]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[testsuite] aarch64,arm Add missing quotes to expected error message
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
a5544970 2 Copyright (C) 2009-2019 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
d9186814 43#include "cgraph.h"
e11c4407 44#include "diagnostic.h"
43e9d192 45#include "insn-attr.h"
40e23961 46#include "alias.h"
40e23961 47#include "fold-const.h"
d8a2d370
DN
48#include "stor-layout.h"
49#include "calls.h"
50#include "varasm.h"
43e9d192 51#include "output.h"
36566b39 52#include "flags.h"
36566b39 53#include "explow.h"
43e9d192
IB
54#include "expr.h"
55#include "reload.h"
43e9d192 56#include "langhooks.h"
5a2c8331 57#include "opts.h"
2d6bc7fa 58#include "params.h"
45b0be94 59#include "gimplify.h"
43e9d192 60#include "dwarf2.h"
61d371eb 61#include "gimple-iterator.h"
8990e73a 62#include "tree-vectorizer.h"
d1bcc29f 63#include "aarch64-cost-tables.h"
0ee859b5 64#include "dumpfile.h"
9b2b7279 65#include "builtins.h"
8baff86e 66#include "rtl-iter.h"
9bbe08fe 67#include "tm-constrs.h"
d03f7e44 68#include "sched-int.h"
d78006d9 69#include "target-globals.h"
a3eb8a52 70#include "common/common-target.h"
43cacb12 71#include "cfgrtl.h"
51b86113
DM
72#include "selftest.h"
73#include "selftest-rtl.h"
43cacb12 74#include "rtx-vector-builder.h"
d9186814 75#include "intl.h"
43e9d192 76
994c5d85 77/* This file should be included last. */
d58627a0
RS
78#include "target-def.h"
79
28514dda
YZ
80/* Defined for convenience. */
81#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
82
b187677b 83/* Information about a legitimate vector immediate operand. */
48063b9d
IB
84struct simd_immediate_info
85{
b187677b
RS
86 enum insn_type { MOV, MVN };
87 enum modifier_type { LSL, MSL };
88
89 simd_immediate_info () {}
90 simd_immediate_info (scalar_float_mode, rtx);
91 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
92 insn_type = MOV, modifier_type = LSL,
93 unsigned int = 0);
43cacb12 94 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
95
96 /* The mode of the elements. */
97 scalar_mode elt_mode;
98
43cacb12
RS
99 /* The value of each element if all elements are the same, or the
100 first value if the constant is a series. */
48063b9d 101 rtx value;
b187677b 102
43cacb12
RS
103 /* The value of the step if the constant is a series, null otherwise. */
104 rtx step;
105
b187677b
RS
106 /* The instruction to use to move the immediate into a vector. */
107 insn_type insn;
108
109 /* The kind of shift modifier to use, and the number of bits to shift.
110 This is (LSL, 0) if no shift is needed. */
111 modifier_type modifier;
112 unsigned int shift;
48063b9d
IB
113};
114
b187677b
RS
115/* Construct a floating-point immediate in which each element has mode
116 ELT_MODE_IN and value VALUE_IN. */
117inline simd_immediate_info
118::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 119 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
120 modifier (LSL), shift (0)
121{}
122
123/* Construct an integer immediate in which each element has mode ELT_MODE_IN
124 and value VALUE_IN. The other parameters are as for the structure
125 fields. */
126inline simd_immediate_info
127::simd_immediate_info (scalar_int_mode elt_mode_in,
128 unsigned HOST_WIDE_INT value_in,
129 insn_type insn_in, modifier_type modifier_in,
130 unsigned int shift_in)
131 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
132 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
133{}
134
135/* Construct an integer immediate in which each element has mode ELT_MODE_IN
136 and where element I is equal to VALUE_IN + I * STEP_IN. */
137inline simd_immediate_info
138::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
139 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
140 modifier (LSL), shift (0)
b187677b
RS
141{}
142
43e9d192
IB
143/* The current code model. */
144enum aarch64_code_model aarch64_cmodel;
145
43cacb12
RS
146/* The number of 64-bit elements in an SVE vector. */
147poly_uint16 aarch64_sve_vg;
148
43e9d192
IB
149#ifdef HAVE_AS_TLS
150#undef TARGET_HAVE_TLS
151#define TARGET_HAVE_TLS 1
152#endif
153
ef4bddc2
RS
154static bool aarch64_composite_type_p (const_tree, machine_mode);
155static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 156 const_tree,
ef4bddc2 157 machine_mode *, int *,
43e9d192
IB
158 bool *);
159static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
160static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 161static void aarch64_override_options_after_change (void);
ef4bddc2 162static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 163static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
164static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
165 const_tree type,
166 int misalignment,
167 bool is_packed);
43cacb12 168static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
169static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
170 aarch64_addr_query_type);
eb471ba3 171static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 172
0c6caaf8
RL
173/* Major revision number of the ARM Architecture implemented by the target. */
174unsigned aarch64_architecture_version;
175
43e9d192 176/* The processor for which instructions should be scheduled. */
02fdbd5b 177enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 178
43e9d192
IB
179/* Mask to specify which instruction scheduling options should be used. */
180unsigned long aarch64_tune_flags = 0;
181
1be34295 182/* Global flag for PC relative loads. */
9ee6540a 183bool aarch64_pcrelative_literal_loads;
1be34295 184
d6cb6d6a
WD
185/* Global flag for whether frame pointer is enabled. */
186bool aarch64_use_frame_pointer;
187
efac62a3
ST
188#define BRANCH_PROTECT_STR_MAX 255
189char *accepted_branch_protection_string = NULL;
190
191static enum aarch64_parse_opt_result
192aarch64_parse_branch_protection (const char*, char**);
193
8dec06f2
JG
194/* Support for command line parsing of boolean flags in the tuning
195 structures. */
196struct aarch64_flag_desc
197{
198 const char* name;
199 unsigned int flag;
200};
201
ed9fa8d2 202#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
203 { name, AARCH64_FUSE_##internal_name },
204static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
205{
206 { "none", AARCH64_FUSE_NOTHING },
207#include "aarch64-fusion-pairs.def"
208 { "all", AARCH64_FUSE_ALL },
209 { NULL, AARCH64_FUSE_NOTHING }
210};
8dec06f2 211
a339a01c 212#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
213 { name, AARCH64_EXTRA_TUNE_##internal_name },
214static const struct aarch64_flag_desc aarch64_tuning_flags[] =
215{
216 { "none", AARCH64_EXTRA_TUNE_NONE },
217#include "aarch64-tuning-flags.def"
218 { "all", AARCH64_EXTRA_TUNE_ALL },
219 { NULL, AARCH64_EXTRA_TUNE_NONE }
220};
8dec06f2 221
43e9d192
IB
222/* Tuning parameters. */
223
43e9d192
IB
224static const struct cpu_addrcost_table generic_addrcost_table =
225{
67747367 226 {
2fae724a 227 1, /* hi */
bd95e655
JG
228 0, /* si */
229 0, /* di */
2fae724a 230 1, /* ti */
67747367 231 },
bd95e655
JG
232 0, /* pre_modify */
233 0, /* post_modify */
234 0, /* register_offset */
783879e6
EM
235 0, /* register_sextend */
236 0, /* register_zextend */
bd95e655 237 0 /* imm_offset */
43e9d192
IB
238};
239
5ec1ae3b
EM
240static const struct cpu_addrcost_table exynosm1_addrcost_table =
241{
242 {
243 0, /* hi */
244 0, /* si */
245 0, /* di */
246 2, /* ti */
247 },
248 0, /* pre_modify */
249 0, /* post_modify */
250 1, /* register_offset */
251 1, /* register_sextend */
252 2, /* register_zextend */
253 0, /* imm_offset */
254};
255
381e27aa
PT
256static const struct cpu_addrcost_table xgene1_addrcost_table =
257{
381e27aa 258 {
bd95e655
JG
259 1, /* hi */
260 0, /* si */
261 0, /* di */
262 1, /* ti */
381e27aa 263 },
bd95e655 264 1, /* pre_modify */
52ddefd8 265 1, /* post_modify */
bd95e655 266 0, /* register_offset */
783879e6
EM
267 1, /* register_sextend */
268 1, /* register_zextend */
bd95e655 269 0, /* imm_offset */
381e27aa
PT
270};
271
d1261ac6 272static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
273{
274 {
5f407e57
AP
275 1, /* hi */
276 1, /* si */
277 1, /* di */
ad611a4c
VP
278 2, /* ti */
279 },
280 0, /* pre_modify */
281 0, /* post_modify */
282 2, /* register_offset */
283 3, /* register_sextend */
284 3, /* register_zextend */
285 0, /* imm_offset */
286};
287
910f72e7
SZ
288static const struct cpu_addrcost_table tsv110_addrcost_table =
289{
290 {
291 1, /* hi */
292 0, /* si */
293 0, /* di */
294 1, /* ti */
295 },
296 0, /* pre_modify */
297 0, /* post_modify */
298 0, /* register_offset */
299 1, /* register_sextend */
300 1, /* register_zextend */
301 0, /* imm_offset */
302};
303
8d39ea2f
LM
304static const struct cpu_addrcost_table qdf24xx_addrcost_table =
305{
306 {
307 1, /* hi */
308 1, /* si */
309 1, /* di */
310 2, /* ti */
311 },
312 1, /* pre_modify */
313 1, /* post_modify */
314 3, /* register_offset */
31508b39 315 3, /* register_sextend */
8d39ea2f
LM
316 3, /* register_zextend */
317 2, /* imm_offset */
318};
319
43e9d192
IB
320static const struct cpu_regmove_cost generic_regmove_cost =
321{
bd95e655 322 1, /* GP2GP */
3969c510
WD
323 /* Avoid the use of slow int<->fp moves for spilling by setting
324 their cost higher than memmov_cost. */
bd95e655
JG
325 5, /* GP2FP */
326 5, /* FP2GP */
327 2 /* FP2FP */
43e9d192
IB
328};
329
e4a9c55a
WD
330static const struct cpu_regmove_cost cortexa57_regmove_cost =
331{
bd95e655 332 1, /* GP2GP */
e4a9c55a
WD
333 /* Avoid the use of slow int<->fp moves for spilling by setting
334 their cost higher than memmov_cost. */
bd95e655
JG
335 5, /* GP2FP */
336 5, /* FP2GP */
337 2 /* FP2FP */
e4a9c55a
WD
338};
339
340static const struct cpu_regmove_cost cortexa53_regmove_cost =
341{
bd95e655 342 1, /* GP2GP */
e4a9c55a
WD
343 /* Avoid the use of slow int<->fp moves for spilling by setting
344 their cost higher than memmov_cost. */
bd95e655
JG
345 5, /* GP2FP */
346 5, /* FP2GP */
347 2 /* FP2FP */
e4a9c55a
WD
348};
349
5ec1ae3b
EM
350static const struct cpu_regmove_cost exynosm1_regmove_cost =
351{
352 1, /* GP2GP */
353 /* Avoid the use of slow int<->fp moves for spilling by setting
354 their cost higher than memmov_cost (actual, 4 and 9). */
355 9, /* GP2FP */
356 9, /* FP2GP */
357 1 /* FP2FP */
358};
359
d1bcc29f
AP
360static const struct cpu_regmove_cost thunderx_regmove_cost =
361{
bd95e655
JG
362 2, /* GP2GP */
363 2, /* GP2FP */
364 6, /* FP2GP */
365 4 /* FP2FP */
d1bcc29f
AP
366};
367
381e27aa
PT
368static const struct cpu_regmove_cost xgene1_regmove_cost =
369{
bd95e655 370 1, /* GP2GP */
381e27aa
PT
371 /* Avoid the use of slow int<->fp moves for spilling by setting
372 their cost higher than memmov_cost. */
bd95e655
JG
373 8, /* GP2FP */
374 8, /* FP2GP */
375 2 /* FP2FP */
381e27aa
PT
376};
377
ee446d9f
JW
378static const struct cpu_regmove_cost qdf24xx_regmove_cost =
379{
380 2, /* GP2GP */
381 /* Avoid the use of int<->fp moves for spilling. */
382 6, /* GP2FP */
383 6, /* FP2GP */
384 4 /* FP2FP */
385};
386
d1261ac6 387static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
388{
389 1, /* GP2GP */
390 /* Avoid the use of int<->fp moves for spilling. */
391 8, /* GP2FP */
392 8, /* FP2GP */
393 4 /* FP2FP */
394};
395
910f72e7
SZ
396static const struct cpu_regmove_cost tsv110_regmove_cost =
397{
398 1, /* GP2GP */
399 /* Avoid the use of slow int<->fp moves for spilling by setting
400 their cost higher than memmov_cost. */
401 2, /* GP2FP */
402 3, /* FP2GP */
403 2 /* FP2FP */
404};
405
8990e73a 406/* Generic costs for vector insn classes. */
8990e73a
TB
407static const struct cpu_vector_cost generic_vector_cost =
408{
cd8ae5ed
AP
409 1, /* scalar_int_stmt_cost */
410 1, /* scalar_fp_stmt_cost */
bd95e655
JG
411 1, /* scalar_load_cost */
412 1, /* scalar_store_cost */
cd8ae5ed
AP
413 1, /* vec_int_stmt_cost */
414 1, /* vec_fp_stmt_cost */
c428f91c 415 2, /* vec_permute_cost */
bd95e655
JG
416 1, /* vec_to_scalar_cost */
417 1, /* scalar_to_vec_cost */
418 1, /* vec_align_load_cost */
419 1, /* vec_unalign_load_cost */
420 1, /* vec_unalign_store_cost */
421 1, /* vec_store_cost */
422 3, /* cond_taken_branch_cost */
423 1 /* cond_not_taken_branch_cost */
8990e73a
TB
424};
425
e75bc10e
LM
426/* QDF24XX costs for vector insn classes. */
427static const struct cpu_vector_cost qdf24xx_vector_cost =
428{
429 1, /* scalar_int_stmt_cost */
430 1, /* scalar_fp_stmt_cost */
431 1, /* scalar_load_cost */
432 1, /* scalar_store_cost */
433 1, /* vec_int_stmt_cost */
434 3, /* vec_fp_stmt_cost */
435 2, /* vec_permute_cost */
436 1, /* vec_to_scalar_cost */
437 1, /* scalar_to_vec_cost */
438 1, /* vec_align_load_cost */
439 1, /* vec_unalign_load_cost */
440 1, /* vec_unalign_store_cost */
441 1, /* vec_store_cost */
442 3, /* cond_taken_branch_cost */
443 1 /* cond_not_taken_branch_cost */
444};
445
c3f20327
AP
446/* ThunderX costs for vector insn classes. */
447static const struct cpu_vector_cost thunderx_vector_cost =
448{
cd8ae5ed
AP
449 1, /* scalar_int_stmt_cost */
450 1, /* scalar_fp_stmt_cost */
c3f20327
AP
451 3, /* scalar_load_cost */
452 1, /* scalar_store_cost */
cd8ae5ed 453 4, /* vec_int_stmt_cost */
b29d7591 454 1, /* vec_fp_stmt_cost */
c3f20327
AP
455 4, /* vec_permute_cost */
456 2, /* vec_to_scalar_cost */
457 2, /* scalar_to_vec_cost */
458 3, /* vec_align_load_cost */
7e87a3d9
AP
459 5, /* vec_unalign_load_cost */
460 5, /* vec_unalign_store_cost */
c3f20327
AP
461 1, /* vec_store_cost */
462 3, /* cond_taken_branch_cost */
463 3 /* cond_not_taken_branch_cost */
464};
465
910f72e7
SZ
466static const struct cpu_vector_cost tsv110_vector_cost =
467{
468 1, /* scalar_int_stmt_cost */
469 1, /* scalar_fp_stmt_cost */
470 5, /* scalar_load_cost */
471 1, /* scalar_store_cost */
472 2, /* vec_int_stmt_cost */
473 2, /* vec_fp_stmt_cost */
474 2, /* vec_permute_cost */
475 3, /* vec_to_scalar_cost */
476 2, /* scalar_to_vec_cost */
477 5, /* vec_align_load_cost */
478 5, /* vec_unalign_load_cost */
479 1, /* vec_unalign_store_cost */
480 1, /* vec_store_cost */
481 1, /* cond_taken_branch_cost */
482 1 /* cond_not_taken_branch_cost */
483};
484
60bff090 485/* Generic costs for vector insn classes. */
60bff090
JG
486static const struct cpu_vector_cost cortexa57_vector_cost =
487{
cd8ae5ed
AP
488 1, /* scalar_int_stmt_cost */
489 1, /* scalar_fp_stmt_cost */
bd95e655
JG
490 4, /* scalar_load_cost */
491 1, /* scalar_store_cost */
cd8ae5ed
AP
492 2, /* vec_int_stmt_cost */
493 2, /* vec_fp_stmt_cost */
c428f91c 494 3, /* vec_permute_cost */
bd95e655
JG
495 8, /* vec_to_scalar_cost */
496 8, /* scalar_to_vec_cost */
db4a1c18
WD
497 4, /* vec_align_load_cost */
498 4, /* vec_unalign_load_cost */
bd95e655
JG
499 1, /* vec_unalign_store_cost */
500 1, /* vec_store_cost */
501 1, /* cond_taken_branch_cost */
502 1 /* cond_not_taken_branch_cost */
60bff090
JG
503};
504
5ec1ae3b
EM
505static const struct cpu_vector_cost exynosm1_vector_cost =
506{
cd8ae5ed
AP
507 1, /* scalar_int_stmt_cost */
508 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
509 5, /* scalar_load_cost */
510 1, /* scalar_store_cost */
cd8ae5ed
AP
511 3, /* vec_int_stmt_cost */
512 3, /* vec_fp_stmt_cost */
c428f91c 513 3, /* vec_permute_cost */
5ec1ae3b
EM
514 3, /* vec_to_scalar_cost */
515 3, /* scalar_to_vec_cost */
516 5, /* vec_align_load_cost */
517 5, /* vec_unalign_load_cost */
518 1, /* vec_unalign_store_cost */
519 1, /* vec_store_cost */
520 1, /* cond_taken_branch_cost */
521 1 /* cond_not_taken_branch_cost */
522};
523
381e27aa 524/* Generic costs for vector insn classes. */
381e27aa
PT
525static const struct cpu_vector_cost xgene1_vector_cost =
526{
cd8ae5ed
AP
527 1, /* scalar_int_stmt_cost */
528 1, /* scalar_fp_stmt_cost */
bd95e655
JG
529 5, /* scalar_load_cost */
530 1, /* scalar_store_cost */
cd8ae5ed
AP
531 2, /* vec_int_stmt_cost */
532 2, /* vec_fp_stmt_cost */
c428f91c 533 2, /* vec_permute_cost */
bd95e655
JG
534 4, /* vec_to_scalar_cost */
535 4, /* scalar_to_vec_cost */
536 10, /* vec_align_load_cost */
537 10, /* vec_unalign_load_cost */
538 2, /* vec_unalign_store_cost */
539 2, /* vec_store_cost */
540 2, /* cond_taken_branch_cost */
541 1 /* cond_not_taken_branch_cost */
381e27aa
PT
542};
543
ad611a4c 544/* Costs for vector insn classes for Vulcan. */
d1261ac6 545static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 546{
cd8ae5ed
AP
547 1, /* scalar_int_stmt_cost */
548 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
549 4, /* scalar_load_cost */
550 1, /* scalar_store_cost */
cd8ae5ed
AP
551 5, /* vec_int_stmt_cost */
552 6, /* vec_fp_stmt_cost */
ad611a4c
VP
553 3, /* vec_permute_cost */
554 6, /* vec_to_scalar_cost */
555 5, /* scalar_to_vec_cost */
556 8, /* vec_align_load_cost */
557 8, /* vec_unalign_load_cost */
558 4, /* vec_unalign_store_cost */
559 4, /* vec_store_cost */
560 2, /* cond_taken_branch_cost */
561 1 /* cond_not_taken_branch_cost */
562};
563
b9066f5a
MW
564/* Generic costs for branch instructions. */
565static const struct cpu_branch_cost generic_branch_cost =
566{
9094d4a4
WD
567 1, /* Predictable. */
568 3 /* Unpredictable. */
b9066f5a
MW
569};
570
9acc9cbe
EM
571/* Generic approximation modes. */
572static const cpu_approx_modes generic_approx_modes =
573{
79a2bc2d 574 AARCH64_APPROX_NONE, /* division */
98daafa0 575 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
576 AARCH64_APPROX_NONE /* recip_sqrt */
577};
578
579/* Approximation modes for Exynos M1. */
580static const cpu_approx_modes exynosm1_approx_modes =
581{
79a2bc2d 582 AARCH64_APPROX_NONE, /* division */
98daafa0 583 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
584 AARCH64_APPROX_ALL /* recip_sqrt */
585};
586
587/* Approximation modes for X-Gene 1. */
588static const cpu_approx_modes xgene1_approx_modes =
589{
79a2bc2d 590 AARCH64_APPROX_NONE, /* division */
98daafa0 591 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
592 AARCH64_APPROX_ALL /* recip_sqrt */
593};
594
9d2c6e2e
MK
595/* Generic prefetch settings (which disable prefetch). */
596static const cpu_prefetch_tune generic_prefetch_tune =
597{
598 0, /* num_slots */
599 -1, /* l1_cache_size */
600 -1, /* l1_cache_line_size */
16b2cafd 601 -1, /* l2_cache_size */
d2ff35c0 602 true, /* prefetch_dynamic_strides */
59100dfc 603 -1, /* minimum_stride */
16b2cafd 604 -1 /* default_opt_level */
9d2c6e2e
MK
605};
606
607static const cpu_prefetch_tune exynosm1_prefetch_tune =
608{
609 0, /* num_slots */
610 -1, /* l1_cache_size */
611 64, /* l1_cache_line_size */
16b2cafd 612 -1, /* l2_cache_size */
d2ff35c0 613 true, /* prefetch_dynamic_strides */
59100dfc 614 -1, /* minimum_stride */
16b2cafd 615 -1 /* default_opt_level */
9d2c6e2e
MK
616};
617
618static const cpu_prefetch_tune qdf24xx_prefetch_tune =
619{
70c51b58
MK
620 4, /* num_slots */
621 32, /* l1_cache_size */
9d2c6e2e 622 64, /* l1_cache_line_size */
725e2110 623 512, /* l2_cache_size */
d2ff35c0 624 false, /* prefetch_dynamic_strides */
59100dfc
LM
625 2048, /* minimum_stride */
626 3 /* default_opt_level */
9d2c6e2e
MK
627};
628
f1e247d0
AP
629static const cpu_prefetch_tune thunderxt88_prefetch_tune =
630{
631 8, /* num_slots */
632 32, /* l1_cache_size */
633 128, /* l1_cache_line_size */
634 16*1024, /* l2_cache_size */
d2ff35c0 635 true, /* prefetch_dynamic_strides */
59100dfc 636 -1, /* minimum_stride */
f1e247d0
AP
637 3 /* default_opt_level */
638};
639
640static const cpu_prefetch_tune thunderx_prefetch_tune =
641{
642 8, /* num_slots */
643 32, /* l1_cache_size */
644 128, /* l1_cache_line_size */
645 -1, /* l2_cache_size */
d2ff35c0 646 true, /* prefetch_dynamic_strides */
59100dfc 647 -1, /* minimum_stride */
f1e247d0
AP
648 -1 /* default_opt_level */
649};
650
9d2c6e2e
MK
651static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
652{
f1e247d0
AP
653 8, /* num_slots */
654 32, /* l1_cache_size */
9d2c6e2e 655 64, /* l1_cache_line_size */
f1e247d0 656 256, /* l2_cache_size */
d2ff35c0 657 true, /* prefetch_dynamic_strides */
59100dfc 658 -1, /* minimum_stride */
16b2cafd 659 -1 /* default_opt_level */
9d2c6e2e
MK
660};
661
910f72e7
SZ
662static const cpu_prefetch_tune tsv110_prefetch_tune =
663{
664 0, /* num_slots */
665 64, /* l1_cache_size */
666 64, /* l1_cache_line_size */
667 512, /* l2_cache_size */
668 true, /* prefetch_dynamic_strides */
669 -1, /* minimum_stride */
670 -1 /* default_opt_level */
671};
672
d5e9851e
CM
673static const cpu_prefetch_tune xgene1_prefetch_tune =
674{
675 8, /* num_slots */
676 32, /* l1_cache_size */
677 64, /* l1_cache_line_size */
678 256, /* l2_cache_size */
679 true, /* prefetch_dynamic_strides */
680 -1, /* minimum_stride */
681 -1 /* default_opt_level */
682};
683
43e9d192
IB
684static const struct tune_params generic_tunings =
685{
4e2cd668 686 &cortexa57_extra_costs,
43e9d192
IB
687 &generic_addrcost_table,
688 &generic_regmove_cost,
8990e73a 689 &generic_vector_cost,
b9066f5a 690 &generic_branch_cost,
9acc9cbe 691 &generic_approx_modes,
2d56d6ba 692 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
693 4, /* memmov_cost */
694 2, /* issue_rate */
e0701ef0 695 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
696 "8", /* function_align. */
697 "4", /* jump_align. */
698 "8", /* loop_align. */
cee66c68
WD
699 2, /* int_reassoc_width. */
700 4, /* fp_reassoc_width. */
50093a33
WD
701 1, /* vec_reassoc_width. */
702 2, /* min_div_recip_mul_sf. */
dfba575f 703 2, /* min_div_recip_mul_df. */
50487d79 704 0, /* max_case_values. */
3b4c0f7e 705 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
706 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
707 &generic_prefetch_tune
43e9d192
IB
708};
709
1c72a3ca
JG
710static const struct tune_params cortexa35_tunings =
711{
712 &cortexa53_extra_costs,
713 &generic_addrcost_table,
714 &cortexa53_regmove_cost,
715 &generic_vector_cost,
aca97ef8 716 &generic_branch_cost,
9acc9cbe 717 &generic_approx_modes,
2d56d6ba 718 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
719 4, /* memmov_cost */
720 1, /* issue_rate */
0bc24338 721 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 722 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
723 "16", /* function_align. */
724 "4", /* jump_align. */
725 "8", /* loop_align. */
1c72a3ca
JG
726 2, /* int_reassoc_width. */
727 4, /* fp_reassoc_width. */
728 1, /* vec_reassoc_width. */
729 2, /* min_div_recip_mul_sf. */
730 2, /* min_div_recip_mul_df. */
731 0, /* max_case_values. */
1c72a3ca 732 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
733 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
734 &generic_prefetch_tune
1c72a3ca
JG
735};
736
984239ad
KT
737static const struct tune_params cortexa53_tunings =
738{
739 &cortexa53_extra_costs,
740 &generic_addrcost_table,
e4a9c55a 741 &cortexa53_regmove_cost,
984239ad 742 &generic_vector_cost,
aca97ef8 743 &generic_branch_cost,
9acc9cbe 744 &generic_approx_modes,
2d56d6ba 745 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
746 4, /* memmov_cost */
747 2, /* issue_rate */
00a8574a 748 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 749 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
750 "16", /* function_align. */
751 "4", /* jump_align. */
752 "8", /* loop_align. */
cee66c68
WD
753 2, /* int_reassoc_width. */
754 4, /* fp_reassoc_width. */
50093a33
WD
755 1, /* vec_reassoc_width. */
756 2, /* min_div_recip_mul_sf. */
dfba575f 757 2, /* min_div_recip_mul_df. */
50487d79 758 0, /* max_case_values. */
2d6bc7fa 759 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
760 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
761 &generic_prefetch_tune
984239ad
KT
762};
763
4fd92af6
KT
764static const struct tune_params cortexa57_tunings =
765{
766 &cortexa57_extra_costs,
a39d4348 767 &generic_addrcost_table,
e4a9c55a 768 &cortexa57_regmove_cost,
60bff090 769 &cortexa57_vector_cost,
aca97ef8 770 &generic_branch_cost,
9acc9cbe 771 &generic_approx_modes,
2d56d6ba 772 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
773 4, /* memmov_cost */
774 3, /* issue_rate */
00a8574a 775 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 776 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
777 "16", /* function_align. */
778 "4", /* jump_align. */
779 "8", /* loop_align. */
cee66c68
WD
780 2, /* int_reassoc_width. */
781 4, /* fp_reassoc_width. */
50093a33
WD
782 1, /* vec_reassoc_width. */
783 2, /* min_div_recip_mul_sf. */
dfba575f 784 2, /* min_div_recip_mul_df. */
50487d79 785 0, /* max_case_values. */
2d6bc7fa 786 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
787 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
788 &generic_prefetch_tune
dfba575f
JG
789};
790
791static const struct tune_params cortexa72_tunings =
792{
793 &cortexa57_extra_costs,
a39d4348 794 &generic_addrcost_table,
dfba575f
JG
795 &cortexa57_regmove_cost,
796 &cortexa57_vector_cost,
aca97ef8 797 &generic_branch_cost,
9acc9cbe 798 &generic_approx_modes,
2d56d6ba 799 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
800 4, /* memmov_cost */
801 3, /* issue_rate */
00a8574a 802 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 803 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
804 "16", /* function_align. */
805 "4", /* jump_align. */
806 "8", /* loop_align. */
dfba575f
JG
807 2, /* int_reassoc_width. */
808 4, /* fp_reassoc_width. */
809 1, /* vec_reassoc_width. */
810 2, /* min_div_recip_mul_sf. */
811 2, /* min_div_recip_mul_df. */
50487d79 812 0, /* max_case_values. */
0bc24338 813 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
814 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
815 &generic_prefetch_tune
4fd92af6
KT
816};
817
4fb570c4
KT
818static const struct tune_params cortexa73_tunings =
819{
820 &cortexa57_extra_costs,
a39d4348 821 &generic_addrcost_table,
4fb570c4
KT
822 &cortexa57_regmove_cost,
823 &cortexa57_vector_cost,
aca97ef8 824 &generic_branch_cost,
4fb570c4 825 &generic_approx_modes,
2d56d6ba 826 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
827 4, /* memmov_cost. */
828 2, /* issue_rate. */
829 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
830 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
831 "16", /* function_align. */
832 "4", /* jump_align. */
833 "8", /* loop_align. */
4fb570c4
KT
834 2, /* int_reassoc_width. */
835 4, /* fp_reassoc_width. */
836 1, /* vec_reassoc_width. */
837 2, /* min_div_recip_mul_sf. */
838 2, /* min_div_recip_mul_df. */
839 0, /* max_case_values. */
4fb570c4 840 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
841 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
842 &generic_prefetch_tune
4fb570c4
KT
843};
844
9d2c6e2e
MK
845
846
5ec1ae3b
EM
847static const struct tune_params exynosm1_tunings =
848{
849 &exynosm1_extra_costs,
850 &exynosm1_addrcost_table,
851 &exynosm1_regmove_cost,
852 &exynosm1_vector_cost,
853 &generic_branch_cost,
9acc9cbe 854 &exynosm1_approx_modes,
2d56d6ba 855 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
856 4, /* memmov_cost */
857 3, /* issue_rate */
25cc2199 858 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
859 "4", /* function_align. */
860 "4", /* jump_align. */
861 "4", /* loop_align. */
5ec1ae3b
EM
862 2, /* int_reassoc_width. */
863 4, /* fp_reassoc_width. */
864 1, /* vec_reassoc_width. */
865 2, /* min_div_recip_mul_sf. */
866 2, /* min_div_recip_mul_df. */
867 48, /* max_case_values. */
220379df 868 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
869 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
870 &exynosm1_prefetch_tune
5ec1ae3b
EM
871};
872
f1e247d0
AP
873static const struct tune_params thunderxt88_tunings =
874{
875 &thunderx_extra_costs,
876 &generic_addrcost_table,
877 &thunderx_regmove_cost,
878 &thunderx_vector_cost,
879 &generic_branch_cost,
880 &generic_approx_modes,
2d56d6ba 881 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
882 6, /* memmov_cost */
883 2, /* issue_rate */
884 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
885 "8", /* function_align. */
886 "8", /* jump_align. */
887 "8", /* loop_align. */
f1e247d0
AP
888 2, /* int_reassoc_width. */
889 4, /* fp_reassoc_width. */
890 1, /* vec_reassoc_width. */
891 2, /* min_div_recip_mul_sf. */
892 2, /* min_div_recip_mul_df. */
893 0, /* max_case_values. */
894 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
895 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
896 &thunderxt88_prefetch_tune
897};
898
d1bcc29f
AP
899static const struct tune_params thunderx_tunings =
900{
901 &thunderx_extra_costs,
902 &generic_addrcost_table,
903 &thunderx_regmove_cost,
c3f20327 904 &thunderx_vector_cost,
b9066f5a 905 &generic_branch_cost,
9acc9cbe 906 &generic_approx_modes,
2d56d6ba 907 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
908 6, /* memmov_cost */
909 2, /* issue_rate */
e9a3a175 910 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
911 "8", /* function_align. */
912 "8", /* jump_align. */
913 "8", /* loop_align. */
cee66c68
WD
914 2, /* int_reassoc_width. */
915 4, /* fp_reassoc_width. */
50093a33
WD
916 1, /* vec_reassoc_width. */
917 2, /* min_div_recip_mul_sf. */
dfba575f 918 2, /* min_div_recip_mul_df. */
50487d79 919 0, /* max_case_values. */
2d6bc7fa 920 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
921 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
922 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 923 &thunderx_prefetch_tune
d1bcc29f
AP
924};
925
910f72e7
SZ
926static const struct tune_params tsv110_tunings =
927{
928 &tsv110_extra_costs,
929 &tsv110_addrcost_table,
930 &tsv110_regmove_cost,
931 &tsv110_vector_cost,
932 &generic_branch_cost,
933 &generic_approx_modes,
2d56d6ba 934 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
935 4, /* memmov_cost */
936 4, /* issue_rate */
937 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
938 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
939 "16", /* function_align. */
940 "4", /* jump_align. */
941 "8", /* loop_align. */
942 2, /* int_reassoc_width. */
943 4, /* fp_reassoc_width. */
944 1, /* vec_reassoc_width. */
945 2, /* min_div_recip_mul_sf. */
946 2, /* min_div_recip_mul_df. */
947 0, /* max_case_values. */
948 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
949 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
950 &tsv110_prefetch_tune
951};
952
381e27aa 953static const struct tune_params xgene1_tunings =
e02669db
CM
954{
955 &xgene1_extra_costs,
956 &xgene1_addrcost_table,
957 &xgene1_regmove_cost,
958 &xgene1_vector_cost,
959 &generic_branch_cost,
960 &xgene1_approx_modes,
2d56d6ba 961 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
962 6, /* memmov_cost */
963 4, /* issue_rate */
964 AARCH64_FUSE_NOTHING, /* fusible_ops */
965 "16", /* function_align. */
966 "16", /* jump_align. */
967 "16", /* loop_align. */
968 2, /* int_reassoc_width. */
969 4, /* fp_reassoc_width. */
970 1, /* vec_reassoc_width. */
971 2, /* min_div_recip_mul_sf. */
972 2, /* min_div_recip_mul_df. */
973 17, /* max_case_values. */
974 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
975 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
976 &xgene1_prefetch_tune
977};
978
979static const struct tune_params emag_tunings =
381e27aa
PT
980{
981 &xgene1_extra_costs,
982 &xgene1_addrcost_table,
983 &xgene1_regmove_cost,
984 &xgene1_vector_cost,
b9066f5a 985 &generic_branch_cost,
9acc9cbe 986 &xgene1_approx_modes,
2d56d6ba 987 SVE_NOT_IMPLEMENTED,
bd95e655
JG
988 6, /* memmov_cost */
989 4, /* issue_rate */
e9a3a175 990 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 991 "16", /* function_align. */
cf28c77e 992 "16", /* jump_align. */
c518c102 993 "16", /* loop_align. */
381e27aa
PT
994 2, /* int_reassoc_width. */
995 4, /* fp_reassoc_width. */
50093a33
WD
996 1, /* vec_reassoc_width. */
997 2, /* min_div_recip_mul_sf. */
dfba575f 998 2, /* min_div_recip_mul_df. */
cf28c77e 999 17, /* max_case_values. */
2d6bc7fa 1000 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 1001 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 1002 &xgene1_prefetch_tune
381e27aa
PT
1003};
1004
ee446d9f
JW
1005static const struct tune_params qdf24xx_tunings =
1006{
1007 &qdf24xx_extra_costs,
8d39ea2f 1008 &qdf24xx_addrcost_table,
ee446d9f 1009 &qdf24xx_regmove_cost,
e75bc10e 1010 &qdf24xx_vector_cost,
ee446d9f
JW
1011 &generic_branch_cost,
1012 &generic_approx_modes,
2d56d6ba 1013 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1014 4, /* memmov_cost */
1015 4, /* issue_rate */
1016 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1017 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1018 "16", /* function_align. */
1019 "8", /* jump_align. */
1020 "16", /* loop_align. */
ee446d9f
JW
1021 2, /* int_reassoc_width. */
1022 4, /* fp_reassoc_width. */
1023 1, /* vec_reassoc_width. */
1024 2, /* min_div_recip_mul_sf. */
1025 2, /* min_div_recip_mul_df. */
1026 0, /* max_case_values. */
4f2a94e6 1027 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1028 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1029 &qdf24xx_prefetch_tune
ee446d9f
JW
1030};
1031
52ee8191
SP
1032/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1033 for now. */
1034static const struct tune_params saphira_tunings =
1035{
1036 &generic_extra_costs,
1037 &generic_addrcost_table,
1038 &generic_regmove_cost,
1039 &generic_vector_cost,
1040 &generic_branch_cost,
1041 &generic_approx_modes,
2d56d6ba 1042 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1043 4, /* memmov_cost */
1044 4, /* issue_rate */
1045 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1046 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1047 "16", /* function_align. */
1048 "8", /* jump_align. */
1049 "16", /* loop_align. */
52ee8191
SP
1050 2, /* int_reassoc_width. */
1051 4, /* fp_reassoc_width. */
1052 1, /* vec_reassoc_width. */
1053 2, /* min_div_recip_mul_sf. */
1054 2, /* min_div_recip_mul_df. */
1055 0, /* max_case_values. */
1056 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1057 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1058 &generic_prefetch_tune
1059};
1060
d1261ac6 1061static const struct tune_params thunderx2t99_tunings =
ad611a4c 1062{
d1261ac6
AP
1063 &thunderx2t99_extra_costs,
1064 &thunderx2t99_addrcost_table,
1065 &thunderx2t99_regmove_cost,
1066 &thunderx2t99_vector_cost,
aca97ef8 1067 &generic_branch_cost,
ad611a4c 1068 &generic_approx_modes,
2d56d6ba 1069 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1070 4, /* memmov_cost. */
1071 4, /* issue_rate. */
00c7c57f
JB
1072 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
1073 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
1074 "16", /* function_align. */
1075 "8", /* jump_align. */
1076 "16", /* loop_align. */
ad611a4c
VP
1077 3, /* int_reassoc_width. */
1078 2, /* fp_reassoc_width. */
1079 2, /* vec_reassoc_width. */
1080 2, /* min_div_recip_mul_sf. */
1081 2, /* min_div_recip_mul_df. */
1082 0, /* max_case_values. */
f1e247d0 1083 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1084 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1085 &thunderx2t99_prefetch_tune
ad611a4c
VP
1086};
1087
9ed6834d 1088static const struct tune_params neoversen1_tunings =
fc881de2
KT
1089{
1090 &cortexa57_extra_costs,
1091 &generic_addrcost_table,
1092 &generic_regmove_cost,
1093 &cortexa57_vector_cost,
1094 &generic_branch_cost,
1095 &generic_approx_modes,
1096 SVE_NOT_IMPLEMENTED, /* sve_width */
1097 4, /* memmov_cost */
1098 3, /* issue_rate */
1099 AARCH64_FUSE_AES_AESMC, /* fusible_ops */
1100 "32:16", /* function_align. */
1101 "32:16", /* jump_align. */
1102 "32:16", /* loop_align. */
1103 2, /* int_reassoc_width. */
1104 4, /* fp_reassoc_width. */
1105 2, /* vec_reassoc_width. */
1106 2, /* min_div_recip_mul_sf. */
1107 2, /* min_div_recip_mul_df. */
1108 0, /* max_case_values. */
1109 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1110 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1111 &generic_prefetch_tune
1112};
1113
8dec06f2
JG
1114/* Support for fine-grained override of the tuning structures. */
1115struct aarch64_tuning_override_function
1116{
1117 const char* name;
1118 void (*parse_override)(const char*, struct tune_params*);
1119};
1120
1121static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1122static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1123static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1124
1125static const struct aarch64_tuning_override_function
1126aarch64_tuning_override_functions[] =
1127{
1128 { "fuse", aarch64_parse_fuse_string },
1129 { "tune", aarch64_parse_tune_string },
886f092f 1130 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1131 { NULL, NULL }
1132};
1133
43e9d192
IB
1134/* A processor implementing AArch64. */
1135struct processor
1136{
1137 const char *const name;
46806c44
KT
1138 enum aarch64_processor ident;
1139 enum aarch64_processor sched_core;
393ae126 1140 enum aarch64_arch arch;
0c6caaf8 1141 unsigned architecture_version;
43e9d192
IB
1142 const unsigned long flags;
1143 const struct tune_params *const tune;
1144};
1145
393ae126
KT
1146/* Architectures implementing AArch64. */
1147static const struct processor all_architectures[] =
1148{
1149#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1150 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1151#include "aarch64-arches.def"
393ae126
KT
1152 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1153};
1154
43e9d192
IB
1155/* Processor cores implementing AArch64. */
1156static const struct processor all_cores[] =
1157{
e8fcc9fa 1158#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1159 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1160 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1161 FLAGS, &COSTS##_tunings},
43e9d192 1162#include "aarch64-cores.def"
393ae126
KT
1163 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1164 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1165 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1166};
1167
43e9d192 1168
361fb3ee
KT
1169/* Target specification. These are populated by the -march, -mtune, -mcpu
1170 handling code or by target attributes. */
43e9d192
IB
1171static const struct processor *selected_arch;
1172static const struct processor *selected_cpu;
1173static const struct processor *selected_tune;
1174
b175b679
JG
1175/* The current tuning set. */
1176struct tune_params aarch64_tune_params = generic_tunings;
1177
a0d0b980
SE
1178/* Table of machine attributes. */
1179static const struct attribute_spec aarch64_attribute_table[] =
1180{
1181 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1182 affects_type_identity, handler, exclude } */
497f281c 1183 { "aarch64_vector_pcs", 0, 0, false, true, true, true, NULL, NULL },
a0d0b980
SE
1184 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1185};
1186
43e9d192
IB
1187#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1188
1189/* An ISA extension in the co-processor and main instruction set space. */
1190struct aarch64_option_extension
1191{
1192 const char *const name;
1193 const unsigned long flags_on;
1194 const unsigned long flags_off;
1195};
1196
43e9d192
IB
1197typedef enum aarch64_cond_code
1198{
1199 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1200 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1201 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1202}
1203aarch64_cc;
1204
1205#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1206
efac62a3
ST
1207struct aarch64_branch_protect_type
1208{
1209 /* The type's name that the user passes to the branch-protection option
1210 string. */
1211 const char* name;
1212 /* Function to handle the protection type and set global variables.
1213 First argument is the string token corresponding with this type and the
1214 second argument is the next token in the option string.
1215 Return values:
1216 * AARCH64_PARSE_OK: Handling was sucessful.
1217 * AARCH64_INVALID_ARG: The type is invalid in this context and the caller
1218 should print an error.
1219 * AARCH64_INVALID_FEATURE: The type is invalid and the handler prints its
1220 own error. */
1221 enum aarch64_parse_opt_result (*handler)(char*, char*);
1222 /* A list of types that can follow this type in the option string. */
1223 const aarch64_branch_protect_type* subtypes;
1224 unsigned int num_subtypes;
1225};
1226
1227static enum aarch64_parse_opt_result
1228aarch64_handle_no_branch_protection (char* str, char* rest)
1229{
1230 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
30afdf34 1231 aarch64_enable_bti = 0;
efac62a3
ST
1232 if (rest)
1233 {
1234 error ("unexpected %<%s%> after %<%s%>", rest, str);
1235 return AARCH64_PARSE_INVALID_FEATURE;
1236 }
1237 return AARCH64_PARSE_OK;
1238}
1239
1240static enum aarch64_parse_opt_result
1241aarch64_handle_standard_branch_protection (char* str, char* rest)
1242{
1243 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
30afdf34 1244 aarch64_enable_bti = 1;
efac62a3
ST
1245 if (rest)
1246 {
1247 error ("unexpected %<%s%> after %<%s%>", rest, str);
1248 return AARCH64_PARSE_INVALID_FEATURE;
1249 }
1250 return AARCH64_PARSE_OK;
1251}
1252
1253static enum aarch64_parse_opt_result
1254aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED,
1255 char* rest ATTRIBUTE_UNUSED)
1256{
1257 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
1258 return AARCH64_PARSE_OK;
1259}
1260
1261static enum aarch64_parse_opt_result
1262aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED,
1263 char* rest ATTRIBUTE_UNUSED)
1264{
1265 aarch64_ra_sign_scope = AARCH64_FUNCTION_ALL;
1266 return AARCH64_PARSE_OK;
1267}
1268
30afdf34
SD
1269static enum aarch64_parse_opt_result
1270aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED,
1271 char* rest ATTRIBUTE_UNUSED)
1272{
1273 aarch64_enable_bti = 1;
1274 return AARCH64_PARSE_OK;
1275}
1276
efac62a3
ST
1277static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = {
1278 { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 },
1279 { NULL, NULL, NULL, 0 }
1280};
1281
1282static const struct aarch64_branch_protect_type aarch64_branch_protect_types[] = {
1283 { "none", aarch64_handle_no_branch_protection, NULL, 0 },
1284 { "standard", aarch64_handle_standard_branch_protection, NULL, 0 },
1285 { "pac-ret", aarch64_handle_pac_ret_protection, aarch64_pac_ret_subtypes,
1286 ARRAY_SIZE (aarch64_pac_ret_subtypes) },
30afdf34 1287 { "bti", aarch64_handle_bti_protection, NULL, 0 },
efac62a3
ST
1288 { NULL, NULL, NULL, 0 }
1289};
1290
43e9d192
IB
1291/* The condition codes of the processor, and the inverse function. */
1292static const char * const aarch64_condition_codes[] =
1293{
1294 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1295 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1296};
1297
973d2e01
TP
1298/* Generate code to enable conditional branches in functions over 1 MiB. */
1299const char *
1300aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1301 const char * branch_format)
1302{
1303 rtx_code_label * tmp_label = gen_label_rtx ();
1304 char label_buf[256];
1305 char buffer[128];
1306 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1307 CODE_LABEL_NUMBER (tmp_label));
1308 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1309 rtx dest_label = operands[pos_label];
1310 operands[pos_label] = tmp_label;
1311
1312 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1313 output_asm_insn (buffer, operands);
1314
1315 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1316 operands[pos_label] = dest_label;
1317 output_asm_insn (buffer, operands);
1318 return "";
1319}
1320
261fb553 1321void
fc29dfc9 1322aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1323{
261fb553 1324 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1325 if (FLOAT_MODE_P (mode))
1326 error ("%qs is incompatible with the use of floating-point types",
1327 "-mgeneral-regs-only");
1328 else
1329 error ("%qs is incompatible with the use of vector types",
1330 "-mgeneral-regs-only");
261fb553 1331 else
fc29dfc9
SE
1332 if (FLOAT_MODE_P (mode))
1333 error ("%qs feature modifier is incompatible with the use of"
1334 " floating-point types", "+nofp");
1335 else
1336 error ("%qs feature modifier is incompatible with the use of"
1337 " vector types", "+nofp");
261fb553
AL
1338}
1339
c64f7d37 1340/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1341 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1342 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1343 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1344 and GENERAL_REGS is lower than the memory cost (in this case the best class
1345 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1346 cost results in bad allocations with many redundant int<->FP moves which
1347 are expensive on various cores.
1348 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1349 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1350 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1351 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1352 The result of this is that it is no longer inefficient to have a higher
1353 memory move cost than the register move cost.
1354*/
c64f7d37
WD
1355
1356static reg_class_t
31e2b5a3
WD
1357aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1358 reg_class_t best_class)
c64f7d37 1359{
b8506a8a 1360 machine_mode mode;
c64f7d37 1361
67e5c59a
RS
1362 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1363 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1364 return allocno_class;
1365
67e5c59a
RS
1366 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1367 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1368 return best_class;
1369
c64f7d37
WD
1370 mode = PSEUDO_REGNO_MODE (regno);
1371 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1372}
1373
26e0ff94 1374static unsigned int
b8506a8a 1375aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1376{
50093a33 1377 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1378 return aarch64_tune_params.min_div_recip_mul_sf;
1379 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1380}
1381
b5b33e11 1382/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1383static int
b5b33e11 1384aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1385{
1386 if (VECTOR_MODE_P (mode))
b175b679 1387 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1388 if (INTEGRAL_MODE_P (mode))
b175b679 1389 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1390 /* Avoid reassociating floating point addition so we emit more FMAs. */
1391 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1392 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1393 return 1;
1394}
1395
43e9d192
IB
1396/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1397unsigned
1398aarch64_dbx_register_number (unsigned regno)
1399{
1400 if (GP_REGNUM_P (regno))
1401 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1402 else if (regno == SP_REGNUM)
1403 return AARCH64_DWARF_SP;
1404 else if (FP_REGNUM_P (regno))
1405 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1406 else if (PR_REGNUM_P (regno))
1407 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1408 else if (regno == VG_REGNUM)
1409 return AARCH64_DWARF_VG;
43e9d192
IB
1410
1411 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1412 equivalent DWARF register. */
1413 return DWARF_FRAME_REGISTERS;
1414}
1415
43cacb12
RS
1416/* Return true if MODE is any of the Advanced SIMD structure modes. */
1417static bool
1418aarch64_advsimd_struct_mode_p (machine_mode mode)
1419{
1420 return (TARGET_SIMD
1421 && (mode == OImode || mode == CImode || mode == XImode));
1422}
1423
1424/* Return true if MODE is an SVE predicate mode. */
1425static bool
1426aarch64_sve_pred_mode_p (machine_mode mode)
1427{
1428 return (TARGET_SVE
1429 && (mode == VNx16BImode
1430 || mode == VNx8BImode
1431 || mode == VNx4BImode
1432 || mode == VNx2BImode));
1433}
1434
1435/* Three mutually-exclusive flags describing a vector or predicate type. */
1436const unsigned int VEC_ADVSIMD = 1;
1437const unsigned int VEC_SVE_DATA = 2;
1438const unsigned int VEC_SVE_PRED = 4;
1439/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1440 a structure of 2, 3 or 4 vectors. */
1441const unsigned int VEC_STRUCT = 8;
1442/* Useful combinations of the above. */
1443const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1444const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1445
1446/* Return a set of flags describing the vector properties of mode MODE.
1447 Ignore modes that are not supported by the current target. */
1448static unsigned int
1449aarch64_classify_vector_mode (machine_mode mode)
1450{
1451 if (aarch64_advsimd_struct_mode_p (mode))
1452 return VEC_ADVSIMD | VEC_STRUCT;
1453
1454 if (aarch64_sve_pred_mode_p (mode))
1455 return VEC_SVE_PRED;
1456
1457 scalar_mode inner = GET_MODE_INNER (mode);
1458 if (VECTOR_MODE_P (mode)
1459 && (inner == QImode
1460 || inner == HImode
1461 || inner == HFmode
1462 || inner == SImode
1463 || inner == SFmode
1464 || inner == DImode
1465 || inner == DFmode))
1466 {
9f4cbab8
RS
1467 if (TARGET_SVE)
1468 {
1469 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1470 return VEC_SVE_DATA;
1471 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1472 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1473 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1474 return VEC_SVE_DATA | VEC_STRUCT;
1475 }
43cacb12
RS
1476
1477 /* This includes V1DF but not V1DI (which doesn't exist). */
1478 if (TARGET_SIMD
1479 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1480 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1481 return VEC_ADVSIMD;
1482 }
1483
1484 return 0;
1485}
1486
1487/* Return true if MODE is any of the data vector modes, including
1488 structure modes. */
43e9d192 1489static bool
43cacb12 1490aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1491{
43cacb12 1492 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1493}
1494
43cacb12
RS
1495/* Return true if MODE is an SVE data vector mode; either a single vector
1496 or a structure of vectors. */
43e9d192 1497static bool
43cacb12 1498aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1499{
43cacb12 1500 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1501}
1502
9f4cbab8
RS
1503/* Implement target hook TARGET_ARRAY_MODE. */
1504static opt_machine_mode
1505aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1506{
1507 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1508 && IN_RANGE (nelems, 2, 4))
1509 return mode_for_vector (GET_MODE_INNER (mode),
1510 GET_MODE_NUNITS (mode) * nelems);
1511
1512 return opt_machine_mode ();
1513}
1514
43e9d192
IB
1515/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1516static bool
ef4bddc2 1517aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1518 unsigned HOST_WIDE_INT nelems)
1519{
1520 if (TARGET_SIMD
635e66fe
AL
1521 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1522 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1523 && (nelems >= 2 && nelems <= 4))
1524 return true;
1525
1526 return false;
1527}
1528
43cacb12
RS
1529/* Return the SVE predicate mode to use for elements that have
1530 ELEM_NBYTES bytes, if such a mode exists. */
1531
1532opt_machine_mode
1533aarch64_sve_pred_mode (unsigned int elem_nbytes)
1534{
1535 if (TARGET_SVE)
1536 {
1537 if (elem_nbytes == 1)
1538 return VNx16BImode;
1539 if (elem_nbytes == 2)
1540 return VNx8BImode;
1541 if (elem_nbytes == 4)
1542 return VNx4BImode;
1543 if (elem_nbytes == 8)
1544 return VNx2BImode;
1545 }
1546 return opt_machine_mode ();
1547}
1548
1549/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1550
1551static opt_machine_mode
1552aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1553{
1554 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1555 {
1556 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1557 machine_mode pred_mode;
1558 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1559 return pred_mode;
1560 }
1561
1562 return default_get_mask_mode (nunits, nbytes);
1563}
1564
b41d1f6e
RS
1565/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1566 prefer to use the first arithmetic operand as the else value if
1567 the else value doesn't matter, since that exactly matches the SVE
1568 destructive merging form. For ternary operations we could either
1569 pick the first operand and use FMAD-like instructions or the last
1570 operand and use FMLA-like instructions; the latter seems more
1571 natural. */
6a86928d
RS
1572
1573static tree
b41d1f6e 1574aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1575{
b41d1f6e 1576 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1577}
1578
c43f4279 1579/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1580
c43f4279 1581static unsigned int
ef4bddc2 1582aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1583{
6a70badb
RS
1584 /* ??? Logically we should only need to provide a value when
1585 HARD_REGNO_MODE_OK says that the combination is valid,
1586 but at the moment we need to handle all modes. Just ignore
1587 any runtime parts for registers that can't store them. */
1588 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1589 switch (aarch64_regno_regclass (regno))
1590 {
1591 case FP_REGS:
1592 case FP_LO_REGS:
43cacb12
RS
1593 if (aarch64_sve_data_mode_p (mode))
1594 return exact_div (GET_MODE_SIZE (mode),
1595 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1596 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1597 case PR_REGS:
1598 case PR_LO_REGS:
1599 case PR_HI_REGS:
1600 return 1;
43e9d192 1601 default:
6a70badb 1602 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1603 }
1604 gcc_unreachable ();
1605}
1606
f939c3e6 1607/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1608
f939c3e6 1609static bool
ef4bddc2 1610aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1611{
1612 if (GET_MODE_CLASS (mode) == MODE_CC)
1613 return regno == CC_REGNUM;
1614
43cacb12
RS
1615 if (regno == VG_REGNUM)
1616 /* This must have the same size as _Unwind_Word. */
1617 return mode == DImode;
1618
1619 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1620 if (vec_flags & VEC_SVE_PRED)
1621 return PR_REGNUM_P (regno);
1622
1623 if (PR_REGNUM_P (regno))
1624 return 0;
1625
9259db42
YZ
1626 if (regno == SP_REGNUM)
1627 /* The purpose of comparing with ptr_mode is to support the
1628 global register variable associated with the stack pointer
1629 register via the syntax of asm ("wsp") in ILP32. */
1630 return mode == Pmode || mode == ptr_mode;
1631
1632 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1633 return mode == Pmode;
1634
563cc649
RH
1635 if (GP_REGNUM_P (regno))
1636 {
1637 if (known_le (GET_MODE_SIZE (mode), 8))
1638 return true;
1639 else if (known_le (GET_MODE_SIZE (mode), 16))
1640 return (regno & 1) == 0;
1641 }
1642 else if (FP_REGNUM_P (regno))
43e9d192 1643 {
43cacb12 1644 if (vec_flags & VEC_STRUCT)
4edd6298 1645 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1646 else
43cacb12 1647 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1648 }
1649
f939c3e6 1650 return false;
43e9d192
IB
1651}
1652
a0d0b980
SE
1653/* Return true if this is a definition of a vectorized simd function. */
1654
1655static bool
1656aarch64_simd_decl_p (tree fndecl)
1657{
1658 tree fntype;
1659
1660 if (fndecl == NULL)
1661 return false;
1662 fntype = TREE_TYPE (fndecl);
1663 if (fntype == NULL)
1664 return false;
1665
1666 /* Functions with the aarch64_vector_pcs attribute use the simd ABI. */
1667 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL)
1668 return true;
1669
1670 return false;
1671}
1672
1673/* Return the mode a register save/restore should use. DImode for integer
1674 registers, DFmode for FP registers in non-SIMD functions (they only save
1675 the bottom half of a 128 bit register), or TFmode for FP registers in
1676 SIMD functions. */
1677
1678static machine_mode
1679aarch64_reg_save_mode (tree fndecl, unsigned regno)
1680{
1681 return GP_REGNUM_P (regno)
1682 ? E_DImode
1683 : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode);
1684}
1685
b3650d40
SE
1686/* Return true if the instruction is a call to a SIMD function, false
1687 if it is not a SIMD function or if we do not know anything about
1688 the function. */
1689
1690static bool
1691aarch64_simd_call_p (rtx_insn *insn)
1692{
1693 rtx symbol;
1694 rtx call;
1695 tree fndecl;
1696
1697 gcc_assert (CALL_P (insn));
1698 call = get_call_rtx_from (insn);
1699 symbol = XEXP (XEXP (call, 0), 0);
1700 if (GET_CODE (symbol) != SYMBOL_REF)
1701 return false;
1702 fndecl = SYMBOL_REF_DECL (symbol);
1703 if (!fndecl)
1704 return false;
1705
1706 return aarch64_simd_decl_p (fndecl);
1707}
1708
1709/* Implement TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS. If INSN calls
1710 a function that uses the SIMD ABI, take advantage of the extra
1711 call-preserved registers that the ABI provides. */
1712
1713void
1714aarch64_remove_extra_call_preserved_regs (rtx_insn *insn,
1715 HARD_REG_SET *return_set)
1716{
1717 if (aarch64_simd_call_p (insn))
1718 {
1719 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1720 if (FP_SIMD_SAVED_REGNUM_P (regno))
1721 CLEAR_HARD_REG_BIT (*return_set, regno);
1722 }
1723}
1724
80ec73f4
RS
1725/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1726 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1727 clobbers the top 64 bits when restoring the bottom 64 bits. */
1728
1729static bool
473574ee
SE
1730aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno,
1731 machine_mode mode)
80ec73f4 1732{
473574ee
SE
1733 bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn);
1734 return FP_REGNUM_P (regno)
1735 && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16 : 8);
1736}
1737
1738/* Implement TARGET_RETURN_CALL_WITH_MAX_CLOBBERS. */
1739
1740rtx_insn *
1741aarch64_return_call_with_max_clobbers (rtx_insn *call_1, rtx_insn *call_2)
1742{
1743 gcc_assert (CALL_P (call_1) && CALL_P (call_2));
1744
1745 if (!aarch64_simd_call_p (call_1) || aarch64_simd_call_p (call_2))
1746 return call_1;
1747 else
1748 return call_2;
80ec73f4
RS
1749}
1750
43cacb12
RS
1751/* Implement REGMODE_NATURAL_SIZE. */
1752poly_uint64
1753aarch64_regmode_natural_size (machine_mode mode)
1754{
1755 /* The natural size for SVE data modes is one SVE data vector,
1756 and similarly for predicates. We can't independently modify
1757 anything smaller than that. */
1758 /* ??? For now, only do this for variable-width SVE registers.
1759 Doing it for constant-sized registers breaks lower-subreg.c. */
1760 /* ??? And once that's fixed, we should probably have similar
1761 code for Advanced SIMD. */
1762 if (!aarch64_sve_vg.is_constant ())
1763 {
1764 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1765 if (vec_flags & VEC_SVE_PRED)
1766 return BYTES_PER_SVE_PRED;
1767 if (vec_flags & VEC_SVE_DATA)
1768 return BYTES_PER_SVE_VECTOR;
1769 }
1770 return UNITS_PER_WORD;
1771}
1772
73d9ac6a 1773/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1774machine_mode
43cacb12
RS
1775aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1776 machine_mode mode)
1777{
1778 /* The predicate mode determines which bits are significant and
1779 which are "don't care". Decreasing the number of lanes would
1780 lose data while increasing the number of lanes would make bits
1781 unnecessarily significant. */
1782 if (PR_REGNUM_P (regno))
1783 return mode;
6a70badb
RS
1784 if (known_ge (GET_MODE_SIZE (mode), 4))
1785 return mode;
73d9ac6a 1786 else
6a70badb 1787 return SImode;
73d9ac6a
IB
1788}
1789
231c52ae
ST
1790/* Return true if I's bits are consecutive ones from the MSB. */
1791bool
1792aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
1793{
1794 return exact_log2 (-i) != HOST_WIDE_INT_M1;
1795}
1796
58e17cf8
RS
1797/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1798 that strcpy from constants will be faster. */
1799
1800static HOST_WIDE_INT
1801aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1802{
1803 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1804 return MAX (align, BITS_PER_WORD);
1805 return align;
1806}
1807
43e9d192
IB
1808/* Return true if calls to DECL should be treated as
1809 long-calls (ie called via a register). */
1810static bool
1811aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1812{
1813 return false;
1814}
1815
1816/* Return true if calls to symbol-ref SYM should be treated as
1817 long-calls (ie called via a register). */
1818bool
1819aarch64_is_long_call_p (rtx sym)
1820{
1821 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1822}
1823
b60d63cb
JW
1824/* Return true if calls to symbol-ref SYM should not go through
1825 plt stubs. */
1826
1827bool
1828aarch64_is_noplt_call_p (rtx sym)
1829{
1830 const_tree decl = SYMBOL_REF_DECL (sym);
1831
1832 if (flag_pic
1833 && decl
1834 && (!flag_plt
1835 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1836 && !targetm.binds_local_p (decl))
1837 return true;
1838
1839 return false;
1840}
1841
43e9d192
IB
1842/* Return true if the offsets to a zero/sign-extract operation
1843 represent an expression that matches an extend operation. The
1844 operands represent the paramters from
1845
4745e701 1846 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1847bool
77e994c9 1848aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1849 rtx extract_imm)
1850{
1851 HOST_WIDE_INT mult_val, extract_val;
1852
1853 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1854 return false;
1855
1856 mult_val = INTVAL (mult_imm);
1857 extract_val = INTVAL (extract_imm);
1858
1859 if (extract_val > 8
1860 && extract_val < GET_MODE_BITSIZE (mode)
1861 && exact_log2 (extract_val & ~7) > 0
1862 && (extract_val & 7) <= 4
1863 && mult_val == (1 << (extract_val & 7)))
1864 return true;
1865
1866 return false;
1867}
1868
1869/* Emit an insn that's a simple single-set. Both the operands must be
1870 known to be valid. */
827ab47a 1871inline static rtx_insn *
43e9d192
IB
1872emit_set_insn (rtx x, rtx y)
1873{
f7df4a84 1874 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1875}
1876
1877/* X and Y are two things to compare using CODE. Emit the compare insn and
1878 return the rtx for register 0 in the proper mode. */
1879rtx
1880aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1881{
ef4bddc2 1882 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1883 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1884
1885 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1886 return cc_reg;
1887}
1888
d400fda3
RH
1889/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
1890
1891static rtx
1892aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
1893 machine_mode y_mode)
1894{
1895 if (y_mode == E_QImode || y_mode == E_HImode)
1896 {
1897 if (CONST_INT_P (y))
1898 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
1899 else
1900 {
1901 rtx t, cc_reg;
1902 machine_mode cc_mode;
1903
1904 t = gen_rtx_ZERO_EXTEND (SImode, y);
1905 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
1906 cc_mode = CC_SWPmode;
1907 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
1908 emit_set_insn (cc_reg, t);
1909 return cc_reg;
1910 }
1911 }
1912
1913 return aarch64_gen_compare_reg (code, x, y);
1914}
1915
43e9d192
IB
1916/* Build the SYMBOL_REF for __tls_get_addr. */
1917
1918static GTY(()) rtx tls_get_addr_libfunc;
1919
1920rtx
1921aarch64_tls_get_addr (void)
1922{
1923 if (!tls_get_addr_libfunc)
1924 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1925 return tls_get_addr_libfunc;
1926}
1927
1928/* Return the TLS model to use for ADDR. */
1929
1930static enum tls_model
1931tls_symbolic_operand_type (rtx addr)
1932{
1933 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1934 if (GET_CODE (addr) == CONST)
1935 {
6a70badb
RS
1936 poly_int64 addend;
1937 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1938 if (GET_CODE (sym) == SYMBOL_REF)
1939 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1940 }
1941 else if (GET_CODE (addr) == SYMBOL_REF)
1942 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1943
1944 return tls_kind;
1945}
1946
1947/* We'll allow lo_sum's in addresses in our legitimate addresses
1948 so that combine would take care of combining addresses where
1949 necessary, but for generation purposes, we'll generate the address
1950 as :
1951 RTL Absolute
1952 tmp = hi (symbol_ref); adrp x1, foo
1953 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1954 nop
1955
1956 PIC TLS
1957 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1958 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1959 bl __tls_get_addr
1960 nop
1961
1962 Load TLS symbol, depending on TLS mechanism and TLS access model.
1963
1964 Global Dynamic - Traditional TLS:
1965 adrp tmp, :tlsgd:imm
1966 add dest, tmp, #:tlsgd_lo12:imm
1967 bl __tls_get_addr
1968
1969 Global Dynamic - TLS Descriptors:
1970 adrp dest, :tlsdesc:imm
1971 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1972 add dest, dest, #:tlsdesc_lo12:imm
1973 blr tmp
1974 mrs tp, tpidr_el0
1975 add dest, dest, tp
1976
1977 Initial Exec:
1978 mrs tp, tpidr_el0
1979 adrp tmp, :gottprel:imm
1980 ldr dest, [tmp, #:gottprel_lo12:imm]
1981 add dest, dest, tp
1982
1983 Local Exec:
1984 mrs tp, tpidr_el0
0699caae
RL
1985 add t0, tp, #:tprel_hi12:imm, lsl #12
1986 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1987*/
1988
1989static void
1990aarch64_load_symref_appropriately (rtx dest, rtx imm,
1991 enum aarch64_symbol_type type)
1992{
1993 switch (type)
1994 {
1995 case SYMBOL_SMALL_ABSOLUTE:
1996 {
28514dda 1997 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1998 rtx tmp_reg = dest;
ef4bddc2 1999 machine_mode mode = GET_MODE (dest);
28514dda
YZ
2000
2001 gcc_assert (mode == Pmode || mode == ptr_mode);
2002
43e9d192 2003 if (can_create_pseudo_p ())
28514dda 2004 tmp_reg = gen_reg_rtx (mode);
43e9d192 2005
28514dda 2006 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
2007 emit_insn (gen_add_losym (dest, tmp_reg, imm));
2008 return;
2009 }
2010
a5350ddc 2011 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 2012 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
2013 return;
2014
1b1e81f8
JW
2015 case SYMBOL_SMALL_GOT_28K:
2016 {
2017 machine_mode mode = GET_MODE (dest);
2018 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
2019 rtx insn;
2020 rtx mem;
1b1e81f8
JW
2021
2022 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
2023 here before rtl expand. Tree IVOPT will generate rtl pattern to
2024 decide rtx costs, in which case pic_offset_table_rtx is not
2025 initialized. For that case no need to generate the first adrp
026c3cfd 2026 instruction as the final cost for global variable access is
1b1e81f8
JW
2027 one instruction. */
2028 if (gp_rtx != NULL)
2029 {
2030 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
2031 using the page base as GOT base, the first page may be wasted,
2032 in the worst scenario, there is only 28K space for GOT).
2033
2034 The generate instruction sequence for accessing global variable
2035 is:
2036
a3957742 2037 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
2038
2039 Only one instruction needed. But we must initialize
2040 pic_offset_table_rtx properly. We generate initialize insn for
2041 every global access, and allow CSE to remove all redundant.
2042
2043 The final instruction sequences will look like the following
2044 for multiply global variables access.
2045
a3957742 2046 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 2047
a3957742
JW
2048 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
2049 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
2050 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
2051 ... */
1b1e81f8
JW
2052
2053 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2054 crtl->uses_pic_offset_table = 1;
2055 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
2056
2057 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
2058 gp_rtx = gen_lowpart (mode, gp_rtx);
2059
1b1e81f8
JW
2060 }
2061
2062 if (mode == ptr_mode)
2063 {
2064 if (mode == DImode)
53021678 2065 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 2066 else
53021678
JW
2067 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
2068
2069 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
2070 }
2071 else
2072 {
2073 gcc_assert (mode == Pmode);
53021678
JW
2074
2075 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
2076 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
2077 }
2078
53021678
JW
2079 /* The operand is expected to be MEM. Whenever the related insn
2080 pattern changed, above code which calculate mem should be
2081 updated. */
2082 gcc_assert (GET_CODE (mem) == MEM);
2083 MEM_READONLY_P (mem) = 1;
2084 MEM_NOTRAP_P (mem) = 1;
2085 emit_insn (insn);
1b1e81f8
JW
2086 return;
2087 }
2088
6642bdb4 2089 case SYMBOL_SMALL_GOT_4G:
43e9d192 2090 {
28514dda
YZ
2091 /* In ILP32, the mode of dest can be either SImode or DImode,
2092 while the got entry is always of SImode size. The mode of
2093 dest depends on how dest is used: if dest is assigned to a
2094 pointer (e.g. in the memory), it has SImode; it may have
2095 DImode if dest is dereferenced to access the memeory.
2096 This is why we have to handle three different ldr_got_small
2097 patterns here (two patterns for ILP32). */
53021678
JW
2098
2099 rtx insn;
2100 rtx mem;
43e9d192 2101 rtx tmp_reg = dest;
ef4bddc2 2102 machine_mode mode = GET_MODE (dest);
28514dda 2103
43e9d192 2104 if (can_create_pseudo_p ())
28514dda
YZ
2105 tmp_reg = gen_reg_rtx (mode);
2106
2107 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
2108 if (mode == ptr_mode)
2109 {
2110 if (mode == DImode)
53021678 2111 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 2112 else
53021678
JW
2113 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
2114
2115 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
2116 }
2117 else
2118 {
2119 gcc_assert (mode == Pmode);
53021678
JW
2120
2121 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
2122 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
2123 }
2124
53021678
JW
2125 gcc_assert (GET_CODE (mem) == MEM);
2126 MEM_READONLY_P (mem) = 1;
2127 MEM_NOTRAP_P (mem) = 1;
2128 emit_insn (insn);
43e9d192
IB
2129 return;
2130 }
2131
2132 case SYMBOL_SMALL_TLSGD:
2133 {
5d8a22a5 2134 rtx_insn *insns;
23b88fda
N
2135 machine_mode mode = GET_MODE (dest);
2136 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2137
2138 start_sequence ();
23b88fda
N
2139 if (TARGET_ILP32)
2140 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
2141 else
2142 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
2143 insns = get_insns ();
2144 end_sequence ();
2145
2146 RTL_CONST_CALL_P (insns) = 1;
2147 emit_libcall_block (insns, dest, result, imm);
2148 return;
2149 }
2150
2151 case SYMBOL_SMALL_TLSDESC:
2152 {
ef4bddc2 2153 machine_mode mode = GET_MODE (dest);
621ad2de 2154 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2155 rtx tp;
2156
621ad2de
AP
2157 gcc_assert (mode == Pmode || mode == ptr_mode);
2158
2876a13f
JW
2159 /* In ILP32, the got entry is always of SImode size. Unlike
2160 small GOT, the dest is fixed at reg 0. */
2161 if (TARGET_ILP32)
2162 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 2163 else
2876a13f 2164 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 2165 tp = aarch64_load_tp (NULL);
621ad2de
AP
2166
2167 if (mode != Pmode)
2168 tp = gen_lowpart (mode, tp);
2169
2876a13f 2170 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
2171 if (REG_P (dest))
2172 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2173 return;
2174 }
2175
79496620 2176 case SYMBOL_SMALL_TLSIE:
43e9d192 2177 {
621ad2de
AP
2178 /* In ILP32, the mode of dest can be either SImode or DImode,
2179 while the got entry is always of SImode size. The mode of
2180 dest depends on how dest is used: if dest is assigned to a
2181 pointer (e.g. in the memory), it has SImode; it may have
2182 DImode if dest is dereferenced to access the memeory.
2183 This is why we have to handle three different tlsie_small
2184 patterns here (two patterns for ILP32). */
ef4bddc2 2185 machine_mode mode = GET_MODE (dest);
621ad2de 2186 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 2187 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
2188
2189 if (mode == ptr_mode)
2190 {
2191 if (mode == DImode)
2192 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
2193 else
2194 {
2195 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
2196 tp = gen_lowpart (mode, tp);
2197 }
2198 }
2199 else
2200 {
2201 gcc_assert (mode == Pmode);
2202 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
2203 }
2204
f7df4a84 2205 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
2206 if (REG_P (dest))
2207 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2208 return;
2209 }
2210
cbf5629e 2211 case SYMBOL_TLSLE12:
d18ba284 2212 case SYMBOL_TLSLE24:
cbf5629e
JW
2213 case SYMBOL_TLSLE32:
2214 case SYMBOL_TLSLE48:
43e9d192 2215 {
cbf5629e 2216 machine_mode mode = GET_MODE (dest);
43e9d192 2217 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 2218
cbf5629e
JW
2219 if (mode != Pmode)
2220 tp = gen_lowpart (mode, tp);
2221
2222 switch (type)
2223 {
2224 case SYMBOL_TLSLE12:
2225 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
2226 (dest, tp, imm));
2227 break;
2228 case SYMBOL_TLSLE24:
2229 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
2230 (dest, tp, imm));
2231 break;
2232 case SYMBOL_TLSLE32:
2233 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
2234 (dest, imm));
2235 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2236 (dest, dest, tp));
2237 break;
2238 case SYMBOL_TLSLE48:
2239 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
2240 (dest, imm));
2241 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2242 (dest, dest, tp));
2243 break;
2244 default:
2245 gcc_unreachable ();
2246 }
e6f7f0e9 2247
241dbd9d
QZ
2248 if (REG_P (dest))
2249 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2250 return;
2251 }
2252
87dd8ab0
MS
2253 case SYMBOL_TINY_GOT:
2254 emit_insn (gen_ldr_got_tiny (dest, imm));
2255 return;
2256
5ae7caad
JW
2257 case SYMBOL_TINY_TLSIE:
2258 {
2259 machine_mode mode = GET_MODE (dest);
2260 rtx tp = aarch64_load_tp (NULL);
2261
2262 if (mode == ptr_mode)
2263 {
2264 if (mode == DImode)
2265 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2266 else
2267 {
2268 tp = gen_lowpart (mode, tp);
2269 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2270 }
2271 }
2272 else
2273 {
2274 gcc_assert (mode == Pmode);
2275 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2276 }
2277
241dbd9d
QZ
2278 if (REG_P (dest))
2279 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2280 return;
2281 }
2282
43e9d192
IB
2283 default:
2284 gcc_unreachable ();
2285 }
2286}
2287
2288/* Emit a move from SRC to DEST. Assume that the move expanders can
2289 handle all moves if !can_create_pseudo_p (). The distinction is
2290 important because, unlike emit_move_insn, the move expanders know
2291 how to force Pmode objects into the constant pool even when the
2292 constant pool address is not itself legitimate. */
2293static rtx
2294aarch64_emit_move (rtx dest, rtx src)
2295{
2296 return (can_create_pseudo_p ()
2297 ? emit_move_insn (dest, src)
2298 : emit_move_insn_1 (dest, src));
2299}
2300
f22d7973
RS
2301/* Apply UNOPTAB to OP and store the result in DEST. */
2302
2303static void
2304aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2305{
2306 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2307 if (dest != tmp)
2308 emit_move_insn (dest, tmp);
2309}
2310
2311/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2312
2313static void
2314aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2315{
2316 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2317 OPTAB_DIRECT);
2318 if (dest != tmp)
2319 emit_move_insn (dest, tmp);
2320}
2321
030d03b8
RE
2322/* Split a 128-bit move operation into two 64-bit move operations,
2323 taking care to handle partial overlap of register to register
2324 copies. Special cases are needed when moving between GP regs and
2325 FP regs. SRC can be a register, constant or memory; DST a register
2326 or memory. If either operand is memory it must not have any side
2327 effects. */
43e9d192
IB
2328void
2329aarch64_split_128bit_move (rtx dst, rtx src)
2330{
030d03b8
RE
2331 rtx dst_lo, dst_hi;
2332 rtx src_lo, src_hi;
43e9d192 2333
ef4bddc2 2334 machine_mode mode = GET_MODE (dst);
12dc6974 2335
030d03b8
RE
2336 gcc_assert (mode == TImode || mode == TFmode);
2337 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2338 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2339
2340 if (REG_P (dst) && REG_P (src))
2341 {
030d03b8
RE
2342 int src_regno = REGNO (src);
2343 int dst_regno = REGNO (dst);
43e9d192 2344
030d03b8 2345 /* Handle FP <-> GP regs. */
43e9d192
IB
2346 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2347 {
030d03b8
RE
2348 src_lo = gen_lowpart (word_mode, src);
2349 src_hi = gen_highpart (word_mode, src);
2350
0016d8d9
RS
2351 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2352 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2353 return;
43e9d192
IB
2354 }
2355 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2356 {
030d03b8
RE
2357 dst_lo = gen_lowpart (word_mode, dst);
2358 dst_hi = gen_highpart (word_mode, dst);
2359
0016d8d9
RS
2360 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2361 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2362 return;
43e9d192 2363 }
43e9d192
IB
2364 }
2365
030d03b8
RE
2366 dst_lo = gen_lowpart (word_mode, dst);
2367 dst_hi = gen_highpart (word_mode, dst);
2368 src_lo = gen_lowpart (word_mode, src);
2369 src_hi = gen_highpart_mode (word_mode, mode, src);
2370
2371 /* At most one pairing may overlap. */
2372 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2373 {
2374 aarch64_emit_move (dst_hi, src_hi);
2375 aarch64_emit_move (dst_lo, src_lo);
2376 }
2377 else
2378 {
2379 aarch64_emit_move (dst_lo, src_lo);
2380 aarch64_emit_move (dst_hi, src_hi);
2381 }
43e9d192
IB
2382}
2383
2384bool
2385aarch64_split_128bit_move_p (rtx dst, rtx src)
2386{
2387 return (! REG_P (src)
2388 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2389}
2390
8b033a8a
SN
2391/* Split a complex SIMD combine. */
2392
2393void
2394aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2395{
ef4bddc2
RS
2396 machine_mode src_mode = GET_MODE (src1);
2397 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2398
2399 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2400 gcc_assert (register_operand (dst, dst_mode)
2401 && register_operand (src1, src_mode)
2402 && register_operand (src2, src_mode));
8b033a8a 2403
0016d8d9 2404 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2405 return;
8b033a8a
SN
2406}
2407
fd4842cd
SN
2408/* Split a complex SIMD move. */
2409
2410void
2411aarch64_split_simd_move (rtx dst, rtx src)
2412{
ef4bddc2
RS
2413 machine_mode src_mode = GET_MODE (src);
2414 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2415
2416 gcc_assert (VECTOR_MODE_P (dst_mode));
2417
2418 if (REG_P (dst) && REG_P (src))
2419 {
2420 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2421 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2422 }
2423}
2424
ef22810a
RH
2425bool
2426aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2427 machine_mode ymode, rtx y)
2428{
2429 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2430 gcc_assert (r != NULL);
2431 return rtx_equal_p (x, r);
2432}
2433
2434
43e9d192 2435static rtx
ef4bddc2 2436aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2437{
2438 if (can_create_pseudo_p ())
e18b4a81 2439 return force_reg (mode, value);
43e9d192
IB
2440 else
2441 {
f5470a77
RS
2442 gcc_assert (x);
2443 aarch64_emit_move (x, value);
43e9d192
IB
2444 return x;
2445 }
2446}
2447
43cacb12
RS
2448/* Return true if we can move VALUE into a register using a single
2449 CNT[BHWD] instruction. */
2450
2451static bool
2452aarch64_sve_cnt_immediate_p (poly_int64 value)
2453{
2454 HOST_WIDE_INT factor = value.coeffs[0];
2455 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2456 return (value.coeffs[1] == factor
2457 && IN_RANGE (factor, 2, 16 * 16)
2458 && (factor & 1) == 0
2459 && factor <= 16 * (factor & -factor));
2460}
2461
2462/* Likewise for rtx X. */
2463
2464bool
2465aarch64_sve_cnt_immediate_p (rtx x)
2466{
2467 poly_int64 value;
2468 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2469}
2470
2471/* Return the asm string for an instruction with a CNT-like vector size
2472 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2473 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2474 first part of the operands template (the part that comes before the
2475 vector size itself). FACTOR is the number of quadwords.
2476 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2477 If it is zero, we can use any element size. */
2478
2479static char *
2480aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2481 unsigned int factor,
2482 unsigned int nelts_per_vq)
2483{
2484 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2485
2486 if (nelts_per_vq == 0)
2487 /* There is some overlap in the ranges of the four CNT instructions.
2488 Here we always use the smallest possible element size, so that the
2489 multiplier is 1 whereever possible. */
2490 nelts_per_vq = factor & -factor;
2491 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2492 gcc_assert (IN_RANGE (shift, 1, 4));
2493 char suffix = "dwhb"[shift - 1];
2494
2495 factor >>= shift;
2496 unsigned int written;
2497 if (factor == 1)
2498 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2499 prefix, suffix, operands);
2500 else
2501 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2502 prefix, suffix, operands, factor);
2503 gcc_assert (written < sizeof (buffer));
2504 return buffer;
2505}
2506
2507/* Return the asm string for an instruction with a CNT-like vector size
2508 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2509 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2510 first part of the operands template (the part that comes before the
2511 vector size itself). X is the value of the vector size operand,
2512 as a polynomial integer rtx. */
2513
2514char *
2515aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2516 rtx x)
2517{
2518 poly_int64 value = rtx_to_poly_int64 (x);
2519 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2520 return aarch64_output_sve_cnt_immediate (prefix, operands,
2521 value.coeffs[1], 0);
2522}
2523
2524/* Return true if we can add VALUE to a register using a single ADDVL
2525 or ADDPL instruction. */
2526
2527static bool
2528aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2529{
2530 HOST_WIDE_INT factor = value.coeffs[0];
2531 if (factor == 0 || value.coeffs[1] != factor)
2532 return false;
2533 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2534 and a value of 16 is one vector width. */
2535 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2536 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2537}
2538
2539/* Likewise for rtx X. */
2540
2541bool
2542aarch64_sve_addvl_addpl_immediate_p (rtx x)
2543{
2544 poly_int64 value;
2545 return (poly_int_rtx_p (x, &value)
2546 && aarch64_sve_addvl_addpl_immediate_p (value));
2547}
2548
2549/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2550 and storing the result in operand 0. */
2551
2552char *
2553aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2554{
2555 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2556 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2557 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2558
2559 /* Use INC or DEC if possible. */
2560 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2561 {
2562 if (aarch64_sve_cnt_immediate_p (offset_value))
2563 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2564 offset_value.coeffs[1], 0);
2565 if (aarch64_sve_cnt_immediate_p (-offset_value))
2566 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2567 -offset_value.coeffs[1], 0);
2568 }
2569
2570 int factor = offset_value.coeffs[1];
2571 if ((factor & 15) == 0)
2572 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2573 else
2574 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2575 return buffer;
2576}
2577
2578/* Return true if X is a valid immediate for an SVE vector INC or DEC
2579 instruction. If it is, store the number of elements in each vector
2580 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2581 factor in *FACTOR_OUT (if nonnull). */
2582
2583bool
2584aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2585 unsigned int *nelts_per_vq_out)
2586{
2587 rtx elt;
2588 poly_int64 value;
2589
2590 if (!const_vec_duplicate_p (x, &elt)
2591 || !poly_int_rtx_p (elt, &value))
2592 return false;
2593
2594 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2595 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2596 /* There's no vector INCB. */
2597 return false;
2598
2599 HOST_WIDE_INT factor = value.coeffs[0];
2600 if (value.coeffs[1] != factor)
2601 return false;
2602
2603 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2604 if ((factor % nelts_per_vq) != 0
2605 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2606 return false;
2607
2608 if (factor_out)
2609 *factor_out = factor;
2610 if (nelts_per_vq_out)
2611 *nelts_per_vq_out = nelts_per_vq;
2612 return true;
2613}
2614
2615/* Return true if X is a valid immediate for an SVE vector INC or DEC
2616 instruction. */
2617
2618bool
2619aarch64_sve_inc_dec_immediate_p (rtx x)
2620{
2621 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2622}
2623
2624/* Return the asm template for an SVE vector INC or DEC instruction.
2625 OPERANDS gives the operands before the vector count and X is the
2626 value of the vector count operand itself. */
2627
2628char *
2629aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2630{
2631 int factor;
2632 unsigned int nelts_per_vq;
2633 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2634 gcc_unreachable ();
2635 if (factor < 0)
2636 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2637 nelts_per_vq);
2638 else
2639 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2640 nelts_per_vq);
2641}
43e9d192 2642
82614948
RR
2643static int
2644aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2645 scalar_int_mode mode)
43e9d192 2646{
43e9d192 2647 int i;
9a4865db
WD
2648 unsigned HOST_WIDE_INT val, val2, mask;
2649 int one_match, zero_match;
2650 int num_insns;
43e9d192 2651
9a4865db
WD
2652 val = INTVAL (imm);
2653
2654 if (aarch64_move_imm (val, mode))
43e9d192 2655 {
82614948 2656 if (generate)
f7df4a84 2657 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2658 return 1;
43e9d192
IB
2659 }
2660
9de00935
TC
2661 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2662 (with XXXX non-zero). In that case check to see if the move can be done in
2663 a smaller mode. */
2664 val2 = val & 0xffffffff;
2665 if (mode == DImode
2666 && aarch64_move_imm (val2, SImode)
2667 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2668 {
2669 if (generate)
2670 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2671
2672 /* Check if we have to emit a second instruction by checking to see
2673 if any of the upper 32 bits of the original DI mode value is set. */
2674 if (val == val2)
2675 return 1;
2676
2677 i = (val >> 48) ? 48 : 32;
2678
2679 if (generate)
2680 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2681 GEN_INT ((val >> i) & 0xffff)));
2682
2683 return 2;
2684 }
2685
9a4865db 2686 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2687 {
82614948
RR
2688 if (generate)
2689 {
9a4865db
WD
2690 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2691 if (mode == SImode)
2692 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2693 GEN_INT ((val >> 16) & 0xffff)));
2694 else
2695 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2696 GEN_INT ((val >> 16) & 0xffff)));
82614948 2697 }
9a4865db 2698 return 2;
43e9d192
IB
2699 }
2700
2701 /* Remaining cases are all for DImode. */
2702
43e9d192 2703 mask = 0xffff;
9a4865db
WD
2704 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2705 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2706 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2707 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2708
62c8d76c 2709 if (zero_match != 2 && one_match != 2)
43e9d192 2710 {
62c8d76c
WD
2711 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2712 For a 64-bit bitmask try whether changing 16 bits to all ones or
2713 zeroes creates a valid bitmask. To check any repeated bitmask,
2714 try using 16 bits from the other 32-bit half of val. */
43e9d192 2715
62c8d76c 2716 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2717 {
62c8d76c
WD
2718 val2 = val & ~mask;
2719 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2720 break;
2721 val2 = val | mask;
2722 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2723 break;
2724 val2 = val2 & ~mask;
2725 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2726 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2727 break;
43e9d192 2728 }
62c8d76c 2729 if (i != 64)
43e9d192 2730 {
62c8d76c 2731 if (generate)
43e9d192 2732 {
62c8d76c
WD
2733 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2734 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2735 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2736 }
1312b1ba 2737 return 2;
43e9d192
IB
2738 }
2739 }
2740
9a4865db
WD
2741 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2742 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2743 otherwise skip zero bits. */
2c274197 2744
9a4865db 2745 num_insns = 1;
43e9d192 2746 mask = 0xffff;
9a4865db
WD
2747 val2 = one_match > zero_match ? ~val : val;
2748 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2749
2750 if (generate)
2751 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2752 ? (val | ~(mask << i))
2753 : (val & (mask << i)))));
2754 for (i += 16; i < 64; i += 16)
43e9d192 2755 {
9a4865db
WD
2756 if ((val2 & (mask << i)) == 0)
2757 continue;
2758 if (generate)
2759 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2760 GEN_INT ((val >> i) & 0xffff)));
2761 num_insns ++;
82614948
RR
2762 }
2763
2764 return num_insns;
2765}
2766
c0bb5bc5
WD
2767/* Return whether imm is a 128-bit immediate which is simple enough to
2768 expand inline. */
2769bool
2770aarch64_mov128_immediate (rtx imm)
2771{
2772 if (GET_CODE (imm) == CONST_INT)
2773 return true;
2774
2775 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2776
2777 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2778 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2779
2780 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2781 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2782}
2783
2784
43cacb12
RS
2785/* Return the number of temporary registers that aarch64_add_offset_1
2786 would need to add OFFSET to a register. */
2787
2788static unsigned int
2789aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2790{
2791 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2792}
2793
f5470a77
RS
2794/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2795 a non-polynomial OFFSET. MODE is the mode of the addition.
2796 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2797 be set and CFA adjustments added to the generated instructions.
2798
2799 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2800 temporary if register allocation is already complete. This temporary
2801 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2802 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2803 the immediate again.
0100c5f9
RS
2804
2805 Since this function may be used to adjust the stack pointer, we must
2806 ensure that it cannot cause transient stack deallocation (for example
2807 by first incrementing SP and then decrementing when adjusting by a
2808 large immediate). */
2809
2810static void
f5470a77
RS
2811aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2812 rtx src, HOST_WIDE_INT offset, rtx temp1,
2813 bool frame_related_p, bool emit_move_imm)
0100c5f9 2814{
f5470a77
RS
2815 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2816 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2817
2818 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2819 rtx_insn *insn;
2820
f5470a77
RS
2821 if (!moffset)
2822 {
2823 if (!rtx_equal_p (dest, src))
2824 {
2825 insn = emit_insn (gen_rtx_SET (dest, src));
2826 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2827 }
2828 return;
2829 }
0100c5f9
RS
2830
2831 /* Single instruction adjustment. */
f5470a77 2832 if (aarch64_uimm12_shift (moffset))
0100c5f9 2833 {
f5470a77 2834 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2835 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2836 return;
2837 }
2838
f5470a77
RS
2839 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2840 and either:
2841
2842 a) the offset cannot be loaded by a 16-bit move or
2843 b) there is no spare register into which we can move it. */
2844 if (moffset < 0x1000000
2845 && ((!temp1 && !can_create_pseudo_p ())
2846 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2847 {
f5470a77 2848 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2849
f5470a77
RS
2850 low_off = offset < 0 ? -low_off : low_off;
2851 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2852 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2853 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2854 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2855 return;
2856 }
2857
2858 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2859 if (emit_move_imm)
f5470a77
RS
2860 {
2861 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2862 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2863 }
2864 insn = emit_insn (offset < 0
2865 ? gen_sub3_insn (dest, src, temp1)
2866 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2867 if (frame_related_p)
2868 {
2869 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2870 rtx adj = plus_constant (mode, src, offset);
2871 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2872 }
2873}
2874
43cacb12
RS
2875/* Return the number of temporary registers that aarch64_add_offset
2876 would need to move OFFSET into a register or add OFFSET to a register;
2877 ADD_P is true if we want the latter rather than the former. */
2878
2879static unsigned int
2880aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2881{
2882 /* This follows the same structure as aarch64_add_offset. */
2883 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2884 return 0;
2885
2886 unsigned int count = 0;
2887 HOST_WIDE_INT factor = offset.coeffs[1];
2888 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2889 poly_int64 poly_offset (factor, factor);
2890 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2891 /* Need one register for the ADDVL/ADDPL result. */
2892 count += 1;
2893 else if (factor != 0)
2894 {
2895 factor = abs (factor);
2896 if (factor > 16 * (factor & -factor))
2897 /* Need one register for the CNT result and one for the multiplication
2898 factor. If necessary, the second temporary can be reused for the
2899 constant part of the offset. */
2900 return 2;
2901 /* Need one register for the CNT result (which might then
2902 be shifted). */
2903 count += 1;
2904 }
2905 return count + aarch64_add_offset_1_temporaries (constant);
2906}
2907
2908/* If X can be represented as a poly_int64, return the number
2909 of temporaries that are required to add it to a register.
2910 Return -1 otherwise. */
2911
2912int
2913aarch64_add_offset_temporaries (rtx x)
2914{
2915 poly_int64 offset;
2916 if (!poly_int_rtx_p (x, &offset))
2917 return -1;
2918 return aarch64_offset_temporaries (true, offset);
2919}
2920
f5470a77
RS
2921/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2922 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2923 be set and CFA adjustments added to the generated instructions.
2924
2925 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2926 temporary if register allocation is already complete. This temporary
43cacb12
RS
2927 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2928 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2929 false to avoid emitting the immediate again.
2930
2931 TEMP2, if nonnull, is a second temporary register that doesn't
2932 overlap either DEST or REG.
f5470a77
RS
2933
2934 Since this function may be used to adjust the stack pointer, we must
2935 ensure that it cannot cause transient stack deallocation (for example
2936 by first incrementing SP and then decrementing when adjusting by a
2937 large immediate). */
2938
2939static void
2940aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2941 poly_int64 offset, rtx temp1, rtx temp2,
2942 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2943{
f5470a77
RS
2944 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2945 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2946 gcc_assert (temp1 == NULL_RTX
2947 || !frame_related_p
2948 || !reg_overlap_mentioned_p (temp1, dest));
2949 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2950
2951 /* Try using ADDVL or ADDPL to add the whole value. */
2952 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2953 {
2954 rtx offset_rtx = gen_int_mode (offset, mode);
2955 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2956 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2957 return;
2958 }
2959
2960 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2961 SVE vector register, over and above the minimum size of 128 bits.
2962 This is equivalent to half the value returned by CNTD with a
2963 vector shape of ALL. */
2964 HOST_WIDE_INT factor = offset.coeffs[1];
2965 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2966
2967 /* Try using ADDVL or ADDPL to add the VG-based part. */
2968 poly_int64 poly_offset (factor, factor);
2969 if (src != const0_rtx
2970 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2971 {
2972 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2973 if (frame_related_p)
2974 {
2975 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2976 RTX_FRAME_RELATED_P (insn) = true;
2977 src = dest;
2978 }
2979 else
2980 {
2981 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2982 src = aarch64_force_temporary (mode, temp1, addr);
2983 temp1 = temp2;
2984 temp2 = NULL_RTX;
2985 }
2986 }
2987 /* Otherwise use a CNT-based sequence. */
2988 else if (factor != 0)
2989 {
2990 /* Use a subtraction if we have a negative factor. */
2991 rtx_code code = PLUS;
2992 if (factor < 0)
2993 {
2994 factor = -factor;
2995 code = MINUS;
2996 }
2997
2998 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2999 into the multiplication. */
3000 rtx val;
3001 int shift = 0;
3002 if (factor & 1)
3003 /* Use a right shift by 1. */
3004 shift = -1;
3005 else
3006 factor /= 2;
3007 HOST_WIDE_INT low_bit = factor & -factor;
3008 if (factor <= 16 * low_bit)
3009 {
3010 if (factor > 16 * 8)
3011 {
3012 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
3013 the value with the minimum multiplier and shift it into
3014 position. */
3015 int extra_shift = exact_log2 (low_bit);
3016 shift += extra_shift;
3017 factor >>= extra_shift;
3018 }
3019 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
3020 }
3021 else
3022 {
3023 /* Use CNTD, then multiply it by FACTOR. */
3024 val = gen_int_mode (poly_int64 (2, 2), mode);
3025 val = aarch64_force_temporary (mode, temp1, val);
3026
3027 /* Go back to using a negative multiplication factor if we have
3028 no register from which to subtract. */
3029 if (code == MINUS && src == const0_rtx)
3030 {
3031 factor = -factor;
3032 code = PLUS;
3033 }
3034 rtx coeff1 = gen_int_mode (factor, mode);
3035 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
3036 val = gen_rtx_MULT (mode, val, coeff1);
3037 }
3038
3039 if (shift > 0)
3040 {
3041 /* Multiply by 1 << SHIFT. */
3042 val = aarch64_force_temporary (mode, temp1, val);
3043 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
3044 }
3045 else if (shift == -1)
3046 {
3047 /* Divide by 2. */
3048 val = aarch64_force_temporary (mode, temp1, val);
3049 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
3050 }
3051
3052 /* Calculate SRC +/- CNTD * FACTOR / 2. */
3053 if (src != const0_rtx)
3054 {
3055 val = aarch64_force_temporary (mode, temp1, val);
3056 val = gen_rtx_fmt_ee (code, mode, src, val);
3057 }
3058 else if (code == MINUS)
3059 {
3060 val = aarch64_force_temporary (mode, temp1, val);
3061 val = gen_rtx_NEG (mode, val);
3062 }
3063
3064 if (constant == 0 || frame_related_p)
3065 {
3066 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
3067 if (frame_related_p)
3068 {
3069 RTX_FRAME_RELATED_P (insn) = true;
3070 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3071 gen_rtx_SET (dest, plus_constant (Pmode, src,
3072 poly_offset)));
3073 }
3074 src = dest;
3075 if (constant == 0)
3076 return;
3077 }
3078 else
3079 {
3080 src = aarch64_force_temporary (mode, temp1, val);
3081 temp1 = temp2;
3082 temp2 = NULL_RTX;
3083 }
3084
3085 emit_move_imm = true;
3086 }
f5470a77 3087
f5470a77
RS
3088 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
3089 frame_related_p, emit_move_imm);
0100c5f9
RS
3090}
3091
43cacb12
RS
3092/* Like aarch64_add_offset, but the offset is given as an rtx rather
3093 than a poly_int64. */
3094
3095void
3096aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
3097 rtx offset_rtx, rtx temp1, rtx temp2)
3098{
3099 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
3100 temp1, temp2, false);
3101}
3102
f5470a77
RS
3103/* Add DELTA to the stack pointer, marking the instructions frame-related.
3104 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
3105 if TEMP1 already contains abs (DELTA). */
3106
0100c5f9 3107static inline void
43cacb12 3108aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 3109{
f5470a77 3110 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 3111 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
3112}
3113
f5470a77
RS
3114/* Subtract DELTA from the stack pointer, marking the instructions
3115 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
3116 if nonnull. */
3117
0100c5f9 3118static inline void
cd1bef27
JL
3119aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
3120 bool emit_move_imm = true)
0100c5f9 3121{
f5470a77 3122 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 3123 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 3124}
82614948 3125
43cacb12
RS
3126/* Set DEST to (vec_series BASE STEP). */
3127
3128static void
3129aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
3130{
3131 machine_mode mode = GET_MODE (dest);
43cacb12
RS
3132 scalar_mode inner = GET_MODE_INNER (mode);
3133
3134 /* Each operand can be a register or an immediate in the range [-16, 15]. */
3135 if (!aarch64_sve_index_immediate_p (base))
3136 base = force_reg (inner, base);
3137 if (!aarch64_sve_index_immediate_p (step))
3138 step = force_reg (inner, step);
3139
3140 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
3141}
82614948 3142
43cacb12
RS
3143/* Try to duplicate SRC into SVE register DEST, given that SRC is an
3144 integer of mode INT_MODE. Return true on success. */
3145
3146static bool
3147aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
3148 rtx src)
3149{
3150 /* If the constant is smaller than 128 bits, we can do the move
3151 using a vector of SRC_MODEs. */
3152 if (src_mode != TImode)
3153 {
3154 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
3155 GET_MODE_SIZE (src_mode));
3156 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
3157 emit_move_insn (gen_lowpart (dup_mode, dest),
3158 gen_const_vec_duplicate (dup_mode, src));
3159 return true;
3160 }
3161
947b1372 3162 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
3163 src = force_const_mem (src_mode, src);
3164 if (!src)
3165 return false;
3166
3167 /* Make sure that the address is legitimate. */
3168 if (!aarch64_sve_ld1r_operand_p (src))
3169 {
3170 rtx addr = force_reg (Pmode, XEXP (src, 0));
3171 src = replace_equiv_address (src, addr);
3172 }
3173
947b1372
RS
3174 machine_mode mode = GET_MODE (dest);
3175 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
3176 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
3177 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3178 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
3179 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
3180 return true;
3181}
3182
3183/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
3184 isn't a simple duplicate or series. */
3185
3186static void
3187aarch64_expand_sve_const_vector (rtx dest, rtx src)
3188{
3189 machine_mode mode = GET_MODE (src);
3190 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
3191 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
3192 gcc_assert (npatterns > 1);
3193
3194 if (nelts_per_pattern == 1)
3195 {
3196 /* The constant is a repeating seqeuence of at least two elements,
3197 where the repeating elements occupy no more than 128 bits.
3198 Get an integer representation of the replicated value. */
8179efe0
RS
3199 scalar_int_mode int_mode;
3200 if (BYTES_BIG_ENDIAN)
3201 /* For now, always use LD1RQ to load the value on big-endian
3202 targets, since the handling of smaller integers includes a
3203 subreg that is semantically an element reverse. */
3204 int_mode = TImode;
3205 else
3206 {
3207 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
3208 gcc_assert (int_bits <= 128);
3209 int_mode = int_mode_for_size (int_bits, 0).require ();
3210 }
43cacb12
RS
3211 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
3212 if (int_value
3213 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
3214 return;
3215 }
3216
3217 /* Expand each pattern individually. */
3218 rtx_vector_builder builder;
3219 auto_vec<rtx, 16> vectors (npatterns);
3220 for (unsigned int i = 0; i < npatterns; ++i)
3221 {
3222 builder.new_vector (mode, 1, nelts_per_pattern);
3223 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
3224 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
3225 vectors.quick_push (force_reg (mode, builder.build ()));
3226 }
3227
3228 /* Use permutes to interleave the separate vectors. */
3229 while (npatterns > 1)
3230 {
3231 npatterns /= 2;
3232 for (unsigned int i = 0; i < npatterns; ++i)
3233 {
3234 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
3235 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
3236 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
3237 vectors[i] = tmp;
3238 }
3239 }
3240 gcc_assert (vectors[0] == dest);
3241}
3242
3243/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
3244 is a pattern that can be used to set DEST to a replicated scalar
3245 element. */
3246
3247void
3248aarch64_expand_mov_immediate (rtx dest, rtx imm,
3249 rtx (*gen_vec_duplicate) (rtx, rtx))
3250{
3251 machine_mode mode = GET_MODE (dest);
82614948
RR
3252
3253 /* Check on what type of symbol it is. */
77e994c9
RS
3254 scalar_int_mode int_mode;
3255 if ((GET_CODE (imm) == SYMBOL_REF
3256 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
3257 || GET_CODE (imm) == CONST
3258 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 3259 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 3260 {
43cacb12
RS
3261 rtx mem;
3262 poly_int64 offset;
3263 HOST_WIDE_INT const_offset;
82614948
RR
3264 enum aarch64_symbol_type sty;
3265
3266 /* If we have (const (plus symbol offset)), separate out the offset
3267 before we start classifying the symbol. */
43cacb12 3268 rtx base = strip_offset (imm, &offset);
82614948 3269
43cacb12
RS
3270 /* We must always add an offset involving VL separately, rather than
3271 folding it into the relocation. */
3272 if (!offset.is_constant (&const_offset))
3273 {
3274 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
3275 emit_insn (gen_rtx_SET (dest, imm));
3276 else
3277 {
3278 /* Do arithmetic on 32-bit values if the result is smaller
3279 than that. */
3280 if (partial_subreg_p (int_mode, SImode))
3281 {
3282 /* It is invalid to do symbol calculations in modes
3283 narrower than SImode. */
3284 gcc_assert (base == const0_rtx);
3285 dest = gen_lowpart (SImode, dest);
3286 int_mode = SImode;
3287 }
3288 if (base != const0_rtx)
3289 {
3290 base = aarch64_force_temporary (int_mode, dest, base);
3291 aarch64_add_offset (int_mode, dest, base, offset,
3292 NULL_RTX, NULL_RTX, false);
3293 }
3294 else
3295 aarch64_add_offset (int_mode, dest, base, offset,
3296 dest, NULL_RTX, false);
3297 }
3298 return;
3299 }
3300
3301 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
3302 switch (sty)
3303 {
3304 case SYMBOL_FORCE_TO_MEM:
43cacb12 3305 if (const_offset != 0
77e994c9 3306 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
3307 {
3308 gcc_assert (can_create_pseudo_p ());
77e994c9 3309 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3310 aarch64_add_offset (int_mode, dest, base, const_offset,
3311 NULL_RTX, NULL_RTX, false);
82614948
RR
3312 return;
3313 }
b4f50fd4 3314
82614948
RR
3315 mem = force_const_mem (ptr_mode, imm);
3316 gcc_assert (mem);
b4f50fd4
RR
3317
3318 /* If we aren't generating PC relative literals, then
3319 we need to expand the literal pool access carefully.
3320 This is something that needs to be done in a number
3321 of places, so could well live as a separate function. */
9ee6540a 3322 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
3323 {
3324 gcc_assert (can_create_pseudo_p ());
3325 base = gen_reg_rtx (ptr_mode);
3326 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
3327 if (ptr_mode != Pmode)
3328 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
3329 mem = gen_rtx_MEM (ptr_mode, base);
3330 }
3331
77e994c9
RS
3332 if (int_mode != ptr_mode)
3333 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 3334
f7df4a84 3335 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 3336
82614948
RR
3337 return;
3338
3339 case SYMBOL_SMALL_TLSGD:
3340 case SYMBOL_SMALL_TLSDESC:
79496620 3341 case SYMBOL_SMALL_TLSIE:
1b1e81f8 3342 case SYMBOL_SMALL_GOT_28K:
6642bdb4 3343 case SYMBOL_SMALL_GOT_4G:
82614948 3344 case SYMBOL_TINY_GOT:
5ae7caad 3345 case SYMBOL_TINY_TLSIE:
43cacb12 3346 if (const_offset != 0)
82614948
RR
3347 {
3348 gcc_assert(can_create_pseudo_p ());
77e994c9 3349 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3350 aarch64_add_offset (int_mode, dest, base, const_offset,
3351 NULL_RTX, NULL_RTX, false);
82614948
RR
3352 return;
3353 }
3354 /* FALLTHRU */
3355
82614948
RR
3356 case SYMBOL_SMALL_ABSOLUTE:
3357 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3358 case SYMBOL_TLSLE12:
d18ba284 3359 case SYMBOL_TLSLE24:
cbf5629e
JW
3360 case SYMBOL_TLSLE32:
3361 case SYMBOL_TLSLE48:
82614948
RR
3362 aarch64_load_symref_appropriately (dest, imm, sty);
3363 return;
3364
3365 default:
3366 gcc_unreachable ();
3367 }
3368 }
3369
3370 if (!CONST_INT_P (imm))
3371 {
43cacb12
RS
3372 rtx base, step, value;
3373 if (GET_CODE (imm) == HIGH
3374 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3375 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3376 else if (const_vec_series_p (imm, &base, &step))
3377 aarch64_expand_vec_series (dest, base, step);
3378 else if (const_vec_duplicate_p (imm, &value))
3379 {
3380 /* If the constant is out of range of an SVE vector move,
3381 load it from memory if we can, otherwise move it into
3382 a register and use a DUP. */
3383 scalar_mode inner_mode = GET_MODE_INNER (mode);
3384 rtx op = force_const_mem (inner_mode, value);
3385 if (!op)
3386 op = force_reg (inner_mode, value);
3387 else if (!aarch64_sve_ld1r_operand_p (op))
3388 {
3389 rtx addr = force_reg (Pmode, XEXP (op, 0));
3390 op = replace_equiv_address (op, addr);
3391 }
3392 emit_insn (gen_vec_duplicate (dest, op));
3393 }
3394 else if (GET_CODE (imm) == CONST_VECTOR
3395 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3396 aarch64_expand_sve_const_vector (dest, imm);
82614948 3397 else
43cacb12 3398 {
82614948
RR
3399 rtx mem = force_const_mem (mode, imm);
3400 gcc_assert (mem);
43cacb12 3401 emit_move_insn (dest, mem);
43e9d192 3402 }
82614948
RR
3403
3404 return;
43e9d192 3405 }
82614948 3406
77e994c9
RS
3407 aarch64_internal_mov_immediate (dest, imm, true,
3408 as_a <scalar_int_mode> (mode));
43e9d192
IB
3409}
3410
43cacb12
RS
3411/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3412 that is known to contain PTRUE. */
3413
3414void
3415aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3416{
0c63a8ee
TC
3417 expand_operand ops[3];
3418 machine_mode mode = GET_MODE (dest);
3419 create_output_operand (&ops[0], dest, mode);
3420 create_input_operand (&ops[1], pred, GET_MODE(pred));
3421 create_input_operand (&ops[2], src, mode);
3422 expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
43cacb12
RS
3423}
3424
3425/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3426 operand is in memory. In this case we need to use the predicated LD1
3427 and ST1 instead of LDR and STR, both for correctness on big-endian
3428 targets and because LD1 and ST1 support a wider range of addressing modes.
3429 PRED_MODE is the mode of the predicate.
3430
3431 See the comment at the head of aarch64-sve.md for details about the
3432 big-endian handling. */
3433
3434void
3435aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3436{
3437 machine_mode mode = GET_MODE (dest);
3438 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3439 if (!register_operand (src, mode)
3440 && !register_operand (dest, mode))
3441 {
3442 rtx tmp = gen_reg_rtx (mode);
3443 if (MEM_P (src))
3444 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3445 else
3446 emit_move_insn (tmp, src);
3447 src = tmp;
3448 }
3449 aarch64_emit_sve_pred_move (dest, ptrue, src);
3450}
3451
002092be
RS
3452/* Called only on big-endian targets. See whether an SVE vector move
3453 from SRC to DEST is effectively a REV[BHW] instruction, because at
3454 least one operand is a subreg of an SVE vector that has wider or
3455 narrower elements. Return true and emit the instruction if so.
3456
3457 For example:
3458
3459 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3460
3461 represents a VIEW_CONVERT between the following vectors, viewed
3462 in memory order:
3463
3464 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3465 R1: { [0], [1], [2], [3], ... }
3466
3467 The high part of lane X in R2 should therefore correspond to lane X*2
3468 of R1, but the register representations are:
3469
3470 msb lsb
3471 R2: ...... [1].high [1].low [0].high [0].low
3472 R1: ...... [3] [2] [1] [0]
3473
3474 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3475 We therefore need a reverse operation to swap the high and low values
3476 around.
3477
3478 This is purely an optimization. Without it we would spill the
3479 subreg operand to the stack in one mode and reload it in the
3480 other mode, which has the same effect as the REV. */
3481
3482bool
3483aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3484{
3485 gcc_assert (BYTES_BIG_ENDIAN);
3486 if (GET_CODE (dest) == SUBREG)
3487 dest = SUBREG_REG (dest);
3488 if (GET_CODE (src) == SUBREG)
3489 src = SUBREG_REG (src);
3490
3491 /* The optimization handles two single SVE REGs with different element
3492 sizes. */
3493 if (!REG_P (dest)
3494 || !REG_P (src)
3495 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3496 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3497 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3498 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3499 return false;
3500
3501 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3502 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3503 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3504 UNSPEC_REV_SUBREG);
3505 emit_insn (gen_rtx_SET (dest, unspec));
3506 return true;
3507}
3508
3509/* Return a copy of X with mode MODE, without changing its other
3510 attributes. Unlike gen_lowpart, this doesn't care whether the
3511 mode change is valid. */
3512
3513static rtx
3514aarch64_replace_reg_mode (rtx x, machine_mode mode)
3515{
3516 if (GET_MODE (x) == mode)
3517 return x;
3518
3519 x = shallow_copy_rtx (x);
3520 set_mode_and_regno (x, mode, REGNO (x));
3521 return x;
3522}
3523
3524/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3525 operands. */
3526
3527void
3528aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3529{
3530 /* Decide which REV operation we need. The mode with narrower elements
3531 determines the mode of the operands and the mode with the wider
3532 elements determines the reverse width. */
3533 machine_mode mode_with_wider_elts = GET_MODE (dest);
3534 machine_mode mode_with_narrower_elts = GET_MODE (src);
3535 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3536 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3537 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3538
3539 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3540 unsigned int unspec;
3541 if (wider_bytes == 8)
3542 unspec = UNSPEC_REV64;
3543 else if (wider_bytes == 4)
3544 unspec = UNSPEC_REV32;
3545 else if (wider_bytes == 2)
3546 unspec = UNSPEC_REV16;
3547 else
3548 gcc_unreachable ();
3549 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3550
3551 /* Emit:
3552
3553 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3554 UNSPEC_MERGE_PTRUE))
3555
3556 with the appropriate modes. */
3557 ptrue = gen_lowpart (pred_mode, ptrue);
3558 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3559 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3560 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3561 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3562 UNSPEC_MERGE_PTRUE);
3563 emit_insn (gen_rtx_SET (dest, src));
3564}
3565
43e9d192 3566static bool
fee9ba42
JW
3567aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3568 tree exp ATTRIBUTE_UNUSED)
43e9d192 3569{
a0d0b980
SE
3570 if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl))
3571 return false;
3572
43e9d192
IB
3573 return true;
3574}
3575
3576/* Implement TARGET_PASS_BY_REFERENCE. */
3577
3578static bool
3579aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3580 machine_mode mode,
43e9d192
IB
3581 const_tree type,
3582 bool named ATTRIBUTE_UNUSED)
3583{
3584 HOST_WIDE_INT size;
ef4bddc2 3585 machine_mode dummymode;
43e9d192
IB
3586 int nregs;
3587
3588 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3589 if (mode == BLKmode && type)
3590 size = int_size_in_bytes (type);
3591 else
3592 /* No frontends can create types with variable-sized modes, so we
3593 shouldn't be asked to pass or return them. */
3594 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3595
aadc1c43
MHD
3596 /* Aggregates are passed by reference based on their size. */
3597 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3598 {
aadc1c43 3599 size = int_size_in_bytes (type);
43e9d192
IB
3600 }
3601
3602 /* Variable sized arguments are always returned by reference. */
3603 if (size < 0)
3604 return true;
3605
3606 /* Can this be a candidate to be passed in fp/simd register(s)? */
3607 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3608 &dummymode, &nregs,
3609 NULL))
3610 return false;
3611
3612 /* Arguments which are variable sized or larger than 2 registers are
3613 passed by reference unless they are a homogenous floating point
3614 aggregate. */
3615 return size > 2 * UNITS_PER_WORD;
3616}
3617
3618/* Return TRUE if VALTYPE is padded to its least significant bits. */
3619static bool
3620aarch64_return_in_msb (const_tree valtype)
3621{
ef4bddc2 3622 machine_mode dummy_mode;
43e9d192
IB
3623 int dummy_int;
3624
3625 /* Never happens in little-endian mode. */
3626 if (!BYTES_BIG_ENDIAN)
3627 return false;
3628
3629 /* Only composite types smaller than or equal to 16 bytes can
3630 be potentially returned in registers. */
3631 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3632 || int_size_in_bytes (valtype) <= 0
3633 || int_size_in_bytes (valtype) > 16)
3634 return false;
3635
3636 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3637 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3638 is always passed/returned in the least significant bits of fp/simd
3639 register(s). */
3640 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3641 &dummy_mode, &dummy_int, NULL))
3642 return false;
3643
3644 return true;
3645}
3646
3647/* Implement TARGET_FUNCTION_VALUE.
3648 Define how to find the value returned by a function. */
3649
3650static rtx
3651aarch64_function_value (const_tree type, const_tree func,
3652 bool outgoing ATTRIBUTE_UNUSED)
3653{
ef4bddc2 3654 machine_mode mode;
43e9d192
IB
3655 int unsignedp;
3656 int count;
ef4bddc2 3657 machine_mode ag_mode;
43e9d192
IB
3658
3659 mode = TYPE_MODE (type);
3660 if (INTEGRAL_TYPE_P (type))
3661 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3662
3663 if (aarch64_return_in_msb (type))
3664 {
3665 HOST_WIDE_INT size = int_size_in_bytes (type);
3666
3667 if (size % UNITS_PER_WORD != 0)
3668 {
3669 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3670 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3671 }
3672 }
3673
3674 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3675 &ag_mode, &count, NULL))
3676 {
3677 if (!aarch64_composite_type_p (type, mode))
3678 {
3679 gcc_assert (count == 1 && mode == ag_mode);
3680 return gen_rtx_REG (mode, V0_REGNUM);
3681 }
3682 else
3683 {
3684 int i;
3685 rtx par;
3686
3687 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3688 for (i = 0; i < count; i++)
3689 {
3690 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3691 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3692 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3693 XVECEXP (par, 0, i) = tmp;
3694 }
3695 return par;
3696 }
3697 }
3698 else
3699 return gen_rtx_REG (mode, R0_REGNUM);
3700}
3701
3702/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3703 Return true if REGNO is the number of a hard register in which the values
3704 of called function may come back. */
3705
3706static bool
3707aarch64_function_value_regno_p (const unsigned int regno)
3708{
3709 /* Maximum of 16 bytes can be returned in the general registers. Examples
3710 of 16-byte return values are: 128-bit integers and 16-byte small
3711 structures (excluding homogeneous floating-point aggregates). */
3712 if (regno == R0_REGNUM || regno == R1_REGNUM)
3713 return true;
3714
3715 /* Up to four fp/simd registers can return a function value, e.g. a
3716 homogeneous floating-point aggregate having four members. */
3717 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3718 return TARGET_FLOAT;
43e9d192
IB
3719
3720 return false;
3721}
3722
3723/* Implement TARGET_RETURN_IN_MEMORY.
3724
3725 If the type T of the result of a function is such that
3726 void func (T arg)
3727 would require that arg be passed as a value in a register (or set of
3728 registers) according to the parameter passing rules, then the result
3729 is returned in the same registers as would be used for such an
3730 argument. */
3731
3732static bool
3733aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3734{
3735 HOST_WIDE_INT size;
ef4bddc2 3736 machine_mode ag_mode;
43e9d192
IB
3737 int count;
3738
3739 if (!AGGREGATE_TYPE_P (type)
3740 && TREE_CODE (type) != COMPLEX_TYPE
3741 && TREE_CODE (type) != VECTOR_TYPE)
3742 /* Simple scalar types always returned in registers. */
3743 return false;
3744
3745 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3746 type,
3747 &ag_mode,
3748 &count,
3749 NULL))
3750 return false;
3751
3752 /* Types larger than 2 registers returned in memory. */
3753 size = int_size_in_bytes (type);
3754 return (size < 0 || size > 2 * UNITS_PER_WORD);
3755}
3756
3757static bool
ef4bddc2 3758aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3759 const_tree type, int *nregs)
3760{
3761 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3762 return aarch64_vfp_is_call_or_return_candidate (mode,
3763 type,
3764 &pcum->aapcs_vfp_rmode,
3765 nregs,
3766 NULL);
3767}
3768
985b8393 3769/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192 3770 bits. The idea is to suppress any stronger alignment requested by
c590597c
RE
3771 the user and opt for the natural alignment (specified in AAPCS64 \S
3772 4.1). ABI_BREAK is set to true if the alignment was incorrectly
3773 calculated in versions of GCC prior to GCC-9. This is a helper
3774 function for local use only. */
43e9d192 3775
985b8393 3776static unsigned int
c590597c
RE
3777aarch64_function_arg_alignment (machine_mode mode, const_tree type,
3778 bool *abi_break)
43e9d192 3779{
c590597c 3780 *abi_break = false;
75d6cc81 3781 if (!type)
985b8393 3782 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3783
75d6cc81 3784 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3785 return 0;
43e9d192 3786
75d6cc81
AL
3787 gcc_assert (TYPE_MODE (type) == mode);
3788
3789 if (!AGGREGATE_TYPE_P (type))
985b8393 3790 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3791
3792 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3793 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3794
985b8393 3795 unsigned int alignment = 0;
c590597c 3796 unsigned int bitfield_alignment = 0;
75d6cc81 3797 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393 3798 if (TREE_CODE (field) == FIELD_DECL)
c590597c
RE
3799 {
3800 alignment = std::max (alignment, DECL_ALIGN (field));
3801 if (DECL_BIT_FIELD_TYPE (field))
3802 bitfield_alignment
3803 = std::max (bitfield_alignment,
3804 TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
3805 }
3806
3807 if (bitfield_alignment > alignment)
3808 {
3809 *abi_break = true;
3810 return bitfield_alignment;
3811 }
43e9d192 3812
985b8393 3813 return alignment;
43e9d192
IB
3814}
3815
3816/* Layout a function argument according to the AAPCS64 rules. The rule
3817 numbers refer to the rule numbers in the AAPCS64. */
3818
3819static void
ef4bddc2 3820aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3821 const_tree type,
3822 bool named ATTRIBUTE_UNUSED)
3823{
3824 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3825 int ncrn, nvrn, nregs;
3826 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3827 HOST_WIDE_INT size;
c590597c 3828 bool abi_break;
43e9d192
IB
3829
3830 /* We need to do this once per argument. */
3831 if (pcum->aapcs_arg_processed)
3832 return;
3833
3834 pcum->aapcs_arg_processed = true;
3835
3abf17cf 3836 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3837 if (type)
3838 size = int_size_in_bytes (type);
3839 else
3840 /* No frontends can create types with variable-sized modes, so we
3841 shouldn't be asked to pass or return them. */
3842 size = GET_MODE_SIZE (mode).to_constant ();
3843 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3844
43e9d192
IB
3845 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3846 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3847 mode,
3848 type,
3849 &nregs);
3850
3851 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3852 The following code thus handles passing by SIMD/FP registers first. */
3853
3854 nvrn = pcum->aapcs_nvrn;
3855
3856 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3857 and homogenous short-vector aggregates (HVA). */
3858 if (allocate_nvrn)
3859 {
261fb553 3860 if (!TARGET_FLOAT)
fc29dfc9 3861 aarch64_err_no_fpadvsimd (mode);
261fb553 3862
43e9d192
IB
3863 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3864 {
3865 pcum->aapcs_nextnvrn = nvrn + nregs;
3866 if (!aarch64_composite_type_p (type, mode))
3867 {
3868 gcc_assert (nregs == 1);
3869 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3870 }
3871 else
3872 {
3873 rtx par;
3874 int i;
3875 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3876 for (i = 0; i < nregs; i++)
3877 {
3878 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3879 V0_REGNUM + nvrn + i);
6a70badb
RS
3880 rtx offset = gen_int_mode
3881 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3882 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3883 XVECEXP (par, 0, i) = tmp;
3884 }
3885 pcum->aapcs_reg = par;
3886 }
3887 return;
3888 }
3889 else
3890 {
3891 /* C.3 NSRN is set to 8. */
3892 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3893 goto on_stack;
3894 }
3895 }
3896
3897 ncrn = pcum->aapcs_ncrn;
3abf17cf 3898 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3899
3900 /* C6 - C9. though the sign and zero extension semantics are
3901 handled elsewhere. This is the case where the argument fits
3902 entirely general registers. */
3903 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3904 {
43e9d192
IB
3905 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3906
3907 /* C.8 if the argument has an alignment of 16 then the NGRN is
c590597c 3908 rounded up to the next even number. */
985b8393
JJ
3909 if (nregs == 2
3910 && ncrn % 2
2ec07fa6 3911 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3912 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3913 alignment nregs should be > 2 and therefore it should be
3914 passed by reference rather than value. */
c590597c
RE
3915 && (aarch64_function_arg_alignment (mode, type, &abi_break)
3916 == 16 * BITS_PER_UNIT))
985b8393 3917 {
c590597c
RE
3918 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
3919 inform (input_location, "parameter passing for argument of type "
3920 "%qT changed in GCC 9.1", type);
985b8393
JJ
3921 ++ncrn;
3922 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3923 }
2ec07fa6 3924
43e9d192 3925 /* NREGS can be 0 when e.g. an empty structure is to be passed.
c590597c 3926 A reg is still generated for it, but the caller should be smart
43e9d192
IB
3927 enough not to use it. */
3928 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3929 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3930 else
3931 {
3932 rtx par;
3933 int i;
3934
3935 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3936 for (i = 0; i < nregs; i++)
3937 {
3938 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3939 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3940 GEN_INT (i * UNITS_PER_WORD));
3941 XVECEXP (par, 0, i) = tmp;
3942 }
3943 pcum->aapcs_reg = par;
3944 }
3945
3946 pcum->aapcs_nextncrn = ncrn + nregs;
3947 return;
3948 }
3949
3950 /* C.11 */
3951 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3952
3953 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3954 this argument and align the total size if necessary. */
43e9d192 3955on_stack:
3abf17cf 3956 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3957
c590597c
RE
3958 if (aarch64_function_arg_alignment (mode, type, &abi_break)
3959 == 16 * BITS_PER_UNIT)
3960 {
3961 int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
3962 if (pcum->aapcs_stack_size != new_size)
3963 {
3964 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
3965 inform (input_location, "parameter passing for argument of type "
3966 "%qT changed in GCC 9.1", type);
3967 pcum->aapcs_stack_size = new_size;
3968 }
3969 }
43e9d192
IB
3970 return;
3971}
3972
3973/* Implement TARGET_FUNCTION_ARG. */
3974
3975static rtx
ef4bddc2 3976aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3977 const_tree type, bool named)
3978{
3979 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3980 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3981
3982 if (mode == VOIDmode)
3983 return NULL_RTX;
3984
3985 aarch64_layout_arg (pcum_v, mode, type, named);
3986 return pcum->aapcs_reg;
3987}
3988
3989void
3990aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3991 const_tree fntype ATTRIBUTE_UNUSED,
3992 rtx libname ATTRIBUTE_UNUSED,
3993 const_tree fndecl ATTRIBUTE_UNUSED,
3994 unsigned n_named ATTRIBUTE_UNUSED)
3995{
3996 pcum->aapcs_ncrn = 0;
3997 pcum->aapcs_nvrn = 0;
3998 pcum->aapcs_nextncrn = 0;
3999 pcum->aapcs_nextnvrn = 0;
4000 pcum->pcs_variant = ARM_PCS_AAPCS64;
4001 pcum->aapcs_reg = NULL_RTX;
4002 pcum->aapcs_arg_processed = false;
4003 pcum->aapcs_stack_words = 0;
4004 pcum->aapcs_stack_size = 0;
4005
261fb553
AL
4006 if (!TARGET_FLOAT
4007 && fndecl && TREE_PUBLIC (fndecl)
4008 && fntype && fntype != error_mark_node)
4009 {
4010 const_tree type = TREE_TYPE (fntype);
4011 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
4012 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
4013 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
4014 &mode, &nregs, NULL))
fc29dfc9 4015 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 4016 }
43e9d192
IB
4017 return;
4018}
4019
4020static void
4021aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 4022 machine_mode mode,
43e9d192
IB
4023 const_tree type,
4024 bool named)
4025{
4026 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4027 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
4028 {
4029 aarch64_layout_arg (pcum_v, mode, type, named);
4030 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
4031 != (pcum->aapcs_stack_words != 0));
4032 pcum->aapcs_arg_processed = false;
4033 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
4034 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
4035 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
4036 pcum->aapcs_stack_words = 0;
4037 pcum->aapcs_reg = NULL_RTX;
4038 }
4039}
4040
4041bool
4042aarch64_function_arg_regno_p (unsigned regno)
4043{
4044 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
4045 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
4046}
4047
4048/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
4049 PARM_BOUNDARY bits of alignment, but will be given anything up
4050 to STACK_BOUNDARY bits if the type requires it. This makes sure
4051 that both before and after the layout of each argument, the Next
4052 Stacked Argument Address (NSAA) will have a minimum alignment of
4053 8 bytes. */
4054
4055static unsigned int
ef4bddc2 4056aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 4057{
c590597c
RE
4058 bool abi_break;
4059 unsigned int alignment = aarch64_function_arg_alignment (mode, type,
4060 &abi_break);
4061 if (abi_break & warn_psabi)
4062 inform (input_location, "parameter passing for argument of type "
4063 "%qT changed in GCC 9.1", type);
4064
985b8393 4065 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
4066}
4067
43cacb12
RS
4068/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
4069
4070static fixed_size_mode
4071aarch64_get_reg_raw_mode (int regno)
4072{
4073 if (TARGET_SVE && FP_REGNUM_P (regno))
4074 /* Don't use the SVE part of the register for __builtin_apply and
4075 __builtin_return. The SVE registers aren't used by the normal PCS,
4076 so using them there would be a waste of time. The PCS extensions
4077 for SVE types are fundamentally incompatible with the
4078 __builtin_return/__builtin_apply interface. */
4079 return as_a <fixed_size_mode> (V16QImode);
4080 return default_get_reg_raw_mode (regno);
4081}
4082
76b0cbf8 4083/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
4084
4085 Small aggregate types are placed in the lowest memory address.
4086
4087 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
4088
76b0cbf8
RS
4089static pad_direction
4090aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
4091{
4092 /* On little-endian targets, the least significant byte of every stack
4093 argument is passed at the lowest byte address of the stack slot. */
4094 if (!BYTES_BIG_ENDIAN)
76b0cbf8 4095 return PAD_UPWARD;
43e9d192 4096
00edcfbe 4097 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
4098 the least significant byte of a stack argument is passed at the highest
4099 byte address of the stack slot. */
4100 if (type
00edcfbe
YZ
4101 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
4102 || POINTER_TYPE_P (type))
43e9d192 4103 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 4104 return PAD_DOWNWARD;
43e9d192
IB
4105
4106 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 4107 return PAD_UPWARD;
43e9d192
IB
4108}
4109
4110/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
4111
4112 It specifies padding for the last (may also be the only)
4113 element of a block move between registers and memory. If
4114 assuming the block is in the memory, padding upward means that
4115 the last element is padded after its highest significant byte,
4116 while in downward padding, the last element is padded at the
4117 its least significant byte side.
4118
4119 Small aggregates and small complex types are always padded
4120 upwards.
4121
4122 We don't need to worry about homogeneous floating-point or
4123 short-vector aggregates; their move is not affected by the
4124 padding direction determined here. Regardless of endianness,
4125 each element of such an aggregate is put in the least
4126 significant bits of a fp/simd register.
4127
4128 Return !BYTES_BIG_ENDIAN if the least significant byte of the
4129 register has useful data, and return the opposite if the most
4130 significant byte does. */
4131
4132bool
ef4bddc2 4133aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
4134 bool first ATTRIBUTE_UNUSED)
4135{
4136
4137 /* Small composite types are always padded upward. */
4138 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
4139 {
6a70badb
RS
4140 HOST_WIDE_INT size;
4141 if (type)
4142 size = int_size_in_bytes (type);
4143 else
4144 /* No frontends can create types with variable-sized modes, so we
4145 shouldn't be asked to pass or return them. */
4146 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
4147 if (size < 2 * UNITS_PER_WORD)
4148 return true;
4149 }
4150
4151 /* Otherwise, use the default padding. */
4152 return !BYTES_BIG_ENDIAN;
4153}
4154
095a2d76 4155static scalar_int_mode
43e9d192
IB
4156aarch64_libgcc_cmp_return_mode (void)
4157{
4158 return SImode;
4159}
4160
a3eb8a52
EB
4161#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4162
4163/* We use the 12-bit shifted immediate arithmetic instructions so values
4164 must be multiple of (1 << 12), i.e. 4096. */
4165#define ARITH_FACTOR 4096
4166
4167#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
4168#error Cannot use simple address calculation for stack probing
4169#endif
4170
4171/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
4172#define PROBE_STACK_FIRST_REG R9_REGNUM
4173#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 4174
6a70badb 4175/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
4176 inclusive. These are offsets from the current stack pointer. */
4177
4178static void
6a70badb 4179aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 4180{
6a70badb
RS
4181 HOST_WIDE_INT size;
4182 if (!poly_size.is_constant (&size))
4183 {
4184 sorry ("stack probes for SVE frames");
4185 return;
4186 }
4187
5f5c5e0f 4188 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
4189
4190 /* See the same assertion on PROBE_INTERVAL above. */
4191 gcc_assert ((first % ARITH_FACTOR) == 0);
4192
4193 /* See if we have a constant small number of probes to generate. If so,
4194 that's the easy case. */
4195 if (size <= PROBE_INTERVAL)
4196 {
4197 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
4198
4199 emit_set_insn (reg1,
5f5c5e0f 4200 plus_constant (Pmode,
a3eb8a52 4201 stack_pointer_rtx, -(first + base)));
5f5c5e0f 4202 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
4203 }
4204
4205 /* The run-time loop is made up of 8 insns in the generic case while the
4206 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4207 else if (size <= 4 * PROBE_INTERVAL)
4208 {
4209 HOST_WIDE_INT i, rem;
4210
4211 emit_set_insn (reg1,
5f5c5e0f 4212 plus_constant (Pmode,
a3eb8a52
EB
4213 stack_pointer_rtx,
4214 -(first + PROBE_INTERVAL)));
4215 emit_stack_probe (reg1);
4216
4217 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4218 it exceeds SIZE. If only two probes are needed, this will not
4219 generate any code. Then probe at FIRST + SIZE. */
4220 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4221 {
4222 emit_set_insn (reg1,
5f5c5e0f 4223 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
4224 emit_stack_probe (reg1);
4225 }
4226
4227 rem = size - (i - PROBE_INTERVAL);
4228 if (rem > 256)
4229 {
4230 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4231
5f5c5e0f
EB
4232 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
4233 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
4234 }
4235 else
5f5c5e0f 4236 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
4237 }
4238
4239 /* Otherwise, do the same as above, but in a loop. Note that we must be
4240 extra careful with variables wrapping around because we might be at
4241 the very top (or the very bottom) of the address space and we have
4242 to be able to handle this case properly; in particular, we use an
4243 equality test for the loop condition. */
4244 else
4245 {
5f5c5e0f 4246 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
4247
4248 /* Step 1: round SIZE to the previous multiple of the interval. */
4249
4250 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
4251
4252
4253 /* Step 2: compute initial and final value of the loop counter. */
4254
4255 /* TEST_ADDR = SP + FIRST. */
4256 emit_set_insn (reg1,
5f5c5e0f 4257 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
4258
4259 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
4260 HOST_WIDE_INT adjustment = - (first + rounded_size);
4261 if (! aarch64_uimm12_shift (adjustment))
4262 {
4263 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
4264 true, Pmode);
4265 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
4266 }
4267 else
8dd64cdf
EB
4268 emit_set_insn (reg2,
4269 plus_constant (Pmode, stack_pointer_rtx, adjustment));
4270
a3eb8a52
EB
4271 /* Step 3: the loop
4272
4273 do
4274 {
4275 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4276 probe at TEST_ADDR
4277 }
4278 while (TEST_ADDR != LAST_ADDR)
4279
4280 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4281 until it is equal to ROUNDED_SIZE. */
4282
5f5c5e0f 4283 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
4284
4285
4286 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4287 that SIZE is equal to ROUNDED_SIZE. */
4288
4289 if (size != rounded_size)
4290 {
4291 HOST_WIDE_INT rem = size - rounded_size;
4292
4293 if (rem > 256)
4294 {
4295 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4296
5f5c5e0f
EB
4297 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
4298 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
4299 }
4300 else
5f5c5e0f 4301 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
4302 }
4303 }
4304
4305 /* Make sure nothing is scheduled before we are done. */
4306 emit_insn (gen_blockage ());
4307}
4308
4309/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4310 absolute addresses. */
4311
4312const char *
4313aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
4314{
4315 static int labelno = 0;
4316 char loop_lab[32];
4317 rtx xops[2];
4318
4319 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
4320
4321 /* Loop. */
4322 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4323
cd1bef27
JL
4324 HOST_WIDE_INT stack_clash_probe_interval
4325 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
4326
a3eb8a52
EB
4327 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4328 xops[0] = reg1;
cd1bef27
JL
4329 HOST_WIDE_INT interval;
4330 if (flag_stack_clash_protection)
4331 interval = stack_clash_probe_interval;
4332 else
4333 interval = PROBE_INTERVAL;
4334
4335 gcc_assert (aarch64_uimm12_shift (interval));
4336 xops[1] = GEN_INT (interval);
4337
a3eb8a52
EB
4338 output_asm_insn ("sub\t%0, %0, %1", xops);
4339
cd1bef27
JL
4340 /* If doing stack clash protection then we probe up by the ABI specified
4341 amount. We do this because we're dropping full pages at a time in the
4342 loop. But if we're doing non-stack clash probing, probe at SP 0. */
4343 if (flag_stack_clash_protection)
4344 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
4345 else
4346 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
4347
4348 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
4349 by this amount for each iteration. */
4350 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
4351
4352 /* Test if TEST_ADDR == LAST_ADDR. */
4353 xops[1] = reg2;
4354 output_asm_insn ("cmp\t%0, %1", xops);
4355
4356 /* Branch. */
4357 fputs ("\tb.ne\t", asm_out_file);
4358 assemble_name_raw (asm_out_file, loop_lab);
4359 fputc ('\n', asm_out_file);
4360
4361 return "";
4362}
4363
eb471ba3
TC
4364/* Emit the probe loop for doing stack clash probes and stack adjustments for
4365 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
4366 of GUARD_SIZE. When a probe is emitted it is done at most
4367 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
4368 at most MIN_PROBE_THRESHOLD. By the end of this function
4369 BASE = BASE - ADJUSTMENT. */
4370
4371const char *
4372aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
4373 rtx min_probe_threshold, rtx guard_size)
4374{
4375 /* This function is not allowed to use any instruction generation function
4376 like gen_ and friends. If you do you'll likely ICE during CFG validation,
4377 so instead emit the code you want using output_asm_insn. */
4378 gcc_assert (flag_stack_clash_protection);
4379 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
4380 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
4381
4382 /* The minimum required allocation before the residual requires probing. */
4383 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
4384
4385 /* Clamp the value down to the nearest value that can be used with a cmp. */
4386 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
4387 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
4388
4389 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
4390 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
4391
4392 static int labelno = 0;
4393 char loop_start_lab[32];
4394 char loop_end_lab[32];
4395 rtx xops[2];
4396
4397 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
4398 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
4399
4400 /* Emit loop start label. */
4401 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
4402
4403 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
4404 xops[0] = adjustment;
4405 xops[1] = probe_offset_value_rtx;
4406 output_asm_insn ("cmp\t%0, %1", xops);
4407
4408 /* Branch to end if not enough adjustment to probe. */
4409 fputs ("\tb.lt\t", asm_out_file);
4410 assemble_name_raw (asm_out_file, loop_end_lab);
4411 fputc ('\n', asm_out_file);
4412
4413 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
4414 xops[0] = base;
4415 xops[1] = probe_offset_value_rtx;
4416 output_asm_insn ("sub\t%0, %0, %1", xops);
4417
4418 /* Probe at BASE. */
4419 xops[1] = const0_rtx;
4420 output_asm_insn ("str\txzr, [%0, %1]", xops);
4421
4422 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
4423 xops[0] = adjustment;
4424 xops[1] = probe_offset_value_rtx;
4425 output_asm_insn ("sub\t%0, %0, %1", xops);
4426
4427 /* Branch to start if still more bytes to allocate. */
4428 fputs ("\tb\t", asm_out_file);
4429 assemble_name_raw (asm_out_file, loop_start_lab);
4430 fputc ('\n', asm_out_file);
4431
4432 /* No probe leave. */
4433 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
4434
4435 /* BASE = BASE - ADJUSTMENT. */
4436 xops[0] = base;
4437 xops[1] = adjustment;
4438 output_asm_insn ("sub\t%0, %0, %1", xops);
4439 return "";
4440}
4441
d6cb6d6a
WD
4442/* Determine whether a frame chain needs to be generated. */
4443static bool
4444aarch64_needs_frame_chain (void)
4445{
4446 /* Force a frame chain for EH returns so the return address is at FP+8. */
4447 if (frame_pointer_needed || crtl->calls_eh_return)
4448 return true;
4449
4450 /* A leaf function cannot have calls or write LR. */
4451 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
4452
4453 /* Don't use a frame chain in leaf functions if leaf frame pointers
4454 are disabled. */
4455 if (flag_omit_leaf_frame_pointer && is_leaf)
4456 return false;
4457
4458 return aarch64_use_frame_pointer;
4459}
4460
43e9d192
IB
4461/* Mark the registers that need to be saved by the callee and calculate
4462 the size of the callee-saved registers area and frame record (both FP
33a2e348 4463 and LR may be omitted). */
43e9d192
IB
4464static void
4465aarch64_layout_frame (void)
4466{
4467 HOST_WIDE_INT offset = 0;
4b0685d9 4468 int regno, last_fp_reg = INVALID_REGNUM;
a0d0b980 4469 bool simd_function = aarch64_simd_decl_p (cfun->decl);
43e9d192 4470
d6cb6d6a 4471 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 4472
8c6e3b23
TC
4473 /* Adjust the outgoing arguments size if required. Keep it in sync with what
4474 the mid-end is doing. */
4475 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
4476
97826595
MS
4477#define SLOT_NOT_REQUIRED (-2)
4478#define SLOT_REQUIRED (-1)
4479
71bfb77a
WD
4480 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
4481 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 4482
a0d0b980
SE
4483 /* If this is a non-leaf simd function with calls we assume that
4484 at least one of those calls is to a non-simd function and thus
4485 we must save V8 to V23 in the prologue. */
4486
4487 if (simd_function && !crtl->is_leaf)
4488 {
4489 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4490 if (FP_SIMD_SAVED_REGNUM_P (regno))
4491 df_set_regs_ever_live (regno, true);
4492 }
4493
43e9d192
IB
4494 /* First mark all the registers that really need to be saved... */
4495 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4496 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4497
4498 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4499 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4500
4501 /* ... that includes the eh data registers (if needed)... */
4502 if (crtl->calls_eh_return)
4503 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4504 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4505 = SLOT_REQUIRED;
43e9d192
IB
4506
4507 /* ... and any callee saved register that dataflow says is live. */
4508 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4509 if (df_regs_ever_live_p (regno)
1c923b60
JW
4510 && (regno == R30_REGNUM
4511 || !call_used_regs[regno]))
97826595 4512 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4513
4514 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4515 if (df_regs_ever_live_p (regno)
a0d0b980
SE
4516 && (!call_used_regs[regno]
4517 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))))
4b0685d9
WD
4518 {
4519 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4520 last_fp_reg = regno;
4521 }
43e9d192 4522
204d2c03 4523 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4524 {
2e1cdae5 4525 /* FP and LR are placed in the linkage record. */
43e9d192 4526 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4527 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4528 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4529 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4530 offset = 2 * UNITS_PER_WORD;
4531 }
43e9d192 4532
db6b62a8
TC
4533 /* With stack-clash, LR must be saved in non-leaf functions. */
4534 gcc_assert (crtl->is_leaf
4535 || (cfun->machine->frame.reg_offset[R30_REGNUM]
4536 != SLOT_NOT_REQUIRED));
4537
43e9d192 4538 /* Now assign stack slots for them. */
2e1cdae5 4539 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4540 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4541 {
4542 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4543 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4544 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4545 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4546 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4547 offset += UNITS_PER_WORD;
4548 }
4549
4b0685d9
WD
4550 HOST_WIDE_INT max_int_offset = offset;
4551 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4552 bool has_align_gap = offset != max_int_offset;
4553
43e9d192 4554 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4555 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4556 {
4b0685d9
WD
4557 /* If there is an alignment gap between integer and fp callee-saves,
4558 allocate the last fp register to it if possible. */
a0d0b980
SE
4559 if (regno == last_fp_reg
4560 && has_align_gap
4561 && !simd_function
4562 && (offset & 8) == 0)
4b0685d9
WD
4563 {
4564 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4565 break;
4566 }
4567
43e9d192 4568 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4569 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4570 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4571 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4572 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4573 cfun->machine->frame.wb_candidate2 = regno;
a0d0b980 4574 offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD;
43e9d192
IB
4575 }
4576
4f59f9f2 4577 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4578
4579 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4580
71bfb77a
WD
4581 HOST_WIDE_INT varargs_and_saved_regs_size
4582 = offset + cfun->machine->frame.saved_varargs_size;
4583
1c960e02 4584 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4585 = aligned_upper_bound (varargs_and_saved_regs_size
4586 + get_frame_size (),
4587 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4588
6a70badb
RS
4589 /* Both these values are already aligned. */
4590 gcc_assert (multiple_p (crtl->outgoing_args_size,
4591 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4592 cfun->machine->frame.frame_size
6a70badb
RS
4593 = (cfun->machine->frame.hard_fp_offset
4594 + crtl->outgoing_args_size);
1c960e02 4595
71bfb77a
WD
4596 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4597
4598 cfun->machine->frame.initial_adjust = 0;
4599 cfun->machine->frame.final_adjust = 0;
4600 cfun->machine->frame.callee_adjust = 0;
4601 cfun->machine->frame.callee_offset = 0;
4602
4603 HOST_WIDE_INT max_push_offset = 0;
4604 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4605 max_push_offset = 512;
4606 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4607 max_push_offset = 256;
4608
6a70badb
RS
4609 HOST_WIDE_INT const_size, const_fp_offset;
4610 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4611 && const_size < max_push_offset
4612 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4613 {
4614 /* Simple, small frame with no outgoing arguments:
4615 stp reg1, reg2, [sp, -frame_size]!
4616 stp reg3, reg4, [sp, 16] */
6a70badb 4617 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4618 }
6a70badb
RS
4619 else if (known_lt (crtl->outgoing_args_size
4620 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4621 && !(cfun->calls_alloca
6a70badb
RS
4622 && known_lt (cfun->machine->frame.hard_fp_offset,
4623 max_push_offset)))
71bfb77a
WD
4624 {
4625 /* Frame with small outgoing arguments:
4626 sub sp, sp, frame_size
4627 stp reg1, reg2, [sp, outgoing_args_size]
4628 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4629 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4630 cfun->machine->frame.callee_offset
4631 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4632 }
6a70badb
RS
4633 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4634 && const_fp_offset < max_push_offset)
71bfb77a
WD
4635 {
4636 /* Frame with large outgoing arguments but a small local area:
4637 stp reg1, reg2, [sp, -hard_fp_offset]!
4638 stp reg3, reg4, [sp, 16]
4639 sub sp, sp, outgoing_args_size */
6a70badb 4640 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4641 cfun->machine->frame.final_adjust
4642 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4643 }
71bfb77a
WD
4644 else
4645 {
4646 /* Frame with large local area and outgoing arguments using frame pointer:
4647 sub sp, sp, hard_fp_offset
4648 stp x29, x30, [sp, 0]
4649 add x29, sp, 0
4650 stp reg3, reg4, [sp, 16]
4651 sub sp, sp, outgoing_args_size */
4652 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4653 cfun->machine->frame.final_adjust
4654 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4655 }
4656
43e9d192
IB
4657 cfun->machine->frame.laid_out = true;
4658}
4659
04ddfe06
KT
4660/* Return true if the register REGNO is saved on entry to
4661 the current function. */
4662
43e9d192
IB
4663static bool
4664aarch64_register_saved_on_entry (int regno)
4665{
97826595 4666 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4667}
4668
04ddfe06
KT
4669/* Return the next register up from REGNO up to LIMIT for the callee
4670 to save. */
4671
64dedd72
JW
4672static unsigned
4673aarch64_next_callee_save (unsigned regno, unsigned limit)
4674{
4675 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4676 regno ++;
4677 return regno;
4678}
43e9d192 4679
04ddfe06
KT
4680/* Push the register number REGNO of mode MODE to the stack with write-back
4681 adjusting the stack by ADJUSTMENT. */
4682
c5e1f66e 4683static void
ef4bddc2 4684aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4685 HOST_WIDE_INT adjustment)
4686 {
4687 rtx base_rtx = stack_pointer_rtx;
4688 rtx insn, reg, mem;
4689
4690 reg = gen_rtx_REG (mode, regno);
4691 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4692 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4693 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4694
4695 insn = emit_move_insn (mem, reg);
4696 RTX_FRAME_RELATED_P (insn) = 1;
4697}
4698
04ddfe06
KT
4699/* Generate and return an instruction to store the pair of registers
4700 REG and REG2 of mode MODE to location BASE with write-back adjusting
4701 the stack location BASE by ADJUSTMENT. */
4702
80c11907 4703static rtx
ef4bddc2 4704aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4705 HOST_WIDE_INT adjustment)
4706{
4707 switch (mode)
4708 {
4e10a5a7 4709 case E_DImode:
80c11907
JW
4710 return gen_storewb_pairdi_di (base, base, reg, reg2,
4711 GEN_INT (-adjustment),
4712 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4713 case E_DFmode:
80c11907
JW
4714 return gen_storewb_pairdf_di (base, base, reg, reg2,
4715 GEN_INT (-adjustment),
4716 GEN_INT (UNITS_PER_WORD - adjustment));
a0d0b980
SE
4717 case E_TFmode:
4718 return gen_storewb_pairtf_di (base, base, reg, reg2,
4719 GEN_INT (-adjustment),
4720 GEN_INT (UNITS_PER_VREG - adjustment));
80c11907
JW
4721 default:
4722 gcc_unreachable ();
4723 }
4724}
4725
04ddfe06
KT
4726/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4727 stack pointer by ADJUSTMENT. */
4728
80c11907 4729static void
89ac681e 4730aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4731{
5d8a22a5 4732 rtx_insn *insn;
a0d0b980 4733 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e 4734
71bfb77a 4735 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4736 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4737
80c11907
JW
4738 rtx reg1 = gen_rtx_REG (mode, regno1);
4739 rtx reg2 = gen_rtx_REG (mode, regno2);
4740
4741 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4742 reg2, adjustment));
4743 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4744 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4745 RTX_FRAME_RELATED_P (insn) = 1;
4746}
4747
04ddfe06
KT
4748/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4749 adjusting it by ADJUSTMENT afterwards. */
4750
159313d9 4751static rtx
ef4bddc2 4752aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4753 HOST_WIDE_INT adjustment)
4754{
4755 switch (mode)
4756 {
4e10a5a7 4757 case E_DImode:
159313d9 4758 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4759 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4760 case E_DFmode:
159313d9 4761 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4762 GEN_INT (UNITS_PER_WORD));
a0d0b980
SE
4763 case E_TFmode:
4764 return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
4765 GEN_INT (UNITS_PER_VREG));
159313d9
JW
4766 default:
4767 gcc_unreachable ();
4768 }
4769}
4770
04ddfe06
KT
4771/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4772 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4773 into CFI_OPS. */
4774
89ac681e
WD
4775static void
4776aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4777 rtx *cfi_ops)
4778{
a0d0b980 4779 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e
WD
4780 rtx reg1 = gen_rtx_REG (mode, regno1);
4781
4782 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4783
71bfb77a 4784 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4785 {
4786 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4787 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4788 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4789 }
4790 else
4791 {
4792 rtx reg2 = gen_rtx_REG (mode, regno2);
4793 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4794 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4795 reg2, adjustment));
4796 }
4797}
4798
04ddfe06
KT
4799/* Generate and return a store pair instruction of mode MODE to store
4800 register REG1 to MEM1 and register REG2 to MEM2. */
4801
72df5c1f 4802static rtx
ef4bddc2 4803aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4804 rtx reg2)
4805{
4806 switch (mode)
4807 {
4e10a5a7 4808 case E_DImode:
dfe1da23 4809 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4810
4e10a5a7 4811 case E_DFmode:
dfe1da23 4812 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f 4813
a0d0b980
SE
4814 case E_TFmode:
4815 return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
4816
72df5c1f
JW
4817 default:
4818 gcc_unreachable ();
4819 }
4820}
4821
04ddfe06
KT
4822/* Generate and regurn a load pair isntruction of mode MODE to load register
4823 REG1 from MEM1 and register REG2 from MEM2. */
4824
72df5c1f 4825static rtx
ef4bddc2 4826aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4827 rtx mem2)
4828{
4829 switch (mode)
4830 {
4e10a5a7 4831 case E_DImode:
dfe1da23 4832 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4833
4e10a5a7 4834 case E_DFmode:
dfe1da23 4835 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f 4836
a0d0b980
SE
4837 case E_TFmode:
4838 return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
4839
72df5c1f
JW
4840 default:
4841 gcc_unreachable ();
4842 }
4843}
4844
db58fd89
JW
4845/* Return TRUE if return address signing should be enabled for the current
4846 function, otherwise return FALSE. */
4847
4848bool
4849aarch64_return_address_signing_enabled (void)
4850{
4851 /* This function should only be called after frame laid out. */
4852 gcc_assert (cfun->machine->frame.laid_out);
4853
4854 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4855 if it's LR is pushed onto stack. */
4856 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4857 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4858 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4859}
4860
30afdf34
SD
4861/* Return TRUE if Branch Target Identification Mechanism is enabled. */
4862bool
4863aarch64_bti_enabled (void)
4864{
4865 return (aarch64_enable_bti == 1);
4866}
4867
04ddfe06
KT
4868/* Emit code to save the callee-saved registers from register number START
4869 to LIMIT to the stack at the location starting at offset START_OFFSET,
4870 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4871
43e9d192 4872static void
6a70badb 4873aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4874 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4875{
5d8a22a5 4876 rtx_insn *insn;
43e9d192
IB
4877 unsigned regno;
4878 unsigned regno2;
4879
0ec74a1e 4880 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4881 regno <= limit;
4882 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4883 {
ae13fce3 4884 rtx reg, mem;
6a70badb 4885 poly_int64 offset;
a0d0b980 4886 int offset_diff;
64dedd72 4887
ae13fce3
JW
4888 if (skip_wb
4889 && (regno == cfun->machine->frame.wb_candidate1
4890 || regno == cfun->machine->frame.wb_candidate2))
4891 continue;
4892
827ab47a
KT
4893 if (cfun->machine->reg_is_wrapped_separately[regno])
4894 continue;
4895
ae13fce3
JW
4896 reg = gen_rtx_REG (mode, regno);
4897 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4898 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4899 offset));
64dedd72
JW
4900
4901 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
4902 offset_diff = cfun->machine->frame.reg_offset[regno2]
4903 - cfun->machine->frame.reg_offset[regno];
64dedd72
JW
4904
4905 if (regno2 <= limit
827ab47a 4906 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 4907 && known_eq (GET_MODE_SIZE (mode), offset_diff))
43e9d192 4908 {
0ec74a1e 4909 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4910 rtx mem2;
4911
4912 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4913 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4914 offset));
8ed2fc62
JW
4915 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4916 reg2));
0b4a9743 4917
64dedd72
JW
4918 /* The first part of a frame-related parallel insn is
4919 always assumed to be relevant to the frame
4920 calculations; subsequent parts, are only
4921 frame-related if explicitly marked. */
4922 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4923 regno = regno2;
4924 }
4925 else
8ed2fc62
JW
4926 insn = emit_move_insn (mem, reg);
4927
4928 RTX_FRAME_RELATED_P (insn) = 1;
4929 }
4930}
4931
04ddfe06
KT
4932/* Emit code to restore the callee registers of mode MODE from register
4933 number START up to and including LIMIT. Restore from the stack offset
4934 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4935 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4936
8ed2fc62 4937static void
ef4bddc2 4938aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4939 poly_int64 start_offset, unsigned start,
dd991abb 4940 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4941{
8ed2fc62 4942 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4943 unsigned regno;
4944 unsigned regno2;
6a70badb 4945 poly_int64 offset;
8ed2fc62
JW
4946
4947 for (regno = aarch64_next_callee_save (start, limit);
4948 regno <= limit;
4949 regno = aarch64_next_callee_save (regno + 1, limit))
4950 {
827ab47a
KT
4951 if (cfun->machine->reg_is_wrapped_separately[regno])
4952 continue;
4953
ae13fce3 4954 rtx reg, mem;
a0d0b980 4955 int offset_diff;
8ed2fc62 4956
ae13fce3
JW
4957 if (skip_wb
4958 && (regno == cfun->machine->frame.wb_candidate1
4959 || regno == cfun->machine->frame.wb_candidate2))
4960 continue;
4961
4962 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4963 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4964 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4965
4966 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
4967 offset_diff = cfun->machine->frame.reg_offset[regno2]
4968 - cfun->machine->frame.reg_offset[regno];
8ed2fc62
JW
4969
4970 if (regno2 <= limit
827ab47a 4971 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 4972 && known_eq (GET_MODE_SIZE (mode), offset_diff))
64dedd72 4973 {
8ed2fc62
JW
4974 rtx reg2 = gen_rtx_REG (mode, regno2);
4975 rtx mem2;
4976
4977 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4978 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4979 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4980
dd991abb 4981 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4982 regno = regno2;
43e9d192 4983 }
8ed2fc62 4984 else
dd991abb
RH
4985 emit_move_insn (reg, mem);
4986 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4987 }
43e9d192
IB
4988}
4989
43cacb12
RS
4990/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4991 of MODE. */
4992
4993static inline bool
4994offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4995{
4996 HOST_WIDE_INT multiple;
4997 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4998 && IN_RANGE (multiple, -8, 7));
4999}
5000
5001/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
5002 of MODE. */
5003
5004static inline bool
5005offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
5006{
5007 HOST_WIDE_INT multiple;
5008 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
5009 && IN_RANGE (multiple, 0, 63));
5010}
5011
5012/* Return true if OFFSET is a signed 7-bit value multiplied by the size
5013 of MODE. */
5014
5015bool
5016aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
5017{
5018 HOST_WIDE_INT multiple;
5019 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
5020 && IN_RANGE (multiple, -64, 63));
5021}
5022
5023/* Return true if OFFSET is a signed 9-bit value. */
5024
3c5af608
MM
5025bool
5026aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
5027 poly_int64 offset)
827ab47a 5028{
6a70badb
RS
5029 HOST_WIDE_INT const_offset;
5030 return (offset.is_constant (&const_offset)
5031 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
5032}
5033
43cacb12
RS
5034/* Return true if OFFSET is a signed 9-bit value multiplied by the size
5035 of MODE. */
5036
827ab47a 5037static inline bool
43cacb12 5038offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 5039{
6a70badb
RS
5040 HOST_WIDE_INT multiple;
5041 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 5042 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
5043}
5044
43cacb12
RS
5045/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
5046 of MODE. */
5047
5048static inline bool
5049offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 5050{
6a70badb
RS
5051 HOST_WIDE_INT multiple;
5052 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 5053 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
5054}
5055
5056/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
5057
5058static sbitmap
5059aarch64_get_separate_components (void)
5060{
827ab47a
KT
5061 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
5062 bitmap_clear (components);
5063
5064 /* The registers we need saved to the frame. */
5065 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
5066 if (aarch64_register_saved_on_entry (regno))
5067 {
6a70badb 5068 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
5069 if (!frame_pointer_needed)
5070 offset += cfun->machine->frame.frame_size
5071 - cfun->machine->frame.hard_fp_offset;
5072 /* Check that we can access the stack slot of the register with one
5073 direct load with no adjustments needed. */
5074 if (offset_12bit_unsigned_scaled_p (DImode, offset))
5075 bitmap_set_bit (components, regno);
5076 }
5077
5078 /* Don't mess with the hard frame pointer. */
5079 if (frame_pointer_needed)
5080 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
5081
5082 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5083 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 5084 /* If registers have been chosen to be stored/restored with
827ab47a
KT
5085 writeback don't interfere with them to avoid having to output explicit
5086 stack adjustment instructions. */
5087 if (reg2 != INVALID_REGNUM)
5088 bitmap_clear_bit (components, reg2);
5089 if (reg1 != INVALID_REGNUM)
5090 bitmap_clear_bit (components, reg1);
5091
5092 bitmap_clear_bit (components, LR_REGNUM);
5093 bitmap_clear_bit (components, SP_REGNUM);
5094
5095 return components;
5096}
5097
5098/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
5099
5100static sbitmap
5101aarch64_components_for_bb (basic_block bb)
5102{
5103 bitmap in = DF_LIVE_IN (bb);
5104 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
5105 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
a0d0b980 5106 bool simd_function = aarch64_simd_decl_p (cfun->decl);
827ab47a
KT
5107
5108 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
5109 bitmap_clear (components);
5110
5111 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
5112 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
a0d0b980
SE
5113 if ((!call_used_regs[regno]
5114 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
827ab47a
KT
5115 && (bitmap_bit_p (in, regno)
5116 || bitmap_bit_p (gen, regno)
5117 || bitmap_bit_p (kill, regno)))
3f26f054
WD
5118 {
5119 unsigned regno2, offset, offset2;
5120 bitmap_set_bit (components, regno);
5121
5122 /* If there is a callee-save at an adjacent offset, add it too
5123 to increase the use of LDP/STP. */
5124 offset = cfun->machine->frame.reg_offset[regno];
5125 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
5126
5127 if (regno2 <= LAST_SAVED_REGNUM)
5128 {
5129 offset2 = cfun->machine->frame.reg_offset[regno2];
5130 if ((offset & ~8) == (offset2 & ~8))
5131 bitmap_set_bit (components, regno2);
5132 }
5133 }
827ab47a
KT
5134
5135 return components;
5136}
5137
5138/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
5139 Nothing to do for aarch64. */
5140
5141static void
5142aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
5143{
5144}
5145
5146/* Return the next set bit in BMP from START onwards. Return the total number
5147 of bits in BMP if no set bit is found at or after START. */
5148
5149static unsigned int
5150aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
5151{
5152 unsigned int nbits = SBITMAP_SIZE (bmp);
5153 if (start == nbits)
5154 return start;
5155
5156 gcc_assert (start < nbits);
5157 for (unsigned int i = start; i < nbits; i++)
5158 if (bitmap_bit_p (bmp, i))
5159 return i;
5160
5161 return nbits;
5162}
5163
5164/* Do the work for aarch64_emit_prologue_components and
5165 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
5166 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
5167 for these components or the epilogue sequence. That is, it determines
5168 whether we should emit stores or loads and what kind of CFA notes to attach
5169 to the insns. Otherwise the logic for the two sequences is very
5170 similar. */
5171
5172static void
5173aarch64_process_components (sbitmap components, bool prologue_p)
5174{
5175 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
5176 ? HARD_FRAME_POINTER_REGNUM
5177 : STACK_POINTER_REGNUM);
5178
5179 unsigned last_regno = SBITMAP_SIZE (components);
5180 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
5181 rtx_insn *insn = NULL;
5182
5183 while (regno != last_regno)
5184 {
a0d0b980
SE
5185 /* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved
5186 so DFmode for the vector registers is enough. For simd functions
5187 we want to save the low 128 bits. */
5188 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno);
5189
827ab47a 5190 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 5191 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
5192 if (!frame_pointer_needed)
5193 offset += cfun->machine->frame.frame_size
5194 - cfun->machine->frame.hard_fp_offset;
5195 rtx addr = plus_constant (Pmode, ptr_reg, offset);
5196 rtx mem = gen_frame_mem (mode, addr);
5197
5198 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
5199 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
5200 /* No more registers to handle after REGNO.
5201 Emit a single save/restore and exit. */
5202 if (regno2 == last_regno)
5203 {
5204 insn = emit_insn (set);
5205 RTX_FRAME_RELATED_P (insn) = 1;
5206 if (prologue_p)
5207 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
5208 else
5209 add_reg_note (insn, REG_CFA_RESTORE, reg);
5210 break;
5211 }
5212
6a70badb 5213 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
5214 /* The next register is not of the same class or its offset is not
5215 mergeable with the current one into a pair. */
5216 if (!satisfies_constraint_Ump (mem)
5217 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
a0d0b980 5218 || (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno))
6a70badb
RS
5219 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
5220 GET_MODE_SIZE (mode)))
827ab47a
KT
5221 {
5222 insn = emit_insn (set);
5223 RTX_FRAME_RELATED_P (insn) = 1;
5224 if (prologue_p)
5225 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
5226 else
5227 add_reg_note (insn, REG_CFA_RESTORE, reg);
5228
5229 regno = regno2;
5230 continue;
5231 }
5232
5233 /* REGNO2 can be saved/restored in a pair with REGNO. */
5234 rtx reg2 = gen_rtx_REG (mode, regno2);
5235 if (!frame_pointer_needed)
5236 offset2 += cfun->machine->frame.frame_size
5237 - cfun->machine->frame.hard_fp_offset;
5238 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
5239 rtx mem2 = gen_frame_mem (mode, addr2);
5240 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
5241 : gen_rtx_SET (reg2, mem2);
5242
5243 if (prologue_p)
5244 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
5245 else
5246 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
5247
5248 RTX_FRAME_RELATED_P (insn) = 1;
5249 if (prologue_p)
5250 {
5251 add_reg_note (insn, REG_CFA_OFFSET, set);
5252 add_reg_note (insn, REG_CFA_OFFSET, set2);
5253 }
5254 else
5255 {
5256 add_reg_note (insn, REG_CFA_RESTORE, reg);
5257 add_reg_note (insn, REG_CFA_RESTORE, reg2);
5258 }
5259
5260 regno = aarch64_get_next_set_bit (components, regno2 + 1);
5261 }
5262}
5263
5264/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
5265
5266static void
5267aarch64_emit_prologue_components (sbitmap components)
5268{
5269 aarch64_process_components (components, true);
5270}
5271
5272/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
5273
5274static void
5275aarch64_emit_epilogue_components (sbitmap components)
5276{
5277 aarch64_process_components (components, false);
5278}
5279
5280/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
5281
5282static void
5283aarch64_set_handled_components (sbitmap components)
5284{
5285 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
5286 if (bitmap_bit_p (components, regno))
5287 cfun->machine->reg_is_wrapped_separately[regno] = true;
5288}
5289
8c6e3b23
TC
5290/* On AArch64 we have an ABI defined safe buffer. This constant is used to
5291 determining the probe offset for alloca. */
5292
5293static HOST_WIDE_INT
5294aarch64_stack_clash_protection_alloca_probe_range (void)
5295{
5296 return STACK_CLASH_CALLER_GUARD;
5297}
5298
5299
cd1bef27
JL
5300/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
5301 registers. If POLY_SIZE is not large enough to require a probe this function
5302 will only adjust the stack. When allocating the stack space
5303 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
5304 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
5305 arguments. If we are then we ensure that any allocation larger than the ABI
5306 defined buffer needs a probe so that the invariant of having a 1KB buffer is
5307 maintained.
5308
5309 We emit barriers after each stack adjustment to prevent optimizations from
5310 breaking the invariant that we never drop the stack more than a page. This
5311 invariant is needed to make it easier to correctly handle asynchronous
5312 events, e.g. if we were to allow the stack to be dropped by more than a page
5313 and then have multiple probes up and we take a signal somewhere in between
5314 then the signal handler doesn't know the state of the stack and can make no
5315 assumptions about which pages have been probed. */
5316
5317static void
5318aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
5319 poly_int64 poly_size,
5320 bool frame_related_p,
5321 bool final_adjustment_p)
5322{
5323 HOST_WIDE_INT guard_size
5324 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5325 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5326 /* When doing the final adjustment for the outgoing argument size we can't
5327 assume that LR was saved at position 0. So subtract it's offset from the
5328 ABI safe buffer so that we don't accidentally allow an adjustment that
5329 would result in an allocation larger than the ABI buffer without
5330 probing. */
5331 HOST_WIDE_INT min_probe_threshold
5332 = final_adjustment_p
5333 ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
5334 : guard_size - guard_used_by_caller;
5335
5336 poly_int64 frame_size = cfun->machine->frame.frame_size;
5337
5338 /* We should always have a positive probe threshold. */
5339 gcc_assert (min_probe_threshold > 0);
5340
5341 if (flag_stack_clash_protection && !final_adjustment_p)
5342 {
5343 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
5344 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5345
5346 if (known_eq (frame_size, 0))
5347 {
5348 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
5349 }
5350 else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
5351 && known_lt (final_adjust, guard_used_by_caller))
5352 {
5353 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
5354 }
5355 }
5356
cd1bef27
JL
5357 /* If SIZE is not large enough to require probing, just adjust the stack and
5358 exit. */
eb471ba3 5359 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
5360 || !flag_stack_clash_protection)
5361 {
5362 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
5363 return;
5364 }
5365
eb471ba3
TC
5366 HOST_WIDE_INT size;
5367 /* Handle the SVE non-constant case first. */
5368 if (!poly_size.is_constant (&size))
5369 {
5370 if (dump_file)
5371 {
5372 fprintf (dump_file, "Stack clash SVE prologue: ");
5373 print_dec (poly_size, dump_file);
5374 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
5375 }
5376
5377 /* First calculate the amount of bytes we're actually spilling. */
5378 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
5379 poly_size, temp1, temp2, false, true);
5380
5381 rtx_insn *insn = get_last_insn ();
5382
5383 if (frame_related_p)
5384 {
5385 /* This is done to provide unwinding information for the stack
5386 adjustments we're about to do, however to prevent the optimizers
143d3b15 5387 from removing the R11 move and leaving the CFA note (which would be
eb471ba3
TC
5388 very wrong) we tie the old and new stack pointer together.
5389 The tie will expand to nothing but the optimizers will not touch
5390 the instruction. */
143d3b15 5391 rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
eb471ba3
TC
5392 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
5393 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
5394
5395 /* We want the CFA independent of the stack pointer for the
5396 duration of the loop. */
5397 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
5398 RTX_FRAME_RELATED_P (insn) = 1;
5399 }
5400
5401 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
5402 rtx guard_const = gen_int_mode (guard_size, Pmode);
5403
5404 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
5405 stack_pointer_rtx, temp1,
5406 probe_const, guard_const));
5407
5408 /* Now reset the CFA register if needed. */
5409 if (frame_related_p)
5410 {
5411 add_reg_note (insn, REG_CFA_DEF_CFA,
5412 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
5413 gen_int_mode (poly_size, Pmode)));
5414 RTX_FRAME_RELATED_P (insn) = 1;
5415 }
5416
5417 return;
5418 }
5419
cd1bef27
JL
5420 if (dump_file)
5421 fprintf (dump_file,
eb471ba3
TC
5422 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
5423 " bytes, probing will be required.\n", size);
cd1bef27
JL
5424
5425 /* Round size to the nearest multiple of guard_size, and calculate the
5426 residual as the difference between the original size and the rounded
5427 size. */
5428 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
5429 HOST_WIDE_INT residual = size - rounded_size;
5430
5431 /* We can handle a small number of allocations/probes inline. Otherwise
5432 punt to a loop. */
5433 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
5434 {
5435 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
5436 {
5437 aarch64_sub_sp (NULL, temp2, guard_size, true);
5438 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5439 guard_used_by_caller));
5440 emit_insn (gen_blockage ());
5441 }
5442 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
5443 }
5444 else
5445 {
5446 /* Compute the ending address. */
5447 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
5448 temp1, NULL, false, true);
5449 rtx_insn *insn = get_last_insn ();
5450
5451 /* For the initial allocation, we don't have a frame pointer
5452 set up, so we always need CFI notes. If we're doing the
5453 final allocation, then we may have a frame pointer, in which
5454 case it is the CFA, otherwise we need CFI notes.
5455
5456 We can determine which allocation we are doing by looking at
5457 the value of FRAME_RELATED_P since the final allocations are not
5458 frame related. */
5459 if (frame_related_p)
5460 {
5461 /* We want the CFA independent of the stack pointer for the
5462 duration of the loop. */
5463 add_reg_note (insn, REG_CFA_DEF_CFA,
5464 plus_constant (Pmode, temp1, rounded_size));
5465 RTX_FRAME_RELATED_P (insn) = 1;
5466 }
5467
5468 /* This allocates and probes the stack. Note that this re-uses some of
5469 the existing Ada stack protection code. However we are guaranteed not
5470 to enter the non loop or residual branches of that code.
5471
5472 The non-loop part won't be entered because if our allocation amount
5473 doesn't require a loop, the case above would handle it.
5474
5475 The residual amount won't be entered because TEMP1 is a mutliple of
5476 the allocation size. The residual will always be 0. As such, the only
5477 part we are actually using from that code is the loop setup. The
5478 actual probing is done in aarch64_output_probe_stack_range. */
5479 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
5480 stack_pointer_rtx, temp1));
5481
5482 /* Now reset the CFA register if needed. */
5483 if (frame_related_p)
5484 {
5485 add_reg_note (insn, REG_CFA_DEF_CFA,
5486 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
5487 RTX_FRAME_RELATED_P (insn) = 1;
5488 }
5489
5490 emit_insn (gen_blockage ());
5491 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
5492 }
5493
5494 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
5495 be probed. This maintains the requirement that each page is probed at
5496 least once. For initial probing we probe only if the allocation is
5497 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
5498 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
5499 GUARD_SIZE. This works that for any allocation that is large enough to
5500 trigger a probe here, we'll have at least one, and if they're not large
5501 enough for this code to emit anything for them, The page would have been
5502 probed by the saving of FP/LR either by this function or any callees. If
5503 we don't have any callees then we won't have more stack adjustments and so
5504 are still safe. */
5505 if (residual)
5506 {
5507 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
5508 /* If we're doing final adjustments, and we've done any full page
5509 allocations then any residual needs to be probed. */
5510 if (final_adjustment_p && rounded_size != 0)
5511 min_probe_threshold = 0;
5512 /* If doing a small final adjustment, we always probe at offset 0.
5513 This is done to avoid issues when LR is not at position 0 or when
5514 the final adjustment is smaller than the probing offset. */
5515 else if (final_adjustment_p && rounded_size == 0)
5516 residual_probe_offset = 0;
5517
5518 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
5519 if (residual >= min_probe_threshold)
5520 {
5521 if (dump_file)
5522 fprintf (dump_file,
5523 "Stack clash AArch64 prologue residuals: "
5524 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
5525 "\n", residual);
5526
5527 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5528 residual_probe_offset));
5529 emit_insn (gen_blockage ());
5530 }
5531 }
5532}
5533
a0d0b980
SE
5534/* Return 1 if the register is used by the epilogue. We need to say the
5535 return register is used, but only after epilogue generation is complete.
5536 Note that in the case of sibcalls, the values "used by the epilogue" are
5537 considered live at the start of the called function.
5538
5539 For SIMD functions we need to return 1 for FP registers that are saved and
5540 restored by a function but are not zero in call_used_regs. If we do not do
5541 this optimizations may remove the restore of the register. */
5542
5543int
5544aarch64_epilogue_uses (int regno)
5545{
5546 if (epilogue_completed)
5547 {
5548 if (regno == LR_REGNUM)
5549 return 1;
5550 if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno))
5551 return 1;
5552 }
5553 return 0;
5554}
5555
43cacb12
RS
5556/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
5557 is saved at BASE + OFFSET. */
5558
5559static void
5560aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
5561 rtx base, poly_int64 offset)
5562{
5563 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
5564 add_reg_note (insn, REG_CFA_EXPRESSION,
5565 gen_rtx_SET (mem, regno_reg_rtx[reg]));
5566}
5567
43e9d192
IB
5568/* AArch64 stack frames generated by this compiler look like:
5569
5570 +-------------------------------+
5571 | |
5572 | incoming stack arguments |
5573 | |
34834420
MS
5574 +-------------------------------+
5575 | | <-- incoming stack pointer (aligned)
43e9d192
IB
5576 | callee-allocated save area |
5577 | for register varargs |
5578 | |
34834420
MS
5579 +-------------------------------+
5580 | local variables | <-- frame_pointer_rtx
43e9d192
IB
5581 | |
5582 +-------------------------------+
cd1bef27 5583 | padding | \
454fdba9 5584 +-------------------------------+ |
454fdba9 5585 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
5586 +-------------------------------+ |
5587 | LR' | |
5588 +-------------------------------+ |
34834420
MS
5589 | FP' | / <- hard_frame_pointer_rtx (aligned)
5590 +-------------------------------+
43e9d192
IB
5591 | dynamic allocation |
5592 +-------------------------------+
34834420
MS
5593 | padding |
5594 +-------------------------------+
5595 | outgoing stack arguments | <-- arg_pointer
5596 | |
5597 +-------------------------------+
5598 | | <-- stack_pointer_rtx (aligned)
43e9d192 5599
34834420
MS
5600 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
5601 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
5602 unchanged.
5603
5604 By default for stack-clash we assume the guard is at least 64KB, but this
5605 value is configurable to either 4KB or 64KB. We also force the guard size to
5606 be the same as the probing interval and both values are kept in sync.
5607
5608 With those assumptions the callee can allocate up to 63KB (or 3KB depending
5609 on the guard size) of stack space without probing.
5610
5611 When probing is needed, we emit a probe at the start of the prologue
5612 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
5613
5614 We have to track how much space has been allocated and the only stores
5615 to the stack we track as implicit probes are the FP/LR stores.
5616
5617 For outgoing arguments we probe if the size is larger than 1KB, such that
143d3b15
TC
5618 the ABI specified buffer is maintained for the next callee.
5619
5620 The following registers are reserved during frame layout and should not be
5621 used for any other purpose:
5622
5623 - r11: Used by stack clash protection when SVE is enabled.
5624 - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment.
5625 - r14 and r15: Used for speculation tracking.
5626 - r16(IP0), r17(IP1): Used by indirect tailcalls.
5627 - r30(LR), r29(FP): Used by standard frame layout.
5628
5629 These registers must be avoided in frame layout related code unless the
5630 explicit intention is to interact with one of the features listed above. */
43e9d192
IB
5631
5632/* Generate the prologue instructions for entry into a function.
5633 Establish the stack frame by decreasing the stack pointer with a
5634 properly calculated size and, if necessary, create a frame record
5635 filled with the values of LR and previous frame pointer. The
6991c977 5636 current FP is also set up if it is in use. */
43e9d192
IB
5637
5638void
5639aarch64_expand_prologue (void)
5640{
6a70badb
RS
5641 poly_int64 frame_size = cfun->machine->frame.frame_size;
5642 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5643 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5644 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5645 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5646 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5647 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 5648 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 5649 rtx_insn *insn;
43e9d192 5650
db58fd89
JW
5651 /* Sign return address for functions. */
5652 if (aarch64_return_address_signing_enabled ())
27169e45
JW
5653 {
5654 insn = emit_insn (gen_pacisp ());
5655 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5656 RTX_FRAME_RELATED_P (insn) = 1;
5657 }
db58fd89 5658
dd991abb 5659 if (flag_stack_usage_info)
6a70badb 5660 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 5661
a3eb8a52
EB
5662 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5663 {
5664 if (crtl->is_leaf && !cfun->calls_alloca)
5665 {
6a70badb
RS
5666 if (maybe_gt (frame_size, PROBE_INTERVAL)
5667 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
5668 aarch64_emit_probe_stack_range (get_stack_check_protect (),
5669 (frame_size
5670 - get_stack_check_protect ()));
a3eb8a52 5671 }
6a70badb 5672 else if (maybe_gt (frame_size, 0))
8c1dd970 5673 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
5674 }
5675
901e66e0
SD
5676 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
5677 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 5678
cd1bef27
JL
5679 /* In theory we should never have both an initial adjustment
5680 and a callee save adjustment. Verify that is the case since the
5681 code below does not handle it for -fstack-clash-protection. */
5682 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
5683
5684 /* Will only probe if the initial adjustment is larger than the guard
5685 less the amount of the guard reserved for use by the caller's
5686 outgoing args. */
901e66e0 5687 aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
cd1bef27 5688 true, false);
43e9d192 5689
71bfb77a
WD
5690 if (callee_adjust != 0)
5691 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 5692
204d2c03 5693 if (emit_frame_chain)
43e9d192 5694 {
43cacb12 5695 poly_int64 reg_offset = callee_adjust;
71bfb77a 5696 if (callee_adjust == 0)
43cacb12
RS
5697 {
5698 reg1 = R29_REGNUM;
5699 reg2 = R30_REGNUM;
5700 reg_offset = callee_offset;
5701 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
5702 }
f5470a77 5703 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12 5704 stack_pointer_rtx, callee_offset,
901e66e0 5705 tmp1_rtx, tmp0_rtx, frame_pointer_needed);
43cacb12
RS
5706 if (frame_pointer_needed && !frame_size.is_constant ())
5707 {
5708 /* Variable-sized frames need to describe the save slot
5709 address using DW_CFA_expression rather than DW_CFA_offset.
5710 This means that, without taking further action, the
5711 locations of the registers that we've already saved would
5712 remain based on the stack pointer even after we redefine
5713 the CFA based on the frame pointer. We therefore need new
5714 DW_CFA_expressions to re-express the save slots with addresses
5715 based on the frame pointer. */
5716 rtx_insn *insn = get_last_insn ();
5717 gcc_assert (RTX_FRAME_RELATED_P (insn));
5718
5719 /* Add an explicit CFA definition if this was previously
5720 implicit. */
5721 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
5722 {
5723 rtx src = plus_constant (Pmode, stack_pointer_rtx,
5724 callee_offset);
5725 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5726 gen_rtx_SET (hard_frame_pointer_rtx, src));
5727 }
5728
5729 /* Change the save slot expressions for the registers that
5730 we've already saved. */
5731 reg_offset -= callee_offset;
5732 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
5733 reg_offset + UNITS_PER_WORD);
5734 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
5735 reg_offset);
5736 }
71bfb77a 5737 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 5738 }
71bfb77a
WD
5739
5740 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 5741 callee_adjust != 0 || emit_frame_chain);
a0d0b980
SE
5742 if (aarch64_simd_decl_p (cfun->decl))
5743 aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5744 callee_adjust != 0 || emit_frame_chain);
5745 else
5746 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5747 callee_adjust != 0 || emit_frame_chain);
cd1bef27
JL
5748
5749 /* We may need to probe the final adjustment if it is larger than the guard
5750 that is assumed by the called. */
901e66e0 5751 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
cd1bef27 5752 !frame_pointer_needed, true);
43e9d192
IB
5753}
5754
4f942779
RL
5755/* Return TRUE if we can use a simple_return insn.
5756
5757 This function checks whether the callee saved stack is empty, which
5758 means no restore actions are need. The pro_and_epilogue will use
5759 this to check whether shrink-wrapping opt is feasible. */
5760
5761bool
5762aarch64_use_return_insn_p (void)
5763{
5764 if (!reload_completed)
5765 return false;
5766
5767 if (crtl->profile)
5768 return false;
5769
6a70badb 5770 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
5771}
5772
a0d0b980
SE
5773/* Return false for non-leaf SIMD functions in order to avoid
5774 shrink-wrapping them. Doing this will lose the necessary
5775 save/restore of FP registers. */
5776
5777bool
5778aarch64_use_simple_return_insn_p (void)
5779{
5780 if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
5781 return false;
5782
5783 return true;
5784}
5785
71bfb77a
WD
5786/* Generate the epilogue instructions for returning from a function.
5787 This is almost exactly the reverse of the prolog sequence, except
5788 that we need to insert barriers to avoid scheduling loads that read
5789 from a deallocated stack, and we optimize the unwind records by
5790 emitting them all together if possible. */
43e9d192
IB
5791void
5792aarch64_expand_epilogue (bool for_sibcall)
5793{
6a70badb 5794 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5795 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5796 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5797 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5798 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5799 unsigned reg2 = cfun->machine->frame.wb_candidate2;
5800 rtx cfi_ops = NULL;
5801 rtx_insn *insn;
901e66e0
SD
5802 /* A stack clash protection prologue may not have left EP0_REGNUM or
5803 EP1_REGNUM in a usable state. The same is true for allocations
43cacb12 5804 with an SVE component, since we then need both temporary registers
cd1bef27
JL
5805 for each allocation. For stack clash we are in a usable state if
5806 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
5807 HOST_WIDE_INT guard_size
5808 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5809 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5810
5811 /* We can re-use the registers when the allocation amount is smaller than
5812 guard_size - guard_used_by_caller because we won't be doing any probes
5813 then. In such situations the register should remain live with the correct
5814 value. */
43cacb12 5815 bool can_inherit_p = (initial_adjust.is_constant ()
cd1bef27
JL
5816 && final_adjust.is_constant ())
5817 && (!flag_stack_clash_protection
901e66e0
SD
5818 || known_lt (initial_adjust,
5819 guard_size - guard_used_by_caller));
44c0e7b9 5820
71bfb77a 5821 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
5822 bool need_barrier_p
5823 = maybe_ne (get_frame_size ()
5824 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 5825
71bfb77a 5826 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
5827 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
5828 || cfun->calls_alloca
8144a493 5829 || crtl->calls_eh_return)
43e9d192 5830 {
71bfb77a
WD
5831 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5832 need_barrier_p = false;
5833 }
7e8c2bd5 5834
71bfb77a
WD
5835 /* Restore the stack pointer from the frame pointer if it may not
5836 be the same as the stack pointer. */
901e66e0
SD
5837 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
5838 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
6a70badb
RS
5839 if (frame_pointer_needed
5840 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
5841 /* If writeback is used when restoring callee-saves, the CFA
5842 is restored on the instruction doing the writeback. */
5843 aarch64_add_offset (Pmode, stack_pointer_rtx,
5844 hard_frame_pointer_rtx, -callee_offset,
901e66e0 5845 tmp1_rtx, tmp0_rtx, callee_adjust == 0);
71bfb77a 5846 else
cd1bef27
JL
5847 /* The case where we need to re-use the register here is very rare, so
5848 avoid the complicated condition and just always emit a move if the
5849 immediate doesn't fit. */
901e66e0 5850 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
43e9d192 5851
71bfb77a
WD
5852 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
5853 callee_adjust != 0, &cfi_ops);
a0d0b980
SE
5854 if (aarch64_simd_decl_p (cfun->decl))
5855 aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5856 callee_adjust != 0, &cfi_ops);
5857 else
5858 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5859 callee_adjust != 0, &cfi_ops);
43e9d192 5860
71bfb77a
WD
5861 if (need_barrier_p)
5862 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5863
5864 if (callee_adjust != 0)
5865 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
5866
6a70badb 5867 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
5868 {
5869 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5870 insn = get_last_insn ();
71bfb77a
WD
5871 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5872 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5873 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5874 cfi_ops = NULL;
43e9d192
IB
5875 }
5876
901e66e0
SD
5877 /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
5878 add restriction on emit_move optimization to leaf functions. */
5879 aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust,
5880 (!can_inherit_p || !crtl->is_leaf
5881 || df_regs_ever_live_p (EP0_REGNUM)));
7e8c2bd5 5882
71bfb77a
WD
5883 if (cfi_ops)
5884 {
5885 /* Emit delayed restores and reset the CFA to be SP. */
5886 insn = get_last_insn ();
5887 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5888 REG_NOTES (insn) = cfi_ops;
5889 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5890 }
5891
db58fd89
JW
5892 /* We prefer to emit the combined return/authenticate instruction RETAA,
5893 however there are three cases in which we must instead emit an explicit
5894 authentication instruction.
5895
5896 1) Sibcalls don't return in a normal way, so if we're about to call one
5897 we must authenticate.
5898
5899 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5900 generating code for !TARGET_ARMV8_3 we can't use it and must
5901 explicitly authenticate.
5902
5903 3) On an eh_return path we make extra stack adjustments to update the
5904 canonical frame address to be the exception handler's CFA. We want
5905 to authenticate using the CFA of the function which calls eh_return.
5906 */
5907 if (aarch64_return_address_signing_enabled ()
5908 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5909 {
5910 insn = emit_insn (gen_autisp ());
5911 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5912 RTX_FRAME_RELATED_P (insn) = 1;
5913 }
db58fd89 5914
dd991abb 5915 /* Stack adjustment for exception handler. */
b5b9147d 5916 if (crtl->calls_eh_return && !for_sibcall)
dd991abb
RH
5917 {
5918 /* We need to unwind the stack by the offset computed by
5919 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5920 to be SP; letting the CFA move during this adjustment
5921 is just as correct as retaining the CFA from the body
5922 of the function. Therefore, do nothing special. */
5923 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5924 }
5925
5926 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5927 if (!for_sibcall)
5928 emit_jump_insn (ret_rtx);
5929}
5930
8144a493
WD
5931/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5932 normally or return to a previous frame after unwinding.
1c960e02 5933
8144a493
WD
5934 An EH return uses a single shared return sequence. The epilogue is
5935 exactly like a normal epilogue except that it has an extra input
5936 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5937 that must be applied after the frame has been destroyed. An extra label
5938 is inserted before the epilogue which initializes this register to zero,
5939 and this is the entry point for a normal return.
43e9d192 5940
8144a493
WD
5941 An actual EH return updates the return address, initializes the stack
5942 adjustment and jumps directly into the epilogue (bypassing the zeroing
5943 of the adjustment). Since the return address is typically saved on the
5944 stack when a function makes a call, the saved LR must be updated outside
5945 the epilogue.
43e9d192 5946
8144a493
WD
5947 This poses problems as the store is generated well before the epilogue,
5948 so the offset of LR is not known yet. Also optimizations will remove the
5949 store as it appears dead, even after the epilogue is generated (as the
5950 base or offset for loading LR is different in many cases).
43e9d192 5951
8144a493
WD
5952 To avoid these problems this implementation forces the frame pointer
5953 in eh_return functions so that the location of LR is fixed and known early.
5954 It also marks the store volatile, so no optimization is permitted to
5955 remove the store. */
5956rtx
5957aarch64_eh_return_handler_rtx (void)
5958{
5959 rtx tmp = gen_frame_mem (Pmode,
5960 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5961
8144a493
WD
5962 /* Mark the store volatile, so no optimization is permitted to remove it. */
5963 MEM_VOLATILE_P (tmp) = true;
5964 return tmp;
43e9d192
IB
5965}
5966
43e9d192
IB
5967/* Output code to add DELTA to the first argument, and then jump
5968 to FUNCTION. Used for C++ multiple inheritance. */
5969static void
5970aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5971 HOST_WIDE_INT delta,
5972 HOST_WIDE_INT vcall_offset,
5973 tree function)
5974{
5975 /* The this pointer is always in x0. Note that this differs from
5976 Arm where the this pointer maybe bumped to r1 if r0 is required
5977 to return a pointer to an aggregate. On AArch64 a result value
5978 pointer will be in x8. */
5979 int this_regno = R0_REGNUM;
5d8a22a5
DM
5980 rtx this_rtx, temp0, temp1, addr, funexp;
5981 rtx_insn *insn;
6b5777c6 5982 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
43e9d192 5983
c904388d
SD
5984 if (aarch64_bti_enabled ())
5985 emit_insn (gen_bti_c());
5986
75f1d6fc
SN
5987 reload_completed = 1;
5988 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5989
f5470a77 5990 this_rtx = gen_rtx_REG (Pmode, this_regno);
901e66e0
SD
5991 temp0 = gen_rtx_REG (Pmode, EP0_REGNUM);
5992 temp1 = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 5993
43e9d192 5994 if (vcall_offset == 0)
43cacb12 5995 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5996 else
5997 {
28514dda 5998 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5999
75f1d6fc
SN
6000 addr = this_rtx;
6001 if (delta != 0)
6002 {
6003 if (delta >= -256 && delta < 256)
6004 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
6005 plus_constant (Pmode, this_rtx, delta));
6006 else
43cacb12
RS
6007 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
6008 temp1, temp0, false);
43e9d192
IB
6009 }
6010
28514dda
YZ
6011 if (Pmode == ptr_mode)
6012 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
6013 else
6014 aarch64_emit_move (temp0,
6015 gen_rtx_ZERO_EXTEND (Pmode,
6016 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 6017
28514dda 6018 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 6019 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
6020 else
6021 {
f43657b4
JW
6022 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
6023 Pmode);
75f1d6fc 6024 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
6025 }
6026
28514dda
YZ
6027 if (Pmode == ptr_mode)
6028 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
6029 else
6030 aarch64_emit_move (temp1,
6031 gen_rtx_SIGN_EXTEND (Pmode,
6032 gen_rtx_MEM (ptr_mode, addr)));
6033
75f1d6fc 6034 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
6035 }
6036
75f1d6fc
SN
6037 /* Generate a tail call to the target function. */
6038 if (!TREE_USED (function))
6039 {
6040 assemble_external (function);
6041 TREE_USED (function) = 1;
6042 }
6043 funexp = XEXP (DECL_RTL (function), 0);
6044 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
6045 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
6046 SIBLING_CALL_P (insn) = 1;
6047
6048 insn = get_insns ();
6049 shorten_branches (insn);
6b5777c6
MF
6050
6051 assemble_start_function (thunk, fnname);
75f1d6fc
SN
6052 final_start_function (insn, file, 1);
6053 final (insn, file, 1);
43e9d192 6054 final_end_function ();
6b5777c6 6055 assemble_end_function (thunk, fnname);
75f1d6fc
SN
6056
6057 /* Stop pretending to be a post-reload pass. */
6058 reload_completed = 0;
43e9d192
IB
6059}
6060
43e9d192
IB
6061static bool
6062aarch64_tls_referenced_p (rtx x)
6063{
6064 if (!TARGET_HAVE_TLS)
6065 return false;
e7de8563
RS
6066 subrtx_iterator::array_type array;
6067 FOR_EACH_SUBRTX (iter, array, x, ALL)
6068 {
6069 const_rtx x = *iter;
6070 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
6071 return true;
6072 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6073 TLS offsets, not real symbol references. */
6074 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6075 iter.skip_subrtxes ();
6076 }
6077 return false;
43e9d192
IB
6078}
6079
6080
43e9d192
IB
6081/* Return true if val can be encoded as a 12-bit unsigned immediate with
6082 a left shift of 0 or 12 bits. */
6083bool
6084aarch64_uimm12_shift (HOST_WIDE_INT val)
6085{
6086 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
6087 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
6088 );
6089}
6090
eb471ba3
TC
6091/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
6092 that can be created with a left shift of 0 or 12. */
6093static HOST_WIDE_INT
6094aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
6095{
6096 /* Check to see if the value fits in 24 bits, as that is the maximum we can
6097 handle correctly. */
6098 gcc_assert ((val & 0xffffff) == val);
6099
6100 if (((val & 0xfff) << 0) == val)
6101 return val;
6102
6103 return val & (0xfff << 12);
6104}
43e9d192
IB
6105
6106/* Return true if val is an immediate that can be loaded into a
6107 register by a MOVZ instruction. */
6108static bool
77e994c9 6109aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
6110{
6111 if (GET_MODE_SIZE (mode) > 4)
6112 {
6113 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
6114 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
6115 return 1;
6116 }
6117 else
6118 {
43cacb12
RS
6119 /* Ignore sign extension. */
6120 val &= (HOST_WIDE_INT) 0xffffffff;
6121 }
6122 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
6123 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
6124}
6125
6126/* VAL is a value with the inner mode of MODE. Replicate it to fill a
6127 64-bit (DImode) integer. */
6128
6129static unsigned HOST_WIDE_INT
6130aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
6131{
6132 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
6133 while (size < 64)
6134 {
6135 val &= (HOST_WIDE_INT_1U << size) - 1;
6136 val |= val << size;
6137 size *= 2;
43e9d192 6138 }
43cacb12 6139 return val;
43e9d192
IB
6140}
6141
a64c73a2
WD
6142/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
6143
6144static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
6145 {
6146 0x0000000100000001ull,
6147 0x0001000100010001ull,
6148 0x0101010101010101ull,
6149 0x1111111111111111ull,
6150 0x5555555555555555ull,
6151 };
6152
43e9d192
IB
6153
6154/* Return true if val is a valid bitmask immediate. */
a64c73a2 6155
43e9d192 6156bool
a64c73a2 6157aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 6158{
a64c73a2
WD
6159 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
6160 int bits;
6161
6162 /* Check for a single sequence of one bits and return quickly if so.
6163 The special cases of all ones and all zeroes returns false. */
43cacb12 6164 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
6165 tmp = val + (val & -val);
6166
6167 if (tmp == (tmp & -tmp))
6168 return (val + 1) > 1;
6169
6170 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
6171 if (mode == SImode)
6172 val = (val << 32) | (val & 0xffffffff);
6173
6174 /* Invert if the immediate doesn't start with a zero bit - this means we
6175 only need to search for sequences of one bits. */
6176 if (val & 1)
6177 val = ~val;
6178
6179 /* Find the first set bit and set tmp to val with the first sequence of one
6180 bits removed. Return success if there is a single sequence of ones. */
6181 first_one = val & -val;
6182 tmp = val & (val + first_one);
6183
6184 if (tmp == 0)
6185 return true;
6186
6187 /* Find the next set bit and compute the difference in bit position. */
6188 next_one = tmp & -tmp;
6189 bits = clz_hwi (first_one) - clz_hwi (next_one);
6190 mask = val ^ tmp;
6191
6192 /* Check the bit position difference is a power of 2, and that the first
6193 sequence of one bits fits within 'bits' bits. */
6194 if ((mask >> bits) != 0 || bits != (bits & -bits))
6195 return false;
6196
6197 /* Check the sequence of one bits is repeated 64/bits times. */
6198 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
6199}
6200
43fd192f
MC
6201/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
6202 Assumed precondition: VAL_IN Is not zero. */
6203
6204unsigned HOST_WIDE_INT
6205aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
6206{
6207 int lowest_bit_set = ctz_hwi (val_in);
6208 int highest_bit_set = floor_log2 (val_in);
6209 gcc_assert (val_in != 0);
6210
6211 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
6212 (HOST_WIDE_INT_1U << lowest_bit_set));
6213}
6214
6215/* Create constant where bits outside of lowest bit set to highest bit set
6216 are set to 1. */
6217
6218unsigned HOST_WIDE_INT
6219aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
6220{
6221 return val_in | ~aarch64_and_split_imm1 (val_in);
6222}
6223
6224/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
6225
6226bool
6227aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
6228{
77e994c9
RS
6229 scalar_int_mode int_mode;
6230 if (!is_a <scalar_int_mode> (mode, &int_mode))
6231 return false;
6232
6233 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
6234 return false;
6235
77e994c9 6236 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
6237 return false;
6238
6239 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
6240
77e994c9 6241 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 6242}
43e9d192
IB
6243
6244/* Return true if val is an immediate that can be loaded into a
6245 register in a single instruction. */
6246bool
ef4bddc2 6247aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 6248{
77e994c9
RS
6249 scalar_int_mode int_mode;
6250 if (!is_a <scalar_int_mode> (mode, &int_mode))
6251 return false;
6252
6253 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 6254 return 1;
77e994c9 6255 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
6256}
6257
6258static bool
ef4bddc2 6259aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
6260{
6261 rtx base, offset;
7eda14e1 6262
43e9d192
IB
6263 if (GET_CODE (x) == HIGH)
6264 return true;
6265
43cacb12
RS
6266 /* There's no way to calculate VL-based values using relocations. */
6267 subrtx_iterator::array_type array;
6268 FOR_EACH_SUBRTX (iter, array, x, ALL)
6269 if (GET_CODE (*iter) == CONST_POLY_INT)
6270 return true;
6271
43e9d192
IB
6272 split_const (x, &base, &offset);
6273 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 6274 {
43cacb12 6275 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
6276 != SYMBOL_FORCE_TO_MEM)
6277 return true;
6278 else
6279 /* Avoid generating a 64-bit relocation in ILP32; leave
6280 to aarch64_expand_mov_immediate to handle it properly. */
6281 return mode != ptr_mode;
6282 }
43e9d192
IB
6283
6284 return aarch64_tls_referenced_p (x);
6285}
6286
e79136e4
WD
6287/* Implement TARGET_CASE_VALUES_THRESHOLD.
6288 The expansion for a table switch is quite expensive due to the number
6289 of instructions, the table lookup and hard to predict indirect jump.
6290 When optimizing for speed, and -O3 enabled, use the per-core tuning if
6291 set, otherwise use tables for > 16 cases as a tradeoff between size and
6292 performance. When optimizing for size, use the default setting. */
50487d79
EM
6293
6294static unsigned int
6295aarch64_case_values_threshold (void)
6296{
6297 /* Use the specified limit for the number of cases before using jump
6298 tables at higher optimization levels. */
6299 if (optimize > 2
6300 && selected_cpu->tune->max_case_values != 0)
6301 return selected_cpu->tune->max_case_values;
6302 else
e79136e4 6303 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
6304}
6305
43e9d192
IB
6306/* Return true if register REGNO is a valid index register.
6307 STRICT_P is true if REG_OK_STRICT is in effect. */
6308
6309bool
6310aarch64_regno_ok_for_index_p (int regno, bool strict_p)
6311{
6312 if (!HARD_REGISTER_NUM_P (regno))
6313 {
6314 if (!strict_p)
6315 return true;
6316
6317 if (!reg_renumber)
6318 return false;
6319
6320 regno = reg_renumber[regno];
6321 }
6322 return GP_REGNUM_P (regno);
6323}
6324
6325/* Return true if register REGNO is a valid base register for mode MODE.
6326 STRICT_P is true if REG_OK_STRICT is in effect. */
6327
6328bool
6329aarch64_regno_ok_for_base_p (int regno, bool strict_p)
6330{
6331 if (!HARD_REGISTER_NUM_P (regno))
6332 {
6333 if (!strict_p)
6334 return true;
6335
6336 if (!reg_renumber)
6337 return false;
6338
6339 regno = reg_renumber[regno];
6340 }
6341
6342 /* The fake registers will be eliminated to either the stack or
6343 hard frame pointer, both of which are usually valid base registers.
6344 Reload deals with the cases where the eliminated form isn't valid. */
6345 return (GP_REGNUM_P (regno)
6346 || regno == SP_REGNUM
6347 || regno == FRAME_POINTER_REGNUM
6348 || regno == ARG_POINTER_REGNUM);
6349}
6350
6351/* Return true if X is a valid base register for mode MODE.
6352 STRICT_P is true if REG_OK_STRICT is in effect. */
6353
6354static bool
6355aarch64_base_register_rtx_p (rtx x, bool strict_p)
6356{
76160199
RS
6357 if (!strict_p
6358 && GET_CODE (x) == SUBREG
6359 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
6360 x = SUBREG_REG (x);
6361
6362 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
6363}
6364
6365/* Return true if address offset is a valid index. If it is, fill in INFO
6366 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
6367
6368static bool
6369aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 6370 machine_mode mode, bool strict_p)
43e9d192
IB
6371{
6372 enum aarch64_address_type type;
6373 rtx index;
6374 int shift;
6375
6376 /* (reg:P) */
6377 if ((REG_P (x) || GET_CODE (x) == SUBREG)
6378 && GET_MODE (x) == Pmode)
6379 {
6380 type = ADDRESS_REG_REG;
6381 index = x;
6382 shift = 0;
6383 }
6384 /* (sign_extend:DI (reg:SI)) */
6385 else if ((GET_CODE (x) == SIGN_EXTEND
6386 || GET_CODE (x) == ZERO_EXTEND)
6387 && GET_MODE (x) == DImode
6388 && GET_MODE (XEXP (x, 0)) == SImode)
6389 {
6390 type = (GET_CODE (x) == SIGN_EXTEND)
6391 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6392 index = XEXP (x, 0);
6393 shift = 0;
6394 }
6395 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
6396 else if (GET_CODE (x) == MULT
6397 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6398 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6399 && GET_MODE (XEXP (x, 0)) == DImode
6400 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6401 && CONST_INT_P (XEXP (x, 1)))
6402 {
6403 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6404 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6405 index = XEXP (XEXP (x, 0), 0);
6406 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6407 }
6408 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
6409 else if (GET_CODE (x) == ASHIFT
6410 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6411 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6412 && GET_MODE (XEXP (x, 0)) == DImode
6413 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6414 && CONST_INT_P (XEXP (x, 1)))
6415 {
6416 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6417 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6418 index = XEXP (XEXP (x, 0), 0);
6419 shift = INTVAL (XEXP (x, 1));
6420 }
6421 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
6422 else if ((GET_CODE (x) == SIGN_EXTRACT
6423 || GET_CODE (x) == ZERO_EXTRACT)
6424 && GET_MODE (x) == DImode
6425 && GET_CODE (XEXP (x, 0)) == MULT
6426 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6427 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6428 {
6429 type = (GET_CODE (x) == SIGN_EXTRACT)
6430 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6431 index = XEXP (XEXP (x, 0), 0);
6432 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6433 if (INTVAL (XEXP (x, 1)) != 32 + shift
6434 || INTVAL (XEXP (x, 2)) != 0)
6435 shift = -1;
6436 }
6437 /* (and:DI (mult:DI (reg:DI) (const_int scale))
6438 (const_int 0xffffffff<<shift)) */
6439 else if (GET_CODE (x) == AND
6440 && GET_MODE (x) == DImode
6441 && GET_CODE (XEXP (x, 0)) == MULT
6442 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6443 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6444 && CONST_INT_P (XEXP (x, 1)))
6445 {
6446 type = ADDRESS_REG_UXTW;
6447 index = XEXP (XEXP (x, 0), 0);
6448 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6449 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6450 shift = -1;
6451 }
6452 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
6453 else if ((GET_CODE (x) == SIGN_EXTRACT
6454 || GET_CODE (x) == ZERO_EXTRACT)
6455 && GET_MODE (x) == DImode
6456 && GET_CODE (XEXP (x, 0)) == ASHIFT
6457 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6458 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6459 {
6460 type = (GET_CODE (x) == SIGN_EXTRACT)
6461 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6462 index = XEXP (XEXP (x, 0), 0);
6463 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6464 if (INTVAL (XEXP (x, 1)) != 32 + shift
6465 || INTVAL (XEXP (x, 2)) != 0)
6466 shift = -1;
6467 }
6468 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
6469 (const_int 0xffffffff<<shift)) */
6470 else if (GET_CODE (x) == AND
6471 && GET_MODE (x) == DImode
6472 && GET_CODE (XEXP (x, 0)) == ASHIFT
6473 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6474 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6475 && CONST_INT_P (XEXP (x, 1)))
6476 {
6477 type = ADDRESS_REG_UXTW;
6478 index = XEXP (XEXP (x, 0), 0);
6479 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6480 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6481 shift = -1;
6482 }
6483 /* (mult:P (reg:P) (const_int scale)) */
6484 else if (GET_CODE (x) == MULT
6485 && GET_MODE (x) == Pmode
6486 && GET_MODE (XEXP (x, 0)) == Pmode
6487 && CONST_INT_P (XEXP (x, 1)))
6488 {
6489 type = ADDRESS_REG_REG;
6490 index = XEXP (x, 0);
6491 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6492 }
6493 /* (ashift:P (reg:P) (const_int shift)) */
6494 else if (GET_CODE (x) == ASHIFT
6495 && GET_MODE (x) == Pmode
6496 && GET_MODE (XEXP (x, 0)) == Pmode
6497 && CONST_INT_P (XEXP (x, 1)))
6498 {
6499 type = ADDRESS_REG_REG;
6500 index = XEXP (x, 0);
6501 shift = INTVAL (XEXP (x, 1));
6502 }
6503 else
6504 return false;
6505
76160199
RS
6506 if (!strict_p
6507 && GET_CODE (index) == SUBREG
6508 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
6509 index = SUBREG_REG (index);
6510
43cacb12
RS
6511 if (aarch64_sve_data_mode_p (mode))
6512 {
6513 if (type != ADDRESS_REG_REG
6514 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
6515 return false;
6516 }
6517 else
6518 {
6519 if (shift != 0
6520 && !(IN_RANGE (shift, 1, 3)
6521 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
6522 return false;
6523 }
6524
6525 if (REG_P (index)
43e9d192
IB
6526 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
6527 {
6528 info->type = type;
6529 info->offset = index;
6530 info->shift = shift;
6531 return true;
6532 }
6533
6534 return false;
6535}
6536
abc52318
KT
6537/* Return true if MODE is one of the modes for which we
6538 support LDP/STP operations. */
6539
6540static bool
6541aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
6542{
6543 return mode == SImode || mode == DImode
6544 || mode == SFmode || mode == DFmode
6545 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
6546 && (known_eq (GET_MODE_SIZE (mode), 8)
6547 || (known_eq (GET_MODE_SIZE (mode), 16)
6548 && (aarch64_tune_params.extra_tuning_flags
6549 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
6550}
6551
9e0218fc
RH
6552/* Return true if REGNO is a virtual pointer register, or an eliminable
6553 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
6554 include stack_pointer or hard_frame_pointer. */
6555static bool
6556virt_or_elim_regno_p (unsigned regno)
6557{
6558 return ((regno >= FIRST_VIRTUAL_REGISTER
6559 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
6560 || regno == FRAME_POINTER_REGNUM
6561 || regno == ARG_POINTER_REGNUM);
6562}
6563
a97d8b98
RS
6564/* Return true if X is a valid address of type TYPE for machine mode MODE.
6565 If it is, fill in INFO appropriately. STRICT_P is true if
6566 REG_OK_STRICT is in effect. */
43e9d192 6567
a98824ac 6568bool
43e9d192 6569aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 6570 rtx x, machine_mode mode, bool strict_p,
a98824ac 6571 aarch64_addr_query_type type)
43e9d192
IB
6572{
6573 enum rtx_code code = GET_CODE (x);
6574 rtx op0, op1;
dc640181
RS
6575 poly_int64 offset;
6576
6a70badb 6577 HOST_WIDE_INT const_size;
2d8c6dc1 6578
80d43579
WD
6579 /* On BE, we use load/store pair for all large int mode load/stores.
6580 TI/TFmode may also use a load/store pair. */
43cacb12
RS
6581 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6582 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 6583 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 6584 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
6585 || mode == TImode
6586 || mode == TFmode
43cacb12 6587 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 6588
a25831ac
AV
6589 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
6590 corresponds to the actual size of the memory being loaded/stored and the
6591 mode of the corresponding addressing mode is half of that. */
6592 if (type == ADDR_QUERY_LDP_STP_N
6593 && known_eq (GET_MODE_SIZE (mode), 16))
6594 mode = DFmode;
6595
6a70badb 6596 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
6597 && (known_lt (GET_MODE_SIZE (mode), 16)
6598 || vec_flags == VEC_ADVSIMD
6599 || vec_flags == VEC_SVE_DATA));
6600
6601 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
6602 [Rn, #offset, MUL VL]. */
6603 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
6604 && (code != REG && code != PLUS))
6605 return false;
2d8c6dc1
AH
6606
6607 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
6608 REG addressing. */
43cacb12
RS
6609 if (advsimd_struct_p
6610 && !BYTES_BIG_ENDIAN
43e9d192
IB
6611 && (code != POST_INC && code != REG))
6612 return false;
6613
43cacb12
RS
6614 gcc_checking_assert (GET_MODE (x) == VOIDmode
6615 || SCALAR_INT_MODE_P (GET_MODE (x)));
6616
43e9d192
IB
6617 switch (code)
6618 {
6619 case REG:
6620 case SUBREG:
6621 info->type = ADDRESS_REG_IMM;
6622 info->base = x;
6623 info->offset = const0_rtx;
dc640181 6624 info->const_offset = 0;
43e9d192
IB
6625 return aarch64_base_register_rtx_p (x, strict_p);
6626
6627 case PLUS:
6628 op0 = XEXP (x, 0);
6629 op1 = XEXP (x, 1);
15c0c5c9
JW
6630
6631 if (! strict_p
4aa81c2e 6632 && REG_P (op0)
9e0218fc 6633 && virt_or_elim_regno_p (REGNO (op0))
dc640181 6634 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
6635 {
6636 info->type = ADDRESS_REG_IMM;
6637 info->base = op0;
6638 info->offset = op1;
dc640181 6639 info->const_offset = offset;
15c0c5c9
JW
6640
6641 return true;
6642 }
6643
6a70badb 6644 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
6645 && aarch64_base_register_rtx_p (op0, strict_p)
6646 && poly_int_rtx_p (op1, &offset))
43e9d192 6647 {
43e9d192
IB
6648 info->type = ADDRESS_REG_IMM;
6649 info->base = op0;
6650 info->offset = op1;
dc640181 6651 info->const_offset = offset;
43e9d192
IB
6652
6653 /* TImode and TFmode values are allowed in both pairs of X
6654 registers and individual Q registers. The available
6655 address modes are:
6656 X,X: 7-bit signed scaled offset
6657 Q: 9-bit signed offset
6658 We conservatively require an offset representable in either mode.
8ed49fab
KT
6659 When performing the check for pairs of X registers i.e. LDP/STP
6660 pass down DImode since that is the natural size of the LDP/STP
6661 instruction memory accesses. */
43e9d192 6662 if (mode == TImode || mode == TFmode)
8ed49fab 6663 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 6664 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 6665 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 6666
2d8c6dc1
AH
6667 /* A 7bit offset check because OImode will emit a ldp/stp
6668 instruction (only big endian will get here).
6669 For ldp/stp instructions, the offset is scaled for the size of a
6670 single element of the pair. */
6671 if (mode == OImode)
6672 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
6673
6674 /* Three 9/12 bit offsets checks because CImode will emit three
6675 ldr/str instructions (only big endian will get here). */
6676 if (mode == CImode)
6677 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
6678 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
6679 offset + 32)
2d8c6dc1
AH
6680 || offset_12bit_unsigned_scaled_p (V16QImode,
6681 offset + 32)));
6682
6683 /* Two 7bit offsets checks because XImode will emit two ldp/stp
6684 instructions (only big endian will get here). */
6685 if (mode == XImode)
6686 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
6687 && aarch64_offset_7bit_signed_scaled_p (TImode,
6688 offset + 32));
6689
43cacb12
RS
6690 /* Make "m" use the LD1 offset range for SVE data modes, so
6691 that pre-RTL optimizers like ivopts will work to that
6692 instead of the wider LDR/STR range. */
6693 if (vec_flags == VEC_SVE_DATA)
6694 return (type == ADDR_QUERY_M
6695 ? offset_4bit_signed_scaled_p (mode, offset)
6696 : offset_9bit_signed_scaled_p (mode, offset));
6697
9f4cbab8
RS
6698 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
6699 {
6700 poly_int64 end_offset = (offset
6701 + GET_MODE_SIZE (mode)
6702 - BYTES_PER_SVE_VECTOR);
6703 return (type == ADDR_QUERY_M
6704 ? offset_4bit_signed_scaled_p (mode, offset)
6705 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
6706 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
6707 end_offset)));
6708 }
6709
43cacb12
RS
6710 if (vec_flags == VEC_SVE_PRED)
6711 return offset_9bit_signed_scaled_p (mode, offset);
6712
2d8c6dc1 6713 if (load_store_pair_p)
6a70badb 6714 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6715 || known_eq (GET_MODE_SIZE (mode), 8)
6716 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6717 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6718 else
3c5af608 6719 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
6720 || offset_12bit_unsigned_scaled_p (mode, offset));
6721 }
6722
6723 if (allow_reg_index_p)
6724 {
6725 /* Look for base + (scaled/extended) index register. */
6726 if (aarch64_base_register_rtx_p (op0, strict_p)
6727 && aarch64_classify_index (info, op1, mode, strict_p))
6728 {
6729 info->base = op0;
6730 return true;
6731 }
6732 if (aarch64_base_register_rtx_p (op1, strict_p)
6733 && aarch64_classify_index (info, op0, mode, strict_p))
6734 {
6735 info->base = op1;
6736 return true;
6737 }
6738 }
6739
6740 return false;
6741
6742 case POST_INC:
6743 case POST_DEC:
6744 case PRE_INC:
6745 case PRE_DEC:
6746 info->type = ADDRESS_REG_WB;
6747 info->base = XEXP (x, 0);
6748 info->offset = NULL_RTX;
6749 return aarch64_base_register_rtx_p (info->base, strict_p);
6750
6751 case POST_MODIFY:
6752 case PRE_MODIFY:
6753 info->type = ADDRESS_REG_WB;
6754 info->base = XEXP (x, 0);
6755 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 6756 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
6757 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
6758 && aarch64_base_register_rtx_p (info->base, strict_p))
6759 {
43e9d192 6760 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 6761 info->const_offset = offset;
43e9d192
IB
6762
6763 /* TImode and TFmode values are allowed in both pairs of X
6764 registers and individual Q registers. The available
6765 address modes are:
6766 X,X: 7-bit signed scaled offset
6767 Q: 9-bit signed offset
6768 We conservatively require an offset representable in either mode.
6769 */
6770 if (mode == TImode || mode == TFmode)
44707478 6771 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 6772 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 6773
2d8c6dc1 6774 if (load_store_pair_p)
6a70badb 6775 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6776 || known_eq (GET_MODE_SIZE (mode), 8)
6777 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6778 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6779 else
3c5af608 6780 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
6781 }
6782 return false;
6783
6784 case CONST:
6785 case SYMBOL_REF:
6786 case LABEL_REF:
79517551
SN
6787 /* load literal: pc-relative constant pool entry. Only supported
6788 for SI mode or larger. */
43e9d192 6789 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 6790
6a70badb
RS
6791 if (!load_store_pair_p
6792 && GET_MODE_SIZE (mode).is_constant (&const_size)
6793 && const_size >= 4)
43e9d192
IB
6794 {
6795 rtx sym, addend;
6796
6797 split_const (x, &sym, &addend);
b4f50fd4
RR
6798 return ((GET_CODE (sym) == LABEL_REF
6799 || (GET_CODE (sym) == SYMBOL_REF
6800 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 6801 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
6802 }
6803 return false;
6804
6805 case LO_SUM:
6806 info->type = ADDRESS_LO_SUM;
6807 info->base = XEXP (x, 0);
6808 info->offset = XEXP (x, 1);
6809 if (allow_reg_index_p
6810 && aarch64_base_register_rtx_p (info->base, strict_p))
6811 {
6812 rtx sym, offs;
6813 split_const (info->offset, &sym, &offs);
6814 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
6815 && (aarch64_classify_symbol (sym, INTVAL (offs))
6816 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
6817 {
6818 /* The symbol and offset must be aligned to the access size. */
6819 unsigned int align;
43e9d192
IB
6820
6821 if (CONSTANT_POOL_ADDRESS_P (sym))
6822 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
6823 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
6824 {
6825 tree exp = SYMBOL_REF_DECL (sym);
6826 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 6827 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
6828 }
6829 else if (SYMBOL_REF_DECL (sym))
6830 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
6831 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
6832 && SYMBOL_REF_BLOCK (sym) != NULL)
6833 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
6834 else
6835 align = BITS_PER_UNIT;
6836
6a70badb
RS
6837 poly_int64 ref_size = GET_MODE_SIZE (mode);
6838 if (known_eq (ref_size, 0))
43e9d192
IB
6839 ref_size = GET_MODE_SIZE (DImode);
6840
6a70badb
RS
6841 return (multiple_p (INTVAL (offs), ref_size)
6842 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
6843 }
6844 }
6845 return false;
6846
6847 default:
6848 return false;
6849 }
6850}
6851
9bf2f779
KT
6852/* Return true if the address X is valid for a PRFM instruction.
6853 STRICT_P is true if we should do strict checking with
6854 aarch64_classify_address. */
6855
6856bool
6857aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
6858{
6859 struct aarch64_address_info addr;
6860
6861 /* PRFM accepts the same addresses as DImode... */
a97d8b98 6862 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
6863 if (!res)
6864 return false;
6865
6866 /* ... except writeback forms. */
6867 return addr.type != ADDRESS_REG_WB;
6868}
6869
43e9d192
IB
6870bool
6871aarch64_symbolic_address_p (rtx x)
6872{
6873 rtx offset;
6874
6875 split_const (x, &x, &offset);
6876 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
6877}
6878
a6e0bfa7 6879/* Classify the base of symbolic expression X. */
da4f13a4
MS
6880
6881enum aarch64_symbol_type
a6e0bfa7 6882aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
6883{
6884 rtx offset;
da4f13a4 6885
43e9d192 6886 split_const (x, &x, &offset);
43cacb12 6887 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
6888}
6889
6890
6891/* Return TRUE if X is a legitimate address for accessing memory in
6892 mode MODE. */
6893static bool
ef4bddc2 6894aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
6895{
6896 struct aarch64_address_info addr;
6897
a97d8b98 6898 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
6899}
6900
a97d8b98
RS
6901/* Return TRUE if X is a legitimate address of type TYPE for accessing
6902 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 6903bool
a97d8b98
RS
6904aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
6905 aarch64_addr_query_type type)
43e9d192
IB
6906{
6907 struct aarch64_address_info addr;
6908
a97d8b98 6909 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6910}
6911
9005477f
RS
6912/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6913
491ec060 6914static bool
9005477f
RS
6915aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6916 poly_int64 orig_offset,
6917 machine_mode mode)
491ec060 6918{
6a70badb
RS
6919 HOST_WIDE_INT size;
6920 if (GET_MODE_SIZE (mode).is_constant (&size))
6921 {
9005477f
RS
6922 HOST_WIDE_INT const_offset, second_offset;
6923
6924 /* A general SVE offset is A * VQ + B. Remove the A component from
6925 coefficient 0 in order to get the constant B. */
6926 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6927
6928 /* Split an out-of-range address displacement into a base and
6929 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6930 range otherwise to increase opportunities for sharing the base
6931 address of different sizes. Unaligned accesses use the signed
6932 9-bit range, TImode/TFmode use the intersection of signed
6933 scaled 7-bit and signed 9-bit offset. */
6a70badb 6934 if (mode == TImode || mode == TFmode)
9005477f
RS
6935 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6936 else if ((const_offset & (size - 1)) != 0)
6937 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6938 else
9005477f 6939 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6940
9005477f
RS
6941 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6942 return false;
6943
6944 /* Split the offset into second_offset and the rest. */
6945 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6946 *offset2 = gen_int_mode (second_offset, Pmode);
6947 return true;
6948 }
6949 else
6950 {
6951 /* Get the mode we should use as the basis of the range. For structure
6952 modes this is the mode of one vector. */
6953 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6954 machine_mode step_mode
6955 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6956
6957 /* Get the "mul vl" multiplier we'd like to use. */
6958 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6959 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6960 if (vec_flags & VEC_SVE_DATA)
6961 /* LDR supports a 9-bit range, but the move patterns for
6962 structure modes require all vectors to be in range of the
6963 same base. The simplest way of accomodating that while still
6964 promoting reuse of anchor points between different modes is
6965 to use an 8-bit range unconditionally. */
6966 vnum = ((vnum + 128) & 255) - 128;
6967 else
6968 /* Predicates are only handled singly, so we might as well use
6969 the full range. */
6970 vnum = ((vnum + 256) & 511) - 256;
6971 if (vnum == 0)
6972 return false;
6973
6974 /* Convert the "mul vl" multiplier into a byte offset. */
6975 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6976 if (known_eq (second_offset, orig_offset))
6977 return false;
6978
6979 /* Split the offset into second_offset and the rest. */
6980 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6981 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6982 return true;
6983 }
491ec060
WD
6984}
6985
a2170965
TC
6986/* Return the binary representation of floating point constant VALUE in INTVAL.
6987 If the value cannot be converted, return false without setting INTVAL.
6988 The conversion is done in the given MODE. */
6989bool
6990aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6991{
6992
6993 /* We make a general exception for 0. */
6994 if (aarch64_float_const_zero_rtx_p (value))
6995 {
6996 *intval = 0;
6997 return true;
6998 }
6999
0d0e0188 7000 scalar_float_mode mode;
a2170965 7001 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 7002 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
7003 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
7004 /* Only support up to DF mode. */
7005 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
7006 return false;
7007
7008 unsigned HOST_WIDE_INT ival = 0;
7009
7010 long res[2];
7011 real_to_target (res,
7012 CONST_DOUBLE_REAL_VALUE (value),
7013 REAL_MODE_FORMAT (mode));
7014
5c22bb48
TC
7015 if (mode == DFmode)
7016 {
7017 int order = BYTES_BIG_ENDIAN ? 1 : 0;
7018 ival = zext_hwi (res[order], 32);
7019 ival |= (zext_hwi (res[1 - order], 32) << 32);
7020 }
7021 else
7022 ival = zext_hwi (res[0], 32);
a2170965
TC
7023
7024 *intval = ival;
7025 return true;
7026}
7027
7028/* Return TRUE if rtx X is an immediate constant that can be moved using a
7029 single MOV(+MOVK) followed by an FMOV. */
7030bool
7031aarch64_float_const_rtx_p (rtx x)
7032{
7033 machine_mode mode = GET_MODE (x);
7034 if (mode == VOIDmode)
7035 return false;
7036
7037 /* Determine whether it's cheaper to write float constants as
7038 mov/movk pairs over ldr/adrp pairs. */
7039 unsigned HOST_WIDE_INT ival;
7040
7041 if (GET_CODE (x) == CONST_DOUBLE
7042 && SCALAR_FLOAT_MODE_P (mode)
7043 && aarch64_reinterpret_float_as_int (x, &ival))
7044 {
77e994c9
RS
7045 scalar_int_mode imode = (mode == HFmode
7046 ? SImode
7047 : int_mode_for_mode (mode).require ());
a2170965
TC
7048 int num_instr = aarch64_internal_mov_immediate
7049 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
7050 return num_instr < 3;
7051 }
7052
7053 return false;
7054}
7055
43e9d192
IB
7056/* Return TRUE if rtx X is immediate constant 0.0 */
7057bool
3520f7cc 7058aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 7059{
43e9d192
IB
7060 if (GET_MODE (x) == VOIDmode)
7061 return false;
7062
34a72c33 7063 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 7064 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 7065 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
7066}
7067
a2170965
TC
7068/* Return TRUE if rtx X is immediate constant that fits in a single
7069 MOVI immediate operation. */
7070bool
7071aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
7072{
7073 if (!TARGET_SIMD)
7074 return false;
7075
77e994c9
RS
7076 machine_mode vmode;
7077 scalar_int_mode imode;
a2170965
TC
7078 unsigned HOST_WIDE_INT ival;
7079
7080 if (GET_CODE (x) == CONST_DOUBLE
7081 && SCALAR_FLOAT_MODE_P (mode))
7082 {
7083 if (!aarch64_reinterpret_float_as_int (x, &ival))
7084 return false;
7085
35c38fa6
TC
7086 /* We make a general exception for 0. */
7087 if (aarch64_float_const_zero_rtx_p (x))
7088 return true;
7089
304b9962 7090 imode = int_mode_for_mode (mode).require ();
a2170965
TC
7091 }
7092 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
7093 && is_a <scalar_int_mode> (mode, &imode))
7094 ival = INTVAL (x);
a2170965
TC
7095 else
7096 return false;
7097
7098 /* use a 64 bit mode for everything except for DI/DF mode, where we use
7099 a 128 bit vector mode. */
77e994c9 7100 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
7101
7102 vmode = aarch64_simd_container_mode (imode, width);
7103 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
7104
b187677b 7105 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
7106}
7107
7108
70f09188
AP
7109/* Return the fixed registers used for condition codes. */
7110
7111static bool
7112aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7113{
7114 *p1 = CC_REGNUM;
7115 *p2 = INVALID_REGNUM;
7116 return true;
7117}
7118
47210a04
RL
7119/* This function is used by the call expanders of the machine description.
7120 RESULT is the register in which the result is returned. It's NULL for
7121 "call" and "sibcall".
7122 MEM is the location of the function call.
7123 SIBCALL indicates whether this function call is normal call or sibling call.
7124 It will generate different pattern accordingly. */
7125
7126void
7127aarch64_expand_call (rtx result, rtx mem, bool sibcall)
7128{
7129 rtx call, callee, tmp;
7130 rtvec vec;
7131 machine_mode mode;
7132
7133 gcc_assert (MEM_P (mem));
7134 callee = XEXP (mem, 0);
7135 mode = GET_MODE (callee);
7136 gcc_assert (mode == Pmode);
7137
7138 /* Decide if we should generate indirect calls by loading the
7139 address of the callee into a register before performing
7140 the branch-and-link. */
7141 if (SYMBOL_REF_P (callee)
7142 ? (aarch64_is_long_call_p (callee)
7143 || aarch64_is_noplt_call_p (callee))
7144 : !REG_P (callee))
7145 XEXP (mem, 0) = force_reg (mode, callee);
7146
7147 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
7148
7149 if (result != NULL_RTX)
7150 call = gen_rtx_SET (result, call);
7151
7152 if (sibcall)
7153 tmp = ret_rtx;
7154 else
7155 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
7156
7157 vec = gen_rtvec (2, call, tmp);
7158 call = gen_rtx_PARALLEL (VOIDmode, vec);
7159
7160 aarch64_emit_call_insn (call);
7161}
7162
78607708
TV
7163/* Emit call insn with PAT and do aarch64-specific handling. */
7164
d07a3fed 7165void
78607708
TV
7166aarch64_emit_call_insn (rtx pat)
7167{
7168 rtx insn = emit_call_insn (pat);
7169
7170 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
7171 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
7172 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
7173}
7174
ef4bddc2 7175machine_mode
43e9d192
IB
7176aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
7177{
f7343f20
RE
7178 machine_mode mode_x = GET_MODE (x);
7179 rtx_code code_x = GET_CODE (x);
7180
43e9d192
IB
7181 /* All floating point compares return CCFP if it is an equality
7182 comparison, and CCFPE otherwise. */
f7343f20 7183 if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
43e9d192
IB
7184 {
7185 switch (code)
7186 {
7187 case EQ:
7188 case NE:
7189 case UNORDERED:
7190 case ORDERED:
7191 case UNLT:
7192 case UNLE:
7193 case UNGT:
7194 case UNGE:
7195 case UNEQ:
43e9d192
IB
7196 return CCFPmode;
7197
7198 case LT:
7199 case LE:
7200 case GT:
7201 case GE:
8332c5ee 7202 case LTGT:
43e9d192
IB
7203 return CCFPEmode;
7204
7205 default:
7206 gcc_unreachable ();
7207 }
7208 }
7209
2b8568fe
KT
7210 /* Equality comparisons of short modes against zero can be performed
7211 using the TST instruction with the appropriate bitmask. */
f73dc006 7212 if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
2b8568fe 7213 && (code == EQ || code == NE)
f7343f20 7214 && (mode_x == HImode || mode_x == QImode))
2b8568fe
KT
7215 return CC_NZmode;
7216
b06335f9
KT
7217 /* Similarly, comparisons of zero_extends from shorter modes can
7218 be performed using an ANDS with an immediate mask. */
f7343f20
RE
7219 if (y == const0_rtx && code_x == ZERO_EXTEND
7220 && (mode_x == SImode || mode_x == DImode)
b06335f9
KT
7221 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
7222 && (code == EQ || code == NE))
7223 return CC_NZmode;
7224
f7343f20 7225 if ((mode_x == SImode || mode_x == DImode)
43e9d192
IB
7226 && y == const0_rtx
7227 && (code == EQ || code == NE || code == LT || code == GE)
f7343f20
RE
7228 && (code_x == PLUS || code_x == MINUS || code_x == AND
7229 || code_x == NEG
7230 || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
7325d85a 7231 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
7232 return CC_NZmode;
7233
1c992d1e 7234 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
7235 the comparison will have to be swapped when we emit the assembly
7236 code. */
f7343f20 7237 if ((mode_x == SImode || mode_x == DImode)
ffa8a921 7238 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
f7343f20
RE
7239 && (code_x == ASHIFT || code_x == ASHIFTRT
7240 || code_x == LSHIFTRT
7241 || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
43e9d192
IB
7242 return CC_SWPmode;
7243
1c992d1e
RE
7244 /* Similarly for a negated operand, but we can only do this for
7245 equalities. */
f7343f20 7246 if ((mode_x == SImode || mode_x == DImode)
4aa81c2e 7247 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e 7248 && (code == EQ || code == NE)
f7343f20 7249 && code_x == NEG)
1c992d1e
RE
7250 return CC_Zmode;
7251
f7343f20
RE
7252 /* A test for unsigned overflow from an addition. */
7253 if ((mode_x == DImode || mode_x == TImode)
7254 && (code == LTU || code == GEU)
7255 && code_x == PLUS
7256 && rtx_equal_p (XEXP (x, 0), y))
ef22810a
RH
7257 return CC_Cmode;
7258
f7343f20
RE
7259 /* A test for unsigned overflow from an add with carry. */
7260 if ((mode_x == DImode || mode_x == TImode)
7261 && (code == LTU || code == GEU)
7262 && code_x == PLUS
7263 && CONST_SCALAR_INT_P (y)
7264 && (rtx_mode_t (y, mode_x)
7265 == (wi::shwi (1, mode_x)
7266 << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
7267 return CC_ADCmode;
7268
30c46053 7269 /* A test for signed overflow. */
f7343f20 7270 if ((mode_x == DImode || mode_x == TImode)
30c46053 7271 && code == NE
f7343f20 7272 && code_x == PLUS
30c46053
MC
7273 && GET_CODE (y) == SIGN_EXTEND)
7274 return CC_Vmode;
7275
43e9d192
IB
7276 /* For everything else, return CCmode. */
7277 return CCmode;
7278}
7279
3dfa7055 7280static int
b8506a8a 7281aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 7282
cd5660ab 7283int
43e9d192
IB
7284aarch64_get_condition_code (rtx x)
7285{
ef4bddc2 7286 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
7287 enum rtx_code comp_code = GET_CODE (x);
7288
7289 if (GET_MODE_CLASS (mode) != MODE_CC)
7290 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
7291 return aarch64_get_condition_code_1 (mode, comp_code);
7292}
43e9d192 7293
3dfa7055 7294static int
b8506a8a 7295aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 7296{
43e9d192
IB
7297 switch (mode)
7298 {
4e10a5a7
RS
7299 case E_CCFPmode:
7300 case E_CCFPEmode:
43e9d192
IB
7301 switch (comp_code)
7302 {
7303 case GE: return AARCH64_GE;
7304 case GT: return AARCH64_GT;
7305 case LE: return AARCH64_LS;
7306 case LT: return AARCH64_MI;
7307 case NE: return AARCH64_NE;
7308 case EQ: return AARCH64_EQ;
7309 case ORDERED: return AARCH64_VC;
7310 case UNORDERED: return AARCH64_VS;
7311 case UNLT: return AARCH64_LT;
7312 case UNLE: return AARCH64_LE;
7313 case UNGT: return AARCH64_HI;
7314 case UNGE: return AARCH64_PL;
cd5660ab 7315 default: return -1;
43e9d192
IB
7316 }
7317 break;
7318
4e10a5a7 7319 case E_CCmode:
43e9d192
IB
7320 switch (comp_code)
7321 {
7322 case NE: return AARCH64_NE;
7323 case EQ: return AARCH64_EQ;
7324 case GE: return AARCH64_GE;
7325 case GT: return AARCH64_GT;
7326 case LE: return AARCH64_LE;
7327 case LT: return AARCH64_LT;
7328 case GEU: return AARCH64_CS;
7329 case GTU: return AARCH64_HI;
7330 case LEU: return AARCH64_LS;
7331 case LTU: return AARCH64_CC;
cd5660ab 7332 default: return -1;
43e9d192
IB
7333 }
7334 break;
7335
4e10a5a7 7336 case E_CC_SWPmode:
43e9d192
IB
7337 switch (comp_code)
7338 {
7339 case NE: return AARCH64_NE;
7340 case EQ: return AARCH64_EQ;
7341 case GE: return AARCH64_LE;
7342 case GT: return AARCH64_LT;
7343 case LE: return AARCH64_GE;
7344 case LT: return AARCH64_GT;
7345 case GEU: return AARCH64_LS;
7346 case GTU: return AARCH64_CC;
7347 case LEU: return AARCH64_CS;
7348 case LTU: return AARCH64_HI;
cd5660ab 7349 default: return -1;
43e9d192
IB
7350 }
7351 break;
7352
4e10a5a7 7353 case E_CC_NZmode:
43e9d192
IB
7354 switch (comp_code)
7355 {
7356 case NE: return AARCH64_NE;
7357 case EQ: return AARCH64_EQ;
7358 case GE: return AARCH64_PL;
7359 case LT: return AARCH64_MI;
cd5660ab 7360 default: return -1;
43e9d192
IB
7361 }
7362 break;
7363
4e10a5a7 7364 case E_CC_Zmode:
1c992d1e
RE
7365 switch (comp_code)
7366 {
7367 case NE: return AARCH64_NE;
7368 case EQ: return AARCH64_EQ;
cd5660ab 7369 default: return -1;
1c992d1e
RE
7370 }
7371 break;
7372
4e10a5a7 7373 case E_CC_Cmode:
ef22810a
RH
7374 switch (comp_code)
7375 {
f7343f20
RE
7376 case LTU: return AARCH64_CS;
7377 case GEU: return AARCH64_CC;
7378 default: return -1;
7379 }
7380 break;
7381
7382 case E_CC_ADCmode:
7383 switch (comp_code)
7384 {
7385 case GEU: return AARCH64_CS;
7386 case LTU: return AARCH64_CC;
ef22810a
RH
7387 default: return -1;
7388 }
7389 break;
7390
30c46053
MC
7391 case E_CC_Vmode:
7392 switch (comp_code)
7393 {
7394 case NE: return AARCH64_VS;
7395 case EQ: return AARCH64_VC;
7396 default: return -1;
7397 }
7398 break;
7399
43e9d192 7400 default:
cd5660ab 7401 return -1;
43e9d192 7402 }
3dfa7055 7403
3dfa7055 7404 return -1;
43e9d192
IB
7405}
7406
ddeabd3e
AL
7407bool
7408aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
7409 HOST_WIDE_INT minval,
7410 HOST_WIDE_INT maxval)
ddeabd3e 7411{
6a70badb
RS
7412 rtx elt;
7413 return (const_vec_duplicate_p (x, &elt)
7414 && CONST_INT_P (elt)
7415 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
7416}
7417
7418bool
7419aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
7420{
7421 return aarch64_const_vec_all_same_in_range_p (x, val, val);
7422}
7423
43cacb12
RS
7424/* Return true if VEC is a constant in which every element is in the range
7425 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
7426
7427static bool
7428aarch64_const_vec_all_in_range_p (rtx vec,
7429 HOST_WIDE_INT minval,
7430 HOST_WIDE_INT maxval)
7431{
7432 if (GET_CODE (vec) != CONST_VECTOR
7433 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
7434 return false;
7435
7436 int nunits;
7437 if (!CONST_VECTOR_STEPPED_P (vec))
7438 nunits = const_vector_encoded_nelts (vec);
7439 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
7440 return false;
7441
7442 for (int i = 0; i < nunits; i++)
7443 {
7444 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
7445 if (!CONST_INT_P (vec_elem)
7446 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
7447 return false;
7448 }
7449 return true;
7450}
43e9d192 7451
cf670503
ZC
7452/* N Z C V. */
7453#define AARCH64_CC_V 1
7454#define AARCH64_CC_C (1 << 1)
7455#define AARCH64_CC_Z (1 << 2)
7456#define AARCH64_CC_N (1 << 3)
7457
c8012fbc
WD
7458/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
7459static const int aarch64_nzcv_codes[] =
7460{
7461 0, /* EQ, Z == 1. */
7462 AARCH64_CC_Z, /* NE, Z == 0. */
7463 0, /* CS, C == 1. */
7464 AARCH64_CC_C, /* CC, C == 0. */
7465 0, /* MI, N == 1. */
7466 AARCH64_CC_N, /* PL, N == 0. */
7467 0, /* VS, V == 1. */
7468 AARCH64_CC_V, /* VC, V == 0. */
7469 0, /* HI, C ==1 && Z == 0. */
7470 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
7471 AARCH64_CC_V, /* GE, N == V. */
7472 0, /* LT, N != V. */
7473 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
7474 0, /* LE, !(Z == 0 && N == V). */
7475 0, /* AL, Any. */
7476 0 /* NV, Any. */
cf670503
ZC
7477};
7478
43cacb12
RS
7479/* Print floating-point vector immediate operand X to F, negating it
7480 first if NEGATE is true. Return true on success, false if it isn't
7481 a constant we can handle. */
7482
7483static bool
7484aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
7485{
7486 rtx elt;
7487
7488 if (!const_vec_duplicate_p (x, &elt))
7489 return false;
7490
7491 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
7492 if (negate)
7493 r = real_value_negate (&r);
7494
7495 /* We only handle the SVE single-bit immediates here. */
7496 if (real_equal (&r, &dconst0))
7497 asm_fprintf (f, "0.0");
7498 else if (real_equal (&r, &dconst1))
7499 asm_fprintf (f, "1.0");
7500 else if (real_equal (&r, &dconsthalf))
7501 asm_fprintf (f, "0.5");
7502 else
7503 return false;
7504
7505 return true;
7506}
7507
9f4cbab8
RS
7508/* Return the equivalent letter for size. */
7509static char
7510sizetochar (int size)
7511{
7512 switch (size)
7513 {
7514 case 64: return 'd';
7515 case 32: return 's';
7516 case 16: return 'h';
7517 case 8 : return 'b';
7518 default: gcc_unreachable ();
7519 }
7520}
7521
bcf19844
JW
7522/* Print operand X to file F in a target specific manner according to CODE.
7523 The acceptable formatting commands given by CODE are:
7524 'c': An integer or symbol address without a preceding #
7525 sign.
43cacb12
RS
7526 'C': Take the duplicated element in a vector constant
7527 and print it in hex.
7528 'D': Take the duplicated element in a vector constant
7529 and print it as an unsigned integer, in decimal.
bcf19844
JW
7530 'e': Print the sign/zero-extend size as a character 8->b,
7531 16->h, 32->w.
7532 'p': Prints N such that 2^N == X (X must be power of 2 and
7533 const int).
7534 'P': Print the number of non-zero bits in X (a const_int).
7535 'H': Print the higher numbered register of a pair (TImode)
7536 of regs.
7537 'm': Print a condition (eq, ne, etc).
7538 'M': Same as 'm', but invert condition.
43cacb12
RS
7539 'N': Take the duplicated element in a vector constant
7540 and print the negative of it in decimal.
bcf19844
JW
7541 'b/h/s/d/q': Print a scalar FP/SIMD register name.
7542 'S/T/U/V': Print a FP/SIMD register name for a register list.
7543 The register printed is the FP/SIMD register name
7544 of X + 0/1/2/3 for S/T/U/V.
7545 'R': Print a scalar FP/SIMD register name + 1.
7546 'X': Print bottom 16 bits of integer constant in hex.
7547 'w/x': Print a general register name or the zero register
7548 (32-bit or 64-bit).
7549 '0': Print a normal operand, if it's a general register,
7550 then we assume DImode.
7551 'k': Print NZCV for conditional compare instructions.
7552 'A': Output address constant representing the first
7553 argument of X, specifying a relocation offset
7554 if appropriate.
7555 'L': Output constant address specified by X
7556 with a relocation offset if appropriate.
7557 'G': Prints address of X, specifying a PC relative
e69a816d
WD
7558 relocation mode if appropriate.
7559 'y': Output address of LDP or STP - this is used for
7560 some LDP/STPs which don't use a PARALLEL in their
7561 pattern (so the mode needs to be adjusted).
7562 'z': Output address of a typical LDP or STP. */
bcf19844 7563
cc8ca59e
JB
7564static void
7565aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 7566{
43cacb12 7567 rtx elt;
43e9d192
IB
7568 switch (code)
7569 {
f541a481
KT
7570 case 'c':
7571 switch (GET_CODE (x))
7572 {
7573 case CONST_INT:
7574 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7575 break;
7576
7577 case SYMBOL_REF:
7578 output_addr_const (f, x);
7579 break;
7580
7581 case CONST:
7582 if (GET_CODE (XEXP (x, 0)) == PLUS
7583 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
7584 {
7585 output_addr_const (f, x);
7586 break;
7587 }
7588 /* Fall through. */
7589
7590 default:
ee61f880 7591 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
7592 }
7593 break;
7594
43e9d192 7595 case 'e':
43e9d192
IB
7596 {
7597 int n;
7598
4aa81c2e 7599 if (!CONST_INT_P (x)
43e9d192
IB
7600 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
7601 {
7602 output_operand_lossage ("invalid operand for '%%%c'", code);
7603 return;
7604 }
7605
7606 switch (n)
7607 {
7608 case 3:
7609 fputc ('b', f);
7610 break;
7611 case 4:
7612 fputc ('h', f);
7613 break;
7614 case 5:
7615 fputc ('w', f);
7616 break;
7617 default:
7618 output_operand_lossage ("invalid operand for '%%%c'", code);
7619 return;
7620 }
7621 }
7622 break;
7623
7624 case 'p':
7625 {
7626 int n;
7627
4aa81c2e 7628 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
7629 {
7630 output_operand_lossage ("invalid operand for '%%%c'", code);
7631 return;
7632 }
7633
7634 asm_fprintf (f, "%d", n);
7635 }
7636 break;
7637
7638 case 'P':
4aa81c2e 7639 if (!CONST_INT_P (x))
43e9d192
IB
7640 {
7641 output_operand_lossage ("invalid operand for '%%%c'", code);
7642 return;
7643 }
7644
8d55c61b 7645 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
7646 break;
7647
7648 case 'H':
c0111dc4
RE
7649 if (x == const0_rtx)
7650 {
7651 asm_fprintf (f, "xzr");
7652 break;
7653 }
7654
4aa81c2e 7655 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
7656 {
7657 output_operand_lossage ("invalid operand for '%%%c'", code);
7658 return;
7659 }
7660
01a3a324 7661 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
7662 break;
7663
43e9d192 7664 case 'M':
c8012fbc 7665 case 'm':
cd5660ab
KT
7666 {
7667 int cond_code;
c8012fbc
WD
7668 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
7669 if (x == const_true_rtx)
cd5660ab 7670 {
c8012fbc
WD
7671 if (code == 'M')
7672 fputs ("nv", f);
cd5660ab
KT
7673 return;
7674 }
43e9d192 7675
cd5660ab
KT
7676 if (!COMPARISON_P (x))
7677 {
7678 output_operand_lossage ("invalid operand for '%%%c'", code);
7679 return;
7680 }
c8012fbc 7681
cd5660ab
KT
7682 cond_code = aarch64_get_condition_code (x);
7683 gcc_assert (cond_code >= 0);
c8012fbc
WD
7684 if (code == 'M')
7685 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
7686 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 7687 }
43e9d192
IB
7688 break;
7689
43cacb12
RS
7690 case 'N':
7691 if (!const_vec_duplicate_p (x, &elt))
7692 {
7693 output_operand_lossage ("invalid vector constant");
7694 return;
7695 }
7696
7697 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7698 asm_fprintf (f, "%wd", -INTVAL (elt));
7699 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7700 && aarch64_print_vector_float_operand (f, x, true))
7701 ;
7702 else
7703 {
7704 output_operand_lossage ("invalid vector constant");
7705 return;
7706 }
7707 break;
7708
43e9d192
IB
7709 case 'b':
7710 case 'h':
7711 case 's':
7712 case 'd':
7713 case 'q':
43e9d192
IB
7714 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7715 {
7716 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7717 return;
7718 }
50ce6f88 7719 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
7720 break;
7721
7722 case 'S':
7723 case 'T':
7724 case 'U':
7725 case 'V':
43e9d192
IB
7726 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7727 {
7728 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7729 return;
7730 }
43cacb12
RS
7731 asm_fprintf (f, "%c%d",
7732 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
7733 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
7734 break;
7735
2d8c6dc1 7736 case 'R':
2d8c6dc1
AH
7737 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7738 {
7739 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7740 return;
7741 }
7742 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
7743 break;
7744
a05c0ddf 7745 case 'X':
4aa81c2e 7746 if (!CONST_INT_P (x))
a05c0ddf
IB
7747 {
7748 output_operand_lossage ("invalid operand for '%%%c'", code);
7749 return;
7750 }
50d38551 7751 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
7752 break;
7753
43cacb12
RS
7754 case 'C':
7755 {
7756 /* Print a replicated constant in hex. */
7757 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7758 {
7759 output_operand_lossage ("invalid operand for '%%%c'", code);
7760 return;
7761 }
7762 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7763 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7764 }
7765 break;
7766
7767 case 'D':
7768 {
7769 /* Print a replicated constant in decimal, treating it as
7770 unsigned. */
7771 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7772 {
7773 output_operand_lossage ("invalid operand for '%%%c'", code);
7774 return;
7775 }
7776 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7777 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7778 }
7779 break;
7780
43e9d192
IB
7781 case 'w':
7782 case 'x':
3520f7cc
JG
7783 if (x == const0_rtx
7784 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 7785 {
50ce6f88 7786 asm_fprintf (f, "%czr", code);
43e9d192
IB
7787 break;
7788 }
7789
7790 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
7791 {
50ce6f88 7792 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
7793 break;
7794 }
7795
7796 if (REG_P (x) && REGNO (x) == SP_REGNUM)
7797 {
50ce6f88 7798 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
7799 break;
7800 }
7801
7802 /* Fall through */
7803
7804 case 0:
43e9d192
IB
7805 if (x == NULL)
7806 {
7807 output_operand_lossage ("missing operand");
7808 return;
7809 }
7810
7811 switch (GET_CODE (x))
7812 {
7813 case REG:
43cacb12 7814 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
7815 {
7816 if (REG_NREGS (x) == 1)
7817 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
7818 else
7819 {
7820 char suffix
7821 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
7822 asm_fprintf (f, "{z%d.%c - z%d.%c}",
7823 REGNO (x) - V0_REGNUM, suffix,
7824 END_REGNO (x) - V0_REGNUM - 1, suffix);
7825 }
7826 }
43cacb12
RS
7827 else
7828 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
7829 break;
7830
7831 case MEM:
cc8ca59e 7832 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
7833 break;
7834
7835 case LABEL_REF:
7836 case SYMBOL_REF:
7837 output_addr_const (asm_out_file, x);
7838 break;
7839
7840 case CONST_INT:
7841 asm_fprintf (f, "%wd", INTVAL (x));
7842 break;
7843
43cacb12
RS
7844 case CONST:
7845 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 7846 {
43cacb12
RS
7847 output_addr_const (asm_out_file, x);
7848 break;
3520f7cc 7849 }
43cacb12
RS
7850 /* fall through */
7851
7852 case CONST_VECTOR:
7853 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 7854 {
43cacb12
RS
7855 output_operand_lossage ("invalid vector constant");
7856 return;
3520f7cc 7857 }
43cacb12
RS
7858
7859 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7860 asm_fprintf (f, "%wd", INTVAL (elt));
7861 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7862 && aarch64_print_vector_float_operand (f, x, false))
7863 ;
3520f7cc 7864 else
43cacb12
RS
7865 {
7866 output_operand_lossage ("invalid vector constant");
7867 return;
7868 }
43e9d192
IB
7869 break;
7870
3520f7cc 7871 case CONST_DOUBLE:
2ca5b430
KT
7872 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
7873 be getting CONST_DOUBLEs holding integers. */
7874 gcc_assert (GET_MODE (x) != VOIDmode);
7875 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
7876 {
7877 fputc ('0', f);
7878 break;
7879 }
7880 else if (aarch64_float_const_representable_p (x))
7881 {
7882#define buf_size 20
7883 char float_buf[buf_size] = {'\0'};
34a72c33
RS
7884 real_to_decimal_for_mode (float_buf,
7885 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
7886 buf_size, buf_size,
7887 1, GET_MODE (x));
7888 asm_fprintf (asm_out_file, "%s", float_buf);
7889 break;
7890#undef buf_size
7891 }
7892 output_operand_lossage ("invalid constant");
7893 return;
43e9d192
IB
7894 default:
7895 output_operand_lossage ("invalid operand");
7896 return;
7897 }
7898 break;
7899
7900 case 'A':
7901 if (GET_CODE (x) == HIGH)
7902 x = XEXP (x, 0);
7903
a6e0bfa7 7904 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7905 {
6642bdb4 7906 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7907 asm_fprintf (asm_out_file, ":got:");
7908 break;
7909
7910 case SYMBOL_SMALL_TLSGD:
7911 asm_fprintf (asm_out_file, ":tlsgd:");
7912 break;
7913
7914 case SYMBOL_SMALL_TLSDESC:
7915 asm_fprintf (asm_out_file, ":tlsdesc:");
7916 break;
7917
79496620 7918 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7919 asm_fprintf (asm_out_file, ":gottprel:");
7920 break;
7921
d18ba284 7922 case SYMBOL_TLSLE24:
43e9d192
IB
7923 asm_fprintf (asm_out_file, ":tprel:");
7924 break;
7925
87dd8ab0
MS
7926 case SYMBOL_TINY_GOT:
7927 gcc_unreachable ();
7928 break;
7929
43e9d192
IB
7930 default:
7931 break;
7932 }
7933 output_addr_const (asm_out_file, x);
7934 break;
7935
7936 case 'L':
a6e0bfa7 7937 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7938 {
6642bdb4 7939 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7940 asm_fprintf (asm_out_file, ":lo12:");
7941 break;
7942
7943 case SYMBOL_SMALL_TLSGD:
7944 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7945 break;
7946
7947 case SYMBOL_SMALL_TLSDESC:
7948 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7949 break;
7950
79496620 7951 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7952 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7953 break;
7954
cbf5629e
JW
7955 case SYMBOL_TLSLE12:
7956 asm_fprintf (asm_out_file, ":tprel_lo12:");
7957 break;
7958
d18ba284 7959 case SYMBOL_TLSLE24:
43e9d192
IB
7960 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7961 break;
7962
87dd8ab0
MS
7963 case SYMBOL_TINY_GOT:
7964 asm_fprintf (asm_out_file, ":got:");
7965 break;
7966
5ae7caad
JW
7967 case SYMBOL_TINY_TLSIE:
7968 asm_fprintf (asm_out_file, ":gottprel:");
7969 break;
7970
43e9d192
IB
7971 default:
7972 break;
7973 }
7974 output_addr_const (asm_out_file, x);
7975 break;
7976
7977 case 'G':
a6e0bfa7 7978 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7979 {
d18ba284 7980 case SYMBOL_TLSLE24:
43e9d192
IB
7981 asm_fprintf (asm_out_file, ":tprel_hi12:");
7982 break;
7983 default:
7984 break;
7985 }
7986 output_addr_const (asm_out_file, x);
7987 break;
7988
cf670503
ZC
7989 case 'k':
7990 {
c8012fbc 7991 HOST_WIDE_INT cond_code;
cf670503 7992
c8012fbc 7993 if (!CONST_INT_P (x))
cf670503
ZC
7994 {
7995 output_operand_lossage ("invalid operand for '%%%c'", code);
7996 return;
7997 }
7998
c8012fbc
WD
7999 cond_code = INTVAL (x);
8000 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
8001 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
8002 }
8003 break;
8004
e69a816d
WD
8005 case 'y':
8006 case 'z':
8007 {
8008 machine_mode mode = GET_MODE (x);
8009
c348cab0 8010 if (GET_CODE (x) != MEM
6a70badb 8011 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
8012 {
8013 output_operand_lossage ("invalid operand for '%%%c'", code);
8014 return;
8015 }
8016
a25831ac
AV
8017 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
8018 code == 'y'
8019 ? ADDR_QUERY_LDP_STP_N
8020 : ADDR_QUERY_LDP_STP))
c348cab0 8021 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
8022 }
8023 break;
8024
43e9d192
IB
8025 default:
8026 output_operand_lossage ("invalid operand prefix '%%%c'", code);
8027 return;
8028 }
8029}
8030
e69a816d
WD
8031/* Print address 'x' of a memory access with mode 'mode'.
8032 'op' is the context required by aarch64_classify_address. It can either be
8033 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 8034static bool
a97d8b98
RS
8035aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
8036 aarch64_addr_query_type type)
43e9d192
IB
8037{
8038 struct aarch64_address_info addr;
6a70badb 8039 unsigned int size;
43e9d192 8040
e69a816d 8041 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
8042 if (GET_MODE (x) != Pmode
8043 && (!CONST_INT_P (x)
8044 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
8045 {
8046 output_operand_lossage ("invalid address mode");
8047 return false;
8048 }
e69a816d 8049
a97d8b98 8050 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
8051 switch (addr.type)
8052 {
8053 case ADDRESS_REG_IMM:
dc640181 8054 if (known_eq (addr.const_offset, 0))
01a3a324 8055 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
8056 else if (aarch64_sve_data_mode_p (mode))
8057 {
8058 HOST_WIDE_INT vnum
8059 = exact_div (addr.const_offset,
8060 BYTES_PER_SVE_VECTOR).to_constant ();
8061 asm_fprintf (f, "[%s, #%wd, mul vl]",
8062 reg_names[REGNO (addr.base)], vnum);
8063 }
8064 else if (aarch64_sve_pred_mode_p (mode))
8065 {
8066 HOST_WIDE_INT vnum
8067 = exact_div (addr.const_offset,
8068 BYTES_PER_SVE_PRED).to_constant ();
8069 asm_fprintf (f, "[%s, #%wd, mul vl]",
8070 reg_names[REGNO (addr.base)], vnum);
8071 }
43e9d192 8072 else
16a3246f 8073 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 8074 INTVAL (addr.offset));
c348cab0 8075 return true;
43e9d192
IB
8076
8077 case ADDRESS_REG_REG:
8078 if (addr.shift == 0)
16a3246f 8079 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 8080 reg_names [REGNO (addr.offset)]);
43e9d192 8081 else
16a3246f 8082 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 8083 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 8084 return true;
43e9d192
IB
8085
8086 case ADDRESS_REG_UXTW:
8087 if (addr.shift == 0)
16a3246f 8088 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
8089 REGNO (addr.offset) - R0_REGNUM);
8090 else
16a3246f 8091 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 8092 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 8093 return true;
43e9d192
IB
8094
8095 case ADDRESS_REG_SXTW:
8096 if (addr.shift == 0)
16a3246f 8097 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
8098 REGNO (addr.offset) - R0_REGNUM);
8099 else
16a3246f 8100 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 8101 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 8102 return true;
43e9d192
IB
8103
8104 case ADDRESS_REG_WB:
6a70badb
RS
8105 /* Writeback is only supported for fixed-width modes. */
8106 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
8107 switch (GET_CODE (x))
8108 {
8109 case PRE_INC:
6a70badb 8110 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 8111 return true;
43e9d192 8112 case POST_INC:
6a70badb 8113 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 8114 return true;
43e9d192 8115 case PRE_DEC:
6a70badb 8116 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 8117 return true;
43e9d192 8118 case POST_DEC:
6a70badb 8119 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 8120 return true;
43e9d192 8121 case PRE_MODIFY:
6a70badb 8122 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 8123 INTVAL (addr.offset));
c348cab0 8124 return true;
43e9d192 8125 case POST_MODIFY:
6a70badb 8126 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 8127 INTVAL (addr.offset));
c348cab0 8128 return true;
43e9d192
IB
8129 default:
8130 break;
8131 }
8132 break;
8133
8134 case ADDRESS_LO_SUM:
16a3246f 8135 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
8136 output_addr_const (f, addr.offset);
8137 asm_fprintf (f, "]");
c348cab0 8138 return true;
43e9d192
IB
8139
8140 case ADDRESS_SYMBOLIC:
d6591257 8141 output_addr_const (f, x);
c348cab0 8142 return true;
43e9d192
IB
8143 }
8144
c348cab0 8145 return false;
43e9d192
IB
8146}
8147
e69a816d
WD
8148/* Print address 'x' of a memory access with mode 'mode'. */
8149static void
8150aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
8151{
43cacb12 8152 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 8153 output_addr_const (f, x);
e69a816d
WD
8154}
8155
43e9d192
IB
8156bool
8157aarch64_label_mentioned_p (rtx x)
8158{
8159 const char *fmt;
8160 int i;
8161
8162 if (GET_CODE (x) == LABEL_REF)
8163 return true;
8164
8165 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
8166 referencing instruction, but they are constant offsets, not
8167 symbols. */
8168 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8169 return false;
8170
8171 fmt = GET_RTX_FORMAT (GET_CODE (x));
8172 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8173 {
8174 if (fmt[i] == 'E')
8175 {
8176 int j;
8177
8178 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8179 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
8180 return 1;
8181 }
8182 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
8183 return 1;
8184 }
8185
8186 return 0;
8187}
8188
8189/* Implement REGNO_REG_CLASS. */
8190
8191enum reg_class
8192aarch64_regno_regclass (unsigned regno)
8193{
8194 if (GP_REGNUM_P (regno))
a4a182c6 8195 return GENERAL_REGS;
43e9d192
IB
8196
8197 if (regno == SP_REGNUM)
8198 return STACK_REG;
8199
8200 if (regno == FRAME_POINTER_REGNUM
8201 || regno == ARG_POINTER_REGNUM)
f24bb080 8202 return POINTER_REGS;
43e9d192
IB
8203
8204 if (FP_REGNUM_P (regno))
8205 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
8206
43cacb12
RS
8207 if (PR_REGNUM_P (regno))
8208 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
8209
43e9d192
IB
8210 return NO_REGS;
8211}
8212
6a70badb
RS
8213/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
8214 If OFFSET is out of range, return an offset of an anchor point
8215 that is in range. Return 0 otherwise. */
8216
8217static HOST_WIDE_INT
8218aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
8219 machine_mode mode)
8220{
8221 /* Does it look like we'll need a 16-byte load/store-pair operation? */
8222 if (size > 16)
8223 return (offset + 0x400) & ~0x7f0;
8224
8225 /* For offsets that aren't a multiple of the access size, the limit is
8226 -256...255. */
8227 if (offset & (size - 1))
8228 {
8229 /* BLKmode typically uses LDP of X-registers. */
8230 if (mode == BLKmode)
8231 return (offset + 512) & ~0x3ff;
8232 return (offset + 0x100) & ~0x1ff;
8233 }
8234
8235 /* Small negative offsets are supported. */
8236 if (IN_RANGE (offset, -256, 0))
8237 return 0;
8238
8239 if (mode == TImode || mode == TFmode)
8240 return (offset + 0x100) & ~0x1ff;
8241
8242 /* Use 12-bit offset by access size. */
8243 return offset & (~0xfff * size);
8244}
8245
0c4ec427 8246static rtx
ef4bddc2 8247aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
8248{
8249 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
8250 where mask is selected by alignment and size of the offset.
8251 We try to pick as large a range for the offset as possible to
8252 maximize the chance of a CSE. However, for aligned addresses
8253 we limit the range to 4k so that structures with different sized
e8426e0a
BC
8254 elements are likely to use the same base. We need to be careful
8255 not to split a CONST for some forms of address expression, otherwise
8256 it will generate sub-optimal code. */
0c4ec427
RE
8257
8258 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
8259 {
9e0218fc 8260 rtx base = XEXP (x, 0);
17d7bdd8 8261 rtx offset_rtx = XEXP (x, 1);
9e0218fc 8262 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 8263
9e0218fc 8264 if (GET_CODE (base) == PLUS)
e8426e0a 8265 {
9e0218fc
RH
8266 rtx op0 = XEXP (base, 0);
8267 rtx op1 = XEXP (base, 1);
8268
8269 /* Force any scaling into a temp for CSE. */
8270 op0 = force_reg (Pmode, op0);
8271 op1 = force_reg (Pmode, op1);
8272
8273 /* Let the pointer register be in op0. */
8274 if (REG_POINTER (op1))
8275 std::swap (op0, op1);
8276
8277 /* If the pointer is virtual or frame related, then we know that
8278 virtual register instantiation or register elimination is going
8279 to apply a second constant. We want the two constants folded
8280 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
8281 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 8282 {
9e0218fc
RH
8283 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
8284 NULL_RTX, true, OPTAB_DIRECT);
8285 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 8286 }
e8426e0a 8287
9e0218fc
RH
8288 /* Otherwise, in order to encourage CSE (and thence loop strength
8289 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
8290 base = expand_binop (Pmode, add_optab, op0, op1,
8291 NULL_RTX, true, OPTAB_DIRECT);
8292 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
8293 }
8294
6a70badb
RS
8295 HOST_WIDE_INT size;
8296 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 8297 {
6a70badb
RS
8298 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
8299 mode);
8300 if (base_offset != 0)
8301 {
8302 base = plus_constant (Pmode, base, base_offset);
8303 base = force_operand (base, NULL_RTX);
8304 return plus_constant (Pmode, base, offset - base_offset);
8305 }
9e0218fc 8306 }
0c4ec427
RE
8307 }
8308
8309 return x;
8310}
8311
43e9d192
IB
8312static reg_class_t
8313aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
8314 reg_class_t rclass,
ef4bddc2 8315 machine_mode mode,
43e9d192
IB
8316 secondary_reload_info *sri)
8317{
9a1b9cb4
RS
8318 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
8319 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
8320 comment at the head of aarch64-sve.md for more details about the
8321 big-endian handling. */
43cacb12
RS
8322 if (BYTES_BIG_ENDIAN
8323 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
8324 && !((REG_P (x) && HARD_REGISTER_P (x))
8325 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
8326 && aarch64_sve_data_mode_p (mode))
8327 {
8328 sri->icode = CODE_FOR_aarch64_sve_reload_be;
8329 return NO_REGS;
8330 }
b4f50fd4
RR
8331
8332 /* If we have to disable direct literal pool loads and stores because the
8333 function is too big, then we need a scratch register. */
8334 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
8335 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
8336 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 8337 && !aarch64_pcrelative_literal_loads)
b4f50fd4 8338 {
0016d8d9 8339 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
8340 return NO_REGS;
8341 }
8342
43e9d192
IB
8343 /* Without the TARGET_SIMD instructions we cannot move a Q register
8344 to a Q register directly. We need a scratch. */
8345 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
8346 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
8347 && reg_class_subset_p (rclass, FP_REGS))
8348 {
0016d8d9 8349 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
8350 return NO_REGS;
8351 }
8352
8353 /* A TFmode or TImode memory access should be handled via an FP_REGS
8354 because AArch64 has richer addressing modes for LDR/STR instructions
8355 than LDP/STP instructions. */
d5726973 8356 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 8357 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
8358 return FP_REGS;
8359
8360 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 8361 return GENERAL_REGS;
43e9d192
IB
8362
8363 return NO_REGS;
8364}
8365
8366static bool
6216fd90 8367aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 8368{
6216fd90 8369 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 8370
6216fd90
WD
8371 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
8372 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 8373 if (frame_pointer_needed)
6216fd90 8374 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
8375 return true;
8376}
8377
6a70badb 8378poly_int64
43e9d192
IB
8379aarch64_initial_elimination_offset (unsigned from, unsigned to)
8380{
78c29983
MS
8381 if (to == HARD_FRAME_POINTER_REGNUM)
8382 {
8383 if (from == ARG_POINTER_REGNUM)
71bfb77a 8384 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
8385
8386 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8387 return cfun->machine->frame.hard_fp_offset
8388 - cfun->machine->frame.locals_offset;
78c29983
MS
8389 }
8390
8391 if (to == STACK_POINTER_REGNUM)
8392 {
8393 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8394 return cfun->machine->frame.frame_size
8395 - cfun->machine->frame.locals_offset;
78c29983
MS
8396 }
8397
1c960e02 8398 return cfun->machine->frame.frame_size;
43e9d192
IB
8399}
8400
43e9d192
IB
8401/* Implement RETURN_ADDR_RTX. We do not support moving back to a
8402 previous frame. */
8403
8404rtx
8405aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
8406{
8407 if (count != 0)
8408 return const0_rtx;
8409 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
8410}
8411
8412
8413static void
8414aarch64_asm_trampoline_template (FILE *f)
8415{
b5f794b4
SD
8416 int offset1 = 16;
8417 int offset2 = 20;
8418
8419 if (aarch64_bti_enabled ())
8420 {
8421 asm_fprintf (f, "\thint\t34 // bti c\n");
8422 offset1 -= 4;
8423 offset2 -= 4;
8424 }
8425
28514dda
YZ
8426 if (TARGET_ILP32)
8427 {
b5f794b4
SD
8428 asm_fprintf (f, "\tldr\tw%d, .+%d\n", IP1_REGNUM - R0_REGNUM, offset1);
8429 asm_fprintf (f, "\tldr\tw%d, .+%d\n", STATIC_CHAIN_REGNUM - R0_REGNUM,
8430 offset1);
28514dda
YZ
8431 }
8432 else
8433 {
b5f794b4
SD
8434 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [IP1_REGNUM], offset1);
8435 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [STATIC_CHAIN_REGNUM],
8436 offset2);
28514dda 8437 }
01a3a324 8438 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
b5f794b4
SD
8439
8440 /* The trampoline needs an extra padding instruction. In case if BTI is
8441 enabled the padding instruction is replaced by the BTI instruction at
8442 the beginning. */
8443 if (!aarch64_bti_enabled ())
8444 assemble_aligned_integer (4, const0_rtx);
8445
28514dda
YZ
8446 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
8447 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
8448}
8449
8450static void
8451aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
8452{
8453 rtx fnaddr, mem, a_tramp;
28514dda 8454 const int tramp_code_sz = 16;
43e9d192
IB
8455
8456 /* Don't need to copy the trailing D-words, we fill those in below. */
8457 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
8458 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
8459 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 8460 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
8461 if (GET_MODE (fnaddr) != ptr_mode)
8462 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
8463 emit_move_insn (mem, fnaddr);
8464
28514dda 8465 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
8466 emit_move_insn (mem, chain_value);
8467
8468 /* XXX We should really define a "clear_cache" pattern and use
8469 gen_clear_cache(). */
8470 a_tramp = XEXP (m_tramp, 0);
8471 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 8472 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
8473 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
8474 ptr_mode);
43e9d192
IB
8475}
8476
8477static unsigned char
ef4bddc2 8478aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 8479{
6a70badb
RS
8480 /* ??? Logically we should only need to provide a value when
8481 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
8482 can hold MODE, but at the moment we need to handle all modes.
8483 Just ignore any runtime parts for registers that can't store them. */
8484 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 8485 unsigned int nregs;
43e9d192
IB
8486 switch (regclass)
8487 {
d677263e 8488 case TAILCALL_ADDR_REGS:
43e9d192
IB
8489 case POINTER_REGS:
8490 case GENERAL_REGS:
8491 case ALL_REGS:
f25a140b 8492 case POINTER_AND_FP_REGS:
43e9d192
IB
8493 case FP_REGS:
8494 case FP_LO_REGS:
43cacb12
RS
8495 if (aarch64_sve_data_mode_p (mode)
8496 && constant_multiple_p (GET_MODE_SIZE (mode),
8497 BYTES_PER_SVE_VECTOR, &nregs))
8498 return nregs;
8499 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
8500 ? CEIL (lowest_size, UNITS_PER_VREG)
8501 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 8502 case STACK_REG:
43cacb12
RS
8503 case PR_REGS:
8504 case PR_LO_REGS:
8505 case PR_HI_REGS:
43e9d192
IB
8506 return 1;
8507
8508 case NO_REGS:
8509 return 0;
8510
8511 default:
8512 break;
8513 }
8514 gcc_unreachable ();
8515}
8516
8517static reg_class_t
78d8b9f0 8518aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 8519{
51bb310d 8520 if (regclass == POINTER_REGS)
78d8b9f0
IB
8521 return GENERAL_REGS;
8522
51bb310d
MS
8523 if (regclass == STACK_REG)
8524 {
8525 if (REG_P(x)
8526 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
8527 return regclass;
8528
8529 return NO_REGS;
8530 }
8531
27bd251b
IB
8532 /* Register eliminiation can result in a request for
8533 SP+constant->FP_REGS. We cannot support such operations which
8534 use SP as source and an FP_REG as destination, so reject out
8535 right now. */
8536 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
8537 {
8538 rtx lhs = XEXP (x, 0);
8539
8540 /* Look through a possible SUBREG introduced by ILP32. */
8541 if (GET_CODE (lhs) == SUBREG)
8542 lhs = SUBREG_REG (lhs);
8543
8544 gcc_assert (REG_P (lhs));
8545 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
8546 POINTER_REGS));
8547 return NO_REGS;
8548 }
8549
78d8b9f0 8550 return regclass;
43e9d192
IB
8551}
8552
8553void
8554aarch64_asm_output_labelref (FILE* f, const char *name)
8555{
8556 asm_fprintf (f, "%U%s", name);
8557}
8558
8559static void
8560aarch64_elf_asm_constructor (rtx symbol, int priority)
8561{
8562 if (priority == DEFAULT_INIT_PRIORITY)
8563 default_ctor_section_asm_out_constructor (symbol, priority);
8564 else
8565 {
8566 section *s;
53d190c1
AT
8567 /* While priority is known to be in range [0, 65535], so 18 bytes
8568 would be enough, the compiler might not know that. To avoid
8569 -Wformat-truncation false positive, use a larger size. */
8570 char buf[23];
43e9d192 8571 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 8572 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8573 switch_to_section (s);
8574 assemble_align (POINTER_SIZE);
28514dda 8575 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8576 }
8577}
8578
8579static void
8580aarch64_elf_asm_destructor (rtx symbol, int priority)
8581{
8582 if (priority == DEFAULT_INIT_PRIORITY)
8583 default_dtor_section_asm_out_destructor (symbol, priority);
8584 else
8585 {
8586 section *s;
53d190c1
AT
8587 /* While priority is known to be in range [0, 65535], so 18 bytes
8588 would be enough, the compiler might not know that. To avoid
8589 -Wformat-truncation false positive, use a larger size. */
8590 char buf[23];
43e9d192 8591 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 8592 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8593 switch_to_section (s);
8594 assemble_align (POINTER_SIZE);
28514dda 8595 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8596 }
8597}
8598
8599const char*
8600aarch64_output_casesi (rtx *operands)
8601{
8602 char buf[100];
8603 char label[100];
b32d5189 8604 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
8605 int index;
8606 static const char *const patterns[4][2] =
8607 {
8608 {
8609 "ldrb\t%w3, [%0,%w1,uxtw]",
8610 "add\t%3, %4, %w3, sxtb #2"
8611 },
8612 {
8613 "ldrh\t%w3, [%0,%w1,uxtw #1]",
8614 "add\t%3, %4, %w3, sxth #2"
8615 },
8616 {
8617 "ldr\t%w3, [%0,%w1,uxtw #2]",
8618 "add\t%3, %4, %w3, sxtw #2"
8619 },
8620 /* We assume that DImode is only generated when not optimizing and
8621 that we don't really need 64-bit address offsets. That would
8622 imply an object file with 8GB of code in a single function! */
8623 {
8624 "ldr\t%w3, [%0,%w1,uxtw #2]",
8625 "add\t%3, %4, %w3, sxtw #2"
8626 }
8627 };
8628
8629 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
8630
77e994c9
RS
8631 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
8632 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
8633
8634 gcc_assert (index >= 0 && index <= 3);
8635
8636 /* Need to implement table size reduction, by chaning the code below. */
8637 output_asm_insn (patterns[index][0], operands);
8638 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
8639 snprintf (buf, sizeof (buf),
8640 "adr\t%%4, %s", targetm.strip_name_encoding (label));
8641 output_asm_insn (buf, operands);
8642 output_asm_insn (patterns[index][1], operands);
8643 output_asm_insn ("br\t%3", operands);
8644 assemble_label (asm_out_file, label);
8645 return "";
8646}
8647
8648
8649/* Return size in bits of an arithmetic operand which is shifted/scaled and
8650 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
8651 operator. */
8652
8653int
8654aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
8655{
8656 if (shift >= 0 && shift <= 3)
8657 {
8658 int size;
8659 for (size = 8; size <= 32; size *= 2)
8660 {
8661 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
8662 if (mask == bits << shift)
8663 return size;
8664 }
8665 }
8666 return 0;
8667}
8668
e78d485e
RR
8669/* Constant pools are per function only when PC relative
8670 literal loads are true or we are in the large memory
8671 model. */
8672
8673static inline bool
8674aarch64_can_use_per_function_literal_pools_p (void)
8675{
9ee6540a 8676 return (aarch64_pcrelative_literal_loads
e78d485e
RR
8677 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
8678}
8679
43e9d192 8680static bool
e78d485e 8681aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 8682{
74a9301d
VM
8683 /* We can't use blocks for constants when we're using a per-function
8684 constant pool. */
8685 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
8686}
8687
e78d485e
RR
8688/* Select appropriate section for constants depending
8689 on where we place literal pools. */
8690
43e9d192 8691static section *
e78d485e
RR
8692aarch64_select_rtx_section (machine_mode mode,
8693 rtx x,
8694 unsigned HOST_WIDE_INT align)
43e9d192 8695{
e78d485e
RR
8696 if (aarch64_can_use_per_function_literal_pools_p ())
8697 return function_section (current_function_decl);
43e9d192 8698
e78d485e
RR
8699 return default_elf_select_rtx_section (mode, x, align);
8700}
43e9d192 8701
5fca7b66
RH
8702/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
8703void
8704aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
8705 HOST_WIDE_INT offset)
8706{
8707 /* When using per-function literal pools, we must ensure that any code
8708 section is aligned to the minimal instruction length, lest we get
8709 errors from the assembler re "unaligned instructions". */
8710 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
8711 ASM_OUTPUT_ALIGN (f, 2);
8712}
8713
43e9d192
IB
8714/* Costs. */
8715
8716/* Helper function for rtx cost calculation. Strip a shift expression
8717 from X. Returns the inner operand if successful, or the original
8718 expression on failure. */
8719static rtx
8720aarch64_strip_shift (rtx x)
8721{
8722 rtx op = x;
8723
57b77d46
RE
8724 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
8725 we can convert both to ROR during final output. */
43e9d192
IB
8726 if ((GET_CODE (op) == ASHIFT
8727 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
8728 || GET_CODE (op) == LSHIFTRT
8729 || GET_CODE (op) == ROTATERT
8730 || GET_CODE (op) == ROTATE)
43e9d192
IB
8731 && CONST_INT_P (XEXP (op, 1)))
8732 return XEXP (op, 0);
8733
8734 if (GET_CODE (op) == MULT
8735 && CONST_INT_P (XEXP (op, 1))
8736 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
8737 return XEXP (op, 0);
8738
8739 return x;
8740}
8741
4745e701 8742/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
8743 expression from X. Returns the inner operand if successful, or the
8744 original expression on failure. We deal with a number of possible
b10f1009
AP
8745 canonicalization variations here. If STRIP_SHIFT is true, then
8746 we can strip off a shift also. */
43e9d192 8747static rtx
b10f1009 8748aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 8749{
77e994c9 8750 scalar_int_mode mode;
43e9d192
IB
8751 rtx op = x;
8752
77e994c9
RS
8753 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
8754 return op;
8755
43e9d192
IB
8756 /* Zero and sign extraction of a widened value. */
8757 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
8758 && XEXP (op, 2) == const0_rtx
4745e701 8759 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 8760 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
8761 XEXP (op, 1)))
8762 return XEXP (XEXP (op, 0), 0);
8763
8764 /* It can also be represented (for zero-extend) as an AND with an
8765 immediate. */
8766 if (GET_CODE (op) == AND
8767 && GET_CODE (XEXP (op, 0)) == MULT
8768 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
8769 && CONST_INT_P (XEXP (op, 1))
8770 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
8771 INTVAL (XEXP (op, 1))) != 0)
8772 return XEXP (XEXP (op, 0), 0);
8773
8774 /* Now handle extended register, as this may also have an optional
8775 left shift by 1..4. */
b10f1009
AP
8776 if (strip_shift
8777 && GET_CODE (op) == ASHIFT
43e9d192
IB
8778 && CONST_INT_P (XEXP (op, 1))
8779 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
8780 op = XEXP (op, 0);
8781
8782 if (GET_CODE (op) == ZERO_EXTEND
8783 || GET_CODE (op) == SIGN_EXTEND)
8784 op = XEXP (op, 0);
8785
8786 if (op != x)
8787 return op;
8788
4745e701
JG
8789 return x;
8790}
8791
0a78ebe4
KT
8792/* Return true iff CODE is a shift supported in combination
8793 with arithmetic instructions. */
4d1919ed 8794
0a78ebe4
KT
8795static bool
8796aarch64_shift_p (enum rtx_code code)
8797{
8798 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
8799}
8800
b10f1009
AP
8801
8802/* Return true iff X is a cheap shift without a sign extend. */
8803
8804static bool
8805aarch64_cheap_mult_shift_p (rtx x)
8806{
8807 rtx op0, op1;
8808
8809 op0 = XEXP (x, 0);
8810 op1 = XEXP (x, 1);
8811
8812 if (!(aarch64_tune_params.extra_tuning_flags
8813 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
8814 return false;
8815
8816 if (GET_CODE (op0) == SIGN_EXTEND)
8817 return false;
8818
8819 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
8820 && UINTVAL (op1) <= 4)
8821 return true;
8822
8823 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
8824 return false;
8825
8826 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
8827
8828 if (l2 > 0 && l2 <= 4)
8829 return true;
8830
8831 return false;
8832}
8833
4745e701 8834/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
8835 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
8836 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
8837 operands where needed. */
8838
8839static int
e548c9df 8840aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
8841{
8842 rtx op0, op1;
8843 const struct cpu_cost_table *extra_cost
b175b679 8844 = aarch64_tune_params.insn_extra_cost;
4745e701 8845 int cost = 0;
0a78ebe4 8846 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 8847 machine_mode mode = GET_MODE (x);
4745e701
JG
8848
8849 gcc_checking_assert (code == MULT);
8850
8851 op0 = XEXP (x, 0);
8852 op1 = XEXP (x, 1);
8853
8854 if (VECTOR_MODE_P (mode))
8855 mode = GET_MODE_INNER (mode);
8856
8857 /* Integer multiply/fma. */
8858 if (GET_MODE_CLASS (mode) == MODE_INT)
8859 {
8860 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
8861 if (aarch64_shift_p (GET_CODE (x))
8862 || (CONST_INT_P (op1)
8863 && exact_log2 (INTVAL (op1)) > 0))
4745e701 8864 {
0a78ebe4
KT
8865 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
8866 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
8867 if (speed)
8868 {
0a78ebe4
KT
8869 if (compound_p)
8870 {
b10f1009
AP
8871 /* If the shift is considered cheap,
8872 then don't add any cost. */
8873 if (aarch64_cheap_mult_shift_p (x))
8874 ;
8875 else if (REG_P (op1))
0a78ebe4
KT
8876 /* ARITH + shift-by-register. */
8877 cost += extra_cost->alu.arith_shift_reg;
8878 else if (is_extend)
8879 /* ARITH + extended register. We don't have a cost field
8880 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
8881 cost += extra_cost->alu.extend_arith;
8882 else
8883 /* ARITH + shift-by-immediate. */
8884 cost += extra_cost->alu.arith_shift;
8885 }
4745e701
JG
8886 else
8887 /* LSL (immediate). */
0a78ebe4
KT
8888 cost += extra_cost->alu.shift;
8889
4745e701 8890 }
0a78ebe4
KT
8891 /* Strip extends as we will have costed them in the case above. */
8892 if (is_extend)
b10f1009 8893 op0 = aarch64_strip_extend (op0, true);
4745e701 8894
e548c9df 8895 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
8896
8897 return cost;
8898 }
8899
d2ac256b
KT
8900 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
8901 compound and let the below cases handle it. After all, MNEG is a
8902 special-case alias of MSUB. */
8903 if (GET_CODE (op0) == NEG)
8904 {
8905 op0 = XEXP (op0, 0);
8906 compound_p = true;
8907 }
8908
4745e701
JG
8909 /* Integer multiplies or FMAs have zero/sign extending variants. */
8910 if ((GET_CODE (op0) == ZERO_EXTEND
8911 && GET_CODE (op1) == ZERO_EXTEND)
8912 || (GET_CODE (op0) == SIGN_EXTEND
8913 && GET_CODE (op1) == SIGN_EXTEND))
8914 {
e548c9df
AM
8915 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8916 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8917
8918 if (speed)
8919 {
0a78ebe4 8920 if (compound_p)
d2ac256b 8921 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8922 cost += extra_cost->mult[0].extend_add;
8923 else
8924 /* MUL/SMULL/UMULL. */
8925 cost += extra_cost->mult[0].extend;
8926 }
8927
8928 return cost;
8929 }
8930
d2ac256b 8931 /* This is either an integer multiply or a MADD. In both cases
4745e701 8932 we want to recurse and cost the operands. */
e548c9df
AM
8933 cost += rtx_cost (op0, mode, MULT, 0, speed);
8934 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8935
8936 if (speed)
8937 {
0a78ebe4 8938 if (compound_p)
d2ac256b 8939 /* MADD/MSUB. */
4745e701
JG
8940 cost += extra_cost->mult[mode == DImode].add;
8941 else
8942 /* MUL. */
8943 cost += extra_cost->mult[mode == DImode].simple;
8944 }
8945
8946 return cost;
8947 }
8948 else
8949 {
8950 if (speed)
8951 {
3d840f7d 8952 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8953 operands, unless the rounding mode is upward or downward in
8954 which case FNMUL is different than FMUL with operand negation. */
8955 bool neg0 = GET_CODE (op0) == NEG;
8956 bool neg1 = GET_CODE (op1) == NEG;
8957 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8958 {
8959 if (neg0)
8960 op0 = XEXP (op0, 0);
8961 if (neg1)
8962 op1 = XEXP (op1, 0);
8963 }
4745e701 8964
0a78ebe4 8965 if (compound_p)
4745e701
JG
8966 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8967 cost += extra_cost->fp[mode == DFmode].fma;
8968 else
3d840f7d 8969 /* FMUL/FNMUL. */
4745e701
JG
8970 cost += extra_cost->fp[mode == DFmode].mult;
8971 }
8972
e548c9df
AM
8973 cost += rtx_cost (op0, mode, MULT, 0, speed);
8974 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8975 return cost;
8976 }
43e9d192
IB
8977}
8978
67747367
JG
8979static int
8980aarch64_address_cost (rtx x,
ef4bddc2 8981 machine_mode mode,
67747367
JG
8982 addr_space_t as ATTRIBUTE_UNUSED,
8983 bool speed)
8984{
8985 enum rtx_code c = GET_CODE (x);
b175b679 8986 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8987 struct aarch64_address_info info;
8988 int cost = 0;
8989 info.shift = 0;
8990
a97d8b98 8991 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8992 {
8993 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8994 {
8995 /* This is a CONST or SYMBOL ref which will be split
8996 in a different way depending on the code model in use.
8997 Cost it through the generic infrastructure. */
e548c9df 8998 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8999 /* Divide through by the cost of one instruction to
9000 bring it to the same units as the address costs. */
9001 cost_symbol_ref /= COSTS_N_INSNS (1);
9002 /* The cost is then the cost of preparing the address,
9003 followed by an immediate (possibly 0) offset. */
9004 return cost_symbol_ref + addr_cost->imm_offset;
9005 }
9006 else
9007 {
9008 /* This is most likely a jump table from a case
9009 statement. */
9010 return addr_cost->register_offset;
9011 }
9012 }
9013
9014 switch (info.type)
9015 {
9016 case ADDRESS_LO_SUM:
9017 case ADDRESS_SYMBOLIC:
9018 case ADDRESS_REG_IMM:
9019 cost += addr_cost->imm_offset;
9020 break;
9021
9022 case ADDRESS_REG_WB:
9023 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
9024 cost += addr_cost->pre_modify;
9025 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
9026 cost += addr_cost->post_modify;
9027 else
9028 gcc_unreachable ();
9029
9030 break;
9031
9032 case ADDRESS_REG_REG:
9033 cost += addr_cost->register_offset;
9034 break;
9035
67747367 9036 case ADDRESS_REG_SXTW:
783879e6
EM
9037 cost += addr_cost->register_sextend;
9038 break;
9039
9040 case ADDRESS_REG_UXTW:
9041 cost += addr_cost->register_zextend;
67747367
JG
9042 break;
9043
9044 default:
9045 gcc_unreachable ();
9046 }
9047
9048
9049 if (info.shift > 0)
9050 {
9051 /* For the sake of calculating the cost of the shifted register
9052 component, we can treat same sized modes in the same way. */
6a70badb
RS
9053 if (known_eq (GET_MODE_BITSIZE (mode), 16))
9054 cost += addr_cost->addr_scale_costs.hi;
9055 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
9056 cost += addr_cost->addr_scale_costs.si;
9057 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
9058 cost += addr_cost->addr_scale_costs.di;
9059 else
9060 /* We can't tell, or this is a 128-bit vector. */
9061 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
9062 }
9063
9064 return cost;
9065}
9066
b9066f5a
MW
9067/* Return the cost of a branch. If SPEED_P is true then the compiler is
9068 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
9069 to be taken. */
9070
9071int
9072aarch64_branch_cost (bool speed_p, bool predictable_p)
9073{
9074 /* When optimizing for speed, use the cost of unpredictable branches. */
9075 const struct cpu_branch_cost *branch_costs =
b175b679 9076 aarch64_tune_params.branch_costs;
b9066f5a
MW
9077
9078 if (!speed_p || predictable_p)
9079 return branch_costs->predictable;
9080 else
9081 return branch_costs->unpredictable;
9082}
9083
7cc2145f
JG
9084/* Return true if the RTX X in mode MODE is a zero or sign extract
9085 usable in an ADD or SUB (extended register) instruction. */
9086static bool
77e994c9 9087aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
9088{
9089 /* Catch add with a sign extract.
9090 This is add_<optab><mode>_multp2. */
9091 if (GET_CODE (x) == SIGN_EXTRACT
9092 || GET_CODE (x) == ZERO_EXTRACT)
9093 {
9094 rtx op0 = XEXP (x, 0);
9095 rtx op1 = XEXP (x, 1);
9096 rtx op2 = XEXP (x, 2);
9097
9098 if (GET_CODE (op0) == MULT
9099 && CONST_INT_P (op1)
9100 && op2 == const0_rtx
9101 && CONST_INT_P (XEXP (op0, 1))
9102 && aarch64_is_extend_from_extract (mode,
9103 XEXP (op0, 1),
9104 op1))
9105 {
9106 return true;
9107 }
9108 }
e47c4031
KT
9109 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
9110 No shift. */
9111 else if (GET_CODE (x) == SIGN_EXTEND
9112 || GET_CODE (x) == ZERO_EXTEND)
9113 return REG_P (XEXP (x, 0));
7cc2145f
JG
9114
9115 return false;
9116}
9117
61263118
KT
9118static bool
9119aarch64_frint_unspec_p (unsigned int u)
9120{
9121 switch (u)
9122 {
9123 case UNSPEC_FRINTZ:
9124 case UNSPEC_FRINTP:
9125 case UNSPEC_FRINTM:
9126 case UNSPEC_FRINTA:
9127 case UNSPEC_FRINTN:
9128 case UNSPEC_FRINTX:
9129 case UNSPEC_FRINTI:
9130 return true;
9131
9132 default:
9133 return false;
9134 }
9135}
9136
fb0cb7fa
KT
9137/* Return true iff X is an rtx that will match an extr instruction
9138 i.e. as described in the *extr<mode>5_insn family of patterns.
9139 OP0 and OP1 will be set to the operands of the shifts involved
9140 on success and will be NULL_RTX otherwise. */
9141
9142static bool
9143aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
9144{
9145 rtx op0, op1;
77e994c9
RS
9146 scalar_int_mode mode;
9147 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
9148 return false;
fb0cb7fa
KT
9149
9150 *res_op0 = NULL_RTX;
9151 *res_op1 = NULL_RTX;
9152
9153 if (GET_CODE (x) != IOR)
9154 return false;
9155
9156 op0 = XEXP (x, 0);
9157 op1 = XEXP (x, 1);
9158
9159 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
9160 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
9161 {
9162 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
9163 if (GET_CODE (op1) == ASHIFT)
9164 std::swap (op0, op1);
9165
9166 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
9167 return false;
9168
9169 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
9170 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
9171
9172 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
9173 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
9174 {
9175 *res_op0 = XEXP (op0, 0);
9176 *res_op1 = XEXP (op1, 0);
9177 return true;
9178 }
9179 }
9180
9181 return false;
9182}
9183
2d5ffe46
AP
9184/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
9185 storing it in *COST. Result is true if the total cost of the operation
9186 has now been calculated. */
9187static bool
9188aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
9189{
b9e3afe9
AP
9190 rtx inner;
9191 rtx comparator;
9192 enum rtx_code cmpcode;
9193
9194 if (COMPARISON_P (op0))
9195 {
9196 inner = XEXP (op0, 0);
9197 comparator = XEXP (op0, 1);
9198 cmpcode = GET_CODE (op0);
9199 }
9200 else
9201 {
9202 inner = op0;
9203 comparator = const0_rtx;
9204 cmpcode = NE;
9205 }
9206
2d5ffe46
AP
9207 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
9208 {
9209 /* Conditional branch. */
b9e3afe9 9210 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
9211 return true;
9212 else
9213 {
b9e3afe9 9214 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 9215 {
2d5ffe46
AP
9216 if (comparator == const0_rtx)
9217 {
9218 /* TBZ/TBNZ/CBZ/CBNZ. */
9219 if (GET_CODE (inner) == ZERO_EXTRACT)
9220 /* TBZ/TBNZ. */
e548c9df
AM
9221 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
9222 ZERO_EXTRACT, 0, speed);
9223 else
9224 /* CBZ/CBNZ. */
9225 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
9226
9227 return true;
9228 }
9229 }
b9e3afe9 9230 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 9231 {
2d5ffe46
AP
9232 /* TBZ/TBNZ. */
9233 if (comparator == const0_rtx)
9234 return true;
9235 }
9236 }
9237 }
b9e3afe9 9238 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 9239 {
786298dc 9240 /* CCMP. */
6dfeb7ce 9241 if (GET_CODE (op1) == COMPARE)
786298dc
WD
9242 {
9243 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
9244 if (XEXP (op1, 1) == const0_rtx)
9245 *cost += 1;
9246 if (speed)
9247 {
9248 machine_mode mode = GET_MODE (XEXP (op1, 0));
9249 const struct cpu_cost_table *extra_cost
9250 = aarch64_tune_params.insn_extra_cost;
9251
9252 if (GET_MODE_CLASS (mode) == MODE_INT)
9253 *cost += extra_cost->alu.arith;
9254 else
9255 *cost += extra_cost->fp[mode == DFmode].compare;
9256 }
9257 return true;
9258 }
9259
2d5ffe46
AP
9260 /* It's a conditional operation based on the status flags,
9261 so it must be some flavor of CSEL. */
9262
9263 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
9264 if (GET_CODE (op1) == NEG
9265 || GET_CODE (op1) == NOT
9266 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
9267 op1 = XEXP (op1, 0);
bad00732
KT
9268 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
9269 {
9270 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
9271 op1 = XEXP (op1, 0);
9272 op2 = XEXP (op2, 0);
9273 }
2d5ffe46 9274
e548c9df
AM
9275 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
9276 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
9277 return true;
9278 }
9279
9280 /* We don't know what this is, cost all operands. */
9281 return false;
9282}
9283
283b6c85
KT
9284/* Check whether X is a bitfield operation of the form shift + extend that
9285 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
9286 operand to which the bitfield operation is applied. Otherwise return
9287 NULL_RTX. */
9288
9289static rtx
9290aarch64_extend_bitfield_pattern_p (rtx x)
9291{
9292 rtx_code outer_code = GET_CODE (x);
9293 machine_mode outer_mode = GET_MODE (x);
9294
9295 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
9296 && outer_mode != SImode && outer_mode != DImode)
9297 return NULL_RTX;
9298
9299 rtx inner = XEXP (x, 0);
9300 rtx_code inner_code = GET_CODE (inner);
9301 machine_mode inner_mode = GET_MODE (inner);
9302 rtx op = NULL_RTX;
9303
9304 switch (inner_code)
9305 {
9306 case ASHIFT:
9307 if (CONST_INT_P (XEXP (inner, 1))
9308 && (inner_mode == QImode || inner_mode == HImode))
9309 op = XEXP (inner, 0);
9310 break;
9311 case LSHIFTRT:
9312 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
9313 && (inner_mode == QImode || inner_mode == HImode))
9314 op = XEXP (inner, 0);
9315 break;
9316 case ASHIFTRT:
9317 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
9318 && (inner_mode == QImode || inner_mode == HImode))
9319 op = XEXP (inner, 0);
9320 break;
9321 default:
9322 break;
9323 }
9324
9325 return op;
9326}
9327
8c83f71d
KT
9328/* Return true if the mask and a shift amount from an RTX of the form
9329 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
9330 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
9331
9332bool
77e994c9
RS
9333aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
9334 rtx shft_amnt)
8c83f71d
KT
9335{
9336 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
9337 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
9338 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
9339 && (INTVAL (mask)
9340 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
9341}
9342
6a0d3939
SE
9343/* Return true if the masks and a shift amount from an RTX of the form
9344 ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
9345 a BFI instruction of mode MODE. See *arch64_bfi patterns. */
9346
9347bool
9348aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
9349 unsigned HOST_WIDE_INT mask1,
9350 unsigned HOST_WIDE_INT shft_amnt,
9351 unsigned HOST_WIDE_INT mask2)
9352{
9353 unsigned HOST_WIDE_INT t;
9354
9355 /* Verify that there is no overlap in what bits are set in the two masks. */
9356 if (mask1 != ~mask2)
9357 return false;
9358
9359 /* Verify that mask2 is not all zeros or ones. */
9360 if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
9361 return false;
9362
9363 /* The shift amount should always be less than the mode size. */
9364 gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
9365
9366 /* Verify that the mask being shifted is contiguous and would be in the
9367 least significant bits after shifting by shft_amnt. */
9368 t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
9369 return (t == (t & -t));
9370}
9371
43e9d192
IB
9372/* Calculate the cost of calculating X, storing it in *COST. Result
9373 is true if the total cost of the operation has now been calculated. */
9374static bool
e548c9df 9375aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
9376 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
9377{
a8eecd00 9378 rtx op0, op1, op2;
73250c4c 9379 const struct cpu_cost_table *extra_cost
b175b679 9380 = aarch64_tune_params.insn_extra_cost;
e548c9df 9381 int code = GET_CODE (x);
b4206259 9382 scalar_int_mode int_mode;
43e9d192 9383
7fc5ef02
JG
9384 /* By default, assume that everything has equivalent cost to the
9385 cheapest instruction. Any additional costs are applied as a delta
9386 above this default. */
9387 *cost = COSTS_N_INSNS (1);
9388
43e9d192
IB
9389 switch (code)
9390 {
9391 case SET:
ba123b0d
JG
9392 /* The cost depends entirely on the operands to SET. */
9393 *cost = 0;
43e9d192
IB
9394 op0 = SET_DEST (x);
9395 op1 = SET_SRC (x);
9396
9397 switch (GET_CODE (op0))
9398 {
9399 case MEM:
9400 if (speed)
2961177e
JG
9401 {
9402 rtx address = XEXP (op0, 0);
b6875aac
KV
9403 if (VECTOR_MODE_P (mode))
9404 *cost += extra_cost->ldst.storev;
9405 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9406 *cost += extra_cost->ldst.store;
9407 else if (mode == SFmode)
9408 *cost += extra_cost->ldst.storef;
9409 else if (mode == DFmode)
9410 *cost += extra_cost->ldst.stored;
9411
9412 *cost +=
9413 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9414 0, speed));
9415 }
43e9d192 9416
e548c9df 9417 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
9418 return true;
9419
9420 case SUBREG:
9421 if (! REG_P (SUBREG_REG (op0)))
e548c9df 9422 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 9423
43e9d192
IB
9424 /* Fall through. */
9425 case REG:
b6875aac
KV
9426 /* The cost is one per vector-register copied. */
9427 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
9428 {
fe1447a1
RS
9429 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
9430 *cost = COSTS_N_INSNS (nregs);
b6875aac 9431 }
ba123b0d
JG
9432 /* const0_rtx is in general free, but we will use an
9433 instruction to set a register to 0. */
b6875aac
KV
9434 else if (REG_P (op1) || op1 == const0_rtx)
9435 {
9436 /* The cost is 1 per register copied. */
fe1447a1
RS
9437 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
9438 *cost = COSTS_N_INSNS (nregs);
b6875aac 9439 }
ba123b0d
JG
9440 else
9441 /* Cost is just the cost of the RHS of the set. */
e548c9df 9442 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
9443 return true;
9444
ba123b0d 9445 case ZERO_EXTRACT:
43e9d192 9446 case SIGN_EXTRACT:
ba123b0d
JG
9447 /* Bit-field insertion. Strip any redundant widening of
9448 the RHS to meet the width of the target. */
43e9d192
IB
9449 if (GET_CODE (op1) == SUBREG)
9450 op1 = SUBREG_REG (op1);
9451 if ((GET_CODE (op1) == ZERO_EXTEND
9452 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 9453 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
9454 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
9455 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 9456 op1 = XEXP (op1, 0);
ba123b0d
JG
9457
9458 if (CONST_INT_P (op1))
9459 {
9460 /* MOV immediate is assumed to always be cheap. */
9461 *cost = COSTS_N_INSNS (1);
9462 }
9463 else
9464 {
9465 /* BFM. */
9466 if (speed)
9467 *cost += extra_cost->alu.bfi;
e548c9df 9468 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
9469 }
9470
43e9d192
IB
9471 return true;
9472
9473 default:
ba123b0d
JG
9474 /* We can't make sense of this, assume default cost. */
9475 *cost = COSTS_N_INSNS (1);
61263118 9476 return false;
43e9d192
IB
9477 }
9478 return false;
9479
9dfc162c
JG
9480 case CONST_INT:
9481 /* If an instruction can incorporate a constant within the
9482 instruction, the instruction's expression avoids calling
9483 rtx_cost() on the constant. If rtx_cost() is called on a
9484 constant, then it is usually because the constant must be
9485 moved into a register by one or more instructions.
9486
9487 The exception is constant 0, which can be expressed
9488 as XZR/WZR and is therefore free. The exception to this is
9489 if we have (set (reg) (const0_rtx)) in which case we must cost
9490 the move. However, we can catch that when we cost the SET, so
9491 we don't need to consider that here. */
9492 if (x == const0_rtx)
9493 *cost = 0;
9494 else
9495 {
9496 /* To an approximation, building any other constant is
9497 proportionally expensive to the number of instructions
9498 required to build that constant. This is true whether we
9499 are compiling for SPEED or otherwise. */
77e994c9
RS
9500 if (!is_a <scalar_int_mode> (mode, &int_mode))
9501 int_mode = word_mode;
82614948 9502 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 9503 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
9504 }
9505 return true;
9506
9507 case CONST_DOUBLE:
a2170965
TC
9508
9509 /* First determine number of instructions to do the move
9510 as an integer constant. */
9511 if (!aarch64_float_const_representable_p (x)
9512 && !aarch64_can_const_movi_rtx_p (x, mode)
9513 && aarch64_float_const_rtx_p (x))
9514 {
9515 unsigned HOST_WIDE_INT ival;
9516 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
9517 gcc_assert (succeed);
9518
77e994c9
RS
9519 scalar_int_mode imode = (mode == HFmode
9520 ? SImode
9521 : int_mode_for_mode (mode).require ());
a2170965
TC
9522 int ncost = aarch64_internal_mov_immediate
9523 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
9524 *cost += COSTS_N_INSNS (ncost);
9525 return true;
9526 }
9527
9dfc162c
JG
9528 if (speed)
9529 {
9530 /* mov[df,sf]_aarch64. */
9531 if (aarch64_float_const_representable_p (x))
9532 /* FMOV (scalar immediate). */
9533 *cost += extra_cost->fp[mode == DFmode].fpconst;
9534 else if (!aarch64_float_const_zero_rtx_p (x))
9535 {
9536 /* This will be a load from memory. */
9537 if (mode == DFmode)
9538 *cost += extra_cost->ldst.loadd;
9539 else
9540 *cost += extra_cost->ldst.loadf;
9541 }
9542 else
9543 /* Otherwise this is +0.0. We get this using MOVI d0, #0
9544 or MOV v0.s[0], wzr - neither of which are modeled by the
9545 cost tables. Just use the default cost. */
9546 {
9547 }
9548 }
9549
9550 return true;
9551
43e9d192
IB
9552 case MEM:
9553 if (speed)
2961177e
JG
9554 {
9555 /* For loads we want the base cost of a load, plus an
9556 approximation for the additional cost of the addressing
9557 mode. */
9558 rtx address = XEXP (x, 0);
b6875aac
KV
9559 if (VECTOR_MODE_P (mode))
9560 *cost += extra_cost->ldst.loadv;
9561 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9562 *cost += extra_cost->ldst.load;
9563 else if (mode == SFmode)
9564 *cost += extra_cost->ldst.loadf;
9565 else if (mode == DFmode)
9566 *cost += extra_cost->ldst.loadd;
9567
9568 *cost +=
9569 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9570 0, speed));
9571 }
43e9d192
IB
9572
9573 return true;
9574
9575 case NEG:
4745e701
JG
9576 op0 = XEXP (x, 0);
9577
b6875aac
KV
9578 if (VECTOR_MODE_P (mode))
9579 {
9580 if (speed)
9581 {
9582 /* FNEG. */
9583 *cost += extra_cost->vect.alu;
9584 }
9585 return false;
9586 }
9587
e548c9df
AM
9588 if (GET_MODE_CLASS (mode) == MODE_INT)
9589 {
4745e701
JG
9590 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9591 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9592 {
9593 /* CSETM. */
e548c9df 9594 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
9595 return true;
9596 }
9597
9598 /* Cost this as SUB wzr, X. */
e548c9df 9599 op0 = CONST0_RTX (mode);
4745e701
JG
9600 op1 = XEXP (x, 0);
9601 goto cost_minus;
9602 }
9603
e548c9df 9604 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
9605 {
9606 /* Support (neg(fma...)) as a single instruction only if
9607 sign of zeros is unimportant. This matches the decision
9608 making in aarch64.md. */
9609 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
9610 {
9611 /* FNMADD. */
e548c9df 9612 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
9613 return true;
9614 }
d318517d
SN
9615 if (GET_CODE (op0) == MULT)
9616 {
9617 /* FNMUL. */
9618 *cost = rtx_cost (op0, mode, NEG, 0, speed);
9619 return true;
9620 }
4745e701
JG
9621 if (speed)
9622 /* FNEG. */
9623 *cost += extra_cost->fp[mode == DFmode].neg;
9624 return false;
9625 }
9626
9627 return false;
43e9d192 9628
781aeb73
KT
9629 case CLRSB:
9630 case CLZ:
9631 if (speed)
b6875aac
KV
9632 {
9633 if (VECTOR_MODE_P (mode))
9634 *cost += extra_cost->vect.alu;
9635 else
9636 *cost += extra_cost->alu.clz;
9637 }
781aeb73
KT
9638
9639 return false;
9640
43e9d192
IB
9641 case COMPARE:
9642 op0 = XEXP (x, 0);
9643 op1 = XEXP (x, 1);
9644
9645 if (op1 == const0_rtx
9646 && GET_CODE (op0) == AND)
9647 {
9648 x = op0;
e548c9df 9649 mode = GET_MODE (op0);
43e9d192
IB
9650 goto cost_logic;
9651 }
9652
a8eecd00
JG
9653 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
9654 {
9655 /* TODO: A write to the CC flags possibly costs extra, this
9656 needs encoding in the cost tables. */
9657
e548c9df 9658 mode = GET_MODE (op0);
a8eecd00
JG
9659 /* ANDS. */
9660 if (GET_CODE (op0) == AND)
9661 {
9662 x = op0;
9663 goto cost_logic;
9664 }
9665
9666 if (GET_CODE (op0) == PLUS)
9667 {
9668 /* ADDS (and CMN alias). */
9669 x = op0;
9670 goto cost_plus;
9671 }
9672
9673 if (GET_CODE (op0) == MINUS)
9674 {
9675 /* SUBS. */
9676 x = op0;
9677 goto cost_minus;
9678 }
9679
345854d8
KT
9680 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
9681 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
9682 && CONST_INT_P (XEXP (op0, 2)))
9683 {
9684 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
9685 Handle it here directly rather than going to cost_logic
9686 since we know the immediate generated for the TST is valid
9687 so we can avoid creating an intermediate rtx for it only
9688 for costing purposes. */
9689 if (speed)
9690 *cost += extra_cost->alu.logical;
9691
9692 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
9693 ZERO_EXTRACT, 0, speed);
9694 return true;
9695 }
9696
a8eecd00
JG
9697 if (GET_CODE (op1) == NEG)
9698 {
9699 /* CMN. */
9700 if (speed)
9701 *cost += extra_cost->alu.arith;
9702
e548c9df
AM
9703 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
9704 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
9705 return true;
9706 }
9707
9708 /* CMP.
9709
9710 Compare can freely swap the order of operands, and
9711 canonicalization puts the more complex operation first.
9712 But the integer MINUS logic expects the shift/extend
9713 operation in op1. */
9714 if (! (REG_P (op0)
9715 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
9716 {
9717 op0 = XEXP (x, 1);
9718 op1 = XEXP (x, 0);
9719 }
9720 goto cost_minus;
9721 }
9722
9723 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9724 {
9725 /* FCMP. */
9726 if (speed)
9727 *cost += extra_cost->fp[mode == DFmode].compare;
9728
9729 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
9730 {
e548c9df 9731 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
9732 /* FCMP supports constant 0.0 for no extra cost. */
9733 return true;
9734 }
9735 return false;
9736 }
9737
b6875aac
KV
9738 if (VECTOR_MODE_P (mode))
9739 {
9740 /* Vector compare. */
9741 if (speed)
9742 *cost += extra_cost->vect.alu;
9743
9744 if (aarch64_float_const_zero_rtx_p (op1))
9745 {
9746 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
9747 cost. */
9748 return true;
9749 }
9750 return false;
9751 }
a8eecd00 9752 return false;
43e9d192
IB
9753
9754 case MINUS:
4745e701
JG
9755 {
9756 op0 = XEXP (x, 0);
9757 op1 = XEXP (x, 1);
9758
9759cost_minus:
e548c9df 9760 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 9761
4745e701
JG
9762 /* Detect valid immediates. */
9763 if ((GET_MODE_CLASS (mode) == MODE_INT
9764 || (GET_MODE_CLASS (mode) == MODE_CC
9765 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
9766 && CONST_INT_P (op1)
9767 && aarch64_uimm12_shift (INTVAL (op1)))
9768 {
4745e701
JG
9769 if (speed)
9770 /* SUB(S) (immediate). */
9771 *cost += extra_cost->alu.arith;
9772 return true;
4745e701
JG
9773 }
9774
7cc2145f 9775 /* Look for SUB (extended register). */
77e994c9
RS
9776 if (is_a <scalar_int_mode> (mode, &int_mode)
9777 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
9778 {
9779 if (speed)
2533c820 9780 *cost += extra_cost->alu.extend_arith;
7cc2145f 9781
b10f1009 9782 op1 = aarch64_strip_extend (op1, true);
e47c4031 9783 *cost += rtx_cost (op1, VOIDmode,
e548c9df 9784 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
9785 return true;
9786 }
9787
b10f1009 9788 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
9789
9790 /* Cost this as an FMA-alike operation. */
9791 if ((GET_CODE (new_op1) == MULT
0a78ebe4 9792 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
9793 && code != COMPARE)
9794 {
9795 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
9796 (enum rtx_code) code,
9797 speed);
4745e701
JG
9798 return true;
9799 }
43e9d192 9800
e548c9df 9801 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 9802
4745e701
JG
9803 if (speed)
9804 {
b6875aac
KV
9805 if (VECTOR_MODE_P (mode))
9806 {
9807 /* Vector SUB. */
9808 *cost += extra_cost->vect.alu;
9809 }
9810 else if (GET_MODE_CLASS (mode) == MODE_INT)
9811 {
9812 /* SUB(S). */
9813 *cost += extra_cost->alu.arith;
9814 }
4745e701 9815 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9816 {
9817 /* FSUB. */
9818 *cost += extra_cost->fp[mode == DFmode].addsub;
9819 }
4745e701
JG
9820 }
9821 return true;
9822 }
43e9d192
IB
9823
9824 case PLUS:
4745e701
JG
9825 {
9826 rtx new_op0;
43e9d192 9827
4745e701
JG
9828 op0 = XEXP (x, 0);
9829 op1 = XEXP (x, 1);
43e9d192 9830
a8eecd00 9831cost_plus:
4745e701
JG
9832 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9833 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9834 {
9835 /* CSINC. */
e548c9df
AM
9836 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
9837 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
9838 return true;
9839 }
43e9d192 9840
4745e701 9841 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
9842 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
9843 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 9844 {
e548c9df 9845 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 9846
4745e701
JG
9847 if (speed)
9848 /* ADD (immediate). */
9849 *cost += extra_cost->alu.arith;
9850 return true;
9851 }
9852
e548c9df 9853 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 9854
7cc2145f 9855 /* Look for ADD (extended register). */
77e994c9
RS
9856 if (is_a <scalar_int_mode> (mode, &int_mode)
9857 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
9858 {
9859 if (speed)
2533c820 9860 *cost += extra_cost->alu.extend_arith;
7cc2145f 9861
b10f1009 9862 op0 = aarch64_strip_extend (op0, true);
e47c4031 9863 *cost += rtx_cost (op0, VOIDmode,
e548c9df 9864 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
9865 return true;
9866 }
9867
4745e701
JG
9868 /* Strip any extend, leave shifts behind as we will
9869 cost them through mult_cost. */
b10f1009 9870 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
9871
9872 if (GET_CODE (new_op0) == MULT
0a78ebe4 9873 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
9874 {
9875 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
9876 speed);
4745e701
JG
9877 return true;
9878 }
9879
e548c9df 9880 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
9881
9882 if (speed)
9883 {
b6875aac
KV
9884 if (VECTOR_MODE_P (mode))
9885 {
9886 /* Vector ADD. */
9887 *cost += extra_cost->vect.alu;
9888 }
9889 else if (GET_MODE_CLASS (mode) == MODE_INT)
9890 {
9891 /* ADD. */
9892 *cost += extra_cost->alu.arith;
9893 }
4745e701 9894 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9895 {
9896 /* FADD. */
9897 *cost += extra_cost->fp[mode == DFmode].addsub;
9898 }
4745e701
JG
9899 }
9900 return true;
9901 }
43e9d192 9902
18b42b2a
KT
9903 case BSWAP:
9904 *cost = COSTS_N_INSNS (1);
9905
9906 if (speed)
b6875aac
KV
9907 {
9908 if (VECTOR_MODE_P (mode))
9909 *cost += extra_cost->vect.alu;
9910 else
9911 *cost += extra_cost->alu.rev;
9912 }
18b42b2a
KT
9913 return false;
9914
43e9d192 9915 case IOR:
f7d5cf8d
KT
9916 if (aarch_rev16_p (x))
9917 {
9918 *cost = COSTS_N_INSNS (1);
9919
b6875aac
KV
9920 if (speed)
9921 {
9922 if (VECTOR_MODE_P (mode))
9923 *cost += extra_cost->vect.alu;
9924 else
9925 *cost += extra_cost->alu.rev;
9926 }
9927 return true;
f7d5cf8d 9928 }
fb0cb7fa
KT
9929
9930 if (aarch64_extr_rtx_p (x, &op0, &op1))
9931 {
e548c9df
AM
9932 *cost += rtx_cost (op0, mode, IOR, 0, speed);
9933 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
9934 if (speed)
9935 *cost += extra_cost->alu.shift;
9936
9937 return true;
9938 }
f7d5cf8d 9939 /* Fall through. */
43e9d192
IB
9940 case XOR:
9941 case AND:
9942 cost_logic:
9943 op0 = XEXP (x, 0);
9944 op1 = XEXP (x, 1);
9945
b6875aac
KV
9946 if (VECTOR_MODE_P (mode))
9947 {
9948 if (speed)
9949 *cost += extra_cost->vect.alu;
9950 return true;
9951 }
9952
268c3b47
JG
9953 if (code == AND
9954 && GET_CODE (op0) == MULT
9955 && CONST_INT_P (XEXP (op0, 1))
9956 && CONST_INT_P (op1)
9957 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9958 INTVAL (op1)) != 0)
9959 {
9960 /* This is a UBFM/SBFM. */
e548c9df 9961 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9962 if (speed)
9963 *cost += extra_cost->alu.bfx;
9964 return true;
9965 }
9966
b4206259 9967 if (is_int_mode (mode, &int_mode))
43e9d192 9968 {
8c83f71d 9969 if (CONST_INT_P (op1))
43e9d192 9970 {
8c83f71d
KT
9971 /* We have a mask + shift version of a UBFIZ
9972 i.e. the *andim_ashift<mode>_bfiz pattern. */
9973 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9974 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9975 XEXP (op0, 1)))
8c83f71d 9976 {
b4206259 9977 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9978 (enum rtx_code) code, 0, speed);
9979 if (speed)
9980 *cost += extra_cost->alu.bfx;
268c3b47 9981
8c83f71d
KT
9982 return true;
9983 }
b4206259 9984 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9985 {
9986 /* We possibly get the immediate for free, this is not
9987 modelled. */
b4206259
RS
9988 *cost += rtx_cost (op0, int_mode,
9989 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9990 if (speed)
9991 *cost += extra_cost->alu.logical;
268c3b47 9992
8c83f71d
KT
9993 return true;
9994 }
43e9d192
IB
9995 }
9996 else
9997 {
268c3b47
JG
9998 rtx new_op0 = op0;
9999
10000 /* Handle ORN, EON, or BIC. */
43e9d192
IB
10001 if (GET_CODE (op0) == NOT)
10002 op0 = XEXP (op0, 0);
268c3b47
JG
10003
10004 new_op0 = aarch64_strip_shift (op0);
10005
10006 /* If we had a shift on op0 then this is a logical-shift-
10007 by-register/immediate operation. Otherwise, this is just
10008 a logical operation. */
10009 if (speed)
10010 {
10011 if (new_op0 != op0)
10012 {
10013 /* Shift by immediate. */
10014 if (CONST_INT_P (XEXP (op0, 1)))
10015 *cost += extra_cost->alu.log_shift;
10016 else
10017 *cost += extra_cost->alu.log_shift_reg;
10018 }
10019 else
10020 *cost += extra_cost->alu.logical;
10021 }
10022
10023 /* In both cases we want to cost both operands. */
b4206259
RS
10024 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
10025 0, speed);
10026 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
10027 1, speed);
268c3b47
JG
10028
10029 return true;
43e9d192 10030 }
43e9d192
IB
10031 }
10032 return false;
10033
268c3b47 10034 case NOT:
6365da9e
KT
10035 x = XEXP (x, 0);
10036 op0 = aarch64_strip_shift (x);
10037
b6875aac
KV
10038 if (VECTOR_MODE_P (mode))
10039 {
10040 /* Vector NOT. */
10041 *cost += extra_cost->vect.alu;
10042 return false;
10043 }
10044
6365da9e
KT
10045 /* MVN-shifted-reg. */
10046 if (op0 != x)
10047 {
e548c9df 10048 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
10049
10050 if (speed)
10051 *cost += extra_cost->alu.log_shift;
10052
10053 return true;
10054 }
10055 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
10056 Handle the second form here taking care that 'a' in the above can
10057 be a shift. */
10058 else if (GET_CODE (op0) == XOR)
10059 {
10060 rtx newop0 = XEXP (op0, 0);
10061 rtx newop1 = XEXP (op0, 1);
10062 rtx op0_stripped = aarch64_strip_shift (newop0);
10063
e548c9df
AM
10064 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
10065 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
10066
10067 if (speed)
10068 {
10069 if (op0_stripped != newop0)
10070 *cost += extra_cost->alu.log_shift;
10071 else
10072 *cost += extra_cost->alu.logical;
10073 }
10074
10075 return true;
10076 }
268c3b47
JG
10077 /* MVN. */
10078 if (speed)
10079 *cost += extra_cost->alu.logical;
10080
268c3b47
JG
10081 return false;
10082
43e9d192 10083 case ZERO_EXTEND:
b1685e62
JG
10084
10085 op0 = XEXP (x, 0);
10086 /* If a value is written in SI mode, then zero extended to DI
10087 mode, the operation will in general be free as a write to
10088 a 'w' register implicitly zeroes the upper bits of an 'x'
10089 register. However, if this is
10090
10091 (set (reg) (zero_extend (reg)))
10092
10093 we must cost the explicit register move. */
10094 if (mode == DImode
10095 && GET_MODE (op0) == SImode
10096 && outer == SET)
10097 {
e548c9df 10098 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 10099
dde23f43
KM
10100 /* If OP_COST is non-zero, then the cost of the zero extend
10101 is effectively the cost of the inner operation. Otherwise
10102 we have a MOV instruction and we take the cost from the MOV
10103 itself. This is true independently of whether we are
10104 optimizing for space or time. */
10105 if (op_cost)
b1685e62
JG
10106 *cost = op_cost;
10107
10108 return true;
10109 }
e548c9df 10110 else if (MEM_P (op0))
43e9d192 10111 {
b1685e62 10112 /* All loads can zero extend to any size for free. */
e548c9df 10113 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
10114 return true;
10115 }
b1685e62 10116
283b6c85
KT
10117 op0 = aarch64_extend_bitfield_pattern_p (x);
10118 if (op0)
10119 {
10120 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
10121 if (speed)
10122 *cost += extra_cost->alu.bfx;
10123 return true;
10124 }
10125
b1685e62 10126 if (speed)
b6875aac
KV
10127 {
10128 if (VECTOR_MODE_P (mode))
10129 {
10130 /* UMOV. */
10131 *cost += extra_cost->vect.alu;
10132 }
10133 else
10134 {
63715e5e
WD
10135 /* We generate an AND instead of UXTB/UXTH. */
10136 *cost += extra_cost->alu.logical;
b6875aac
KV
10137 }
10138 }
43e9d192
IB
10139 return false;
10140
10141 case SIGN_EXTEND:
b1685e62 10142 if (MEM_P (XEXP (x, 0)))
43e9d192 10143 {
b1685e62
JG
10144 /* LDRSH. */
10145 if (speed)
10146 {
10147 rtx address = XEXP (XEXP (x, 0), 0);
10148 *cost += extra_cost->ldst.load_sign_extend;
10149
10150 *cost +=
10151 COSTS_N_INSNS (aarch64_address_cost (address, mode,
10152 0, speed));
10153 }
43e9d192
IB
10154 return true;
10155 }
b1685e62 10156
283b6c85
KT
10157 op0 = aarch64_extend_bitfield_pattern_p (x);
10158 if (op0)
10159 {
10160 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
10161 if (speed)
10162 *cost += extra_cost->alu.bfx;
10163 return true;
10164 }
10165
b1685e62 10166 if (speed)
b6875aac
KV
10167 {
10168 if (VECTOR_MODE_P (mode))
10169 *cost += extra_cost->vect.alu;
10170 else
10171 *cost += extra_cost->alu.extend;
10172 }
43e9d192
IB
10173 return false;
10174
ba0cfa17
JG
10175 case ASHIFT:
10176 op0 = XEXP (x, 0);
10177 op1 = XEXP (x, 1);
10178
10179 if (CONST_INT_P (op1))
10180 {
ba0cfa17 10181 if (speed)
b6875aac
KV
10182 {
10183 if (VECTOR_MODE_P (mode))
10184 {
10185 /* Vector shift (immediate). */
10186 *cost += extra_cost->vect.alu;
10187 }
10188 else
10189 {
10190 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
10191 aliases. */
10192 *cost += extra_cost->alu.shift;
10193 }
10194 }
ba0cfa17
JG
10195
10196 /* We can incorporate zero/sign extend for free. */
10197 if (GET_CODE (op0) == ZERO_EXTEND
10198 || GET_CODE (op0) == SIGN_EXTEND)
10199 op0 = XEXP (op0, 0);
10200
e548c9df 10201 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
10202 return true;
10203 }
10204 else
10205 {
7813b280 10206 if (VECTOR_MODE_P (mode))
b6875aac 10207 {
7813b280
KT
10208 if (speed)
10209 /* Vector shift (register). */
10210 *cost += extra_cost->vect.alu;
10211 }
10212 else
10213 {
10214 if (speed)
10215 /* LSLV. */
10216 *cost += extra_cost->alu.shift_reg;
10217
10218 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
10219 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
10220 && known_eq (INTVAL (XEXP (op1, 1)),
10221 GET_MODE_BITSIZE (mode) - 1))
b6875aac 10222 {
7813b280
KT
10223 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
10224 /* We already demanded XEXP (op1, 0) to be REG_P, so
10225 don't recurse into it. */
10226 return true;
b6875aac
KV
10227 }
10228 }
ba0cfa17
JG
10229 return false; /* All arguments need to be in registers. */
10230 }
10231
43e9d192 10232 case ROTATE:
43e9d192
IB
10233 case ROTATERT:
10234 case LSHIFTRT:
43e9d192 10235 case ASHIFTRT:
ba0cfa17
JG
10236 op0 = XEXP (x, 0);
10237 op1 = XEXP (x, 1);
43e9d192 10238
ba0cfa17
JG
10239 if (CONST_INT_P (op1))
10240 {
10241 /* ASR (immediate) and friends. */
10242 if (speed)
b6875aac
KV
10243 {
10244 if (VECTOR_MODE_P (mode))
10245 *cost += extra_cost->vect.alu;
10246 else
10247 *cost += extra_cost->alu.shift;
10248 }
43e9d192 10249
e548c9df 10250 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
10251 return true;
10252 }
10253 else
10254 {
7813b280 10255 if (VECTOR_MODE_P (mode))
b6875aac 10256 {
7813b280
KT
10257 if (speed)
10258 /* Vector shift (register). */
b6875aac 10259 *cost += extra_cost->vect.alu;
7813b280
KT
10260 }
10261 else
10262 {
10263 if (speed)
10264 /* ASR (register) and friends. */
b6875aac 10265 *cost += extra_cost->alu.shift_reg;
7813b280
KT
10266
10267 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
10268 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
10269 && known_eq (INTVAL (XEXP (op1, 1)),
10270 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
10271 {
10272 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
10273 /* We already demanded XEXP (op1, 0) to be REG_P, so
10274 don't recurse into it. */
10275 return true;
10276 }
b6875aac 10277 }
ba0cfa17
JG
10278 return false; /* All arguments need to be in registers. */
10279 }
43e9d192 10280
909734be
JG
10281 case SYMBOL_REF:
10282
1b1e81f8
JW
10283 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
10284 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
10285 {
10286 /* LDR. */
10287 if (speed)
10288 *cost += extra_cost->ldst.load;
10289 }
10290 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
10291 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
10292 {
10293 /* ADRP, followed by ADD. */
10294 *cost += COSTS_N_INSNS (1);
10295 if (speed)
10296 *cost += 2 * extra_cost->alu.arith;
10297 }
10298 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
10299 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
10300 {
10301 /* ADR. */
10302 if (speed)
10303 *cost += extra_cost->alu.arith;
10304 }
10305
10306 if (flag_pic)
10307 {
10308 /* One extra load instruction, after accessing the GOT. */
10309 *cost += COSTS_N_INSNS (1);
10310 if (speed)
10311 *cost += extra_cost->ldst.load;
10312 }
43e9d192
IB
10313 return true;
10314
909734be 10315 case HIGH:
43e9d192 10316 case LO_SUM:
909734be
JG
10317 /* ADRP/ADD (immediate). */
10318 if (speed)
10319 *cost += extra_cost->alu.arith;
43e9d192
IB
10320 return true;
10321
10322 case ZERO_EXTRACT:
10323 case SIGN_EXTRACT:
7cc2145f
JG
10324 /* UBFX/SBFX. */
10325 if (speed)
b6875aac
KV
10326 {
10327 if (VECTOR_MODE_P (mode))
10328 *cost += extra_cost->vect.alu;
10329 else
10330 *cost += extra_cost->alu.bfx;
10331 }
7cc2145f
JG
10332
10333 /* We can trust that the immediates used will be correct (there
10334 are no by-register forms), so we need only cost op0. */
e548c9df 10335 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
10336 return true;
10337
10338 case MULT:
4745e701
JG
10339 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
10340 /* aarch64_rtx_mult_cost always handles recursion to its
10341 operands. */
10342 return true;
43e9d192
IB
10343
10344 case MOD:
4f58fe36
KT
10345 /* We can expand signed mod by power of 2 using a NEGS, two parallel
10346 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
10347 an unconditional negate. This case should only ever be reached through
10348 the set_smod_pow2_cheap check in expmed.c. */
10349 if (CONST_INT_P (XEXP (x, 1))
10350 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10351 && (mode == SImode || mode == DImode))
10352 {
10353 /* We expand to 4 instructions. Reset the baseline. */
10354 *cost = COSTS_N_INSNS (4);
10355
10356 if (speed)
10357 *cost += 2 * extra_cost->alu.logical
10358 + 2 * extra_cost->alu.arith;
10359
10360 return true;
10361 }
10362
10363 /* Fall-through. */
43e9d192 10364 case UMOD:
43e9d192
IB
10365 if (speed)
10366 {
cb9ac430 10367 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
10368 if (VECTOR_MODE_P (mode))
10369 *cost += extra_cost->vect.alu;
e548c9df
AM
10370 else if (GET_MODE_CLASS (mode) == MODE_INT)
10371 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
10372 + extra_cost->mult[mode == DImode].idiv
10373 + (code == MOD ? 1 : 0));
43e9d192
IB
10374 }
10375 return false; /* All arguments need to be in registers. */
10376
10377 case DIV:
10378 case UDIV:
4105fe38 10379 case SQRT:
43e9d192
IB
10380 if (speed)
10381 {
b6875aac
KV
10382 if (VECTOR_MODE_P (mode))
10383 *cost += extra_cost->vect.alu;
10384 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
10385 /* There is no integer SQRT, so only DIV and UDIV can get
10386 here. */
cb9ac430
TC
10387 *cost += (extra_cost->mult[mode == DImode].idiv
10388 /* Slighly prefer UDIV over SDIV. */
10389 + (code == DIV ? 1 : 0));
4105fe38
JG
10390 else
10391 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
10392 }
10393 return false; /* All arguments need to be in registers. */
10394
a8eecd00 10395 case IF_THEN_ELSE:
2d5ffe46
AP
10396 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
10397 XEXP (x, 2), cost, speed);
a8eecd00
JG
10398
10399 case EQ:
10400 case NE:
10401 case GT:
10402 case GTU:
10403 case LT:
10404 case LTU:
10405 case GE:
10406 case GEU:
10407 case LE:
10408 case LEU:
10409
10410 return false; /* All arguments must be in registers. */
10411
b292109f
JG
10412 case FMA:
10413 op0 = XEXP (x, 0);
10414 op1 = XEXP (x, 1);
10415 op2 = XEXP (x, 2);
10416
10417 if (speed)
b6875aac
KV
10418 {
10419 if (VECTOR_MODE_P (mode))
10420 *cost += extra_cost->vect.alu;
10421 else
10422 *cost += extra_cost->fp[mode == DFmode].fma;
10423 }
b292109f
JG
10424
10425 /* FMSUB, FNMADD, and FNMSUB are free. */
10426 if (GET_CODE (op0) == NEG)
10427 op0 = XEXP (op0, 0);
10428
10429 if (GET_CODE (op2) == NEG)
10430 op2 = XEXP (op2, 0);
10431
10432 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
10433 and the by-element operand as operand 0. */
10434 if (GET_CODE (op1) == NEG)
10435 op1 = XEXP (op1, 0);
10436
10437 /* Catch vector-by-element operations. The by-element operand can
10438 either be (vec_duplicate (vec_select (x))) or just
10439 (vec_select (x)), depending on whether we are multiplying by
10440 a vector or a scalar.
10441
10442 Canonicalization is not very good in these cases, FMA4 will put the
10443 by-element operand as operand 0, FNMA4 will have it as operand 1. */
10444 if (GET_CODE (op0) == VEC_DUPLICATE)
10445 op0 = XEXP (op0, 0);
10446 else if (GET_CODE (op1) == VEC_DUPLICATE)
10447 op1 = XEXP (op1, 0);
10448
10449 if (GET_CODE (op0) == VEC_SELECT)
10450 op0 = XEXP (op0, 0);
10451 else if (GET_CODE (op1) == VEC_SELECT)
10452 op1 = XEXP (op1, 0);
10453
10454 /* If the remaining parameters are not registers,
10455 get the cost to put them into registers. */
e548c9df
AM
10456 *cost += rtx_cost (op0, mode, FMA, 0, speed);
10457 *cost += rtx_cost (op1, mode, FMA, 1, speed);
10458 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
10459 return true;
10460
5e2a765b
KT
10461 case FLOAT:
10462 case UNSIGNED_FLOAT:
10463 if (speed)
10464 *cost += extra_cost->fp[mode == DFmode].fromint;
10465 return false;
10466
b292109f
JG
10467 case FLOAT_EXTEND:
10468 if (speed)
b6875aac
KV
10469 {
10470 if (VECTOR_MODE_P (mode))
10471 {
10472 /*Vector truncate. */
10473 *cost += extra_cost->vect.alu;
10474 }
10475 else
10476 *cost += extra_cost->fp[mode == DFmode].widen;
10477 }
b292109f
JG
10478 return false;
10479
10480 case FLOAT_TRUNCATE:
10481 if (speed)
b6875aac
KV
10482 {
10483 if (VECTOR_MODE_P (mode))
10484 {
10485 /*Vector conversion. */
10486 *cost += extra_cost->vect.alu;
10487 }
10488 else
10489 *cost += extra_cost->fp[mode == DFmode].narrow;
10490 }
b292109f
JG
10491 return false;
10492
61263118
KT
10493 case FIX:
10494 case UNSIGNED_FIX:
10495 x = XEXP (x, 0);
10496 /* Strip the rounding part. They will all be implemented
10497 by the fcvt* family of instructions anyway. */
10498 if (GET_CODE (x) == UNSPEC)
10499 {
10500 unsigned int uns_code = XINT (x, 1);
10501
10502 if (uns_code == UNSPEC_FRINTA
10503 || uns_code == UNSPEC_FRINTM
10504 || uns_code == UNSPEC_FRINTN
10505 || uns_code == UNSPEC_FRINTP
10506 || uns_code == UNSPEC_FRINTZ)
10507 x = XVECEXP (x, 0, 0);
10508 }
10509
10510 if (speed)
b6875aac
KV
10511 {
10512 if (VECTOR_MODE_P (mode))
10513 *cost += extra_cost->vect.alu;
10514 else
10515 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
10516 }
39252973
KT
10517
10518 /* We can combine fmul by a power of 2 followed by a fcvt into a single
10519 fixed-point fcvt. */
10520 if (GET_CODE (x) == MULT
10521 && ((VECTOR_MODE_P (mode)
10522 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
10523 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
10524 {
10525 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
10526 0, speed);
10527 return true;
10528 }
10529
e548c9df 10530 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
10531 return true;
10532
b292109f 10533 case ABS:
b6875aac
KV
10534 if (VECTOR_MODE_P (mode))
10535 {
10536 /* ABS (vector). */
10537 if (speed)
10538 *cost += extra_cost->vect.alu;
10539 }
10540 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 10541 {
19261b99
KT
10542 op0 = XEXP (x, 0);
10543
10544 /* FABD, which is analogous to FADD. */
10545 if (GET_CODE (op0) == MINUS)
10546 {
e548c9df
AM
10547 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
10548 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
10549 if (speed)
10550 *cost += extra_cost->fp[mode == DFmode].addsub;
10551
10552 return true;
10553 }
10554 /* Simple FABS is analogous to FNEG. */
b292109f
JG
10555 if (speed)
10556 *cost += extra_cost->fp[mode == DFmode].neg;
10557 }
10558 else
10559 {
10560 /* Integer ABS will either be split to
10561 two arithmetic instructions, or will be an ABS
10562 (scalar), which we don't model. */
10563 *cost = COSTS_N_INSNS (2);
10564 if (speed)
10565 *cost += 2 * extra_cost->alu.arith;
10566 }
10567 return false;
10568
10569 case SMAX:
10570 case SMIN:
10571 if (speed)
10572 {
b6875aac
KV
10573 if (VECTOR_MODE_P (mode))
10574 *cost += extra_cost->vect.alu;
10575 else
10576 {
10577 /* FMAXNM/FMINNM/FMAX/FMIN.
10578 TODO: This may not be accurate for all implementations, but
10579 we do not model this in the cost tables. */
10580 *cost += extra_cost->fp[mode == DFmode].addsub;
10581 }
b292109f
JG
10582 }
10583 return false;
10584
61263118
KT
10585 case UNSPEC:
10586 /* The floating point round to integer frint* instructions. */
10587 if (aarch64_frint_unspec_p (XINT (x, 1)))
10588 {
10589 if (speed)
10590 *cost += extra_cost->fp[mode == DFmode].roundint;
10591
10592 return false;
10593 }
781aeb73
KT
10594
10595 if (XINT (x, 1) == UNSPEC_RBIT)
10596 {
10597 if (speed)
10598 *cost += extra_cost->alu.rev;
10599
10600 return false;
10601 }
61263118
KT
10602 break;
10603
fb620c4a
JG
10604 case TRUNCATE:
10605
10606 /* Decompose <su>muldi3_highpart. */
10607 if (/* (truncate:DI */
10608 mode == DImode
10609 /* (lshiftrt:TI */
10610 && GET_MODE (XEXP (x, 0)) == TImode
10611 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
10612 /* (mult:TI */
10613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10614 /* (ANY_EXTEND:TI (reg:DI))
10615 (ANY_EXTEND:TI (reg:DI))) */
10616 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10617 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
10618 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10619 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
10620 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
10621 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
10622 /* (const_int 64) */
10623 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10624 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
10625 {
10626 /* UMULH/SMULH. */
10627 if (speed)
10628 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
10629 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
10630 mode, MULT, 0, speed);
10631 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
10632 mode, MULT, 1, speed);
fb620c4a
JG
10633 return true;
10634 }
10635
10636 /* Fall through. */
43e9d192 10637 default:
61263118 10638 break;
43e9d192 10639 }
61263118 10640
c10e3d7f
AP
10641 if (dump_file
10642 && flag_aarch64_verbose_cost)
61263118
KT
10643 fprintf (dump_file,
10644 "\nFailed to cost RTX. Assuming default cost.\n");
10645
10646 return true;
43e9d192
IB
10647}
10648
0ee859b5
JG
10649/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
10650 calculated for X. This cost is stored in *COST. Returns true
10651 if the total cost of X was calculated. */
10652static bool
e548c9df 10653aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
10654 int param, int *cost, bool speed)
10655{
e548c9df 10656 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 10657
c10e3d7f
AP
10658 if (dump_file
10659 && flag_aarch64_verbose_cost)
0ee859b5
JG
10660 {
10661 print_rtl_single (dump_file, x);
10662 fprintf (dump_file, "\n%s cost: %d (%s)\n",
10663 speed ? "Hot" : "Cold",
10664 *cost, result ? "final" : "partial");
10665 }
10666
10667 return result;
10668}
10669
43e9d192 10670static int
ef4bddc2 10671aarch64_register_move_cost (machine_mode mode,
8a3a7e67 10672 reg_class_t from_i, reg_class_t to_i)
43e9d192 10673{
8a3a7e67
RH
10674 enum reg_class from = (enum reg_class) from_i;
10675 enum reg_class to = (enum reg_class) to_i;
43e9d192 10676 const struct cpu_regmove_cost *regmove_cost
b175b679 10677 = aarch64_tune_params.regmove_cost;
43e9d192 10678
3be07662 10679 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 10680 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
10681 to = GENERAL_REGS;
10682
d677263e 10683 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
10684 from = GENERAL_REGS;
10685
6ee70f81
AP
10686 /* Moving between GPR and stack cost is the same as GP2GP. */
10687 if ((from == GENERAL_REGS && to == STACK_REG)
10688 || (to == GENERAL_REGS && from == STACK_REG))
10689 return regmove_cost->GP2GP;
10690
10691 /* To/From the stack register, we move via the gprs. */
10692 if (to == STACK_REG || from == STACK_REG)
10693 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
10694 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
10695
6a70badb 10696 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
10697 {
10698 /* 128-bit operations on general registers require 2 instructions. */
10699 if (from == GENERAL_REGS && to == GENERAL_REGS)
10700 return regmove_cost->GP2GP * 2;
10701 else if (from == GENERAL_REGS)
10702 return regmove_cost->GP2FP * 2;
10703 else if (to == GENERAL_REGS)
10704 return regmove_cost->FP2GP * 2;
10705
10706 /* When AdvSIMD instructions are disabled it is not possible to move
10707 a 128-bit value directly between Q registers. This is handled in
10708 secondary reload. A general register is used as a scratch to move
10709 the upper DI value and the lower DI value is moved directly,
10710 hence the cost is the sum of three moves. */
10711 if (! TARGET_SIMD)
10712 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
10713
10714 return regmove_cost->FP2FP;
10715 }
10716
43e9d192
IB
10717 if (from == GENERAL_REGS && to == GENERAL_REGS)
10718 return regmove_cost->GP2GP;
10719 else if (from == GENERAL_REGS)
10720 return regmove_cost->GP2FP;
10721 else if (to == GENERAL_REGS)
10722 return regmove_cost->FP2GP;
10723
43e9d192
IB
10724 return regmove_cost->FP2FP;
10725}
10726
10727static int
ef4bddc2 10728aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
10729 reg_class_t rclass ATTRIBUTE_UNUSED,
10730 bool in ATTRIBUTE_UNUSED)
10731{
b175b679 10732 return aarch64_tune_params.memmov_cost;
43e9d192
IB
10733}
10734
0c30e0f3
EM
10735/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
10736 to optimize 1.0/sqrt. */
ee62a5a6
RS
10737
10738static bool
9acc9cbe 10739use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
10740{
10741 return (!flag_trapping_math
10742 && flag_unsafe_math_optimizations
9acc9cbe
EM
10743 && ((aarch64_tune_params.approx_modes->recip_sqrt
10744 & AARCH64_APPROX_MODE (mode))
1a33079e 10745 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
10746}
10747
0c30e0f3
EM
10748/* Function to decide when to use the approximate reciprocal square root
10749 builtin. */
a6fc00da
BH
10750
10751static tree
ee62a5a6 10752aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 10753{
9acc9cbe
EM
10754 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
10755
10756 if (!use_rsqrt_p (mode))
a6fc00da 10757 return NULL_TREE;
ee62a5a6 10758 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
10759}
10760
98daafa0
EM
10761/* Emit instruction sequence to compute either the approximate square root
10762 or its approximate reciprocal, depending on the flag RECP, and return
10763 whether the sequence was emitted or not. */
a6fc00da 10764
98daafa0
EM
10765bool
10766aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 10767{
98daafa0 10768 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
10769
10770 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
10771 {
10772 gcc_assert (!recp);
10773 return false;
10774 }
10775
2e19adc8
RE
10776 if (!recp)
10777 {
10778 if (!(flag_mlow_precision_sqrt
10779 || (aarch64_tune_params.approx_modes->sqrt
10780 & AARCH64_APPROX_MODE (mode))))
10781 return false;
10782
10783 if (flag_finite_math_only
10784 || flag_trapping_math
10785 || !flag_unsafe_math_optimizations
10786 || optimize_function_for_size_p (cfun))
10787 return false;
10788 }
10789 else
10790 /* Caller assumes we cannot fail. */
10791 gcc_assert (use_rsqrt_p (mode));
daef0a8c 10792
ddc203a7 10793 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
10794 rtx xmsk = gen_reg_rtx (mmsk);
10795 if (!recp)
2e19adc8
RE
10796 /* When calculating the approximate square root, compare the
10797 argument with 0.0 and create a mask. */
10798 emit_insn (gen_rtx_SET (xmsk,
10799 gen_rtx_NEG (mmsk,
10800 gen_rtx_EQ (mmsk, src,
10801 CONST0_RTX (mode)))));
a6fc00da 10802
98daafa0
EM
10803 /* Estimate the approximate reciprocal square root. */
10804 rtx xdst = gen_reg_rtx (mode);
0016d8d9 10805 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 10806
98daafa0
EM
10807 /* Iterate over the series twice for SF and thrice for DF. */
10808 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 10809
98daafa0
EM
10810 /* Optionally iterate over the series once less for faster performance
10811 while sacrificing the accuracy. */
10812 if ((recp && flag_mrecip_low_precision_sqrt)
10813 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
10814 iterations--;
10815
98daafa0
EM
10816 /* Iterate over the series to calculate the approximate reciprocal square
10817 root. */
10818 rtx x1 = gen_reg_rtx (mode);
10819 while (iterations--)
a6fc00da 10820 {
a6fc00da 10821 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
10822 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
10823
0016d8d9 10824 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 10825
98daafa0
EM
10826 if (iterations > 0)
10827 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
10828 }
10829
10830 if (!recp)
10831 {
10832 /* Qualify the approximate reciprocal square root when the argument is
10833 0.0 by squashing the intermediary result to 0.0. */
10834 rtx xtmp = gen_reg_rtx (mmsk);
10835 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
10836 gen_rtx_SUBREG (mmsk, xdst, 0)));
10837 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 10838
98daafa0
EM
10839 /* Calculate the approximate square root. */
10840 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
10841 }
10842
98daafa0
EM
10843 /* Finalize the approximation. */
10844 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
10845
10846 return true;
a6fc00da
BH
10847}
10848
79a2bc2d
EM
10849/* Emit the instruction sequence to compute the approximation for the division
10850 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
10851
10852bool
10853aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
10854{
10855 machine_mode mode = GET_MODE (quo);
33d72b63
JW
10856
10857 if (GET_MODE_INNER (mode) == HFmode)
10858 return false;
10859
79a2bc2d
EM
10860 bool use_approx_division_p = (flag_mlow_precision_div
10861 || (aarch64_tune_params.approx_modes->division
10862 & AARCH64_APPROX_MODE (mode)));
10863
10864 if (!flag_finite_math_only
10865 || flag_trapping_math
10866 || !flag_unsafe_math_optimizations
10867 || optimize_function_for_size_p (cfun)
10868 || !use_approx_division_p)
10869 return false;
10870
1be49a38
RR
10871 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10872 return false;
10873
79a2bc2d
EM
10874 /* Estimate the approximate reciprocal. */
10875 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 10876 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
10877
10878 /* Iterate over the series twice for SF and thrice for DF. */
10879 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10880
10881 /* Optionally iterate over the series once less for faster performance,
10882 while sacrificing the accuracy. */
10883 if (flag_mlow_precision_div)
10884 iterations--;
10885
10886 /* Iterate over the series to calculate the approximate reciprocal. */
10887 rtx xtmp = gen_reg_rtx (mode);
10888 while (iterations--)
10889 {
0016d8d9 10890 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
10891
10892 if (iterations > 0)
10893 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10894 }
10895
10896 if (num != CONST1_RTX (mode))
10897 {
10898 /* As the approximate reciprocal of DEN is already calculated, only
10899 calculate the approximate division when NUM is not 1.0. */
10900 rtx xnum = force_reg (mode, num);
10901 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10902 }
10903
10904 /* Finalize the approximation. */
10905 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10906 return true;
10907}
10908
d126a4ae
AP
10909/* Return the number of instructions that can be issued per cycle. */
10910static int
10911aarch64_sched_issue_rate (void)
10912{
b175b679 10913 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10914}
10915
d03f7e44
MK
10916static int
10917aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10918{
10919 int issue_rate = aarch64_sched_issue_rate ();
10920
10921 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10922}
10923
2d6bc7fa
KT
10924
10925/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10926 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10927 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10928
10929static int
10930aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10931 int ready_index)
10932{
10933 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10934}
10935
10936
8990e73a
TB
10937/* Vectorizer cost model target hooks. */
10938
10939/* Implement targetm.vectorize.builtin_vectorization_cost. */
10940static int
10941aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10942 tree vectype,
10943 int misalign ATTRIBUTE_UNUSED)
10944{
10945 unsigned elements;
cd8ae5ed
AP
10946 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10947 bool fp = false;
10948
10949 if (vectype != NULL)
10950 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10951
10952 switch (type_of_cost)
10953 {
10954 case scalar_stmt:
cd8ae5ed 10955 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10956
10957 case scalar_load:
cd8ae5ed 10958 return costs->scalar_load_cost;
8990e73a
TB
10959
10960 case scalar_store:
cd8ae5ed 10961 return costs->scalar_store_cost;
8990e73a
TB
10962
10963 case vector_stmt:
cd8ae5ed 10964 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10965
10966 case vector_load:
cd8ae5ed 10967 return costs->vec_align_load_cost;
8990e73a
TB
10968
10969 case vector_store:
cd8ae5ed 10970 return costs->vec_store_cost;
8990e73a
TB
10971
10972 case vec_to_scalar:
cd8ae5ed 10973 return costs->vec_to_scalar_cost;
8990e73a
TB
10974
10975 case scalar_to_vec:
cd8ae5ed 10976 return costs->scalar_to_vec_cost;
8990e73a
TB
10977
10978 case unaligned_load:
cc9fe6bb 10979 case vector_gather_load:
cd8ae5ed 10980 return costs->vec_unalign_load_cost;
8990e73a
TB
10981
10982 case unaligned_store:
cc9fe6bb 10983 case vector_scatter_store:
cd8ae5ed 10984 return costs->vec_unalign_store_cost;
8990e73a
TB
10985
10986 case cond_branch_taken:
cd8ae5ed 10987 return costs->cond_taken_branch_cost;
8990e73a
TB
10988
10989 case cond_branch_not_taken:
cd8ae5ed 10990 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10991
10992 case vec_perm:
cd8ae5ed 10993 return costs->vec_permute_cost;
c428f91c 10994
8990e73a 10995 case vec_promote_demote:
cd8ae5ed 10996 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10997
10998 case vec_construct:
6a70badb 10999 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
11000 return elements / 2 + 1;
11001
11002 default:
11003 gcc_unreachable ();
11004 }
11005}
11006
11007/* Implement targetm.vectorize.add_stmt_cost. */
11008static unsigned
11009aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11010 struct _stmt_vec_info *stmt_info, int misalign,
11011 enum vect_cost_model_location where)
11012{
11013 unsigned *cost = (unsigned *) data;
11014 unsigned retval = 0;
11015
11016 if (flag_vect_cost_model)
11017 {
11018 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11019 int stmt_cost =
11020 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
11021
11022 /* Statements in an inner loop relative to the loop being
11023 vectorized are weighted more heavily. The value here is
058e4c71 11024 arbitrary and could potentially be improved with analysis. */
8990e73a 11025 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 11026 count *= 50; /* FIXME */
8990e73a
TB
11027
11028 retval = (unsigned) (count * stmt_cost);
11029 cost[where] += retval;
11030 }
11031
11032 return retval;
11033}
11034
0cfff2a1 11035static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 11036
0cfff2a1
KT
11037/* Parse the TO_PARSE string and put the architecture struct that it
11038 selects into RES and the architectural features into ISA_FLAGS.
11039 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
11040 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
11041 When the TO_PARSE string contains an invalid extension,
11042 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 11043
0cfff2a1
KT
11044static enum aarch64_parse_opt_result
11045aarch64_parse_arch (const char *to_parse, const struct processor **res,
c7887347 11046 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 11047{
ff150bc4 11048 const char *ext;
43e9d192 11049 const struct processor *arch;
43e9d192
IB
11050 size_t len;
11051
ff150bc4 11052 ext = strchr (to_parse, '+');
43e9d192
IB
11053
11054 if (ext != NULL)
ff150bc4 11055 len = ext - to_parse;
43e9d192 11056 else
ff150bc4 11057 len = strlen (to_parse);
43e9d192
IB
11058
11059 if (len == 0)
0cfff2a1
KT
11060 return AARCH64_PARSE_MISSING_ARG;
11061
43e9d192 11062
0cfff2a1 11063 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
11064 for (arch = all_architectures; arch->name != NULL; arch++)
11065 {
ff150bc4
ML
11066 if (strlen (arch->name) == len
11067 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 11068 {
0cfff2a1 11069 unsigned long isa_temp = arch->flags;
43e9d192
IB
11070
11071 if (ext != NULL)
11072 {
0cfff2a1
KT
11073 /* TO_PARSE string contains at least one extension. */
11074 enum aarch64_parse_opt_result ext_res
c7887347 11075 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 11076
0cfff2a1
KT
11077 if (ext_res != AARCH64_PARSE_OK)
11078 return ext_res;
ffee7aa9 11079 }
0cfff2a1
KT
11080 /* Extension parsing was successful. Confirm the result
11081 arch and ISA flags. */
11082 *res = arch;
11083 *isa_flags = isa_temp;
11084 return AARCH64_PARSE_OK;
43e9d192
IB
11085 }
11086 }
11087
11088 /* ARCH name not found in list. */
0cfff2a1 11089 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11090}
11091
0cfff2a1
KT
11092/* Parse the TO_PARSE string and put the result tuning in RES and the
11093 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
11094 describing the parse result. If there is an error parsing, RES and
c7887347
ML
11095 ISA_FLAGS are left unchanged.
11096 When the TO_PARSE string contains an invalid extension,
11097 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 11098
0cfff2a1
KT
11099static enum aarch64_parse_opt_result
11100aarch64_parse_cpu (const char *to_parse, const struct processor **res,
c7887347 11101 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 11102{
ff150bc4 11103 const char *ext;
43e9d192 11104 const struct processor *cpu;
43e9d192
IB
11105 size_t len;
11106
ff150bc4 11107 ext = strchr (to_parse, '+');
43e9d192
IB
11108
11109 if (ext != NULL)
ff150bc4 11110 len = ext - to_parse;
43e9d192 11111 else
ff150bc4 11112 len = strlen (to_parse);
43e9d192
IB
11113
11114 if (len == 0)
0cfff2a1
KT
11115 return AARCH64_PARSE_MISSING_ARG;
11116
43e9d192
IB
11117
11118 /* Loop through the list of supported CPUs to find a match. */
11119 for (cpu = all_cores; cpu->name != NULL; cpu++)
11120 {
ff150bc4 11121 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 11122 {
0cfff2a1
KT
11123 unsigned long isa_temp = cpu->flags;
11124
43e9d192
IB
11125
11126 if (ext != NULL)
11127 {
0cfff2a1
KT
11128 /* TO_PARSE string contains at least one extension. */
11129 enum aarch64_parse_opt_result ext_res
c7887347 11130 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 11131
0cfff2a1
KT
11132 if (ext_res != AARCH64_PARSE_OK)
11133 return ext_res;
11134 }
11135 /* Extension parsing was successfull. Confirm the result
11136 cpu and ISA flags. */
11137 *res = cpu;
11138 *isa_flags = isa_temp;
11139 return AARCH64_PARSE_OK;
43e9d192
IB
11140 }
11141 }
11142
11143 /* CPU name not found in list. */
0cfff2a1 11144 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11145}
11146
0cfff2a1
KT
11147/* Parse the TO_PARSE string and put the cpu it selects into RES.
11148 Return an aarch64_parse_opt_result describing the parse result.
11149 If the parsing fails the RES does not change. */
43e9d192 11150
0cfff2a1
KT
11151static enum aarch64_parse_opt_result
11152aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
11153{
11154 const struct processor *cpu;
43e9d192
IB
11155
11156 /* Loop through the list of supported CPUs to find a match. */
11157 for (cpu = all_cores; cpu->name != NULL; cpu++)
11158 {
ff150bc4 11159 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 11160 {
0cfff2a1
KT
11161 *res = cpu;
11162 return AARCH64_PARSE_OK;
43e9d192
IB
11163 }
11164 }
11165
11166 /* CPU name not found in list. */
0cfff2a1 11167 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11168}
11169
8dec06f2
JG
11170/* Parse TOKEN, which has length LENGTH to see if it is an option
11171 described in FLAG. If it is, return the index bit for that fusion type.
11172 If not, error (printing OPTION_NAME) and return zero. */
11173
11174static unsigned int
11175aarch64_parse_one_option_token (const char *token,
11176 size_t length,
11177 const struct aarch64_flag_desc *flag,
11178 const char *option_name)
11179{
11180 for (; flag->name != NULL; flag++)
11181 {
11182 if (length == strlen (flag->name)
11183 && !strncmp (flag->name, token, length))
11184 return flag->flag;
11185 }
11186
a3f9f006 11187 error ("unknown flag passed in %<-moverride=%s%> (%s)", option_name, token);
8dec06f2
JG
11188 return 0;
11189}
11190
11191/* Parse OPTION which is a comma-separated list of flags to enable.
11192 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
11193 default state we inherit from the CPU tuning structures. OPTION_NAME
11194 gives the top-level option we are parsing in the -moverride string,
11195 for use in error messages. */
11196
11197static unsigned int
11198aarch64_parse_boolean_options (const char *option,
11199 const struct aarch64_flag_desc *flags,
11200 unsigned int initial_state,
11201 const char *option_name)
11202{
11203 const char separator = '.';
11204 const char* specs = option;
11205 const char* ntoken = option;
11206 unsigned int found_flags = initial_state;
11207
11208 while ((ntoken = strchr (specs, separator)))
11209 {
11210 size_t token_length = ntoken - specs;
11211 unsigned token_ops = aarch64_parse_one_option_token (specs,
11212 token_length,
11213 flags,
11214 option_name);
11215 /* If we find "none" (or, for simplicity's sake, an error) anywhere
11216 in the token stream, reset the supported operations. So:
11217
11218 adrp+add.cmp+branch.none.adrp+add
11219
11220 would have the result of turning on only adrp+add fusion. */
11221 if (!token_ops)
11222 found_flags = 0;
11223
11224 found_flags |= token_ops;
11225 specs = ++ntoken;
11226 }
11227
11228 /* We ended with a comma, print something. */
11229 if (!(*specs))
11230 {
11231 error ("%s string ill-formed\n", option_name);
11232 return 0;
11233 }
11234
11235 /* We still have one more token to parse. */
11236 size_t token_length = strlen (specs);
11237 unsigned token_ops = aarch64_parse_one_option_token (specs,
11238 token_length,
11239 flags,
11240 option_name);
11241 if (!token_ops)
11242 found_flags = 0;
11243
11244 found_flags |= token_ops;
11245 return found_flags;
11246}
11247
11248/* Support for overriding instruction fusion. */
11249
11250static void
11251aarch64_parse_fuse_string (const char *fuse_string,
11252 struct tune_params *tune)
11253{
11254 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
11255 aarch64_fusible_pairs,
11256 tune->fusible_ops,
11257 "fuse=");
11258}
11259
11260/* Support for overriding other tuning flags. */
11261
11262static void
11263aarch64_parse_tune_string (const char *tune_string,
11264 struct tune_params *tune)
11265{
11266 tune->extra_tuning_flags
11267 = aarch64_parse_boolean_options (tune_string,
11268 aarch64_tuning_flags,
11269 tune->extra_tuning_flags,
11270 "tune=");
11271}
11272
886f092f
KT
11273/* Parse the sve_width tuning moverride string in TUNE_STRING.
11274 Accept the valid SVE vector widths allowed by
11275 aarch64_sve_vector_bits_enum and use it to override sve_width
11276 in TUNE. */
11277
11278static void
11279aarch64_parse_sve_width_string (const char *tune_string,
11280 struct tune_params *tune)
11281{
11282 int width = -1;
11283
11284 int n = sscanf (tune_string, "%d", &width);
11285 if (n == EOF)
11286 {
11287 error ("invalid format for sve_width");
11288 return;
11289 }
11290 switch (width)
11291 {
11292 case SVE_128:
11293 case SVE_256:
11294 case SVE_512:
11295 case SVE_1024:
11296 case SVE_2048:
11297 break;
11298 default:
11299 error ("invalid sve_width value: %d", width);
11300 }
11301 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
11302}
11303
8dec06f2
JG
11304/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
11305 we understand. If it is, extract the option string and handoff to
11306 the appropriate function. */
11307
11308void
11309aarch64_parse_one_override_token (const char* token,
11310 size_t length,
11311 struct tune_params *tune)
11312{
11313 const struct aarch64_tuning_override_function *fn
11314 = aarch64_tuning_override_functions;
11315
11316 const char *option_part = strchr (token, '=');
11317 if (!option_part)
11318 {
11319 error ("tuning string missing in option (%s)", token);
11320 return;
11321 }
11322
11323 /* Get the length of the option name. */
11324 length = option_part - token;
11325 /* Skip the '=' to get to the option string. */
11326 option_part++;
11327
11328 for (; fn->name != NULL; fn++)
11329 {
11330 if (!strncmp (fn->name, token, length))
11331 {
11332 fn->parse_override (option_part, tune);
11333 return;
11334 }
11335 }
11336
11337 error ("unknown tuning option (%s)",token);
11338 return;
11339}
11340
5eee3c34
JW
11341/* A checking mechanism for the implementation of the tls size. */
11342
11343static void
11344initialize_aarch64_tls_size (struct gcc_options *opts)
11345{
11346 if (aarch64_tls_size == 0)
11347 aarch64_tls_size = 24;
11348
11349 switch (opts->x_aarch64_cmodel_var)
11350 {
11351 case AARCH64_CMODEL_TINY:
11352 /* Both the default and maximum TLS size allowed under tiny is 1M which
11353 needs two instructions to address, so we clamp the size to 24. */
11354 if (aarch64_tls_size > 24)
11355 aarch64_tls_size = 24;
11356 break;
11357 case AARCH64_CMODEL_SMALL:
11358 /* The maximum TLS size allowed under small is 4G. */
11359 if (aarch64_tls_size > 32)
11360 aarch64_tls_size = 32;
11361 break;
11362 case AARCH64_CMODEL_LARGE:
11363 /* The maximum TLS size allowed under large is 16E.
11364 FIXME: 16E should be 64bit, we only support 48bit offset now. */
11365 if (aarch64_tls_size > 48)
11366 aarch64_tls_size = 48;
11367 break;
11368 default:
11369 gcc_unreachable ();
11370 }
11371
11372 return;
11373}
11374
8dec06f2
JG
11375/* Parse STRING looking for options in the format:
11376 string :: option:string
11377 option :: name=substring
11378 name :: {a-z}
11379 substring :: defined by option. */
11380
11381static void
11382aarch64_parse_override_string (const char* input_string,
11383 struct tune_params* tune)
11384{
11385 const char separator = ':';
11386 size_t string_length = strlen (input_string) + 1;
11387 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
11388 char *string = string_root;
11389 strncpy (string, input_string, string_length);
11390 string[string_length - 1] = '\0';
11391
11392 char* ntoken = string;
11393
11394 while ((ntoken = strchr (string, separator)))
11395 {
11396 size_t token_length = ntoken - string;
11397 /* Make this substring look like a string. */
11398 *ntoken = '\0';
11399 aarch64_parse_one_override_token (string, token_length, tune);
11400 string = ++ntoken;
11401 }
11402
11403 /* One last option to parse. */
11404 aarch64_parse_one_override_token (string, strlen (string), tune);
11405 free (string_root);
11406}
43e9d192 11407
43e9d192
IB
11408
11409static void
0cfff2a1 11410aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 11411{
efac62a3
ST
11412 if (accepted_branch_protection_string)
11413 {
11414 opts->x_aarch64_branch_protection_string
11415 = xstrdup (accepted_branch_protection_string);
11416 }
11417
acea40ac
WD
11418 /* PR 70044: We have to be careful about being called multiple times for the
11419 same function. This means all changes should be repeatable. */
11420
d6cb6d6a
WD
11421 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
11422 Disable the frame pointer flag so the mid-end will not use a frame
11423 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
11424 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
11425 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
11426 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 11427 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 11428 opts->x_flag_omit_frame_pointer = 2;
43e9d192 11429
1be34295 11430 /* If not optimizing for size, set the default
0cfff2a1
KT
11431 alignment to what the target wants. */
11432 if (!opts->x_optimize_size)
43e9d192 11433 {
c518c102
ML
11434 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
11435 opts->x_str_align_loops = aarch64_tune_params.loop_align;
11436 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
11437 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
11438 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
11439 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 11440 }
b4f50fd4 11441
9ee6540a
WD
11442 /* We default to no pc-relative literal loads. */
11443
11444 aarch64_pcrelative_literal_loads = false;
11445
11446 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 11447 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
11448 if (opts->x_pcrelative_literal_loads == 1)
11449 aarch64_pcrelative_literal_loads = true;
b4f50fd4 11450
9ee6540a
WD
11451 /* In the tiny memory model it makes no sense to disallow PC relative
11452 literal pool loads. */
11453 if (aarch64_cmodel == AARCH64_CMODEL_TINY
11454 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
11455 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
11456
11457 /* When enabling the lower precision Newton series for the square root, also
11458 enable it for the reciprocal square root, since the latter is an
11459 intermediary step for the former. */
11460 if (flag_mlow_precision_sqrt)
11461 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 11462}
43e9d192 11463
0cfff2a1
KT
11464/* 'Unpack' up the internal tuning structs and update the options
11465 in OPTS. The caller must have set up selected_tune and selected_arch
11466 as all the other target-specific codegen decisions are
11467 derived from them. */
11468
e4ea20c8 11469void
0cfff2a1
KT
11470aarch64_override_options_internal (struct gcc_options *opts)
11471{
11472 aarch64_tune_flags = selected_tune->flags;
11473 aarch64_tune = selected_tune->sched_core;
11474 /* Make a copy of the tuning parameters attached to the core, which
11475 we may later overwrite. */
11476 aarch64_tune_params = *(selected_tune->tune);
11477 aarch64_architecture_version = selected_arch->architecture_version;
11478
11479 if (opts->x_aarch64_override_tune_string)
11480 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
11481 &aarch64_tune_params);
11482
11483 /* This target defaults to strict volatile bitfields. */
11484 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
11485 opts->x_flag_strict_volatile_bitfields = 1;
11486
cd0b2d36
RR
11487 if (aarch64_stack_protector_guard == SSP_GLOBAL
11488 && opts->x_aarch64_stack_protector_guard_offset_str)
11489 {
41804907 11490 error ("incompatible options %<-mstack-protector-guard=global%> and "
63d42e89 11491 "%<-mstack-protector-guard-offset=%s%>",
cd0b2d36
RR
11492 aarch64_stack_protector_guard_offset_str);
11493 }
11494
11495 if (aarch64_stack_protector_guard == SSP_SYSREG
11496 && !(opts->x_aarch64_stack_protector_guard_offset_str
11497 && opts->x_aarch64_stack_protector_guard_reg_str))
11498 {
a3f9f006
ML
11499 error ("both %<-mstack-protector-guard-offset%> and "
11500 "%<-mstack-protector-guard-reg%> must be used "
11501 "with %<-mstack-protector-guard=sysreg%>");
cd0b2d36
RR
11502 }
11503
11504 if (opts->x_aarch64_stack_protector_guard_reg_str)
11505 {
11506 if (strlen (opts->x_aarch64_stack_protector_guard_reg_str) > 100)
11507 error ("specify a system register with a small string length.");
11508 }
11509
11510 if (opts->x_aarch64_stack_protector_guard_offset_str)
11511 {
11512 char *end;
11513 const char *str = aarch64_stack_protector_guard_offset_str;
11514 errno = 0;
11515 long offs = strtol (aarch64_stack_protector_guard_offset_str, &end, 0);
11516 if (!*str || *end || errno)
11517 error ("%qs is not a valid offset in %qs", str,
63d42e89 11518 "-mstack-protector-guard-offset=");
cd0b2d36
RR
11519 aarch64_stack_protector_guard_offset = offs;
11520 }
11521
0cfff2a1 11522 initialize_aarch64_code_model (opts);
5eee3c34 11523 initialize_aarch64_tls_size (opts);
63892fa2 11524
2d6bc7fa
KT
11525 int queue_depth = 0;
11526 switch (aarch64_tune_params.autoprefetcher_model)
11527 {
11528 case tune_params::AUTOPREFETCHER_OFF:
11529 queue_depth = -1;
11530 break;
11531 case tune_params::AUTOPREFETCHER_WEAK:
11532 queue_depth = 0;
11533 break;
11534 case tune_params::AUTOPREFETCHER_STRONG:
11535 queue_depth = max_insn_queue_index + 1;
11536 break;
11537 default:
11538 gcc_unreachable ();
11539 }
11540
11541 /* We don't mind passing in global_options_set here as we don't use
11542 the *options_set structs anyway. */
11543 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
11544 queue_depth,
11545 opts->x_param_values,
11546 global_options_set.x_param_values);
11547
9d2c6e2e
MK
11548 /* Set up parameters to be used in prefetching algorithm. Do not
11549 override the defaults unless we are tuning for a core we have
11550 researched values for. */
11551 if (aarch64_tune_params.prefetch->num_slots > 0)
11552 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
11553 aarch64_tune_params.prefetch->num_slots,
11554 opts->x_param_values,
11555 global_options_set.x_param_values);
11556 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
11557 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
11558 aarch64_tune_params.prefetch->l1_cache_size,
11559 opts->x_param_values,
11560 global_options_set.x_param_values);
11561 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 11562 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
11563 aarch64_tune_params.prefetch->l1_cache_line_size,
11564 opts->x_param_values,
11565 global_options_set.x_param_values);
11566 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
11567 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
11568 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
11569 opts->x_param_values,
11570 global_options_set.x_param_values);
d2ff35c0
LM
11571 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
11572 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
11573 0,
11574 opts->x_param_values,
11575 global_options_set.x_param_values);
59100dfc
LM
11576 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
11577 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
11578 aarch64_tune_params.prefetch->minimum_stride,
11579 opts->x_param_values,
11580 global_options_set.x_param_values);
50487d79 11581
13494fcb
WD
11582 /* Use the alternative scheduling-pressure algorithm by default. */
11583 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
11584 opts->x_param_values,
11585 global_options_set.x_param_values);
11586
fbe9af50
TC
11587 /* If the user hasn't changed it via configure then set the default to 64 KB
11588 for the backend. */
11589 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
11590 DEFAULT_STK_CLASH_GUARD_SIZE == 0
11591 ? 16 : DEFAULT_STK_CLASH_GUARD_SIZE,
11592 opts->x_param_values,
11593 global_options_set.x_param_values);
11594
11595 /* Validate the guard size. */
11596 int guard_size = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
fbe9af50
TC
11597
11598 /* Enforce that interval is the same size as size so the mid-end does the
11599 right thing. */
11600 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
11601 guard_size,
11602 opts->x_param_values,
11603 global_options_set.x_param_values);
11604
11605 /* The maybe_set calls won't update the value if the user has explicitly set
11606 one. Which means we need to validate that probing interval and guard size
11607 are equal. */
11608 int probe_interval
11609 = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11610 if (guard_size != probe_interval)
904f3daa
ML
11611 error ("stack clash guard size %<%d%> must be equal to probing interval "
11612 "%<%d%>", guard_size, probe_interval);
fbe9af50 11613
16b2cafd
MK
11614 /* Enable sw prefetching at specified optimization level for
11615 CPUS that have prefetch. Lower optimization level threshold by 1
11616 when profiling is enabled. */
11617 if (opts->x_flag_prefetch_loop_arrays < 0
11618 && !opts->x_optimize_size
11619 && aarch64_tune_params.prefetch->default_opt_level >= 0
11620 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
11621 opts->x_flag_prefetch_loop_arrays = 1;
11622
266c2b54
ML
11623 if (opts->x_aarch64_arch_string == NULL)
11624 opts->x_aarch64_arch_string = selected_arch->name;
11625 if (opts->x_aarch64_cpu_string == NULL)
11626 opts->x_aarch64_cpu_string = selected_cpu->name;
11627 if (opts->x_aarch64_tune_string == NULL)
11628 opts->x_aarch64_tune_string = selected_tune->name;
11629
0cfff2a1
KT
11630 aarch64_override_options_after_change_1 (opts);
11631}
43e9d192 11632
01f44038
KT
11633/* Print a hint with a suggestion for a core or architecture name that
11634 most closely resembles what the user passed in STR. ARCH is true if
11635 the user is asking for an architecture name. ARCH is false if the user
11636 is asking for a core name. */
11637
11638static void
11639aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
11640{
11641 auto_vec<const char *> candidates;
11642 const struct processor *entry = arch ? all_architectures : all_cores;
11643 for (; entry->name != NULL; entry++)
11644 candidates.safe_push (entry->name);
a08b5429
ML
11645
11646#ifdef HAVE_LOCAL_CPU_DETECT
11647 /* Add also "native" as possible value. */
11648 if (arch)
11649 candidates.safe_push ("native");
11650#endif
11651
01f44038
KT
11652 char *s;
11653 const char *hint = candidates_list_and_hint (str, s, candidates);
11654 if (hint)
11655 inform (input_location, "valid arguments are: %s;"
11656 " did you mean %qs?", s, hint);
6285e915
ML
11657 else
11658 inform (input_location, "valid arguments are: %s", s);
11659
01f44038
KT
11660 XDELETEVEC (s);
11661}
11662
11663/* Print a hint with a suggestion for a core name that most closely resembles
11664 what the user passed in STR. */
11665
11666inline static void
11667aarch64_print_hint_for_core (const char *str)
11668{
11669 aarch64_print_hint_for_core_or_arch (str, false);
11670}
11671
11672/* Print a hint with a suggestion for an architecture name that most closely
11673 resembles what the user passed in STR. */
11674
11675inline static void
11676aarch64_print_hint_for_arch (const char *str)
11677{
11678 aarch64_print_hint_for_core_or_arch (str, true);
11679}
11680
c7887347
ML
11681
11682/* Print a hint with a suggestion for an extension name
11683 that most closely resembles what the user passed in STR. */
11684
11685void
11686aarch64_print_hint_for_extensions (const std::string &str)
11687{
11688 auto_vec<const char *> candidates;
11689 aarch64_get_all_extension_candidates (&candidates);
11690 char *s;
11691 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
11692 if (hint)
11693 inform (input_location, "valid arguments are: %s;"
11694 " did you mean %qs?", s, hint);
11695 else
11696 inform (input_location, "valid arguments are: %s;", s);
11697
11698 XDELETEVEC (s);
11699}
11700
0cfff2a1
KT
11701/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
11702 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
11703 they are valid in RES and ISA_FLAGS. Return whether the option is
11704 valid. */
43e9d192 11705
361fb3ee 11706static bool
0cfff2a1
KT
11707aarch64_validate_mcpu (const char *str, const struct processor **res,
11708 unsigned long *isa_flags)
11709{
c7887347 11710 std::string invalid_extension;
0cfff2a1 11711 enum aarch64_parse_opt_result parse_res
c7887347 11712 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11713
11714 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11715 return true;
0cfff2a1
KT
11716
11717 switch (parse_res)
11718 {
11719 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11720 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
11721 break;
11722 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 11723 error ("unknown value %qs for %<-mcpu%>", str);
01f44038 11724 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11725 break;
11726 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11727 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
11728 invalid_extension.c_str (), str);
11729 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11730 break;
11731 default:
11732 gcc_unreachable ();
11733 }
361fb3ee
KT
11734
11735 return false;
0cfff2a1
KT
11736}
11737
efac62a3
ST
11738/* Parses CONST_STR for branch protection features specified in
11739 aarch64_branch_protect_types, and set any global variables required. Returns
11740 the parsing result and assigns LAST_STR to the last processed token from
11741 CONST_STR so that it can be used for error reporting. */
11742
11743static enum
11744aarch64_parse_opt_result aarch64_parse_branch_protection (const char *const_str,
11745 char** last_str)
11746{
11747 char *str_root = xstrdup (const_str);
11748 char* token_save = NULL;
11749 char *str = strtok_r (str_root, "+", &token_save);
11750 enum aarch64_parse_opt_result res = AARCH64_PARSE_OK;
11751 if (!str)
11752 res = AARCH64_PARSE_MISSING_ARG;
11753 else
11754 {
11755 char *next_str = strtok_r (NULL, "+", &token_save);
11756 /* Reset the branch protection features to their defaults. */
11757 aarch64_handle_no_branch_protection (NULL, NULL);
11758
11759 while (str && res == AARCH64_PARSE_OK)
11760 {
11761 const aarch64_branch_protect_type* type = aarch64_branch_protect_types;
11762 bool found = false;
11763 /* Search for this type. */
11764 while (type && type->name && !found && res == AARCH64_PARSE_OK)
11765 {
11766 if (strcmp (str, type->name) == 0)
11767 {
11768 found = true;
11769 res = type->handler (str, next_str);
11770 str = next_str;
11771 next_str = strtok_r (NULL, "+", &token_save);
11772 }
11773 else
11774 type++;
11775 }
11776 if (found && res == AARCH64_PARSE_OK)
11777 {
11778 bool found_subtype = true;
11779 /* Loop through each token until we find one that isn't a
11780 subtype. */
11781 while (found_subtype)
11782 {
11783 found_subtype = false;
11784 const aarch64_branch_protect_type *subtype = type->subtypes;
11785 /* Search for the subtype. */
11786 while (str && subtype && subtype->name && !found_subtype
11787 && res == AARCH64_PARSE_OK)
11788 {
11789 if (strcmp (str, subtype->name) == 0)
11790 {
11791 found_subtype = true;
11792 res = subtype->handler (str, next_str);
11793 str = next_str;
11794 next_str = strtok_r (NULL, "+", &token_save);
11795 }
11796 else
11797 subtype++;
11798 }
11799 }
11800 }
11801 else if (!found)
11802 res = AARCH64_PARSE_INVALID_ARG;
11803 }
11804 }
11805 /* Copy the last processed token into the argument to pass it back.
11806 Used by option and attribute validation to print the offending token. */
11807 if (last_str)
11808 {
11809 if (str) strcpy (*last_str, str);
11810 else *last_str = NULL;
11811 }
11812 if (res == AARCH64_PARSE_OK)
11813 {
11814 /* If needed, alloc the accepted string then copy in const_str.
11815 Used by override_option_after_change_1. */
11816 if (!accepted_branch_protection_string)
11817 accepted_branch_protection_string = (char *) xmalloc (
11818 BRANCH_PROTECT_STR_MAX
11819 + 1);
11820 strncpy (accepted_branch_protection_string, const_str,
11821 BRANCH_PROTECT_STR_MAX + 1);
11822 /* Forcibly null-terminate. */
11823 accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0';
11824 }
11825 return res;
11826}
11827
11828static bool
11829aarch64_validate_mbranch_protection (const char *const_str)
11830{
11831 char *str = (char *) xmalloc (strlen (const_str));
11832 enum aarch64_parse_opt_result res =
11833 aarch64_parse_branch_protection (const_str, &str);
11834 if (res == AARCH64_PARSE_INVALID_ARG)
a9c697b8 11835 error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str);
efac62a3 11836 else if (res == AARCH64_PARSE_MISSING_ARG)
a9c697b8 11837 error ("missing argument for %<-mbranch-protection=%>");
efac62a3
ST
11838 free (str);
11839 return res == AARCH64_PARSE_OK;
11840}
11841
0cfff2a1
KT
11842/* Validate a command-line -march option. Parse the arch and extensions
11843 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11844 results, if they are valid, in RES and ISA_FLAGS. Return whether the
11845 option is valid. */
0cfff2a1 11846
361fb3ee 11847static bool
0cfff2a1 11848aarch64_validate_march (const char *str, const struct processor **res,
01f44038 11849 unsigned long *isa_flags)
0cfff2a1 11850{
c7887347 11851 std::string invalid_extension;
0cfff2a1 11852 enum aarch64_parse_opt_result parse_res
c7887347 11853 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11854
11855 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11856 return true;
0cfff2a1
KT
11857
11858 switch (parse_res)
11859 {
11860 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11861 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
11862 break;
11863 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 11864 error ("unknown value %qs for %<-march%>", str);
01f44038 11865 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
11866 break;
11867 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11868 error ("invalid feature modifier %qs in %<-march=%s%>",
11869 invalid_extension.c_str (), str);
11870 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11871 break;
11872 default:
11873 gcc_unreachable ();
11874 }
361fb3ee
KT
11875
11876 return false;
0cfff2a1
KT
11877}
11878
11879/* Validate a command-line -mtune option. Parse the cpu
11880 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11881 result, if it is valid, in RES. Return whether the option is
11882 valid. */
0cfff2a1 11883
361fb3ee 11884static bool
0cfff2a1
KT
11885aarch64_validate_mtune (const char *str, const struct processor **res)
11886{
11887 enum aarch64_parse_opt_result parse_res
11888 = aarch64_parse_tune (str, res);
11889
11890 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11891 return true;
0cfff2a1
KT
11892
11893 switch (parse_res)
11894 {
11895 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11896 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
11897 break;
11898 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 11899 error ("unknown value %qs for %<-mtune%>", str);
01f44038 11900 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11901 break;
11902 default:
11903 gcc_unreachable ();
11904 }
361fb3ee
KT
11905 return false;
11906}
11907
11908/* Return the CPU corresponding to the enum CPU.
11909 If it doesn't specify a cpu, return the default. */
11910
11911static const struct processor *
11912aarch64_get_tune_cpu (enum aarch64_processor cpu)
11913{
11914 if (cpu != aarch64_none)
11915 return &all_cores[cpu];
11916
11917 /* The & 0x3f is to extract the bottom 6 bits that encode the
11918 default cpu as selected by the --with-cpu GCC configure option
11919 in config.gcc.
11920 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
11921 flags mechanism should be reworked to make it more sane. */
11922 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11923}
11924
11925/* Return the architecture corresponding to the enum ARCH.
11926 If it doesn't specify a valid architecture, return the default. */
11927
11928static const struct processor *
11929aarch64_get_arch (enum aarch64_arch arch)
11930{
11931 if (arch != aarch64_no_arch)
11932 return &all_architectures[arch];
11933
11934 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11935
11936 return &all_architectures[cpu->arch];
0cfff2a1
KT
11937}
11938
43cacb12
RS
11939/* Return the VG value associated with -msve-vector-bits= value VALUE. */
11940
11941static poly_uint16
11942aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
11943{
11944 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
11945 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
11946 deciding which .md file patterns to use and when deciding whether
11947 something is a legitimate address or constant. */
11948 if (value == SVE_SCALABLE || value == SVE_128)
11949 return poly_uint16 (2, 2);
11950 else
11951 return (int) value / 64;
11952}
11953
0cfff2a1
KT
11954/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
11955 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
11956 tuning structs. In particular it must set selected_tune and
11957 aarch64_isa_flags that define the available ISA features and tuning
11958 decisions. It must also set selected_arch as this will be used to
11959 output the .arch asm tags for each function. */
11960
11961static void
11962aarch64_override_options (void)
11963{
11964 unsigned long cpu_isa = 0;
11965 unsigned long arch_isa = 0;
11966 aarch64_isa_flags = 0;
11967
361fb3ee
KT
11968 bool valid_cpu = true;
11969 bool valid_tune = true;
11970 bool valid_arch = true;
11971
0cfff2a1
KT
11972 selected_cpu = NULL;
11973 selected_arch = NULL;
11974 selected_tune = NULL;
11975
efac62a3
ST
11976 if (aarch64_branch_protection_string)
11977 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
11978
0cfff2a1
KT
11979 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
11980 If either of -march or -mtune is given, they override their
11981 respective component of -mcpu. */
11982 if (aarch64_cpu_string)
361fb3ee
KT
11983 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
11984 &cpu_isa);
0cfff2a1
KT
11985
11986 if (aarch64_arch_string)
361fb3ee
KT
11987 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
11988 &arch_isa);
0cfff2a1
KT
11989
11990 if (aarch64_tune_string)
361fb3ee 11991 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192 11992
6881e3c1
OH
11993#ifdef SUBTARGET_OVERRIDE_OPTIONS
11994 SUBTARGET_OVERRIDE_OPTIONS;
11995#endif
11996
43e9d192
IB
11997 /* If the user did not specify a processor, choose the default
11998 one for them. This will be the CPU set during configuration using
a3cd0246 11999 --with-cpu, otherwise it is "generic". */
43e9d192
IB
12000 if (!selected_cpu)
12001 {
0cfff2a1
KT
12002 if (selected_arch)
12003 {
12004 selected_cpu = &all_cores[selected_arch->ident];
12005 aarch64_isa_flags = arch_isa;
361fb3ee 12006 explicit_arch = selected_arch->arch;
0cfff2a1
KT
12007 }
12008 else
12009 {
361fb3ee
KT
12010 /* Get default configure-time CPU. */
12011 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
12012 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
12013 }
361fb3ee
KT
12014
12015 if (selected_tune)
12016 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
12017 }
12018 /* If both -mcpu and -march are specified check that they are architecturally
12019 compatible, warn if they're not and prefer the -march ISA flags. */
12020 else if (selected_arch)
12021 {
12022 if (selected_arch->arch != selected_cpu->arch)
12023 {
a3f9f006 12024 warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
0cfff2a1
KT
12025 all_architectures[selected_cpu->arch].name,
12026 selected_arch->name);
12027 }
12028 aarch64_isa_flags = arch_isa;
361fb3ee
KT
12029 explicit_arch = selected_arch->arch;
12030 explicit_tune_core = selected_tune ? selected_tune->ident
12031 : selected_cpu->ident;
0cfff2a1
KT
12032 }
12033 else
12034 {
12035 /* -mcpu but no -march. */
12036 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
12037 explicit_tune_core = selected_tune ? selected_tune->ident
12038 : selected_cpu->ident;
12039 gcc_assert (selected_cpu);
12040 selected_arch = &all_architectures[selected_cpu->arch];
12041 explicit_arch = selected_arch->arch;
43e9d192
IB
12042 }
12043
0cfff2a1
KT
12044 /* Set the arch as well as we will need it when outputing
12045 the .arch directive in assembly. */
12046 if (!selected_arch)
12047 {
12048 gcc_assert (selected_cpu);
12049 selected_arch = &all_architectures[selected_cpu->arch];
12050 }
43e9d192 12051
43e9d192 12052 if (!selected_tune)
3edaf26d 12053 selected_tune = selected_cpu;
43e9d192 12054
c7ff4f0f
SD
12055 if (aarch64_enable_bti == 2)
12056 {
12057#ifdef TARGET_ENABLE_BTI
12058 aarch64_enable_bti = 1;
12059#else
12060 aarch64_enable_bti = 0;
12061#endif
12062 }
12063
12064 /* Return address signing is currently not supported for ILP32 targets. For
12065 LP64 targets use the configured option in the absence of a command-line
12066 option for -mbranch-protection. */
12067 if (!TARGET_ILP32 && accepted_branch_protection_string == NULL)
12068 {
12069#ifdef TARGET_ENABLE_PAC_RET
12070 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
c7ff4f0f
SD
12071#else
12072 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
12073#endif
12074 }
12075
0cfff2a1
KT
12076#ifndef HAVE_AS_MABI_OPTION
12077 /* The compiler may have been configured with 2.23.* binutils, which does
12078 not have support for ILP32. */
12079 if (TARGET_ILP32)
a3f9f006 12080 error ("assembler does not support %<-mabi=ilp32%>");
0cfff2a1 12081#endif
43e9d192 12082
43cacb12
RS
12083 /* Convert -msve-vector-bits to a VG count. */
12084 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
12085
db58fd89 12086 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
a3f9f006 12087 sorry ("return address signing is only supported for %<-mabi=lp64%>");
db58fd89 12088
361fb3ee
KT
12089 /* Make sure we properly set up the explicit options. */
12090 if ((aarch64_cpu_string && valid_cpu)
12091 || (aarch64_tune_string && valid_tune))
12092 gcc_assert (explicit_tune_core != aarch64_none);
12093
12094 if ((aarch64_cpu_string && valid_cpu)
12095 || (aarch64_arch_string && valid_arch))
12096 gcc_assert (explicit_arch != aarch64_no_arch);
12097
5f7dbaa0
RE
12098 /* The pass to insert speculation tracking runs before
12099 shrink-wrapping and the latter does not know how to update the
12100 tracking status. So disable it in this case. */
12101 if (aarch64_track_speculation)
12102 flag_shrink_wrap = 0;
12103
0cfff2a1
KT
12104 aarch64_override_options_internal (&global_options);
12105
12106 /* Save these options as the default ones in case we push and pop them later
12107 while processing functions with potential target attributes. */
12108 target_option_default_node = target_option_current_node
12109 = build_target_option_node (&global_options);
43e9d192
IB
12110}
12111
12112/* Implement targetm.override_options_after_change. */
12113
12114static void
12115aarch64_override_options_after_change (void)
12116{
0cfff2a1 12117 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
12118}
12119
12120static struct machine_function *
12121aarch64_init_machine_status (void)
12122{
12123 struct machine_function *machine;
766090c2 12124 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
12125 return machine;
12126}
12127
12128void
12129aarch64_init_expanders (void)
12130{
12131 init_machine_status = aarch64_init_machine_status;
12132}
12133
12134/* A checking mechanism for the implementation of the various code models. */
12135static void
0cfff2a1 12136initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 12137{
0cfff2a1 12138 if (opts->x_flag_pic)
43e9d192 12139 {
0cfff2a1 12140 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
12141 {
12142 case AARCH64_CMODEL_TINY:
12143 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
12144 break;
12145 case AARCH64_CMODEL_SMALL:
34ecdb0f 12146#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
12147 aarch64_cmodel = (flag_pic == 2
12148 ? AARCH64_CMODEL_SMALL_PIC
12149 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
12150#else
12151 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
12152#endif
43e9d192
IB
12153 break;
12154 case AARCH64_CMODEL_LARGE:
a3f9f006 12155 sorry ("code model %qs with %<-f%s%>", "large",
0cfff2a1 12156 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 12157 break;
43e9d192
IB
12158 default:
12159 gcc_unreachable ();
12160 }
12161 }
12162 else
0cfff2a1 12163 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
12164}
12165
361fb3ee
KT
12166/* Implement TARGET_OPTION_SAVE. */
12167
12168static void
12169aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
12170{
12171 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
efac62a3
ST
12172 ptr->x_aarch64_branch_protection_string
12173 = opts->x_aarch64_branch_protection_string;
361fb3ee
KT
12174}
12175
12176/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
12177 using the information saved in PTR. */
12178
12179static void
12180aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
12181{
12182 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
12183 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
12184 opts->x_explicit_arch = ptr->x_explicit_arch;
12185 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
12186 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
efac62a3
ST
12187 opts->x_aarch64_branch_protection_string
12188 = ptr->x_aarch64_branch_protection_string;
12189 if (opts->x_aarch64_branch_protection_string)
12190 {
12191 aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
12192 NULL);
12193 }
361fb3ee
KT
12194
12195 aarch64_override_options_internal (opts);
12196}
12197
12198/* Implement TARGET_OPTION_PRINT. */
12199
12200static void
12201aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
12202{
12203 const struct processor *cpu
12204 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
12205 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
12206 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 12207 std::string extension
04a99ebe 12208 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
12209
12210 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
12211 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
12212 arch->name, extension.c_str ());
361fb3ee
KT
12213}
12214
d78006d9
KT
12215static GTY(()) tree aarch64_previous_fndecl;
12216
e4ea20c8
KT
12217void
12218aarch64_reset_previous_fndecl (void)
12219{
12220 aarch64_previous_fndecl = NULL;
12221}
12222
acfc1ac1
KT
12223/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
12224 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
12225 make sure optab availability predicates are recomputed when necessary. */
12226
12227void
12228aarch64_save_restore_target_globals (tree new_tree)
12229{
12230 if (TREE_TARGET_GLOBALS (new_tree))
12231 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
12232 else if (new_tree == target_option_default_node)
12233 restore_target_globals (&default_target_globals);
12234 else
12235 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
12236}
12237
d78006d9
KT
12238/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
12239 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
12240 of the function, if such exists. This function may be called multiple
12241 times on a single function so use aarch64_previous_fndecl to avoid
12242 setting up identical state. */
12243
12244static void
12245aarch64_set_current_function (tree fndecl)
12246{
acfc1ac1
KT
12247 if (!fndecl || fndecl == aarch64_previous_fndecl)
12248 return;
12249
d78006d9
KT
12250 tree old_tree = (aarch64_previous_fndecl
12251 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
12252 : NULL_TREE);
12253
acfc1ac1 12254 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 12255
acfc1ac1
KT
12256 /* If current function has no attributes but the previous one did,
12257 use the default node. */
12258 if (!new_tree && old_tree)
12259 new_tree = target_option_default_node;
d78006d9 12260
acfc1ac1
KT
12261 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
12262 the default have been handled by aarch64_save_restore_target_globals from
12263 aarch64_pragma_target_parse. */
12264 if (old_tree == new_tree)
12265 return;
d78006d9 12266
acfc1ac1 12267 aarch64_previous_fndecl = fndecl;
6e17a23b 12268
acfc1ac1
KT
12269 /* First set the target options. */
12270 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 12271
acfc1ac1 12272 aarch64_save_restore_target_globals (new_tree);
d78006d9 12273}
361fb3ee 12274
5a2c8331
KT
12275/* Enum describing the various ways we can handle attributes.
12276 In many cases we can reuse the generic option handling machinery. */
12277
12278enum aarch64_attr_opt_type
12279{
12280 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
12281 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
12282 aarch64_attr_enum, /* Attribute sets an enum variable. */
12283 aarch64_attr_custom /* Attribute requires a custom handling function. */
12284};
12285
12286/* All the information needed to handle a target attribute.
12287 NAME is the name of the attribute.
9c582551 12288 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
12289 in the definition of enum aarch64_attr_opt_type.
12290 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
12291 HANDLER is the function that takes the attribute string as an argument
12292 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 12293 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 12294 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
12295 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
12296 aarch64_attr_enum. */
12297
12298struct aarch64_attribute_info
12299{
12300 const char *name;
12301 enum aarch64_attr_opt_type attr_type;
12302 bool allow_neg;
ab93e9b7 12303 bool (*handler) (const char *);
5a2c8331
KT
12304 enum opt_code opt_num;
12305};
12306
ab93e9b7 12307/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
12308
12309static bool
ab93e9b7 12310aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
12311{
12312 const struct processor *tmp_arch = NULL;
c7887347 12313 std::string invalid_extension;
5a2c8331 12314 enum aarch64_parse_opt_result parse_res
c7887347 12315 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
12316
12317 if (parse_res == AARCH64_PARSE_OK)
12318 {
12319 gcc_assert (tmp_arch);
12320 selected_arch = tmp_arch;
12321 explicit_arch = selected_arch->arch;
12322 return true;
12323 }
12324
12325 switch (parse_res)
12326 {
12327 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 12328 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
12329 break;
12330 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 12331 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 12332 aarch64_print_hint_for_arch (str);
5a2c8331
KT
12333 break;
12334 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
12335 error ("invalid feature modifier %s of value (\"%s\") in "
12336 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
12337 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
12338 break;
12339 default:
12340 gcc_unreachable ();
12341 }
12342
12343 return false;
12344}
12345
ab93e9b7 12346/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
12347
12348static bool
ab93e9b7 12349aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
12350{
12351 const struct processor *tmp_cpu = NULL;
c7887347 12352 std::string invalid_extension;
5a2c8331 12353 enum aarch64_parse_opt_result parse_res
c7887347 12354 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
12355
12356 if (parse_res == AARCH64_PARSE_OK)
12357 {
12358 gcc_assert (tmp_cpu);
12359 selected_tune = tmp_cpu;
12360 explicit_tune_core = selected_tune->ident;
12361
12362 selected_arch = &all_architectures[tmp_cpu->arch];
12363 explicit_arch = selected_arch->arch;
12364 return true;
12365 }
12366
12367 switch (parse_res)
12368 {
12369 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 12370 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
12371 break;
12372 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 12373 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 12374 aarch64_print_hint_for_core (str);
5a2c8331
KT
12375 break;
12376 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
12377 error ("invalid feature modifier %s of value (\"%s\") in "
12378 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
12379 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
12380 break;
12381 default:
12382 gcc_unreachable ();
12383 }
12384
12385 return false;
12386}
12387
efac62a3
ST
12388/* Handle the argument STR to the branch-protection= attribute. */
12389
12390 static bool
12391 aarch64_handle_attr_branch_protection (const char* str)
12392 {
12393 char *err_str = (char *) xmalloc (strlen (str));
12394 enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
12395 &err_str);
12396 bool success = false;
12397 switch (res)
12398 {
12399 case AARCH64_PARSE_MISSING_ARG:
12400 error ("missing argument to %<target(\"branch-protection=\")%> pragma or"
12401 " attribute");
12402 break;
12403 case AARCH64_PARSE_INVALID_ARG:
12404 error ("invalid protection type (\"%s\") in %<target(\"branch-protection"
12405 "=\")%> pragma or attribute", err_str);
12406 break;
12407 case AARCH64_PARSE_OK:
12408 success = true;
12409 /* Fall through. */
12410 case AARCH64_PARSE_INVALID_FEATURE:
12411 break;
12412 default:
12413 gcc_unreachable ();
12414 }
12415 free (err_str);
12416 return success;
12417 }
12418
ab93e9b7 12419/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
12420
12421static bool
ab93e9b7 12422aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
12423{
12424 const struct processor *tmp_tune = NULL;
12425 enum aarch64_parse_opt_result parse_res
12426 = aarch64_parse_tune (str, &tmp_tune);
12427
12428 if (parse_res == AARCH64_PARSE_OK)
12429 {
12430 gcc_assert (tmp_tune);
12431 selected_tune = tmp_tune;
12432 explicit_tune_core = selected_tune->ident;
12433 return true;
12434 }
12435
12436 switch (parse_res)
12437 {
12438 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 12439 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 12440 aarch64_print_hint_for_core (str);
5a2c8331
KT
12441 break;
12442 default:
12443 gcc_unreachable ();
12444 }
12445
12446 return false;
12447}
12448
12449/* Parse an architecture extensions target attribute string specified in STR.
12450 For example "+fp+nosimd". Show any errors if needed. Return TRUE
12451 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 12452 modified. */
5a2c8331
KT
12453
12454static bool
ab93e9b7 12455aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
12456{
12457 enum aarch64_parse_opt_result parse_res;
12458 unsigned long isa_flags = aarch64_isa_flags;
12459
e4ea20c8
KT
12460 /* We allow "+nothing" in the beginning to clear out all architectural
12461 features if the user wants to handpick specific features. */
12462 if (strncmp ("+nothing", str, 8) == 0)
12463 {
12464 isa_flags = 0;
12465 str += 8;
12466 }
12467
c7887347
ML
12468 std::string invalid_extension;
12469 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
12470
12471 if (parse_res == AARCH64_PARSE_OK)
12472 {
12473 aarch64_isa_flags = isa_flags;
12474 return true;
12475 }
12476
12477 switch (parse_res)
12478 {
12479 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 12480 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
12481 break;
12482
12483 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
12484 error ("invalid feature modifier %s of value (\"%s\") in "
12485 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
12486 break;
12487
12488 default:
12489 gcc_unreachable ();
12490 }
12491
12492 return false;
12493}
12494
12495/* The target attributes that we support. On top of these we also support just
12496 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
12497 handled explicitly in aarch64_process_one_target_attr. */
12498
12499static const struct aarch64_attribute_info aarch64_attributes[] =
12500{
12501 { "general-regs-only", aarch64_attr_mask, false, NULL,
12502 OPT_mgeneral_regs_only },
12503 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
12504 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
12505 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
12506 OPT_mfix_cortex_a53_843419 },
5a2c8331 12507 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 12508 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
12509 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
12510 OPT_momit_leaf_frame_pointer },
12511 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
12512 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
12513 OPT_march_ },
12514 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
12515 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
12516 OPT_mtune_ },
efac62a3
ST
12517 { "branch-protection", aarch64_attr_custom, false,
12518 aarch64_handle_attr_branch_protection, OPT_mbranch_protection_ },
db58fd89
JW
12519 { "sign-return-address", aarch64_attr_enum, false, NULL,
12520 OPT_msign_return_address_ },
5a2c8331
KT
12521 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
12522};
12523
12524/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 12525 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
12526
12527static bool
ab93e9b7 12528aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
12529{
12530 bool invert = false;
12531
12532 size_t len = strlen (arg_str);
12533
12534 if (len == 0)
12535 {
ab93e9b7 12536 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
12537 return false;
12538 }
12539
12540 char *str_to_check = (char *) alloca (len + 1);
12541 strcpy (str_to_check, arg_str);
12542
5a2c8331
KT
12543 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
12544 It is easier to detect and handle it explicitly here rather than going
12545 through the machinery for the rest of the target attributes in this
12546 function. */
12547 if (*str_to_check == '+')
ab93e9b7 12548 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
12549
12550 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
12551 {
12552 invert = true;
12553 str_to_check += 3;
12554 }
12555 char *arg = strchr (str_to_check, '=');
12556
12557 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
12558 and point ARG to "foo". */
12559 if (arg)
12560 {
12561 *arg = '\0';
12562 arg++;
12563 }
12564 const struct aarch64_attribute_info *p_attr;
16d12992 12565 bool found = false;
5a2c8331
KT
12566 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
12567 {
12568 /* If the names don't match up, or the user has given an argument
12569 to an attribute that doesn't accept one, or didn't give an argument
12570 to an attribute that expects one, fail to match. */
12571 if (strcmp (str_to_check, p_attr->name) != 0)
12572 continue;
12573
16d12992 12574 found = true;
5a2c8331
KT
12575 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
12576 || p_attr->attr_type == aarch64_attr_enum;
12577
12578 if (attr_need_arg_p ^ (arg != NULL))
12579 {
ab93e9b7 12580 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
12581 return false;
12582 }
12583
12584 /* If the name matches but the attribute does not allow "no-" versions
12585 then we can't match. */
12586 if (invert && !p_attr->allow_neg)
12587 {
ab93e9b7 12588 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
12589 return false;
12590 }
12591
12592 switch (p_attr->attr_type)
12593 {
12594 /* Has a custom handler registered.
12595 For example, cpu=, arch=, tune=. */
12596 case aarch64_attr_custom:
12597 gcc_assert (p_attr->handler);
ab93e9b7 12598 if (!p_attr->handler (arg))
5a2c8331
KT
12599 return false;
12600 break;
12601
12602 /* Either set or unset a boolean option. */
12603 case aarch64_attr_bool:
12604 {
12605 struct cl_decoded_option decoded;
12606
12607 generate_option (p_attr->opt_num, NULL, !invert,
12608 CL_TARGET, &decoded);
12609 aarch64_handle_option (&global_options, &global_options_set,
12610 &decoded, input_location);
12611 break;
12612 }
12613 /* Set or unset a bit in the target_flags. aarch64_handle_option
12614 should know what mask to apply given the option number. */
12615 case aarch64_attr_mask:
12616 {
12617 struct cl_decoded_option decoded;
12618 /* We only need to specify the option number.
12619 aarch64_handle_option will know which mask to apply. */
12620 decoded.opt_index = p_attr->opt_num;
12621 decoded.value = !invert;
12622 aarch64_handle_option (&global_options, &global_options_set,
12623 &decoded, input_location);
12624 break;
12625 }
12626 /* Use the option setting machinery to set an option to an enum. */
12627 case aarch64_attr_enum:
12628 {
12629 gcc_assert (arg);
12630 bool valid;
12631 int value;
12632 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
12633 &value, CL_TARGET);
12634 if (valid)
12635 {
12636 set_option (&global_options, NULL, p_attr->opt_num, value,
12637 NULL, DK_UNSPECIFIED, input_location,
12638 global_dc);
12639 }
12640 else
12641 {
ab93e9b7 12642 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
12643 }
12644 break;
12645 }
12646 default:
12647 gcc_unreachable ();
12648 }
12649 }
12650
16d12992
KT
12651 /* If we reached here we either have found an attribute and validated
12652 it or didn't match any. If we matched an attribute but its arguments
12653 were malformed we will have returned false already. */
12654 return found;
5a2c8331
KT
12655}
12656
12657/* Count how many times the character C appears in
12658 NULL-terminated string STR. */
12659
12660static unsigned int
12661num_occurences_in_str (char c, char *str)
12662{
12663 unsigned int res = 0;
12664 while (*str != '\0')
12665 {
12666 if (*str == c)
12667 res++;
12668
12669 str++;
12670 }
12671
12672 return res;
12673}
12674
12675/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 12676 and update the global target options space. */
5a2c8331
KT
12677
12678bool
ab93e9b7 12679aarch64_process_target_attr (tree args)
5a2c8331
KT
12680{
12681 if (TREE_CODE (args) == TREE_LIST)
12682 {
12683 do
12684 {
12685 tree head = TREE_VALUE (args);
12686 if (head)
12687 {
ab93e9b7 12688 if (!aarch64_process_target_attr (head))
5a2c8331
KT
12689 return false;
12690 }
12691 args = TREE_CHAIN (args);
12692 } while (args);
12693
12694 return true;
12695 }
3b6cb9e3
ML
12696
12697 if (TREE_CODE (args) != STRING_CST)
12698 {
12699 error ("attribute %<target%> argument not a string");
12700 return false;
12701 }
5a2c8331
KT
12702
12703 size_t len = strlen (TREE_STRING_POINTER (args));
12704 char *str_to_check = (char *) alloca (len + 1);
12705 strcpy (str_to_check, TREE_STRING_POINTER (args));
12706
12707 if (len == 0)
12708 {
ab93e9b7 12709 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
12710 return false;
12711 }
12712
12713 /* Used to catch empty spaces between commas i.e.
12714 attribute ((target ("attr1,,attr2"))). */
12715 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
12716
12717 /* Handle multiple target attributes separated by ','. */
7185a4eb 12718 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
12719
12720 unsigned int num_attrs = 0;
12721 while (token)
12722 {
12723 num_attrs++;
ab93e9b7 12724 if (!aarch64_process_one_target_attr (token))
5a2c8331 12725 {
ab93e9b7 12726 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
12727 return false;
12728 }
12729
7185a4eb 12730 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
12731 }
12732
12733 if (num_attrs != num_commas + 1)
12734 {
ab93e9b7 12735 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
12736 return false;
12737 }
12738
12739 return true;
12740}
12741
12742/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
12743 process attribute ((target ("..."))). */
12744
12745static bool
12746aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
12747{
12748 struct cl_target_option cur_target;
12749 bool ret;
12750 tree old_optimize;
12751 tree new_target, new_optimize;
12752 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
12753
12754 /* If what we're processing is the current pragma string then the
12755 target option node is already stored in target_option_current_node
12756 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
12757 having to re-parse the string. This is especially useful to keep
12758 arm_neon.h compile times down since that header contains a lot
12759 of intrinsics enclosed in pragmas. */
12760 if (!existing_target && args == current_target_pragma)
12761 {
12762 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
12763 return true;
12764 }
5a2c8331
KT
12765 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12766
12767 old_optimize = build_optimization_node (&global_options);
12768 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12769
12770 /* If the function changed the optimization levels as well as setting
12771 target options, start with the optimizations specified. */
12772 if (func_optimize && func_optimize != old_optimize)
12773 cl_optimization_restore (&global_options,
12774 TREE_OPTIMIZATION (func_optimize));
12775
12776 /* Save the current target options to restore at the end. */
12777 cl_target_option_save (&cur_target, &global_options);
12778
12779 /* If fndecl already has some target attributes applied to it, unpack
12780 them so that we add this attribute on top of them, rather than
12781 overwriting them. */
12782 if (existing_target)
12783 {
12784 struct cl_target_option *existing_options
12785 = TREE_TARGET_OPTION (existing_target);
12786
12787 if (existing_options)
12788 cl_target_option_restore (&global_options, existing_options);
12789 }
12790 else
12791 cl_target_option_restore (&global_options,
12792 TREE_TARGET_OPTION (target_option_current_node));
12793
ab93e9b7 12794 ret = aarch64_process_target_attr (args);
5a2c8331
KT
12795
12796 /* Set up any additional state. */
12797 if (ret)
12798 {
12799 aarch64_override_options_internal (&global_options);
e95a988a
KT
12800 /* Initialize SIMD builtins if we haven't already.
12801 Set current_target_pragma to NULL for the duration so that
12802 the builtin initialization code doesn't try to tag the functions
12803 being built with the attributes specified by any current pragma, thus
12804 going into an infinite recursion. */
12805 if (TARGET_SIMD)
12806 {
12807 tree saved_current_target_pragma = current_target_pragma;
12808 current_target_pragma = NULL;
12809 aarch64_init_simd_builtins ();
12810 current_target_pragma = saved_current_target_pragma;
12811 }
5a2c8331
KT
12812 new_target = build_target_option_node (&global_options);
12813 }
12814 else
12815 new_target = NULL;
12816
12817 new_optimize = build_optimization_node (&global_options);
12818
12819 if (fndecl && ret)
12820 {
12821 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
12822
12823 if (old_optimize != new_optimize)
12824 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
12825 }
12826
12827 cl_target_option_restore (&global_options, &cur_target);
12828
12829 if (old_optimize != new_optimize)
12830 cl_optimization_restore (&global_options,
12831 TREE_OPTIMIZATION (old_optimize));
12832 return ret;
12833}
12834
1fd8d40c
KT
12835/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
12836 tri-bool options (yes, no, don't care) and the default value is
12837 DEF, determine whether to reject inlining. */
12838
12839static bool
12840aarch64_tribools_ok_for_inlining_p (int caller, int callee,
12841 int dont_care, int def)
12842{
12843 /* If the callee doesn't care, always allow inlining. */
12844 if (callee == dont_care)
12845 return true;
12846
12847 /* If the caller doesn't care, always allow inlining. */
12848 if (caller == dont_care)
12849 return true;
12850
12851 /* Otherwise, allow inlining if either the callee and caller values
12852 agree, or if the callee is using the default value. */
12853 return (callee == caller || callee == def);
12854}
12855
12856/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
12857 to inline CALLEE into CALLER based on target-specific info.
12858 Make sure that the caller and callee have compatible architectural
12859 features. Then go through the other possible target attributes
12860 and see if they can block inlining. Try not to reject always_inline
12861 callees unless they are incompatible architecturally. */
12862
12863static bool
12864aarch64_can_inline_p (tree caller, tree callee)
12865{
12866 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
12867 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
12868
1fd8d40c
KT
12869 struct cl_target_option *caller_opts
12870 = TREE_TARGET_OPTION (caller_tree ? caller_tree
12871 : target_option_default_node);
12872
675d044c
SD
12873 struct cl_target_option *callee_opts
12874 = TREE_TARGET_OPTION (callee_tree ? callee_tree
12875 : target_option_default_node);
1fd8d40c
KT
12876
12877 /* Callee's ISA flags should be a subset of the caller's. */
12878 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
12879 != callee_opts->x_aarch64_isa_flags)
12880 return false;
12881
12882 /* Allow non-strict aligned functions inlining into strict
12883 aligned ones. */
12884 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
12885 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
12886 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
12887 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
12888 return false;
12889
12890 bool always_inline = lookup_attribute ("always_inline",
12891 DECL_ATTRIBUTES (callee));
12892
12893 /* If the architectural features match up and the callee is always_inline
12894 then the other attributes don't matter. */
12895 if (always_inline)
12896 return true;
12897
12898 if (caller_opts->x_aarch64_cmodel_var
12899 != callee_opts->x_aarch64_cmodel_var)
12900 return false;
12901
12902 if (caller_opts->x_aarch64_tls_dialect
12903 != callee_opts->x_aarch64_tls_dialect)
12904 return false;
12905
12906 /* Honour explicit requests to workaround errata. */
12907 if (!aarch64_tribools_ok_for_inlining_p (
12908 caller_opts->x_aarch64_fix_a53_err835769,
12909 callee_opts->x_aarch64_fix_a53_err835769,
12910 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
12911 return false;
12912
48bb1a55
CL
12913 if (!aarch64_tribools_ok_for_inlining_p (
12914 caller_opts->x_aarch64_fix_a53_err843419,
12915 callee_opts->x_aarch64_fix_a53_err843419,
12916 2, TARGET_FIX_ERR_A53_843419))
12917 return false;
12918
1fd8d40c
KT
12919 /* If the user explicitly specified -momit-leaf-frame-pointer for the
12920 caller and calle and they don't match up, reject inlining. */
12921 if (!aarch64_tribools_ok_for_inlining_p (
12922 caller_opts->x_flag_omit_leaf_frame_pointer,
12923 callee_opts->x_flag_omit_leaf_frame_pointer,
12924 2, 1))
12925 return false;
12926
12927 /* If the callee has specific tuning overrides, respect them. */
12928 if (callee_opts->x_aarch64_override_tune_string != NULL
12929 && caller_opts->x_aarch64_override_tune_string == NULL)
12930 return false;
12931
12932 /* If the user specified tuning override strings for the
12933 caller and callee and they don't match up, reject inlining.
12934 We just do a string compare here, we don't analyze the meaning
12935 of the string, as it would be too costly for little gain. */
12936 if (callee_opts->x_aarch64_override_tune_string
12937 && caller_opts->x_aarch64_override_tune_string
12938 && (strcmp (callee_opts->x_aarch64_override_tune_string,
12939 caller_opts->x_aarch64_override_tune_string) != 0))
12940 return false;
12941
12942 return true;
12943}
12944
43e9d192
IB
12945/* Return true if SYMBOL_REF X binds locally. */
12946
12947static bool
12948aarch64_symbol_binds_local_p (const_rtx x)
12949{
12950 return (SYMBOL_REF_DECL (x)
12951 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
12952 : SYMBOL_REF_LOCAL_P (x));
12953}
12954
12955/* Return true if SYMBOL_REF X is thread local */
12956static bool
12957aarch64_tls_symbol_p (rtx x)
12958{
12959 if (! TARGET_HAVE_TLS)
12960 return false;
12961
12962 if (GET_CODE (x) != SYMBOL_REF)
12963 return false;
12964
12965 return SYMBOL_REF_TLS_MODEL (x) != 0;
12966}
12967
12968/* Classify a TLS symbol into one of the TLS kinds. */
12969enum aarch64_symbol_type
12970aarch64_classify_tls_symbol (rtx x)
12971{
12972 enum tls_model tls_kind = tls_symbolic_operand_type (x);
12973
12974 switch (tls_kind)
12975 {
12976 case TLS_MODEL_GLOBAL_DYNAMIC:
12977 case TLS_MODEL_LOCAL_DYNAMIC:
12978 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
12979
12980 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
12981 switch (aarch64_cmodel)
12982 {
12983 case AARCH64_CMODEL_TINY:
12984 case AARCH64_CMODEL_TINY_PIC:
12985 return SYMBOL_TINY_TLSIE;
12986 default:
79496620 12987 return SYMBOL_SMALL_TLSIE;
5ae7caad 12988 }
43e9d192
IB
12989
12990 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
12991 if (aarch64_tls_size == 12)
12992 return SYMBOL_TLSLE12;
12993 else if (aarch64_tls_size == 24)
12994 return SYMBOL_TLSLE24;
12995 else if (aarch64_tls_size == 32)
12996 return SYMBOL_TLSLE32;
12997 else if (aarch64_tls_size == 48)
12998 return SYMBOL_TLSLE48;
12999 else
13000 gcc_unreachable ();
43e9d192
IB
13001
13002 case TLS_MODEL_EMULATED:
13003 case TLS_MODEL_NONE:
13004 return SYMBOL_FORCE_TO_MEM;
13005
13006 default:
13007 gcc_unreachable ();
13008 }
13009}
13010
43cacb12
RS
13011/* Return the correct method for accessing X + OFFSET, where X is either
13012 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 13013
43e9d192 13014enum aarch64_symbol_type
43cacb12 13015aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
13016{
13017 if (GET_CODE (x) == LABEL_REF)
13018 {
13019 switch (aarch64_cmodel)
13020 {
13021 case AARCH64_CMODEL_LARGE:
13022 return SYMBOL_FORCE_TO_MEM;
13023
13024 case AARCH64_CMODEL_TINY_PIC:
13025 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
13026 return SYMBOL_TINY_ABSOLUTE;
13027
1b1e81f8 13028 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
13029 case AARCH64_CMODEL_SMALL_PIC:
13030 case AARCH64_CMODEL_SMALL:
13031 return SYMBOL_SMALL_ABSOLUTE;
13032
13033 default:
13034 gcc_unreachable ();
13035 }
13036 }
13037
17f4d4bf 13038 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 13039 {
43e9d192
IB
13040 if (aarch64_tls_symbol_p (x))
13041 return aarch64_classify_tls_symbol (x);
13042
17f4d4bf
CSS
13043 switch (aarch64_cmodel)
13044 {
13045 case AARCH64_CMODEL_TINY:
15f6e0da 13046 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
13047 the offset does not cause overflow of the final address. But
13048 we have no way of knowing the address of symbol at compile time
13049 so we can't accurately say if the distance between the PC and
13050 symbol + offset is outside the addressible range of +/-1M in the
13051 TINY code model. So we rely on images not being greater than
13052 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
13053 be loaded using an alternative mechanism. Furthermore if the
13054 symbol is a weak reference to something that isn't known to
13055 resolve to a symbol in this module, then force to memory. */
13056 if ((SYMBOL_REF_WEAK (x)
13057 && !aarch64_symbol_binds_local_p (x))
43cacb12 13058 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
13059 return SYMBOL_FORCE_TO_MEM;
13060 return SYMBOL_TINY_ABSOLUTE;
13061
17f4d4bf 13062 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
13063 /* Same reasoning as the tiny code model, but the offset cap here is
13064 4G. */
15f6e0da
RR
13065 if ((SYMBOL_REF_WEAK (x)
13066 && !aarch64_symbol_binds_local_p (x))
43cacb12 13067 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 13068 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
13069 return SYMBOL_FORCE_TO_MEM;
13070 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 13071
17f4d4bf 13072 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 13073 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 13074 return SYMBOL_TINY_GOT;
38e6c9a6
MS
13075 return SYMBOL_TINY_ABSOLUTE;
13076
1b1e81f8 13077 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
13078 case AARCH64_CMODEL_SMALL_PIC:
13079 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
13080 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
13081 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 13082 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 13083
9ee6540a
WD
13084 case AARCH64_CMODEL_LARGE:
13085 /* This is alright even in PIC code as the constant
13086 pool reference is always PC relative and within
13087 the same translation unit. */
d47d34bb 13088 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
13089 return SYMBOL_SMALL_ABSOLUTE;
13090 else
13091 return SYMBOL_FORCE_TO_MEM;
13092
17f4d4bf
CSS
13093 default:
13094 gcc_unreachable ();
13095 }
43e9d192 13096 }
17f4d4bf 13097
43e9d192
IB
13098 /* By default push everything into the constant pool. */
13099 return SYMBOL_FORCE_TO_MEM;
13100}
13101
43e9d192
IB
13102bool
13103aarch64_constant_address_p (rtx x)
13104{
13105 return (CONSTANT_P (x) && memory_address_p (DImode, x));
13106}
13107
13108bool
13109aarch64_legitimate_pic_operand_p (rtx x)
13110{
13111 if (GET_CODE (x) == SYMBOL_REF
13112 || (GET_CODE (x) == CONST
13113 && GET_CODE (XEXP (x, 0)) == PLUS
13114 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
13115 return false;
13116
13117 return true;
13118}
13119
26895c21
WD
13120/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
13121 that should be rematerialized rather than spilled. */
3520f7cc 13122
43e9d192 13123static bool
ef4bddc2 13124aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 13125{
26895c21 13126 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 13127 if (CONST_INT_P (x)
9f7b87ca 13128 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 13129 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
13130 return true;
13131
43cacb12
RS
13132 /* Do not allow vector struct mode constants for Advanced SIMD.
13133 We could support 0 and -1 easily, but they need support in
13134 aarch64-simd.md. */
13135 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13136 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
13137 return false;
13138
43cacb12
RS
13139 /* Only accept variable-length vector constants if they can be
13140 handled directly.
13141
13142 ??? It would be possible to handle rematerialization of other
13143 constants via secondary reloads. */
13144 if (vec_flags & VEC_ANY_SVE)
13145 return aarch64_simd_valid_immediate (x, NULL);
13146
509bb9b6
RS
13147 if (GET_CODE (x) == HIGH)
13148 x = XEXP (x, 0);
13149
43cacb12
RS
13150 /* Accept polynomial constants that can be calculated by using the
13151 destination of a move as the sole temporary. Constants that
13152 require a second temporary cannot be rematerialized (they can't be
13153 forced to memory and also aren't legitimate constants). */
13154 poly_int64 offset;
13155 if (poly_int_rtx_p (x, &offset))
13156 return aarch64_offset_temporaries (false, offset) <= 1;
13157
13158 /* If an offset is being added to something else, we need to allow the
13159 base to be moved into the destination register, meaning that there
13160 are no free temporaries for the offset. */
13161 x = strip_offset (x, &offset);
13162 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
13163 return false;
26895c21 13164
43cacb12
RS
13165 /* Do not allow const (plus (anchor_symbol, const_int)). */
13166 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
13167 return false;
26895c21 13168
f28e54bd
WD
13169 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
13170 so spilling them is better than rematerialization. */
13171 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
13172 return true;
13173
26895c21
WD
13174 /* Label references are always constant. */
13175 if (GET_CODE (x) == LABEL_REF)
13176 return true;
13177
13178 return false;
43e9d192
IB
13179}
13180
a5bc806c 13181rtx
43e9d192
IB
13182aarch64_load_tp (rtx target)
13183{
13184 if (!target
13185 || GET_MODE (target) != Pmode
13186 || !register_operand (target, Pmode))
13187 target = gen_reg_rtx (Pmode);
13188
13189 /* Can return in any reg. */
13190 emit_insn (gen_aarch64_load_tp_hard (target));
13191 return target;
13192}
13193
43e9d192
IB
13194/* On AAPCS systems, this is the "struct __va_list". */
13195static GTY(()) tree va_list_type;
13196
13197/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
13198 Return the type to use as __builtin_va_list.
13199
13200 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
13201
13202 struct __va_list
13203 {
13204 void *__stack;
13205 void *__gr_top;
13206 void *__vr_top;
13207 int __gr_offs;
13208 int __vr_offs;
13209 }; */
13210
13211static tree
13212aarch64_build_builtin_va_list (void)
13213{
13214 tree va_list_name;
13215 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
13216
13217 /* Create the type. */
13218 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
13219 /* Give it the required name. */
13220 va_list_name = build_decl (BUILTINS_LOCATION,
13221 TYPE_DECL,
13222 get_identifier ("__va_list"),
13223 va_list_type);
13224 DECL_ARTIFICIAL (va_list_name) = 1;
13225 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 13226 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
13227
13228 /* Create the fields. */
13229 f_stack = build_decl (BUILTINS_LOCATION,
13230 FIELD_DECL, get_identifier ("__stack"),
13231 ptr_type_node);
13232 f_grtop = build_decl (BUILTINS_LOCATION,
13233 FIELD_DECL, get_identifier ("__gr_top"),
13234 ptr_type_node);
13235 f_vrtop = build_decl (BUILTINS_LOCATION,
13236 FIELD_DECL, get_identifier ("__vr_top"),
13237 ptr_type_node);
13238 f_groff = build_decl (BUILTINS_LOCATION,
13239 FIELD_DECL, get_identifier ("__gr_offs"),
13240 integer_type_node);
13241 f_vroff = build_decl (BUILTINS_LOCATION,
13242 FIELD_DECL, get_identifier ("__vr_offs"),
13243 integer_type_node);
13244
88e3bdd1 13245 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
13246 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
13247 purpose to identify whether the code is updating va_list internal
13248 offset fields through irregular way. */
13249 va_list_gpr_counter_field = f_groff;
13250 va_list_fpr_counter_field = f_vroff;
13251
43e9d192
IB
13252 DECL_ARTIFICIAL (f_stack) = 1;
13253 DECL_ARTIFICIAL (f_grtop) = 1;
13254 DECL_ARTIFICIAL (f_vrtop) = 1;
13255 DECL_ARTIFICIAL (f_groff) = 1;
13256 DECL_ARTIFICIAL (f_vroff) = 1;
13257
13258 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
13259 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
13260 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
13261 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
13262 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
13263
13264 TYPE_FIELDS (va_list_type) = f_stack;
13265 DECL_CHAIN (f_stack) = f_grtop;
13266 DECL_CHAIN (f_grtop) = f_vrtop;
13267 DECL_CHAIN (f_vrtop) = f_groff;
13268 DECL_CHAIN (f_groff) = f_vroff;
13269
13270 /* Compute its layout. */
13271 layout_type (va_list_type);
13272
13273 return va_list_type;
13274}
13275
13276/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
13277static void
13278aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
13279{
13280 const CUMULATIVE_ARGS *cum;
13281 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
13282 tree stack, grtop, vrtop, groff, vroff;
13283 tree t;
88e3bdd1
JW
13284 int gr_save_area_size = cfun->va_list_gpr_size;
13285 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
13286 int vr_offset;
13287
13288 cum = &crtl->args.info;
88e3bdd1
JW
13289 if (cfun->va_list_gpr_size)
13290 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
13291 cfun->va_list_gpr_size);
13292 if (cfun->va_list_fpr_size)
13293 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
13294 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 13295
d5726973 13296 if (!TARGET_FLOAT)
43e9d192 13297 {
261fb553 13298 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
13299 vr_save_area_size = 0;
13300 }
13301
13302 f_stack = TYPE_FIELDS (va_list_type_node);
13303 f_grtop = DECL_CHAIN (f_stack);
13304 f_vrtop = DECL_CHAIN (f_grtop);
13305 f_groff = DECL_CHAIN (f_vrtop);
13306 f_vroff = DECL_CHAIN (f_groff);
13307
13308 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
13309 NULL_TREE);
13310 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
13311 NULL_TREE);
13312 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
13313 NULL_TREE);
13314 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
13315 NULL_TREE);
13316 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
13317 NULL_TREE);
13318
13319 /* Emit code to initialize STACK, which points to the next varargs stack
13320 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
13321 by named arguments. STACK is 8-byte aligned. */
13322 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
13323 if (cum->aapcs_stack_size > 0)
13324 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
13325 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
13326 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13327
13328 /* Emit code to initialize GRTOP, the top of the GR save area.
13329 virtual_incoming_args_rtx should have been 16 byte aligned. */
13330 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
13331 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
13332 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13333
13334 /* Emit code to initialize VRTOP, the top of the VR save area.
13335 This address is gr_save_area_bytes below GRTOP, rounded
13336 down to the next 16-byte boundary. */
13337 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
13338 vr_offset = ROUND_UP (gr_save_area_size,
13339 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
13340
13341 if (vr_offset)
13342 t = fold_build_pointer_plus_hwi (t, -vr_offset);
13343 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
13344 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13345
13346 /* Emit code to initialize GROFF, the offset from GRTOP of the
13347 next GPR argument. */
13348 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
13349 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
13350 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13351
13352 /* Likewise emit code to initialize VROFF, the offset from FTOP
13353 of the next VR argument. */
13354 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
13355 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
13356 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13357}
13358
13359/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
13360
13361static tree
13362aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
13363 gimple_seq *post_p ATTRIBUTE_UNUSED)
13364{
13365 tree addr;
13366 bool indirect_p;
13367 bool is_ha; /* is HFA or HVA. */
13368 bool dw_align; /* double-word align. */
ef4bddc2 13369 machine_mode ag_mode = VOIDmode;
43e9d192 13370 int nregs;
ef4bddc2 13371 machine_mode mode;
43e9d192
IB
13372
13373 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
13374 tree stack, f_top, f_off, off, arg, roundup, on_stack;
13375 HOST_WIDE_INT size, rsize, adjust, align;
13376 tree t, u, cond1, cond2;
13377
13378 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
13379 if (indirect_p)
13380 type = build_pointer_type (type);
13381
13382 mode = TYPE_MODE (type);
13383
13384 f_stack = TYPE_FIELDS (va_list_type_node);
13385 f_grtop = DECL_CHAIN (f_stack);
13386 f_vrtop = DECL_CHAIN (f_grtop);
13387 f_groff = DECL_CHAIN (f_vrtop);
13388 f_vroff = DECL_CHAIN (f_groff);
13389
13390 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
13391 f_stack, NULL_TREE);
13392 size = int_size_in_bytes (type);
c590597c
RE
13393
13394 bool abi_break;
13395 align
13396 = aarch64_function_arg_alignment (mode, type, &abi_break) / BITS_PER_UNIT;
43e9d192
IB
13397
13398 dw_align = false;
13399 adjust = 0;
13400 if (aarch64_vfp_is_call_or_return_candidate (mode,
13401 type,
13402 &ag_mode,
13403 &nregs,
13404 &is_ha))
13405 {
6a70badb
RS
13406 /* No frontends can create types with variable-sized modes, so we
13407 shouldn't be asked to pass or return them. */
13408 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
13409
43e9d192 13410 /* TYPE passed in fp/simd registers. */
d5726973 13411 if (!TARGET_FLOAT)
fc29dfc9 13412 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
13413
13414 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
13415 unshare_expr (valist), f_vrtop, NULL_TREE);
13416 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
13417 unshare_expr (valist), f_vroff, NULL_TREE);
13418
13419 rsize = nregs * UNITS_PER_VREG;
13420
13421 if (is_ha)
13422 {
6a70badb
RS
13423 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
13424 adjust = UNITS_PER_VREG - ag_size;
43e9d192 13425 }
76b0cbf8 13426 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
13427 && size < UNITS_PER_VREG)
13428 {
13429 adjust = UNITS_PER_VREG - size;
13430 }
13431 }
13432 else
13433 {
13434 /* TYPE passed in general registers. */
13435 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
13436 unshare_expr (valist), f_grtop, NULL_TREE);
13437 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
13438 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 13439 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
13440 nregs = rsize / UNITS_PER_WORD;
13441
13442 if (align > 8)
c590597c
RE
13443 {
13444 if (abi_break && warn_psabi)
13445 inform (input_location, "parameter passing for argument of type "
13446 "%qT changed in GCC 9.1", type);
13447 dw_align = true;
13448 }
43e9d192 13449
76b0cbf8 13450 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
13451 && size < UNITS_PER_WORD)
13452 {
13453 adjust = UNITS_PER_WORD - size;
13454 }
13455 }
13456
13457 /* Get a local temporary for the field value. */
13458 off = get_initialized_tmp_var (f_off, pre_p, NULL);
13459
13460 /* Emit code to branch if off >= 0. */
13461 t = build2 (GE_EXPR, boolean_type_node, off,
13462 build_int_cst (TREE_TYPE (off), 0));
13463 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
13464
13465 if (dw_align)
13466 {
13467 /* Emit: offs = (offs + 15) & -16. */
13468 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
13469 build_int_cst (TREE_TYPE (off), 15));
13470 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
13471 build_int_cst (TREE_TYPE (off), -16));
13472 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
13473 }
13474 else
13475 roundup = NULL;
13476
13477 /* Update ap.__[g|v]r_offs */
13478 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
13479 build_int_cst (TREE_TYPE (off), rsize));
13480 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
13481
13482 /* String up. */
13483 if (roundup)
13484 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
13485
13486 /* [cond2] if (ap.__[g|v]r_offs > 0) */
13487 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
13488 build_int_cst (TREE_TYPE (f_off), 0));
13489 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
13490
13491 /* String up: make sure the assignment happens before the use. */
13492 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
13493 COND_EXPR_ELSE (cond1) = t;
13494
13495 /* Prepare the trees handling the argument that is passed on the stack;
13496 the top level node will store in ON_STACK. */
13497 arg = get_initialized_tmp_var (stack, pre_p, NULL);
13498 if (align > 8)
13499 {
13500 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 13501 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
13502 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13503 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
13504 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
13505 }
13506 else
13507 roundup = NULL;
13508 /* Advance ap.__stack */
4bdc2738 13509 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
13510 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13511 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
13512 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
13513 /* String up roundup and advance. */
13514 if (roundup)
13515 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
13516 /* String up with arg */
13517 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
13518 /* Big-endianness related address adjustment. */
76b0cbf8 13519 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
13520 && size < UNITS_PER_WORD)
13521 {
13522 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
13523 size_int (UNITS_PER_WORD - size));
13524 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
13525 }
13526
13527 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
13528 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
13529
13530 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
13531 t = off;
13532 if (adjust)
13533 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
13534 build_int_cst (TREE_TYPE (off), adjust));
13535
13536 t = fold_convert (sizetype, t);
13537 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
13538
13539 if (is_ha)
13540 {
13541 /* type ha; // treat as "struct {ftype field[n];}"
13542 ... [computing offs]
13543 for (i = 0; i <nregs; ++i, offs += 16)
13544 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
13545 return ha; */
13546 int i;
13547 tree tmp_ha, field_t, field_ptr_t;
13548
13549 /* Declare a local variable. */
13550 tmp_ha = create_tmp_var_raw (type, "ha");
13551 gimple_add_tmp_var (tmp_ha);
13552
13553 /* Establish the base type. */
13554 switch (ag_mode)
13555 {
4e10a5a7 13556 case E_SFmode:
43e9d192
IB
13557 field_t = float_type_node;
13558 field_ptr_t = float_ptr_type_node;
13559 break;
4e10a5a7 13560 case E_DFmode:
43e9d192
IB
13561 field_t = double_type_node;
13562 field_ptr_t = double_ptr_type_node;
13563 break;
4e10a5a7 13564 case E_TFmode:
43e9d192
IB
13565 field_t = long_double_type_node;
13566 field_ptr_t = long_double_ptr_type_node;
13567 break;
4e10a5a7 13568 case E_HFmode:
1b62ed4f
JG
13569 field_t = aarch64_fp16_type_node;
13570 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 13571 break;
4e10a5a7
RS
13572 case E_V2SImode:
13573 case E_V4SImode:
43e9d192
IB
13574 {
13575 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
13576 field_t = build_vector_type_for_mode (innertype, ag_mode);
13577 field_ptr_t = build_pointer_type (field_t);
13578 }
13579 break;
13580 default:
13581 gcc_assert (0);
13582 }
13583
13584 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
13585 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
13586 addr = t;
13587 t = fold_convert (field_ptr_t, addr);
13588 t = build2 (MODIFY_EXPR, field_t,
13589 build1 (INDIRECT_REF, field_t, tmp_ha),
13590 build1 (INDIRECT_REF, field_t, t));
13591
13592 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
13593 for (i = 1; i < nregs; ++i)
13594 {
13595 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
13596 u = fold_convert (field_ptr_t, addr);
13597 u = build2 (MODIFY_EXPR, field_t,
13598 build2 (MEM_REF, field_t, tmp_ha,
13599 build_int_cst (field_ptr_t,
13600 (i *
13601 int_size_in_bytes (field_t)))),
13602 build1 (INDIRECT_REF, field_t, u));
13603 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
13604 }
13605
13606 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
13607 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
13608 }
13609
13610 COND_EXPR_ELSE (cond2) = t;
13611 addr = fold_convert (build_pointer_type (type), cond1);
13612 addr = build_va_arg_indirect_ref (addr);
13613
13614 if (indirect_p)
13615 addr = build_va_arg_indirect_ref (addr);
13616
13617 return addr;
13618}
13619
13620/* Implement TARGET_SETUP_INCOMING_VARARGS. */
13621
13622static void
ef4bddc2 13623aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
13624 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13625 int no_rtl)
13626{
13627 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13628 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
13629 int gr_saved = cfun->va_list_gpr_size;
13630 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
13631
13632 /* The caller has advanced CUM up to, but not beyond, the last named
13633 argument. Advance a local copy of CUM past the last "real" named
13634 argument, to find out how many registers are left over. */
13635 local_cum = *cum;
13636 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
13637
88e3bdd1
JW
13638 /* Found out how many registers we need to save.
13639 Honor tree-stdvar analysis results. */
13640 if (cfun->va_list_gpr_size)
13641 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
13642 cfun->va_list_gpr_size / UNITS_PER_WORD);
13643 if (cfun->va_list_fpr_size)
13644 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
13645 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 13646
d5726973 13647 if (!TARGET_FLOAT)
43e9d192 13648 {
261fb553 13649 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
13650 vr_saved = 0;
13651 }
13652
13653 if (!no_rtl)
13654 {
13655 if (gr_saved > 0)
13656 {
13657 rtx ptr, mem;
13658
13659 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
13660 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
13661 - gr_saved * UNITS_PER_WORD);
13662 mem = gen_frame_mem (BLKmode, ptr);
13663 set_mem_alias_set (mem, get_varargs_alias_set ());
13664
13665 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
13666 mem, gr_saved);
13667 }
13668 if (vr_saved > 0)
13669 {
13670 /* We can't use move_block_from_reg, because it will use
13671 the wrong mode, storing D regs only. */
ef4bddc2 13672 machine_mode mode = TImode;
88e3bdd1 13673 int off, i, vr_start;
43e9d192
IB
13674
13675 /* Set OFF to the offset from virtual_incoming_args_rtx of
13676 the first vector register. The VR save area lies below
13677 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
13678 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
13679 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
13680 off -= vr_saved * UNITS_PER_VREG;
13681
88e3bdd1
JW
13682 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
13683 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
13684 {
13685 rtx ptr, mem;
13686
13687 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
13688 mem = gen_frame_mem (mode, ptr);
13689 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 13690 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
13691 off += UNITS_PER_VREG;
13692 }
13693 }
13694 }
13695
13696 /* We don't save the size into *PRETEND_SIZE because we want to avoid
13697 any complication of having crtl->args.pretend_args_size changed. */
8799637a 13698 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
13699 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
13700 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
13701 + vr_saved * UNITS_PER_VREG);
13702}
13703
13704static void
13705aarch64_conditional_register_usage (void)
13706{
13707 int i;
13708 if (!TARGET_FLOAT)
13709 {
13710 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
13711 {
13712 fixed_regs[i] = 1;
13713 call_used_regs[i] = 1;
13714 }
13715 }
43cacb12
RS
13716 if (!TARGET_SVE)
13717 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
13718 {
13719 fixed_regs[i] = 1;
13720 call_used_regs[i] = 1;
13721 }
3751345d
RE
13722
13723 /* When tracking speculation, we need a couple of call-clobbered registers
13724 to track the speculation state. It would be nice to just use
13725 IP0 and IP1, but currently there are numerous places that just
13726 assume these registers are free for other uses (eg pointer
13727 authentication). */
13728 if (aarch64_track_speculation)
13729 {
13730 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
13731 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
13732 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13733 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13734 }
43e9d192
IB
13735}
13736
13737/* Walk down the type tree of TYPE counting consecutive base elements.
13738 If *MODEP is VOIDmode, then set it to the first valid floating point
13739 type. If a non-floating point type is found, or if a floating point
13740 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
13741 otherwise return the count in the sub-tree. */
13742static int
ef4bddc2 13743aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 13744{
ef4bddc2 13745 machine_mode mode;
43e9d192
IB
13746 HOST_WIDE_INT size;
13747
13748 switch (TREE_CODE (type))
13749 {
13750 case REAL_TYPE:
13751 mode = TYPE_MODE (type);
1b62ed4f
JG
13752 if (mode != DFmode && mode != SFmode
13753 && mode != TFmode && mode != HFmode)
43e9d192
IB
13754 return -1;
13755
13756 if (*modep == VOIDmode)
13757 *modep = mode;
13758
13759 if (*modep == mode)
13760 return 1;
13761
13762 break;
13763
13764 case COMPLEX_TYPE:
13765 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
13766 if (mode != DFmode && mode != SFmode
13767 && mode != TFmode && mode != HFmode)
43e9d192
IB
13768 return -1;
13769
13770 if (*modep == VOIDmode)
13771 *modep = mode;
13772
13773 if (*modep == mode)
13774 return 2;
13775
13776 break;
13777
13778 case VECTOR_TYPE:
13779 /* Use V2SImode and V4SImode as representatives of all 64-bit
13780 and 128-bit vector types. */
13781 size = int_size_in_bytes (type);
13782 switch (size)
13783 {
13784 case 8:
13785 mode = V2SImode;
13786 break;
13787 case 16:
13788 mode = V4SImode;
13789 break;
13790 default:
13791 return -1;
13792 }
13793
13794 if (*modep == VOIDmode)
13795 *modep = mode;
13796
13797 /* Vector modes are considered to be opaque: two vectors are
13798 equivalent for the purposes of being homogeneous aggregates
13799 if they are the same size. */
13800 if (*modep == mode)
13801 return 1;
13802
13803 break;
13804
13805 case ARRAY_TYPE:
13806 {
13807 int count;
13808 tree index = TYPE_DOMAIN (type);
13809
807e902e
KZ
13810 /* Can't handle incomplete types nor sizes that are not
13811 fixed. */
13812 if (!COMPLETE_TYPE_P (type)
13813 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13814 return -1;
13815
13816 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
13817 if (count == -1
13818 || !index
13819 || !TYPE_MAX_VALUE (index)
cc269bb6 13820 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 13821 || !TYPE_MIN_VALUE (index)
cc269bb6 13822 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
13823 || count < 0)
13824 return -1;
13825
ae7e9ddd
RS
13826 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
13827 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
13828
13829 /* There must be no padding. */
6a70badb
RS
13830 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13831 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13832 return -1;
13833
13834 return count;
13835 }
13836
13837 case RECORD_TYPE:
13838 {
13839 int count = 0;
13840 int sub_count;
13841 tree field;
13842
807e902e
KZ
13843 /* Can't handle incomplete types nor sizes that are not
13844 fixed. */
13845 if (!COMPLETE_TYPE_P (type)
13846 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13847 return -1;
13848
13849 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13850 {
13851 if (TREE_CODE (field) != FIELD_DECL)
13852 continue;
13853
13854 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13855 if (sub_count < 0)
13856 return -1;
13857 count += sub_count;
13858 }
13859
13860 /* There must be no padding. */
6a70badb
RS
13861 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13862 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13863 return -1;
13864
13865 return count;
13866 }
13867
13868 case UNION_TYPE:
13869 case QUAL_UNION_TYPE:
13870 {
13871 /* These aren't very interesting except in a degenerate case. */
13872 int count = 0;
13873 int sub_count;
13874 tree field;
13875
807e902e
KZ
13876 /* Can't handle incomplete types nor sizes that are not
13877 fixed. */
13878 if (!COMPLETE_TYPE_P (type)
13879 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13880 return -1;
13881
13882 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13883 {
13884 if (TREE_CODE (field) != FIELD_DECL)
13885 continue;
13886
13887 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13888 if (sub_count < 0)
13889 return -1;
13890 count = count > sub_count ? count : sub_count;
13891 }
13892
13893 /* There must be no padding. */
6a70badb
RS
13894 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13895 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13896 return -1;
13897
13898 return count;
13899 }
13900
13901 default:
13902 break;
13903 }
13904
13905 return -1;
13906}
13907
b6ec6215
KT
13908/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
13909 type as described in AAPCS64 \S 4.1.2.
13910
13911 See the comment above aarch64_composite_type_p for the notes on MODE. */
13912
13913static bool
13914aarch64_short_vector_p (const_tree type,
13915 machine_mode mode)
13916{
6a70badb 13917 poly_int64 size = -1;
b6ec6215
KT
13918
13919 if (type && TREE_CODE (type) == VECTOR_TYPE)
13920 size = int_size_in_bytes (type);
13921 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13922 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13923 size = GET_MODE_SIZE (mode);
13924
6a70badb 13925 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
13926}
13927
43e9d192
IB
13928/* Return TRUE if the type, as described by TYPE and MODE, is a composite
13929 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
13930 array types. The C99 floating-point complex types are also considered
13931 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
13932 types, which are GCC extensions and out of the scope of AAPCS64, are
13933 treated as composite types here as well.
13934
13935 Note that MODE itself is not sufficient in determining whether a type
13936 is such a composite type or not. This is because
13937 stor-layout.c:compute_record_mode may have already changed the MODE
13938 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
13939 structure with only one field may have its MODE set to the mode of the
13940 field. Also an integer mode whose size matches the size of the
13941 RECORD_TYPE type may be used to substitute the original mode
13942 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
13943 solely relied on. */
13944
13945static bool
13946aarch64_composite_type_p (const_tree type,
ef4bddc2 13947 machine_mode mode)
43e9d192 13948{
b6ec6215
KT
13949 if (aarch64_short_vector_p (type, mode))
13950 return false;
13951
43e9d192
IB
13952 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
13953 return true;
13954
13955 if (mode == BLKmode
13956 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
13957 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13958 return true;
13959
13960 return false;
13961}
13962
43e9d192
IB
13963/* Return TRUE if an argument, whose type is described by TYPE and MODE,
13964 shall be passed or returned in simd/fp register(s) (providing these
13965 parameter passing registers are available).
13966
13967 Upon successful return, *COUNT returns the number of needed registers,
13968 *BASE_MODE returns the mode of the individual register and when IS_HAF
13969 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
13970 floating-point aggregate or a homogeneous short-vector aggregate. */
13971
13972static bool
ef4bddc2 13973aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 13974 const_tree type,
ef4bddc2 13975 machine_mode *base_mode,
43e9d192
IB
13976 int *count,
13977 bool *is_ha)
13978{
ef4bddc2 13979 machine_mode new_mode = VOIDmode;
43e9d192
IB
13980 bool composite_p = aarch64_composite_type_p (type, mode);
13981
13982 if (is_ha != NULL) *is_ha = false;
13983
13984 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
13985 || aarch64_short_vector_p (type, mode))
13986 {
13987 *count = 1;
13988 new_mode = mode;
13989 }
13990 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
13991 {
13992 if (is_ha != NULL) *is_ha = true;
13993 *count = 2;
13994 new_mode = GET_MODE_INNER (mode);
13995 }
13996 else if (type && composite_p)
13997 {
13998 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
13999
14000 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
14001 {
14002 if (is_ha != NULL) *is_ha = true;
14003 *count = ag_count;
14004 }
14005 else
14006 return false;
14007 }
14008 else
14009 return false;
14010
14011 *base_mode = new_mode;
14012 return true;
14013}
14014
14015/* Implement TARGET_STRUCT_VALUE_RTX. */
14016
14017static rtx
14018aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
14019 int incoming ATTRIBUTE_UNUSED)
14020{
14021 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
14022}
14023
14024/* Implements target hook vector_mode_supported_p. */
14025static bool
ef4bddc2 14026aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 14027{
43cacb12
RS
14028 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
14029 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
14030}
14031
b7342d25
IB
14032/* Return appropriate SIMD container
14033 for MODE within a vector of WIDTH bits. */
ef4bddc2 14034static machine_mode
43cacb12 14035aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 14036{
43cacb12
RS
14037 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
14038 switch (mode)
14039 {
14040 case E_DFmode:
14041 return VNx2DFmode;
14042 case E_SFmode:
14043 return VNx4SFmode;
14044 case E_HFmode:
14045 return VNx8HFmode;
14046 case E_DImode:
14047 return VNx2DImode;
14048 case E_SImode:
14049 return VNx4SImode;
14050 case E_HImode:
14051 return VNx8HImode;
14052 case E_QImode:
14053 return VNx16QImode;
14054 default:
14055 return word_mode;
14056 }
14057
14058 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 14059 if (TARGET_SIMD)
b7342d25 14060 {
43cacb12 14061 if (known_eq (width, 128))
b7342d25
IB
14062 switch (mode)
14063 {
4e10a5a7 14064 case E_DFmode:
b7342d25 14065 return V2DFmode;
4e10a5a7 14066 case E_SFmode:
b7342d25 14067 return V4SFmode;
4e10a5a7 14068 case E_HFmode:
b719f884 14069 return V8HFmode;
4e10a5a7 14070 case E_SImode:
b7342d25 14071 return V4SImode;
4e10a5a7 14072 case E_HImode:
b7342d25 14073 return V8HImode;
4e10a5a7 14074 case E_QImode:
b7342d25 14075 return V16QImode;
4e10a5a7 14076 case E_DImode:
b7342d25
IB
14077 return V2DImode;
14078 default:
14079 break;
14080 }
14081 else
14082 switch (mode)
14083 {
4e10a5a7 14084 case E_SFmode:
b7342d25 14085 return V2SFmode;
4e10a5a7 14086 case E_HFmode:
b719f884 14087 return V4HFmode;
4e10a5a7 14088 case E_SImode:
b7342d25 14089 return V2SImode;
4e10a5a7 14090 case E_HImode:
b7342d25 14091 return V4HImode;
4e10a5a7 14092 case E_QImode:
b7342d25
IB
14093 return V8QImode;
14094 default:
14095 break;
14096 }
14097 }
43e9d192
IB
14098 return word_mode;
14099}
14100
b7342d25 14101/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 14102static machine_mode
005ba29c 14103aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 14104{
43cacb12
RS
14105 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
14106 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
14107}
14108
86e36728 14109/* Return a list of possible vector sizes for the vectorizer
3b357264 14110 to iterate over. */
86e36728
RS
14111static void
14112aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 14113{
43cacb12
RS
14114 if (TARGET_SVE)
14115 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
14116 sizes->safe_push (16);
14117 sizes->safe_push (8);
3b357264
JG
14118}
14119
ac2b960f
YZ
14120/* Implement TARGET_MANGLE_TYPE. */
14121
6f549691 14122static const char *
ac2b960f
YZ
14123aarch64_mangle_type (const_tree type)
14124{
14125 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 14126 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
14127 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
14128 return "St9__va_list";
14129
c2ec330c
AL
14130 /* Half-precision float. */
14131 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
14132 return "Dh";
14133
f9d53c27
TB
14134 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
14135 builtin types. */
14136 if (TYPE_NAME (type) != NULL)
14137 return aarch64_mangle_builtin_type (type);
c6fc9e43 14138
ac2b960f
YZ
14139 /* Use the default mangling. */
14140 return NULL;
14141}
14142
75cf1494
KT
14143/* Find the first rtx_insn before insn that will generate an assembly
14144 instruction. */
14145
14146static rtx_insn *
14147aarch64_prev_real_insn (rtx_insn *insn)
14148{
14149 if (!insn)
14150 return NULL;
14151
14152 do
14153 {
14154 insn = prev_real_insn (insn);
14155 }
14156 while (insn && recog_memoized (insn) < 0);
14157
14158 return insn;
14159}
14160
14161static bool
14162is_madd_op (enum attr_type t1)
14163{
14164 unsigned int i;
14165 /* A number of these may be AArch32 only. */
14166 enum attr_type mlatypes[] = {
14167 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
14168 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
14169 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
14170 };
14171
14172 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
14173 {
14174 if (t1 == mlatypes[i])
14175 return true;
14176 }
14177
14178 return false;
14179}
14180
14181/* Check if there is a register dependency between a load and the insn
14182 for which we hold recog_data. */
14183
14184static bool
14185dep_between_memop_and_curr (rtx memop)
14186{
14187 rtx load_reg;
14188 int opno;
14189
8baff86e 14190 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
14191
14192 if (!REG_P (SET_DEST (memop)))
14193 return false;
14194
14195 load_reg = SET_DEST (memop);
8baff86e 14196 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
14197 {
14198 rtx operand = recog_data.operand[opno];
14199 if (REG_P (operand)
14200 && reg_overlap_mentioned_p (load_reg, operand))
14201 return true;
14202
14203 }
14204 return false;
14205}
14206
8baff86e
KT
14207
14208/* When working around the Cortex-A53 erratum 835769,
14209 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
14210 instruction and has a preceding memory instruction such that a NOP
14211 should be inserted between them. */
14212
75cf1494
KT
14213bool
14214aarch64_madd_needs_nop (rtx_insn* insn)
14215{
14216 enum attr_type attr_type;
14217 rtx_insn *prev;
14218 rtx body;
14219
b32c1043 14220 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
14221 return false;
14222
e322d6e3 14223 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
14224 return false;
14225
14226 attr_type = get_attr_type (insn);
14227 if (!is_madd_op (attr_type))
14228 return false;
14229
14230 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
14231 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
14232 Restore recog state to INSN to avoid state corruption. */
14233 extract_constrain_insn_cached (insn);
14234
550e2205 14235 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
14236 return false;
14237
14238 body = single_set (prev);
14239
14240 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
14241 it and the DImode madd, emit a NOP between them. If body is NULL then we
14242 have a complex memory operation, probably a load/store pair.
14243 Be conservative for now and emit a NOP. */
14244 if (GET_MODE (recog_data.operand[0]) == DImode
14245 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
14246 return true;
14247
14248 return false;
14249
14250}
14251
8baff86e
KT
14252
14253/* Implement FINAL_PRESCAN_INSN. */
14254
75cf1494
KT
14255void
14256aarch64_final_prescan_insn (rtx_insn *insn)
14257{
14258 if (aarch64_madd_needs_nop (insn))
14259 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
14260}
14261
14262
43cacb12
RS
14263/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
14264 instruction. */
14265
14266bool
14267aarch64_sve_index_immediate_p (rtx base_or_step)
14268{
14269 return (CONST_INT_P (base_or_step)
14270 && IN_RANGE (INTVAL (base_or_step), -16, 15));
14271}
14272
14273/* Return true if X is a valid immediate for the SVE ADD and SUB
14274 instructions. Negate X first if NEGATE_P is true. */
14275
14276bool
14277aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
14278{
14279 rtx elt;
14280
14281 if (!const_vec_duplicate_p (x, &elt)
14282 || !CONST_INT_P (elt))
14283 return false;
14284
14285 HOST_WIDE_INT val = INTVAL (elt);
14286 if (negate_p)
14287 val = -val;
14288 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
14289
14290 if (val & 0xff)
14291 return IN_RANGE (val, 0, 0xff);
14292 return IN_RANGE (val, 0, 0xff00);
14293}
14294
14295/* Return true if X is a valid immediate operand for an SVE logical
14296 instruction such as AND. */
14297
14298bool
14299aarch64_sve_bitmask_immediate_p (rtx x)
14300{
14301 rtx elt;
14302
14303 return (const_vec_duplicate_p (x, &elt)
14304 && CONST_INT_P (elt)
14305 && aarch64_bitmask_imm (INTVAL (elt),
14306 GET_MODE_INNER (GET_MODE (x))));
14307}
14308
14309/* Return true if X is a valid immediate for the SVE DUP and CPY
14310 instructions. */
14311
14312bool
14313aarch64_sve_dup_immediate_p (rtx x)
14314{
14315 rtx elt;
14316
14317 if (!const_vec_duplicate_p (x, &elt)
14318 || !CONST_INT_P (elt))
14319 return false;
14320
14321 HOST_WIDE_INT val = INTVAL (elt);
14322 if (val & 0xff)
14323 return IN_RANGE (val, -0x80, 0x7f);
14324 return IN_RANGE (val, -0x8000, 0x7f00);
14325}
14326
14327/* Return true if X is a valid immediate operand for an SVE CMP instruction.
14328 SIGNED_P says whether the operand is signed rather than unsigned. */
14329
14330bool
14331aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
14332{
14333 rtx elt;
14334
14335 return (const_vec_duplicate_p (x, &elt)
14336 && CONST_INT_P (elt)
14337 && (signed_p
14338 ? IN_RANGE (INTVAL (elt), -16, 15)
14339 : IN_RANGE (INTVAL (elt), 0, 127)));
14340}
14341
14342/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
14343 instruction. Negate X first if NEGATE_P is true. */
14344
14345bool
14346aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
14347{
14348 rtx elt;
14349 REAL_VALUE_TYPE r;
14350
14351 if (!const_vec_duplicate_p (x, &elt)
14352 || GET_CODE (elt) != CONST_DOUBLE)
14353 return false;
14354
14355 r = *CONST_DOUBLE_REAL_VALUE (elt);
14356
14357 if (negate_p)
14358 r = real_value_negate (&r);
14359
14360 if (real_equal (&r, &dconst1))
14361 return true;
14362 if (real_equal (&r, &dconsthalf))
14363 return true;
14364 return false;
14365}
14366
14367/* Return true if X is a valid immediate operand for an SVE FMUL
14368 instruction. */
14369
14370bool
14371aarch64_sve_float_mul_immediate_p (rtx x)
14372{
14373 rtx elt;
14374
14375 /* GCC will never generate a multiply with an immediate of 2, so there is no
14376 point testing for it (even though it is a valid constant). */
14377 return (const_vec_duplicate_p (x, &elt)
14378 && GET_CODE (elt) == CONST_DOUBLE
14379 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
14380}
14381
b187677b
RS
14382/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
14383 for the Advanced SIMD operation described by WHICH and INSN. If INFO
14384 is nonnull, use it to describe valid immediates. */
3520f7cc 14385static bool
b187677b
RS
14386aarch64_advsimd_valid_immediate_hs (unsigned int val32,
14387 simd_immediate_info *info,
14388 enum simd_immediate_check which,
14389 simd_immediate_info::insn_type insn)
14390{
14391 /* Try a 4-byte immediate with LSL. */
14392 for (unsigned int shift = 0; shift < 32; shift += 8)
14393 if ((val32 & (0xff << shift)) == val32)
14394 {
14395 if (info)
14396 *info = simd_immediate_info (SImode, val32 >> shift, insn,
14397 simd_immediate_info::LSL, shift);
14398 return true;
14399 }
3520f7cc 14400
b187677b
RS
14401 /* Try a 2-byte immediate with LSL. */
14402 unsigned int imm16 = val32 & 0xffff;
14403 if (imm16 == (val32 >> 16))
14404 for (unsigned int shift = 0; shift < 16; shift += 8)
14405 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 14406 {
b187677b
RS
14407 if (info)
14408 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
14409 simd_immediate_info::LSL, shift);
14410 return true;
48063b9d 14411 }
3520f7cc 14412
b187677b
RS
14413 /* Try a 4-byte immediate with MSL, except for cases that MVN
14414 can handle. */
14415 if (which == AARCH64_CHECK_MOV)
14416 for (unsigned int shift = 8; shift < 24; shift += 8)
14417 {
14418 unsigned int low = (1 << shift) - 1;
14419 if (((val32 & (0xff << shift)) | low) == val32)
14420 {
14421 if (info)
14422 *info = simd_immediate_info (SImode, val32 >> shift, insn,
14423 simd_immediate_info::MSL, shift);
14424 return true;
14425 }
14426 }
43e9d192 14427
b187677b
RS
14428 return false;
14429}
14430
14431/* Return true if replicating VAL64 is a valid immediate for the
14432 Advanced SIMD operation described by WHICH. If INFO is nonnull,
14433 use it to describe valid immediates. */
14434static bool
14435aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
14436 simd_immediate_info *info,
14437 enum simd_immediate_check which)
14438{
14439 unsigned int val32 = val64 & 0xffffffff;
14440 unsigned int val16 = val64 & 0xffff;
14441 unsigned int val8 = val64 & 0xff;
14442
14443 if (val32 == (val64 >> 32))
43e9d192 14444 {
b187677b
RS
14445 if ((which & AARCH64_CHECK_ORR) != 0
14446 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
14447 simd_immediate_info::MOV))
14448 return true;
43e9d192 14449
b187677b
RS
14450 if ((which & AARCH64_CHECK_BIC) != 0
14451 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
14452 simd_immediate_info::MVN))
14453 return true;
ee78df47 14454
b187677b
RS
14455 /* Try using a replicated byte. */
14456 if (which == AARCH64_CHECK_MOV
14457 && val16 == (val32 >> 16)
14458 && val8 == (val16 >> 8))
ee78df47 14459 {
b187677b
RS
14460 if (info)
14461 *info = simd_immediate_info (QImode, val8);
14462 return true;
ee78df47 14463 }
43e9d192
IB
14464 }
14465
b187677b
RS
14466 /* Try using a bit-to-bytemask. */
14467 if (which == AARCH64_CHECK_MOV)
43e9d192 14468 {
b187677b
RS
14469 unsigned int i;
14470 for (i = 0; i < 64; i += 8)
ab6501d7 14471 {
b187677b
RS
14472 unsigned char byte = (val64 >> i) & 0xff;
14473 if (byte != 0 && byte != 0xff)
14474 break;
ab6501d7 14475 }
b187677b 14476 if (i == 64)
ab6501d7 14477 {
b187677b
RS
14478 if (info)
14479 *info = simd_immediate_info (DImode, val64);
14480 return true;
ab6501d7 14481 }
43e9d192 14482 }
b187677b
RS
14483 return false;
14484}
43e9d192 14485
43cacb12
RS
14486/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
14487 instruction. If INFO is nonnull, use it to describe valid immediates. */
14488
14489static bool
14490aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
14491 simd_immediate_info *info)
14492{
14493 scalar_int_mode mode = DImode;
14494 unsigned int val32 = val64 & 0xffffffff;
14495 if (val32 == (val64 >> 32))
14496 {
14497 mode = SImode;
14498 unsigned int val16 = val32 & 0xffff;
14499 if (val16 == (val32 >> 16))
14500 {
14501 mode = HImode;
14502 unsigned int val8 = val16 & 0xff;
14503 if (val8 == (val16 >> 8))
14504 mode = QImode;
14505 }
14506 }
14507 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
14508 if (IN_RANGE (val, -0x80, 0x7f))
14509 {
14510 /* DUP with no shift. */
14511 if (info)
14512 *info = simd_immediate_info (mode, val);
14513 return true;
14514 }
14515 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
14516 {
14517 /* DUP with LSL #8. */
14518 if (info)
14519 *info = simd_immediate_info (mode, val);
14520 return true;
14521 }
14522 if (aarch64_bitmask_imm (val64, mode))
14523 {
14524 /* DUPM. */
14525 if (info)
14526 *info = simd_immediate_info (mode, val);
14527 return true;
14528 }
14529 return false;
14530}
14531
b187677b
RS
14532/* Return true if OP is a valid SIMD immediate for the operation
14533 described by WHICH. If INFO is nonnull, use it to describe valid
14534 immediates. */
14535bool
14536aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
14537 enum simd_immediate_check which)
14538{
43cacb12
RS
14539 machine_mode mode = GET_MODE (op);
14540 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
14541 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
14542 return false;
14543
14544 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 14545 rtx base, step;
b187677b 14546 unsigned int n_elts;
f9093f23
RS
14547 if (GET_CODE (op) == CONST_VECTOR
14548 && CONST_VECTOR_DUPLICATE_P (op))
14549 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
14550 else if ((vec_flags & VEC_SVE_DATA)
14551 && const_vec_series_p (op, &base, &step))
14552 {
14553 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
14554 if (!aarch64_sve_index_immediate_p (base)
14555 || !aarch64_sve_index_immediate_p (step))
14556 return false;
14557
14558 if (info)
14559 *info = simd_immediate_info (elt_mode, base, step);
14560 return true;
14561 }
6a70badb
RS
14562 else if (GET_CODE (op) == CONST_VECTOR
14563 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
14564 /* N_ELTS set above. */;
b187677b 14565 else
d8edd899 14566 return false;
43e9d192 14567
43cacb12
RS
14568 /* Handle PFALSE and PTRUE. */
14569 if (vec_flags & VEC_SVE_PRED)
14570 return (op == CONST0_RTX (mode)
14571 || op == CONSTM1_RTX (mode));
14572
b187677b 14573 scalar_float_mode elt_float_mode;
f9093f23
RS
14574 if (n_elts == 1
14575 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 14576 {
f9093f23
RS
14577 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
14578 if (aarch64_float_const_zero_rtx_p (elt)
14579 || aarch64_float_const_representable_p (elt))
14580 {
14581 if (info)
14582 *info = simd_immediate_info (elt_float_mode, elt);
14583 return true;
14584 }
b187677b 14585 }
43e9d192 14586
b187677b
RS
14587 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
14588 if (elt_size > 8)
14589 return false;
e4f0f84d 14590
b187677b 14591 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 14592
b187677b
RS
14593 /* Expand the vector constant out into a byte vector, with the least
14594 significant byte of the register first. */
14595 auto_vec<unsigned char, 16> bytes;
14596 bytes.reserve (n_elts * elt_size);
14597 for (unsigned int i = 0; i < n_elts; i++)
14598 {
f9093f23
RS
14599 /* The vector is provided in gcc endian-neutral fashion.
14600 For aarch64_be Advanced SIMD, it must be laid out in the vector
14601 register in reverse order. */
14602 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
14603 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 14604
b187677b
RS
14605 if (elt_mode != elt_int_mode)
14606 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 14607
b187677b
RS
14608 if (!CONST_INT_P (elt))
14609 return false;
43e9d192 14610
b187677b
RS
14611 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
14612 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 14613 {
b187677b
RS
14614 bytes.quick_push (elt_val & 0xff);
14615 elt_val >>= BITS_PER_UNIT;
48063b9d 14616 }
43e9d192
IB
14617 }
14618
b187677b
RS
14619 /* The immediate must repeat every eight bytes. */
14620 unsigned int nbytes = bytes.length ();
14621 for (unsigned i = 8; i < nbytes; ++i)
14622 if (bytes[i] != bytes[i - 8])
14623 return false;
14624
14625 /* Get the repeating 8-byte value as an integer. No endian correction
14626 is needed here because bytes is already in lsb-first order. */
14627 unsigned HOST_WIDE_INT val64 = 0;
14628 for (unsigned int i = 0; i < 8; i++)
14629 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
14630 << (i * BITS_PER_UNIT));
14631
43cacb12
RS
14632 if (vec_flags & VEC_SVE_DATA)
14633 return aarch64_sve_valid_immediate (val64, info);
14634 else
14635 return aarch64_advsimd_valid_immediate (val64, info, which);
14636}
14637
14638/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
14639 has a step in the range of INDEX. Return the index expression if so,
14640 otherwise return null. */
14641rtx
14642aarch64_check_zero_based_sve_index_immediate (rtx x)
14643{
14644 rtx base, step;
14645 if (const_vec_series_p (x, &base, &step)
14646 && base == const0_rtx
14647 && aarch64_sve_index_immediate_p (step))
14648 return step;
14649 return NULL_RTX;
43e9d192
IB
14650}
14651
43e9d192
IB
14652/* Check of immediate shift constants are within range. */
14653bool
ef4bddc2 14654aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
14655{
14656 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
14657 if (left)
ddeabd3e 14658 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 14659 else
ddeabd3e 14660 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
14661}
14662
7325d85a
KT
14663/* Return the bitmask CONST_INT to select the bits required by a zero extract
14664 operation of width WIDTH at bit position POS. */
14665
14666rtx
14667aarch64_mask_from_zextract_ops (rtx width, rtx pos)
14668{
14669 gcc_assert (CONST_INT_P (width));
14670 gcc_assert (CONST_INT_P (pos));
14671
14672 unsigned HOST_WIDE_INT mask
14673 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
14674 return GEN_INT (mask << UINTVAL (pos));
14675}
14676
83f8c414 14677bool
a6e0bfa7 14678aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 14679{
83f8c414
CSS
14680 if (GET_CODE (x) == HIGH
14681 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
14682 return true;
14683
82614948 14684 if (CONST_INT_P (x))
83f8c414
CSS
14685 return true;
14686
43cacb12
RS
14687 if (VECTOR_MODE_P (GET_MODE (x)))
14688 return aarch64_simd_valid_immediate (x, NULL);
14689
83f8c414
CSS
14690 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
14691 return true;
14692
43cacb12
RS
14693 if (aarch64_sve_cnt_immediate_p (x))
14694 return true;
14695
a6e0bfa7 14696 return aarch64_classify_symbolic_expression (x)
a5350ddc 14697 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
14698}
14699
43e9d192
IB
14700/* Return a const_int vector of VAL. */
14701rtx
ab014eb3 14702aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 14703{
59d06c05
RS
14704 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
14705 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
14706}
14707
051d0e2f
SN
14708/* Check OP is a legal scalar immediate for the MOVI instruction. */
14709
14710bool
77e994c9 14711aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 14712{
ef4bddc2 14713 machine_mode vmode;
051d0e2f 14714
43cacb12 14715 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 14716 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 14717 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
14718}
14719
988fa693
JG
14720/* Construct and return a PARALLEL RTX vector with elements numbering the
14721 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
14722 the vector - from the perspective of the architecture. This does not
14723 line up with GCC's perspective on lane numbers, so we end up with
14724 different masks depending on our target endian-ness. The diagram
14725 below may help. We must draw the distinction when building masks
14726 which select one half of the vector. An instruction selecting
14727 architectural low-lanes for a big-endian target, must be described using
14728 a mask selecting GCC high-lanes.
14729
14730 Big-Endian Little-Endian
14731
14732GCC 0 1 2 3 3 2 1 0
14733 | x | x | x | x | | x | x | x | x |
14734Architecture 3 2 1 0 3 2 1 0
14735
14736Low Mask: { 2, 3 } { 0, 1 }
14737High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
14738
14739 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 14740
43e9d192 14741rtx
f5cbabc1 14742aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 14743{
43e9d192 14744 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
14745 int high_base = nunits / 2;
14746 int low_base = 0;
14747 int base;
43e9d192
IB
14748 rtx t1;
14749 int i;
14750
988fa693
JG
14751 if (BYTES_BIG_ENDIAN)
14752 base = high ? low_base : high_base;
14753 else
14754 base = high ? high_base : low_base;
14755
14756 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
14757 RTVEC_ELT (v, i) = GEN_INT (base + i);
14758
14759 t1 = gen_rtx_PARALLEL (mode, v);
14760 return t1;
14761}
14762
988fa693
JG
14763/* Check OP for validity as a PARALLEL RTX vector with elements
14764 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
14765 from the perspective of the architecture. See the diagram above
14766 aarch64_simd_vect_par_cnst_half for more details. */
14767
14768bool
ef4bddc2 14769aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
14770 bool high)
14771{
6a70badb
RS
14772 int nelts;
14773 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
14774 return false;
14775
6a70badb 14776 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
14777 HOST_WIDE_INT count_op = XVECLEN (op, 0);
14778 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
14779 int i = 0;
14780
988fa693
JG
14781 if (count_op != count_ideal)
14782 return false;
14783
14784 for (i = 0; i < count_ideal; i++)
14785 {
14786 rtx elt_op = XVECEXP (op, 0, i);
14787 rtx elt_ideal = XVECEXP (ideal, 0, i);
14788
4aa81c2e 14789 if (!CONST_INT_P (elt_op)
988fa693
JG
14790 || INTVAL (elt_ideal) != INTVAL (elt_op))
14791 return false;
14792 }
14793 return true;
14794}
14795
43e9d192
IB
14796/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
14797 HIGH (exclusive). */
14798void
46ed6024
CB
14799aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
14800 const_tree exp)
43e9d192
IB
14801{
14802 HOST_WIDE_INT lane;
4aa81c2e 14803 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
14804 lane = INTVAL (operand);
14805
14806 if (lane < low || lane >= high)
46ed6024
CB
14807 {
14808 if (exp)
cf0c27ef 14809 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 14810 else
cf0c27ef 14811 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 14812 }
43e9d192
IB
14813}
14814
7ac29c0f
RS
14815/* Peform endian correction on lane number N, which indexes a vector
14816 of mode MODE, and return the result as an SImode rtx. */
14817
14818rtx
14819aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
14820{
14821 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
14822}
14823
43e9d192 14824/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 14825
43e9d192
IB
14826bool
14827aarch64_simd_mem_operand_p (rtx op)
14828{
14829 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 14830 || REG_P (XEXP (op, 0)));
43e9d192
IB
14831}
14832
43cacb12
RS
14833/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
14834
14835bool
14836aarch64_sve_ld1r_operand_p (rtx op)
14837{
14838 struct aarch64_address_info addr;
14839 scalar_mode mode;
14840
14841 return (MEM_P (op)
14842 && is_a <scalar_mode> (GET_MODE (op), &mode)
14843 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
14844 && addr.type == ADDRESS_REG_IMM
14845 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
14846}
14847
14848/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
14849 The conditions for STR are the same. */
14850bool
14851aarch64_sve_ldr_operand_p (rtx op)
14852{
14853 struct aarch64_address_info addr;
14854
14855 return (MEM_P (op)
14856 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
14857 false, ADDR_QUERY_ANY)
14858 && addr.type == ADDRESS_REG_IMM);
14859}
14860
9f4cbab8
RS
14861/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
14862 We need to be able to access the individual pieces, so the range
14863 is different from LD[234] and ST[234]. */
14864bool
14865aarch64_sve_struct_memory_operand_p (rtx op)
14866{
14867 if (!MEM_P (op))
14868 return false;
14869
14870 machine_mode mode = GET_MODE (op);
14871 struct aarch64_address_info addr;
14872 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
14873 ADDR_QUERY_ANY)
14874 || addr.type != ADDRESS_REG_IMM)
14875 return false;
14876
14877 poly_int64 first = addr.const_offset;
14878 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
14879 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
14880 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
14881}
14882
2d8c6dc1
AH
14883/* Emit a register copy from operand to operand, taking care not to
14884 early-clobber source registers in the process.
43e9d192 14885
2d8c6dc1
AH
14886 COUNT is the number of components into which the copy needs to be
14887 decomposed. */
43e9d192 14888void
b8506a8a 14889aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 14890 unsigned int count)
43e9d192
IB
14891{
14892 unsigned int i;
2d8c6dc1
AH
14893 int rdest = REGNO (operands[0]);
14894 int rsrc = REGNO (operands[1]);
43e9d192
IB
14895
14896 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
14897 || rdest < rsrc)
14898 for (i = 0; i < count; i++)
14899 emit_move_insn (gen_rtx_REG (mode, rdest + i),
14900 gen_rtx_REG (mode, rsrc + i));
43e9d192 14901 else
2d8c6dc1
AH
14902 for (i = 0; i < count; i++)
14903 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
14904 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
14905}
14906
668046d1 14907/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 14908 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 14909int
b8506a8a 14910aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 14911{
6a70badb
RS
14912 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
14913 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
14914}
14915
db0253a4 14916/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
14917 alignment of a vector to 128 bits. SVE predicates have an alignment of
14918 16 bits. */
db0253a4
TB
14919static HOST_WIDE_INT
14920aarch64_simd_vector_alignment (const_tree type)
14921{
43cacb12
RS
14922 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
14923 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
14924 be set for non-predicate vectors of booleans. Modes are the most
14925 direct way we have of identifying real SVE predicate types. */
14926 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
6c76c0e4 14927 return wi::umin (wi::to_wide (TYPE_SIZE (type)), 128).to_uhwi ();
db0253a4
TB
14928}
14929
43cacb12 14930/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 14931static poly_uint64
43cacb12
RS
14932aarch64_vectorize_preferred_vector_alignment (const_tree type)
14933{
14934 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
14935 {
14936 /* If the length of the vector is fixed, try to align to that length,
14937 otherwise don't try to align at all. */
14938 HOST_WIDE_INT result;
14939 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
14940 result = TYPE_ALIGN (TREE_TYPE (type));
14941 return result;
14942 }
14943 return TYPE_ALIGN (type);
14944}
14945
db0253a4
TB
14946/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
14947static bool
14948aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
14949{
14950 if (is_packed)
14951 return false;
14952
43cacb12
RS
14953 /* For fixed-length vectors, check that the vectorizer will aim for
14954 full-vector alignment. This isn't true for generic GCC vectors
14955 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
14956 poly_uint64 preferred_alignment =
14957 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 14958 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
14959 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
14960 preferred_alignment))
db0253a4
TB
14961 return false;
14962
14963 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
14964 return true;
14965}
14966
7df76747
N
14967/* Return true if the vector misalignment factor is supported by the
14968 target. */
14969static bool
14970aarch64_builtin_support_vector_misalignment (machine_mode mode,
14971 const_tree type, int misalignment,
14972 bool is_packed)
14973{
14974 if (TARGET_SIMD && STRICT_ALIGNMENT)
14975 {
14976 /* Return if movmisalign pattern is not supported for this mode. */
14977 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
14978 return false;
14979
a509c571 14980 /* Misalignment factor is unknown at compile time. */
7df76747 14981 if (misalignment == -1)
a509c571 14982 return false;
7df76747
N
14983 }
14984 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14985 is_packed);
14986}
14987
4369c11e
TB
14988/* If VALS is a vector constant that can be loaded into a register
14989 using DUP, generate instructions to do so and return an RTX to
14990 assign to the register. Otherwise return NULL_RTX. */
14991static rtx
14992aarch64_simd_dup_constant (rtx vals)
14993{
ef4bddc2
RS
14994 machine_mode mode = GET_MODE (vals);
14995 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 14996 rtx x;
4369c11e 14997
92695fbb 14998 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
14999 return NULL_RTX;
15000
15001 /* We can load this constant by using DUP and a constant in a
15002 single ARM register. This will be cheaper than a vector
15003 load. */
92695fbb 15004 x = copy_to_mode_reg (inner_mode, x);
59d06c05 15005 return gen_vec_duplicate (mode, x);
4369c11e
TB
15006}
15007
15008
15009/* Generate code to load VALS, which is a PARALLEL containing only
15010 constants (for vec_init) or CONST_VECTOR, efficiently into a
15011 register. Returns an RTX to copy into the register, or NULL_RTX
67914693 15012 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
1df3f464 15013static rtx
4369c11e
TB
15014aarch64_simd_make_constant (rtx vals)
15015{
ef4bddc2 15016 machine_mode mode = GET_MODE (vals);
4369c11e
TB
15017 rtx const_dup;
15018 rtx const_vec = NULL_RTX;
4369c11e
TB
15019 int n_const = 0;
15020 int i;
15021
15022 if (GET_CODE (vals) == CONST_VECTOR)
15023 const_vec = vals;
15024 else if (GET_CODE (vals) == PARALLEL)
15025 {
15026 /* A CONST_VECTOR must contain only CONST_INTs and
15027 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
15028 Only store valid constants in a CONST_VECTOR. */
6a70badb 15029 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
15030 for (i = 0; i < n_elts; ++i)
15031 {
15032 rtx x = XVECEXP (vals, 0, i);
15033 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
15034 n_const++;
15035 }
15036 if (n_const == n_elts)
15037 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
15038 }
15039 else
15040 gcc_unreachable ();
15041
15042 if (const_vec != NULL_RTX
b187677b 15043 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
15044 /* Load using MOVI/MVNI. */
15045 return const_vec;
15046 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
15047 /* Loaded using DUP. */
15048 return const_dup;
15049 else if (const_vec != NULL_RTX)
67914693 15050 /* Load from constant pool. We cannot take advantage of single-cycle
4369c11e
TB
15051 LD1 because we need a PC-relative addressing mode. */
15052 return const_vec;
15053 else
15054 /* A PARALLEL containing something not valid inside CONST_VECTOR.
67914693 15055 We cannot construct an initializer. */
4369c11e
TB
15056 return NULL_RTX;
15057}
15058
35a093b6
JG
15059/* Expand a vector initialisation sequence, such that TARGET is
15060 initialised to contain VALS. */
15061
4369c11e
TB
15062void
15063aarch64_expand_vector_init (rtx target, rtx vals)
15064{
ef4bddc2 15065 machine_mode mode = GET_MODE (target);
146c2e3a 15066 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 15067 /* The number of vector elements. */
6a70badb 15068 int n_elts = XVECLEN (vals, 0);
35a093b6 15069 /* The number of vector elements which are not constant. */
8b66a2d4
AL
15070 int n_var = 0;
15071 rtx any_const = NULL_RTX;
35a093b6
JG
15072 /* The first element of vals. */
15073 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 15074 bool all_same = true;
4369c11e 15075
35a093b6 15076 /* Count the number of variable elements to initialise. */
8b66a2d4 15077 for (int i = 0; i < n_elts; ++i)
4369c11e 15078 {
8b66a2d4 15079 rtx x = XVECEXP (vals, 0, i);
35a093b6 15080 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
15081 ++n_var;
15082 else
15083 any_const = x;
4369c11e 15084
35a093b6 15085 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
15086 }
15087
35a093b6
JG
15088 /* No variable elements, hand off to aarch64_simd_make_constant which knows
15089 how best to handle this. */
4369c11e
TB
15090 if (n_var == 0)
15091 {
15092 rtx constant = aarch64_simd_make_constant (vals);
15093 if (constant != NULL_RTX)
15094 {
15095 emit_move_insn (target, constant);
15096 return;
15097 }
15098 }
15099
15100 /* Splat a single non-constant element if we can. */
15101 if (all_same)
15102 {
35a093b6 15103 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 15104 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
15105 return;
15106 }
15107
85c1b6d7
AP
15108 enum insn_code icode = optab_handler (vec_set_optab, mode);
15109 gcc_assert (icode != CODE_FOR_nothing);
15110
15111 /* If there are only variable elements, try to optimize
15112 the insertion using dup for the most common element
15113 followed by insertions. */
15114
15115 /* The algorithm will fill matches[*][0] with the earliest matching element,
15116 and matches[X][1] with the count of duplicate elements (if X is the
15117 earliest element which has duplicates). */
15118
15119 if (n_var == n_elts && n_elts <= 16)
15120 {
15121 int matches[16][2] = {0};
15122 for (int i = 0; i < n_elts; i++)
15123 {
15124 for (int j = 0; j <= i; j++)
15125 {
15126 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
15127 {
15128 matches[i][0] = j;
15129 matches[j][1]++;
15130 break;
15131 }
15132 }
15133 }
15134 int maxelement = 0;
15135 int maxv = 0;
15136 for (int i = 0; i < n_elts; i++)
15137 if (matches[i][1] > maxv)
15138 {
15139 maxelement = i;
15140 maxv = matches[i][1];
15141 }
15142
b4e2cd5b
JG
15143 /* Create a duplicate of the most common element, unless all elements
15144 are equally useless to us, in which case just immediately set the
15145 vector register using the first element. */
15146
15147 if (maxv == 1)
15148 {
15149 /* For vectors of two 64-bit elements, we can do even better. */
15150 if (n_elts == 2
15151 && (inner_mode == E_DImode
15152 || inner_mode == E_DFmode))
15153
15154 {
15155 rtx x0 = XVECEXP (vals, 0, 0);
15156 rtx x1 = XVECEXP (vals, 0, 1);
15157 /* Combine can pick up this case, but handling it directly
15158 here leaves clearer RTL.
15159
15160 This is load_pair_lanes<mode>, and also gives us a clean-up
15161 for store_pair_lanes<mode>. */
15162 if (memory_operand (x0, inner_mode)
15163 && memory_operand (x1, inner_mode)
15164 && !STRICT_ALIGNMENT
15165 && rtx_equal_p (XEXP (x1, 0),
15166 plus_constant (Pmode,
15167 XEXP (x0, 0),
15168 GET_MODE_SIZE (inner_mode))))
15169 {
15170 rtx t;
15171 if (inner_mode == DFmode)
15172 t = gen_load_pair_lanesdf (target, x0, x1);
15173 else
15174 t = gen_load_pair_lanesdi (target, x0, x1);
15175 emit_insn (t);
15176 return;
15177 }
15178 }
15179 /* The subreg-move sequence below will move into lane zero of the
15180 vector register. For big-endian we want that position to hold
15181 the last element of VALS. */
15182 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
15183 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
15184 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
15185 }
15186 else
15187 {
15188 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
15189 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
15190 }
85c1b6d7
AP
15191
15192 /* Insert the rest. */
15193 for (int i = 0; i < n_elts; i++)
15194 {
15195 rtx x = XVECEXP (vals, 0, i);
15196 if (matches[i][0] == maxelement)
15197 continue;
15198 x = copy_to_mode_reg (inner_mode, x);
15199 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
15200 }
15201 return;
15202 }
15203
35a093b6
JG
15204 /* Initialise a vector which is part-variable. We want to first try
15205 to build those lanes which are constant in the most efficient way we
15206 can. */
15207 if (n_var != n_elts)
4369c11e
TB
15208 {
15209 rtx copy = copy_rtx (vals);
4369c11e 15210
8b66a2d4
AL
15211 /* Load constant part of vector. We really don't care what goes into the
15212 parts we will overwrite, but we're more likely to be able to load the
15213 constant efficiently if it has fewer, larger, repeating parts
15214 (see aarch64_simd_valid_immediate). */
15215 for (int i = 0; i < n_elts; i++)
15216 {
15217 rtx x = XVECEXP (vals, 0, i);
15218 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
15219 continue;
15220 rtx subst = any_const;
15221 for (int bit = n_elts / 2; bit > 0; bit /= 2)
15222 {
15223 /* Look in the copied vector, as more elements are const. */
15224 rtx test = XVECEXP (copy, 0, i ^ bit);
15225 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
15226 {
15227 subst = test;
15228 break;
15229 }
15230 }
15231 XVECEXP (copy, 0, i) = subst;
15232 }
4369c11e 15233 aarch64_expand_vector_init (target, copy);
35a093b6 15234 }
4369c11e 15235
35a093b6 15236 /* Insert the variable lanes directly. */
8b66a2d4 15237 for (int i = 0; i < n_elts; i++)
35a093b6
JG
15238 {
15239 rtx x = XVECEXP (vals, 0, i);
15240 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
15241 continue;
15242 x = copy_to_mode_reg (inner_mode, x);
15243 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
15244 }
4369c11e
TB
15245}
15246
43e9d192 15247static unsigned HOST_WIDE_INT
ef4bddc2 15248aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 15249{
43cacb12
RS
15250 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
15251 return 0;
15252 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
15253}
15254
43e9d192
IB
15255/* Select a format to encode pointers in exception handling data. */
15256int
15257aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
15258{
15259 int type;
15260 switch (aarch64_cmodel)
15261 {
15262 case AARCH64_CMODEL_TINY:
15263 case AARCH64_CMODEL_TINY_PIC:
15264 case AARCH64_CMODEL_SMALL:
15265 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 15266 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
15267 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
15268 for everything. */
15269 type = DW_EH_PE_sdata4;
15270 break;
15271 default:
15272 /* No assumptions here. 8-byte relocs required. */
15273 type = DW_EH_PE_sdata8;
15274 break;
15275 }
15276 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
15277}
15278
e1c1ecb0
KT
15279/* The last .arch and .tune assembly strings that we printed. */
15280static std::string aarch64_last_printed_arch_string;
15281static std::string aarch64_last_printed_tune_string;
15282
361fb3ee
KT
15283/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
15284 by the function fndecl. */
15285
15286void
15287aarch64_declare_function_name (FILE *stream, const char* name,
15288 tree fndecl)
15289{
15290 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15291
15292 struct cl_target_option *targ_options;
15293 if (target_parts)
15294 targ_options = TREE_TARGET_OPTION (target_parts);
15295 else
15296 targ_options = TREE_TARGET_OPTION (target_option_current_node);
15297 gcc_assert (targ_options);
15298
15299 const struct processor *this_arch
15300 = aarch64_get_arch (targ_options->x_explicit_arch);
15301
054b4005
JG
15302 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
15303 std::string extension
04a99ebe
JG
15304 = aarch64_get_extension_string_for_isa_flags (isa_flags,
15305 this_arch->flags);
e1c1ecb0
KT
15306 /* Only update the assembler .arch string if it is distinct from the last
15307 such string we printed. */
15308 std::string to_print = this_arch->name + extension;
15309 if (to_print != aarch64_last_printed_arch_string)
15310 {
15311 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
15312 aarch64_last_printed_arch_string = to_print;
15313 }
361fb3ee
KT
15314
15315 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
15316 useful to readers of the generated asm. Do it only when it changes
15317 from function to function and verbose assembly is requested. */
361fb3ee
KT
15318 const struct processor *this_tune
15319 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
15320
e1c1ecb0
KT
15321 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
15322 {
15323 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
15324 this_tune->name);
15325 aarch64_last_printed_tune_string = this_tune->name;
15326 }
361fb3ee
KT
15327
15328 /* Don't forget the type directive for ELF. */
15329 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
15330 ASM_OUTPUT_LABEL (stream, name);
15331}
15332
e1c1ecb0
KT
15333/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
15334
15335static void
15336aarch64_start_file (void)
15337{
15338 struct cl_target_option *default_options
15339 = TREE_TARGET_OPTION (target_option_default_node);
15340
15341 const struct processor *default_arch
15342 = aarch64_get_arch (default_options->x_explicit_arch);
15343 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
15344 std::string extension
04a99ebe
JG
15345 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
15346 default_arch->flags);
e1c1ecb0
KT
15347
15348 aarch64_last_printed_arch_string = default_arch->name + extension;
15349 aarch64_last_printed_tune_string = "";
15350 asm_fprintf (asm_out_file, "\t.arch %s\n",
15351 aarch64_last_printed_arch_string.c_str ());
15352
15353 default_file_start ();
15354}
15355
0462169c
SN
15356/* Emit load exclusive. */
15357
15358static void
ef4bddc2 15359aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
15360 rtx mem, rtx model_rtx)
15361{
0016d8d9 15362 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
15363}
15364
15365/* Emit store exclusive. */
15366
15367static void
ef4bddc2 15368aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
15369 rtx rval, rtx mem, rtx model_rtx)
15370{
0016d8d9 15371 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
15372}
15373
15374/* Mark the previous jump instruction as unlikely. */
15375
15376static void
15377aarch64_emit_unlikely_jump (rtx insn)
15378{
f370536c 15379 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 15380 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
15381}
15382
15383/* Expand a compare and swap pattern. */
15384
15385void
15386aarch64_expand_compare_and_swap (rtx operands[])
15387{
d400fda3
RH
15388 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
15389 machine_mode mode, r_mode;
0462169c
SN
15390
15391 bval = operands[0];
15392 rval = operands[1];
15393 mem = operands[2];
15394 oldval = operands[3];
15395 newval = operands[4];
15396 is_weak = operands[5];
15397 mod_s = operands[6];
15398 mod_f = operands[7];
15399 mode = GET_MODE (mem);
0462169c
SN
15400
15401 /* Normally the succ memory model must be stronger than fail, but in the
15402 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
15403 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
15404 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
15405 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
15406 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
15407
d400fda3
RH
15408 r_mode = mode;
15409 if (mode == QImode || mode == HImode)
0462169c 15410 {
d400fda3
RH
15411 r_mode = SImode;
15412 rval = gen_reg_rtx (r_mode);
0462169c
SN
15413 }
15414
b0770c0f 15415 if (TARGET_LSE)
77f33f44
RH
15416 {
15417 /* The CAS insn requires oldval and rval overlap, but we need to
15418 have a copy of oldval saved across the operation to tell if
15419 the operation is successful. */
d400fda3
RH
15420 if (reg_overlap_mentioned_p (rval, oldval))
15421 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 15422 else
d400fda3
RH
15423 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
15424
77f33f44
RH
15425 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
15426 newval, mod_s));
d400fda3 15427 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 15428 }
b0770c0f 15429 else
d400fda3
RH
15430 {
15431 /* The oldval predicate varies by mode. Test it and force to reg. */
15432 insn_code code = code_for_aarch64_compare_and_swap (mode);
15433 if (!insn_data[code].operand[2].predicate (oldval, mode))
15434 oldval = force_reg (mode, oldval);
0462169c 15435
d400fda3
RH
15436 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
15437 is_weak, mod_s, mod_f));
15438 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
15439 }
15440
15441 if (r_mode != mode)
77f33f44
RH
15442 rval = gen_lowpart (mode, rval);
15443 emit_move_insn (operands[1], rval);
0462169c 15444
d400fda3 15445 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 15446 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
15447}
15448
f70fb3b6
MW
15449/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
15450 sequence implementing an atomic operation. */
15451
15452static void
15453aarch64_emit_post_barrier (enum memmodel model)
15454{
15455 const enum memmodel base_model = memmodel_base (model);
15456
15457 if (is_mm_sync (model)
15458 && (base_model == MEMMODEL_ACQUIRE
15459 || base_model == MEMMODEL_ACQ_REL
15460 || base_model == MEMMODEL_SEQ_CST))
15461 {
15462 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
15463 }
15464}
15465
0462169c
SN
15466/* Split a compare and swap pattern. */
15467
15468void
15469aarch64_split_compare_and_swap (rtx operands[])
15470{
15471 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 15472 machine_mode mode;
0462169c 15473 bool is_weak;
5d8a22a5
DM
15474 rtx_code_label *label1, *label2;
15475 rtx x, cond;
ab876106
MW
15476 enum memmodel model;
15477 rtx model_rtx;
0462169c
SN
15478
15479 rval = operands[0];
15480 mem = operands[1];
15481 oldval = operands[2];
15482 newval = operands[3];
15483 is_weak = (operands[4] != const0_rtx);
ab876106 15484 model_rtx = operands[5];
0462169c
SN
15485 scratch = operands[7];
15486 mode = GET_MODE (mem);
ab876106 15487 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 15488
17f47f86
KT
15489 /* When OLDVAL is zero and we want the strong version we can emit a tighter
15490 loop:
15491 .label1:
15492 LD[A]XR rval, [mem]
15493 CBNZ rval, .label2
15494 ST[L]XR scratch, newval, [mem]
15495 CBNZ scratch, .label1
15496 .label2:
15497 CMP rval, 0. */
15498 bool strong_zero_p = !is_weak && oldval == const0_rtx;
15499
5d8a22a5 15500 label1 = NULL;
0462169c
SN
15501 if (!is_weak)
15502 {
15503 label1 = gen_label_rtx ();
15504 emit_label (label1);
15505 }
15506 label2 = gen_label_rtx ();
15507
ab876106
MW
15508 /* The initial load can be relaxed for a __sync operation since a final
15509 barrier will be emitted to stop code hoisting. */
15510 if (is_mm_sync (model))
15511 aarch64_emit_load_exclusive (mode, rval, mem,
15512 GEN_INT (MEMMODEL_RELAXED));
15513 else
15514 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 15515
17f47f86
KT
15516 if (strong_zero_p)
15517 {
6e1eaca9
RE
15518 if (aarch64_track_speculation)
15519 {
15520 /* Emit an explicit compare instruction, so that we can correctly
15521 track the condition codes. */
15522 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
15523 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15524 }
15525 else
15526 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
15527
17f47f86
KT
15528 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15529 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
15530 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
15531 }
15532 else
15533 {
d400fda3 15534 cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
17f47f86
KT
15535 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15536 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
d400fda3 15537 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
17f47f86
KT
15538 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
15539 }
0462169c 15540
ab876106 15541 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
15542
15543 if (!is_weak)
15544 {
6e1eaca9
RE
15545 if (aarch64_track_speculation)
15546 {
15547 /* Emit an explicit compare instruction, so that we can correctly
15548 track the condition codes. */
15549 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
15550 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15551 }
15552 else
15553 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
15554
0462169c
SN
15555 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15556 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 15557 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
15558 }
15559 else
15560 {
15561 cond = gen_rtx_REG (CCmode, CC_REGNUM);
15562 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 15563 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
15564 }
15565
15566 emit_label (label2);
17f47f86
KT
15567 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
15568 to set the condition flags. If this is not used it will be removed by
15569 later passes. */
15570 if (strong_zero_p)
15571 {
15572 cond = gen_rtx_REG (CCmode, CC_REGNUM);
15573 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
15574 emit_insn (gen_rtx_SET (cond, x));
15575 }
ab876106
MW
15576 /* Emit any final barrier needed for a __sync operation. */
15577 if (is_mm_sync (model))
15578 aarch64_emit_post_barrier (model);
0462169c 15579}
9cd7b720 15580
0462169c
SN
15581/* Split an atomic operation. */
15582
15583void
15584aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 15585 rtx value, rtx model_rtx, rtx cond)
0462169c 15586{
ef4bddc2
RS
15587 machine_mode mode = GET_MODE (mem);
15588 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
15589 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
15590 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
15591 rtx_code_label *label;
15592 rtx x;
0462169c 15593
9cd7b720 15594 /* Split the atomic operation into a sequence. */
0462169c
SN
15595 label = gen_label_rtx ();
15596 emit_label (label);
15597
15598 if (new_out)
15599 new_out = gen_lowpart (wmode, new_out);
15600 if (old_out)
15601 old_out = gen_lowpart (wmode, old_out);
15602 else
15603 old_out = new_out;
15604 value = simplify_gen_subreg (wmode, value, mode, 0);
15605
f70fb3b6
MW
15606 /* The initial load can be relaxed for a __sync operation since a final
15607 barrier will be emitted to stop code hoisting. */
15608 if (is_sync)
15609 aarch64_emit_load_exclusive (mode, old_out, mem,
15610 GEN_INT (MEMMODEL_RELAXED));
15611 else
15612 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
15613
15614 switch (code)
15615 {
15616 case SET:
15617 new_out = value;
15618 break;
15619
15620 case NOT:
15621 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 15622 emit_insn (gen_rtx_SET (new_out, x));
0462169c 15623 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 15624 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
15625 break;
15626
15627 case MINUS:
15628 if (CONST_INT_P (value))
15629 {
15630 value = GEN_INT (-INTVAL (value));
15631 code = PLUS;
15632 }
15633 /* Fall through. */
15634
15635 default:
15636 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 15637 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
15638 break;
15639 }
15640
15641 aarch64_emit_store_exclusive (mode, cond, mem,
15642 gen_lowpart (mode, new_out), model_rtx);
15643
6e1eaca9
RE
15644 if (aarch64_track_speculation)
15645 {
15646 /* Emit an explicit compare instruction, so that we can correctly
15647 track the condition codes. */
15648 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
15649 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15650 }
15651 else
15652 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15653
0462169c
SN
15654 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15655 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 15656 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
15657
15658 /* Emit any final barrier needed for a __sync operation. */
15659 if (is_sync)
15660 aarch64_emit_post_barrier (model);
0462169c
SN
15661}
15662
c2ec330c
AL
15663static void
15664aarch64_init_libfuncs (void)
15665{
15666 /* Half-precision float operations. The compiler handles all operations
15667 with NULL libfuncs by converting to SFmode. */
15668
15669 /* Conversions. */
15670 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
15671 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
15672
15673 /* Arithmetic. */
15674 set_optab_libfunc (add_optab, HFmode, NULL);
15675 set_optab_libfunc (sdiv_optab, HFmode, NULL);
15676 set_optab_libfunc (smul_optab, HFmode, NULL);
15677 set_optab_libfunc (neg_optab, HFmode, NULL);
15678 set_optab_libfunc (sub_optab, HFmode, NULL);
15679
15680 /* Comparisons. */
15681 set_optab_libfunc (eq_optab, HFmode, NULL);
15682 set_optab_libfunc (ne_optab, HFmode, NULL);
15683 set_optab_libfunc (lt_optab, HFmode, NULL);
15684 set_optab_libfunc (le_optab, HFmode, NULL);
15685 set_optab_libfunc (ge_optab, HFmode, NULL);
15686 set_optab_libfunc (gt_optab, HFmode, NULL);
15687 set_optab_libfunc (unord_optab, HFmode, NULL);
15688}
15689
43e9d192 15690/* Target hook for c_mode_for_suffix. */
ef4bddc2 15691static machine_mode
43e9d192
IB
15692aarch64_c_mode_for_suffix (char suffix)
15693{
15694 if (suffix == 'q')
15695 return TFmode;
15696
15697 return VOIDmode;
15698}
15699
3520f7cc
JG
15700/* We can only represent floating point constants which will fit in
15701 "quarter-precision" values. These values are characterised by
15702 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
15703 by:
15704
15705 (-1)^s * (n/16) * 2^r
15706
15707 Where:
15708 's' is the sign bit.
15709 'n' is an integer in the range 16 <= n <= 31.
15710 'r' is an integer in the range -3 <= r <= 4. */
15711
15712/* Return true iff X can be represented by a quarter-precision
15713 floating point immediate operand X. Note, we cannot represent 0.0. */
15714bool
15715aarch64_float_const_representable_p (rtx x)
15716{
15717 /* This represents our current view of how many bits
15718 make up the mantissa. */
15719 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 15720 int exponent;
3520f7cc 15721 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 15722 REAL_VALUE_TYPE r, m;
807e902e 15723 bool fail;
3520f7cc
JG
15724
15725 if (!CONST_DOUBLE_P (x))
15726 return false;
15727
a4518821
RS
15728 if (GET_MODE (x) == VOIDmode
15729 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
15730 return false;
15731
34a72c33 15732 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
15733
15734 /* We cannot represent infinities, NaNs or +/-zero. We won't
15735 know if we have +zero until we analyse the mantissa, but we
15736 can reject the other invalid values. */
15737 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
15738 || REAL_VALUE_MINUS_ZERO (r))
15739 return false;
15740
ba96cdfb 15741 /* Extract exponent. */
3520f7cc
JG
15742 r = real_value_abs (&r);
15743 exponent = REAL_EXP (&r);
15744
15745 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
15746 highest (sign) bit, with a fixed binary point at bit point_pos.
15747 m1 holds the low part of the mantissa, m2 the high part.
15748 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
15749 bits for the mantissa, this can fail (low bits will be lost). */
15750 real_ldexp (&m, &r, point_pos - exponent);
807e902e 15751 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
15752
15753 /* If the low part of the mantissa has bits set we cannot represent
15754 the value. */
d9074b29 15755 if (w.ulow () != 0)
3520f7cc
JG
15756 return false;
15757 /* We have rejected the lower HOST_WIDE_INT, so update our
15758 understanding of how many bits lie in the mantissa and
15759 look only at the high HOST_WIDE_INT. */
807e902e 15760 mantissa = w.elt (1);
3520f7cc
JG
15761 point_pos -= HOST_BITS_PER_WIDE_INT;
15762
15763 /* We can only represent values with a mantissa of the form 1.xxxx. */
15764 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
15765 if ((mantissa & mask) != 0)
15766 return false;
15767
15768 /* Having filtered unrepresentable values, we may now remove all
15769 but the highest 5 bits. */
15770 mantissa >>= point_pos - 5;
15771
15772 /* We cannot represent the value 0.0, so reject it. This is handled
15773 elsewhere. */
15774 if (mantissa == 0)
15775 return false;
15776
15777 /* Then, as bit 4 is always set, we can mask it off, leaving
15778 the mantissa in the range [0, 15]. */
15779 mantissa &= ~(1 << 4);
15780 gcc_assert (mantissa <= 15);
15781
15782 /* GCC internally does not use IEEE754-like encoding (where normalized
15783 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
15784 Our mantissa values are shifted 4 places to the left relative to
15785 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
15786 by 5 places to correct for GCC's representation. */
15787 exponent = 5 - exponent;
15788
15789 return (exponent >= 0 && exponent <= 7);
15790}
15791
ab6501d7
SD
15792/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
15793 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
15794 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 15795char*
b187677b 15796aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 15797 enum simd_immediate_check which)
3520f7cc 15798{
3ea63f60 15799 bool is_valid;
3520f7cc 15800 static char templ[40];
3520f7cc 15801 const char *mnemonic;
e4f0f84d 15802 const char *shift_op;
3520f7cc 15803 unsigned int lane_count = 0;
81c2dfb9 15804 char element_char;
3520f7cc 15805
b187677b 15806 struct simd_immediate_info info;
48063b9d
IB
15807
15808 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
15809 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
15810 It will also update INFO to show how the immediate should be generated.
15811 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 15812 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
15813 gcc_assert (is_valid);
15814
b187677b
RS
15815 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15816 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 15817
b187677b 15818 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 15819 {
b187677b 15820 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
15821 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
15822 move immediate path. */
48063b9d
IB
15823 if (aarch64_float_const_zero_rtx_p (info.value))
15824 info.value = GEN_INT (0);
15825 else
15826 {
83faf7d0 15827 const unsigned int buf_size = 20;
48063b9d 15828 char float_buf[buf_size] = {'\0'};
34a72c33
RS
15829 real_to_decimal_for_mode (float_buf,
15830 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 15831 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
15832
15833 if (lane_count == 1)
15834 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
15835 else
15836 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 15837 lane_count, element_char, float_buf);
48063b9d
IB
15838 return templ;
15839 }
3520f7cc 15840 }
3520f7cc 15841
0d8e1702 15842 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
15843
15844 if (which == AARCH64_CHECK_MOV)
15845 {
b187677b
RS
15846 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
15847 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
15848 if (lane_count == 1)
15849 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
15850 mnemonic, UINTVAL (info.value));
15851 else if (info.shift)
15852 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15853 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
15854 element_char, UINTVAL (info.value), shift_op, info.shift);
15855 else
15856 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15857 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
15858 element_char, UINTVAL (info.value));
15859 }
3520f7cc 15860 else
ab6501d7
SD
15861 {
15862 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 15863 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
15864 if (info.shift)
15865 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15866 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
15867 element_char, UINTVAL (info.value), "lsl", info.shift);
15868 else
15869 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15870 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
15871 element_char, UINTVAL (info.value));
15872 }
3520f7cc
JG
15873 return templ;
15874}
15875
b7342d25 15876char*
77e994c9 15877aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 15878{
a2170965
TC
15879
15880 /* If a floating point number was passed and we desire to use it in an
15881 integer mode do the conversion to integer. */
15882 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
15883 {
15884 unsigned HOST_WIDE_INT ival;
15885 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15886 gcc_unreachable ();
15887 immediate = gen_int_mode (ival, mode);
15888 }
15889
ef4bddc2 15890 machine_mode vmode;
a2170965
TC
15891 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15892 a 128 bit vector mode. */
15893 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15894
a2170965 15895 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15896 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15897 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15898}
15899
43cacb12
RS
15900/* Return the output string to use for moving immediate CONST_VECTOR
15901 into an SVE register. */
15902
15903char *
15904aarch64_output_sve_mov_immediate (rtx const_vector)
15905{
15906 static char templ[40];
15907 struct simd_immediate_info info;
15908 char element_char;
15909
15910 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15911 gcc_assert (is_valid);
15912
15913 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15914
15915 if (info.step)
15916 {
15917 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15918 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15919 element_char, INTVAL (info.value), INTVAL (info.step));
15920 return templ;
15921 }
15922
15923 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15924 {
15925 if (aarch64_float_const_zero_rtx_p (info.value))
15926 info.value = GEN_INT (0);
15927 else
15928 {
15929 const int buf_size = 20;
15930 char float_buf[buf_size] = {};
15931 real_to_decimal_for_mode (float_buf,
15932 CONST_DOUBLE_REAL_VALUE (info.value),
15933 buf_size, buf_size, 1, info.elt_mode);
15934
15935 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15936 element_char, float_buf);
15937 return templ;
15938 }
15939 }
15940
15941 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15942 element_char, INTVAL (info.value));
15943 return templ;
15944}
15945
15946/* Return the asm format for a PTRUE instruction whose destination has
15947 mode MODE. SUFFIX is the element size suffix. */
15948
15949char *
15950aarch64_output_ptrue (machine_mode mode, char suffix)
15951{
15952 unsigned int nunits;
15953 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15954 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15955 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15956 else
15957 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15958 return buf;
15959}
15960
88b08073
JG
15961/* Split operands into moves from op[1] + op[2] into op[0]. */
15962
15963void
15964aarch64_split_combinev16qi (rtx operands[3])
15965{
15966 unsigned int dest = REGNO (operands[0]);
15967 unsigned int src1 = REGNO (operands[1]);
15968 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15969 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15970 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15971 rtx destlo, desthi;
15972
15973 gcc_assert (halfmode == V16QImode);
15974
15975 if (src1 == dest && src2 == dest + halfregs)
15976 {
15977 /* No-op move. Can't split to nothing; emit something. */
15978 emit_note (NOTE_INSN_DELETED);
15979 return;
15980 }
15981
15982 /* Preserve register attributes for variable tracking. */
15983 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15984 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15985 GET_MODE_SIZE (halfmode));
15986
15987 /* Special case of reversed high/low parts. */
15988 if (reg_overlap_mentioned_p (operands[2], destlo)
15989 && reg_overlap_mentioned_p (operands[1], desthi))
15990 {
15991 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15992 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15993 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15994 }
15995 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15996 {
15997 /* Try to avoid unnecessary moves if part of the result
15998 is in the right place already. */
15999 if (src1 != dest)
16000 emit_move_insn (destlo, operands[1]);
16001 if (src2 != dest + halfregs)
16002 emit_move_insn (desthi, operands[2]);
16003 }
16004 else
16005 {
16006 if (src2 != dest + halfregs)
16007 emit_move_insn (desthi, operands[2]);
16008 if (src1 != dest)
16009 emit_move_insn (destlo, operands[1]);
16010 }
16011}
16012
16013/* vec_perm support. */
16014
88b08073
JG
16015struct expand_vec_perm_d
16016{
16017 rtx target, op0, op1;
e3342de4 16018 vec_perm_indices perm;
ef4bddc2 16019 machine_mode vmode;
43cacb12 16020 unsigned int vec_flags;
88b08073
JG
16021 bool one_vector_p;
16022 bool testing_p;
16023};
16024
16025/* Generate a variable permutation. */
16026
16027static void
16028aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
16029{
ef4bddc2 16030 machine_mode vmode = GET_MODE (target);
88b08073
JG
16031 bool one_vector_p = rtx_equal_p (op0, op1);
16032
16033 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
16034 gcc_checking_assert (GET_MODE (op0) == vmode);
16035 gcc_checking_assert (GET_MODE (op1) == vmode);
16036 gcc_checking_assert (GET_MODE (sel) == vmode);
16037 gcc_checking_assert (TARGET_SIMD);
16038
16039 if (one_vector_p)
16040 {
16041 if (vmode == V8QImode)
16042 {
16043 /* Expand the argument to a V16QI mode by duplicating it. */
16044 rtx pair = gen_reg_rtx (V16QImode);
16045 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
16046 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
16047 }
16048 else
16049 {
16050 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
16051 }
16052 }
16053 else
16054 {
16055 rtx pair;
16056
16057 if (vmode == V8QImode)
16058 {
16059 pair = gen_reg_rtx (V16QImode);
16060 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
16061 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
16062 }
16063 else
16064 {
16065 pair = gen_reg_rtx (OImode);
16066 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
16067 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
16068 }
16069 }
16070}
16071
80940017
RS
16072/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
16073 NELT is the number of elements in the vector. */
16074
88b08073 16075void
80940017
RS
16076aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
16077 unsigned int nelt)
88b08073 16078{
ef4bddc2 16079 machine_mode vmode = GET_MODE (target);
88b08073 16080 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 16081 rtx mask;
88b08073
JG
16082
16083 /* The TBL instruction does not use a modulo index, so we must take care
16084 of that ourselves. */
f7c4e5b8
AL
16085 mask = aarch64_simd_gen_const_vector_dup (vmode,
16086 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
16087 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
16088
f7c4e5b8
AL
16089 /* For big-endian, we also need to reverse the index within the vector
16090 (but not which vector). */
16091 if (BYTES_BIG_ENDIAN)
16092 {
16093 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
16094 if (!one_vector_p)
16095 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
16096 sel = expand_simple_binop (vmode, XOR, sel, mask,
16097 NULL, 0, OPTAB_LIB_WIDEN);
16098 }
88b08073
JG
16099 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
16100}
16101
43cacb12
RS
16102/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
16103
16104static void
16105emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
16106{
16107 emit_insn (gen_rtx_SET (target,
16108 gen_rtx_UNSPEC (GET_MODE (target),
16109 gen_rtvec (2, op0, op1), code)));
16110}
16111
16112/* Expand an SVE vec_perm with the given operands. */
16113
16114void
16115aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
16116{
16117 machine_mode data_mode = GET_MODE (target);
16118 machine_mode sel_mode = GET_MODE (sel);
16119 /* Enforced by the pattern condition. */
16120 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
16121
16122 /* Note: vec_perm indices are supposed to wrap when they go beyond the
16123 size of the two value vectors, i.e. the upper bits of the indices
16124 are effectively ignored. SVE TBL instead produces 0 for any
16125 out-of-range indices, so we need to modulo all the vec_perm indices
16126 to ensure they are all in range. */
16127 rtx sel_reg = force_reg (sel_mode, sel);
16128
16129 /* Check if the sel only references the first values vector. */
16130 if (GET_CODE (sel) == CONST_VECTOR
16131 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
16132 {
16133 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
16134 return;
16135 }
16136
16137 /* Check if the two values vectors are the same. */
16138 if (rtx_equal_p (op0, op1))
16139 {
16140 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
16141 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
16142 NULL, 0, OPTAB_DIRECT);
16143 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
16144 return;
16145 }
16146
16147 /* Run TBL on for each value vector and combine the results. */
16148
16149 rtx res0 = gen_reg_rtx (data_mode);
16150 rtx res1 = gen_reg_rtx (data_mode);
16151 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
16152 if (GET_CODE (sel) != CONST_VECTOR
16153 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
16154 {
16155 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
16156 2 * nunits - 1);
16157 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
16158 NULL, 0, OPTAB_DIRECT);
16159 }
16160 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
16161 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
16162 NULL, 0, OPTAB_DIRECT);
16163 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
16164 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
16165 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
16166 else
16167 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
16168}
16169
cc4d934f
JG
16170/* Recognize patterns suitable for the TRN instructions. */
16171static bool
16172aarch64_evpc_trn (struct expand_vec_perm_d *d)
16173{
6a70badb
RS
16174 HOST_WIDE_INT odd;
16175 poly_uint64 nelt = d->perm.length ();
cc4d934f 16176 rtx out, in0, in1, x;
ef4bddc2 16177 machine_mode vmode = d->vmode;
cc4d934f
JG
16178
16179 if (GET_MODE_UNIT_SIZE (vmode) > 8)
16180 return false;
16181
16182 /* Note that these are little-endian tests.
16183 We correct for big-endian later. */
6a70badb
RS
16184 if (!d->perm[0].is_constant (&odd)
16185 || (odd != 0 && odd != 1)
326ac20e
RS
16186 || !d->perm.series_p (0, 2, odd, 2)
16187 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 16188 return false;
cc4d934f
JG
16189
16190 /* Success! */
16191 if (d->testing_p)
16192 return true;
16193
16194 in0 = d->op0;
16195 in1 = d->op1;
43cacb12
RS
16196 /* We don't need a big-endian lane correction for SVE; see the comment
16197 at the head of aarch64-sve.md for details. */
16198 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
16199 {
16200 x = in0, in0 = in1, in1 = x;
16201 odd = !odd;
16202 }
16203 out = d->target;
16204
3f8334a5
RS
16205 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
16206 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
16207 return true;
16208}
16209
16210/* Recognize patterns suitable for the UZP instructions. */
16211static bool
16212aarch64_evpc_uzp (struct expand_vec_perm_d *d)
16213{
6a70badb 16214 HOST_WIDE_INT odd;
cc4d934f 16215 rtx out, in0, in1, x;
ef4bddc2 16216 machine_mode vmode = d->vmode;
cc4d934f
JG
16217
16218 if (GET_MODE_UNIT_SIZE (vmode) > 8)
16219 return false;
16220
16221 /* Note that these are little-endian tests.
16222 We correct for big-endian later. */
6a70badb
RS
16223 if (!d->perm[0].is_constant (&odd)
16224 || (odd != 0 && odd != 1)
326ac20e 16225 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 16226 return false;
cc4d934f
JG
16227
16228 /* Success! */
16229 if (d->testing_p)
16230 return true;
16231
16232 in0 = d->op0;
16233 in1 = d->op1;
43cacb12
RS
16234 /* We don't need a big-endian lane correction for SVE; see the comment
16235 at the head of aarch64-sve.md for details. */
16236 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
16237 {
16238 x = in0, in0 = in1, in1 = x;
16239 odd = !odd;
16240 }
16241 out = d->target;
16242
3f8334a5
RS
16243 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
16244 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
16245 return true;
16246}
16247
16248/* Recognize patterns suitable for the ZIP instructions. */
16249static bool
16250aarch64_evpc_zip (struct expand_vec_perm_d *d)
16251{
6a70badb
RS
16252 unsigned int high;
16253 poly_uint64 nelt = d->perm.length ();
cc4d934f 16254 rtx out, in0, in1, x;
ef4bddc2 16255 machine_mode vmode = d->vmode;
cc4d934f
JG
16256
16257 if (GET_MODE_UNIT_SIZE (vmode) > 8)
16258 return false;
16259
16260 /* Note that these are little-endian tests.
16261 We correct for big-endian later. */
6a70badb
RS
16262 poly_uint64 first = d->perm[0];
16263 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
16264 || !d->perm.series_p (0, 2, first, 1)
16265 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 16266 return false;
6a70badb 16267 high = maybe_ne (first, 0U);
cc4d934f
JG
16268
16269 /* Success! */
16270 if (d->testing_p)
16271 return true;
16272
16273 in0 = d->op0;
16274 in1 = d->op1;
43cacb12
RS
16275 /* We don't need a big-endian lane correction for SVE; see the comment
16276 at the head of aarch64-sve.md for details. */
16277 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
16278 {
16279 x = in0, in0 = in1, in1 = x;
16280 high = !high;
16281 }
16282 out = d->target;
16283
3f8334a5
RS
16284 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
16285 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
16286 return true;
16287}
16288
ae0533da
AL
16289/* Recognize patterns for the EXT insn. */
16290
16291static bool
16292aarch64_evpc_ext (struct expand_vec_perm_d *d)
16293{
6a70badb 16294 HOST_WIDE_INT location;
ae0533da
AL
16295 rtx offset;
16296
6a70badb
RS
16297 /* The first element always refers to the first vector.
16298 Check if the extracted indices are increasing by one. */
43cacb12
RS
16299 if (d->vec_flags == VEC_SVE_PRED
16300 || !d->perm[0].is_constant (&location)
6a70badb 16301 || !d->perm.series_p (0, 1, location, 1))
326ac20e 16302 return false;
ae0533da 16303
ae0533da
AL
16304 /* Success! */
16305 if (d->testing_p)
16306 return true;
16307
b31e65bb 16308 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 16309 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 16310
43cacb12
RS
16311 We don't need a big-endian lane correction for SVE; see the comment
16312 at the head of aarch64-sve.md for details. */
16313 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
16314 {
16315 /* After setup, we want the high elements of the first vector (stored
16316 at the LSB end of the register), and the low elements of the second
16317 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 16318 std::swap (d->op0, d->op1);
6a70badb
RS
16319 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
16320 to_constant () is safe since this is restricted to Advanced SIMD
16321 vectors. */
16322 location = d->perm.length ().to_constant () - location;
ae0533da
AL
16323 }
16324
16325 offset = GEN_INT (location);
3f8334a5
RS
16326 emit_set_insn (d->target,
16327 gen_rtx_UNSPEC (d->vmode,
16328 gen_rtvec (3, d->op0, d->op1, offset),
16329 UNSPEC_EXT));
ae0533da
AL
16330 return true;
16331}
16332
43cacb12
RS
16333/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
16334 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
16335
16336static bool
43cacb12 16337aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 16338{
6a70badb
RS
16339 HOST_WIDE_INT diff;
16340 unsigned int i, size, unspec;
43cacb12 16341 machine_mode pred_mode;
923fcec3 16342
43cacb12
RS
16343 if (d->vec_flags == VEC_SVE_PRED
16344 || !d->one_vector_p
6a70badb 16345 || !d->perm[0].is_constant (&diff))
923fcec3
AL
16346 return false;
16347
3f8334a5
RS
16348 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
16349 if (size == 8)
43cacb12
RS
16350 {
16351 unspec = UNSPEC_REV64;
16352 pred_mode = VNx2BImode;
16353 }
3f8334a5 16354 else if (size == 4)
43cacb12
RS
16355 {
16356 unspec = UNSPEC_REV32;
16357 pred_mode = VNx4BImode;
16358 }
3f8334a5 16359 else if (size == 2)
43cacb12
RS
16360 {
16361 unspec = UNSPEC_REV16;
16362 pred_mode = VNx8BImode;
16363 }
3f8334a5
RS
16364 else
16365 return false;
923fcec3 16366
326ac20e
RS
16367 unsigned int step = diff + 1;
16368 for (i = 0; i < step; ++i)
16369 if (!d->perm.series_p (i, step, diff - i, step))
16370 return false;
923fcec3
AL
16371
16372 /* Success! */
16373 if (d->testing_p)
16374 return true;
16375
43cacb12
RS
16376 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
16377 if (d->vec_flags == VEC_SVE_DATA)
16378 {
16379 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16380 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
16381 UNSPEC_MERGE_PTRUE);
16382 }
16383 emit_set_insn (d->target, src);
16384 return true;
16385}
16386
16387/* Recognize patterns for the REV insn, which reverses elements within
16388 a full vector. */
16389
16390static bool
16391aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
16392{
16393 poly_uint64 nelt = d->perm.length ();
16394
16395 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
16396 return false;
16397
16398 if (!d->perm.series_p (0, 1, nelt - 1, -1))
16399 return false;
16400
16401 /* Success! */
16402 if (d->testing_p)
16403 return true;
16404
16405 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
16406 emit_set_insn (d->target, src);
923fcec3
AL
16407 return true;
16408}
16409
91bd4114
JG
16410static bool
16411aarch64_evpc_dup (struct expand_vec_perm_d *d)
16412{
91bd4114
JG
16413 rtx out = d->target;
16414 rtx in0;
6a70badb 16415 HOST_WIDE_INT elt;
ef4bddc2 16416 machine_mode vmode = d->vmode;
91bd4114
JG
16417 rtx lane;
16418
43cacb12
RS
16419 if (d->vec_flags == VEC_SVE_PRED
16420 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 16421 || !d->perm[0].is_constant (&elt))
326ac20e
RS
16422 return false;
16423
43cacb12
RS
16424 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
16425 return false;
16426
326ac20e
RS
16427 /* Success! */
16428 if (d->testing_p)
16429 return true;
16430
91bd4114
JG
16431 /* The generic preparation in aarch64_expand_vec_perm_const_1
16432 swaps the operand order and the permute indices if it finds
16433 d->perm[0] to be in the second operand. Thus, we can always
16434 use d->op0 and need not do any extra arithmetic to get the
16435 correct lane number. */
16436 in0 = d->op0;
f901401e 16437 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 16438
3f8334a5
RS
16439 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
16440 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
16441 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
16442 return true;
16443}
16444
88b08073
JG
16445static bool
16446aarch64_evpc_tbl (struct expand_vec_perm_d *d)
16447{
43cacb12 16448 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 16449 machine_mode vmode = d->vmode;
6a70badb
RS
16450
16451 /* Make sure that the indices are constant. */
16452 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
16453 for (unsigned int i = 0; i < encoded_nelts; ++i)
16454 if (!d->perm[i].is_constant ())
16455 return false;
88b08073 16456
88b08073
JG
16457 if (d->testing_p)
16458 return true;
16459
16460 /* Generic code will try constant permutation twice. Once with the
16461 original mode and again with the elements lowered to QImode.
16462 So wait and don't do the selector expansion ourselves. */
16463 if (vmode != V8QImode && vmode != V16QImode)
16464 return false;
16465
6a70badb
RS
16466 /* to_constant is safe since this routine is specific to Advanced SIMD
16467 vectors. */
16468 unsigned int nelt = d->perm.length ().to_constant ();
16469 for (unsigned int i = 0; i < nelt; ++i)
16470 /* If big-endian and two vectors we end up with a weird mixed-endian
16471 mode on NEON. Reverse the index within each word but not the word
16472 itself. to_constant is safe because we checked is_constant above. */
16473 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
16474 ? d->perm[i].to_constant () ^ (nelt - 1)
16475 : d->perm[i].to_constant ());
bbcc9c00 16476
88b08073
JG
16477 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
16478 sel = force_reg (vmode, sel);
16479
16480 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
16481 return true;
16482}
16483
43cacb12
RS
16484/* Try to implement D using an SVE TBL instruction. */
16485
16486static bool
16487aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
16488{
16489 unsigned HOST_WIDE_INT nelt;
16490
16491 /* Permuting two variable-length vectors could overflow the
16492 index range. */
16493 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
16494 return false;
16495
16496 if (d->testing_p)
16497 return true;
16498
16499 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
16500 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
16501 if (d->one_vector_p)
16502 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
16503 else
16504 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
16505 return true;
16506}
16507
88b08073
JG
16508static bool
16509aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
16510{
16511 /* The pattern matching functions above are written to look for a small
16512 number to begin the sequence (0, 1, N/2). If we begin with an index
16513 from the second operand, we can swap the operands. */
6a70badb
RS
16514 poly_int64 nelt = d->perm.length ();
16515 if (known_ge (d->perm[0], nelt))
88b08073 16516 {
e3342de4 16517 d->perm.rotate_inputs (1);
cb5c6c29 16518 std::swap (d->op0, d->op1);
88b08073
JG
16519 }
16520
43cacb12
RS
16521 if ((d->vec_flags == VEC_ADVSIMD
16522 || d->vec_flags == VEC_SVE_DATA
16523 || d->vec_flags == VEC_SVE_PRED)
16524 && known_gt (nelt, 1))
cc4d934f 16525 {
43cacb12
RS
16526 if (aarch64_evpc_rev_local (d))
16527 return true;
16528 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
16529 return true;
16530 else if (aarch64_evpc_ext (d))
ae0533da 16531 return true;
f901401e
AL
16532 else if (aarch64_evpc_dup (d))
16533 return true;
ae0533da 16534 else if (aarch64_evpc_zip (d))
cc4d934f
JG
16535 return true;
16536 else if (aarch64_evpc_uzp (d))
16537 return true;
16538 else if (aarch64_evpc_trn (d))
16539 return true;
43cacb12
RS
16540 if (d->vec_flags == VEC_SVE_DATA)
16541 return aarch64_evpc_sve_tbl (d);
4ec8bb67 16542 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 16543 return aarch64_evpc_tbl (d);
cc4d934f 16544 }
88b08073
JG
16545 return false;
16546}
16547
f151c9e1 16548/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 16549
f151c9e1
RS
16550static bool
16551aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
16552 rtx op1, const vec_perm_indices &sel)
88b08073
JG
16553{
16554 struct expand_vec_perm_d d;
88b08073 16555
326ac20e 16556 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
16557 if (sel.ninputs () == 1
16558 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
16559 d.one_vector_p = true;
16560 else if (sel.all_from_input_p (0))
88b08073 16561 {
326ac20e
RS
16562 d.one_vector_p = true;
16563 op1 = op0;
88b08073 16564 }
326ac20e 16565 else if (sel.all_from_input_p (1))
88b08073 16566 {
88b08073 16567 d.one_vector_p = true;
326ac20e 16568 op0 = op1;
88b08073 16569 }
326ac20e
RS
16570 else
16571 d.one_vector_p = false;
88b08073 16572
326ac20e
RS
16573 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
16574 sel.nelts_per_input ());
16575 d.vmode = vmode;
43cacb12 16576 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
16577 d.target = target;
16578 d.op0 = op0;
16579 d.op1 = op1;
16580 d.testing_p = !target;
e3342de4 16581
f151c9e1
RS
16582 if (!d.testing_p)
16583 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 16584
326ac20e 16585 rtx_insn *last = get_last_insn ();
f151c9e1 16586 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 16587 gcc_assert (last == get_last_insn ());
88b08073
JG
16588
16589 return ret;
16590}
16591
73e3da51
RS
16592/* Generate a byte permute mask for a register of mode MODE,
16593 which has NUNITS units. */
16594
668046d1 16595rtx
73e3da51 16596aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
16597{
16598 /* We have to reverse each vector because we dont have
16599 a permuted load that can reverse-load according to ABI rules. */
16600 rtx mask;
16601 rtvec v = rtvec_alloc (16);
73e3da51
RS
16602 unsigned int i, j;
16603 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
16604
16605 gcc_assert (BYTES_BIG_ENDIAN);
16606 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
16607
16608 for (i = 0; i < nunits; i++)
16609 for (j = 0; j < usize; j++)
16610 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
16611 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
16612 return force_reg (V16QImode, mask);
16613}
16614
43cacb12
RS
16615/* Return true if X is a valid second operand for the SVE instruction
16616 that implements integer comparison OP_CODE. */
16617
16618static bool
16619aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
16620{
16621 if (register_operand (x, VOIDmode))
16622 return true;
16623
16624 switch (op_code)
16625 {
16626 case LTU:
16627 case LEU:
16628 case GEU:
16629 case GTU:
16630 return aarch64_sve_cmp_immediate_p (x, false);
16631 case LT:
16632 case LE:
16633 case GE:
16634 case GT:
16635 case NE:
16636 case EQ:
16637 return aarch64_sve_cmp_immediate_p (x, true);
16638 default:
16639 gcc_unreachable ();
16640 }
16641}
16642
f22d7973
RS
16643/* Use predicated SVE instructions to implement the equivalent of:
16644
16645 (set TARGET OP)
16646
16647 given that PTRUE is an all-true predicate of the appropriate mode. */
16648
16649static void
16650aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
16651{
16652 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16653 gen_rtvec (2, ptrue, op),
16654 UNSPEC_MERGE_PTRUE);
16655 rtx_insn *insn = emit_set_insn (target, unspec);
16656 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16657}
16658
16659/* Likewise, but also clobber the condition codes. */
16660
16661static void
16662aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
16663{
16664 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16665 gen_rtvec (2, ptrue, op),
16666 UNSPEC_MERGE_PTRUE);
16667 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
16668 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16669}
16670
43cacb12
RS
16671/* Return the UNSPEC_COND_* code for comparison CODE. */
16672
16673static unsigned int
16674aarch64_unspec_cond_code (rtx_code code)
16675{
16676 switch (code)
16677 {
16678 case NE:
16679 return UNSPEC_COND_NE;
16680 case EQ:
16681 return UNSPEC_COND_EQ;
16682 case LT:
16683 return UNSPEC_COND_LT;
16684 case GT:
16685 return UNSPEC_COND_GT;
16686 case LE:
16687 return UNSPEC_COND_LE;
16688 case GE:
16689 return UNSPEC_COND_GE;
43cacb12
RS
16690 default:
16691 gcc_unreachable ();
16692 }
16693}
16694
f22d7973 16695/* Emit:
43cacb12 16696
f22d7973
RS
16697 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
16698
16699 where <X> is the operation associated with comparison CODE. This form
16700 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
16701 semantics, such as when PRED might not be all-true and when comparing
16702 inactive lanes could have side effects. */
16703
16704static void
16705aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
16706 rtx pred, rtx op0, rtx op1)
43cacb12 16707{
f22d7973
RS
16708 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
16709 gen_rtvec (3, pred, op0, op1),
16710 aarch64_unspec_cond_code (code));
16711 emit_set_insn (target, unspec);
43cacb12
RS
16712}
16713
f22d7973 16714/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 16715
f22d7973 16716 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
16717
16718void
16719aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
16720{
16721 machine_mode pred_mode = GET_MODE (target);
16722 machine_mode data_mode = GET_MODE (op0);
16723
16724 if (!aarch64_sve_cmp_operand_p (code, op1))
16725 op1 = force_reg (data_mode, op1);
16726
16727 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
16728 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16729 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
16730}
16731
f22d7973 16732/* Emit the SVE equivalent of:
43cacb12 16733
f22d7973
RS
16734 (set TMP1 (CODE1 OP0 OP1))
16735 (set TMP2 (CODE2 OP0 OP1))
16736 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 16737
f22d7973 16738 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16739
16740static void
f22d7973
RS
16741aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
16742 rtx ptrue, rtx op0, rtx op1)
43cacb12 16743{
f22d7973 16744 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 16745 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
16746 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
16747 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 16748 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
16749 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
16750 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
16751 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
16752}
16753
f22d7973 16754/* Emit the SVE equivalent of:
43cacb12 16755
f22d7973
RS
16756 (set TMP (CODE OP0 OP1))
16757 (set TARGET (not TMP))
43cacb12 16758
f22d7973 16759 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16760
16761static void
f22d7973
RS
16762aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
16763 rtx op0, rtx op1)
43cacb12 16764{
f22d7973
RS
16765 machine_mode pred_mode = GET_MODE (ptrue);
16766 rtx tmp = gen_reg_rtx (pred_mode);
16767 aarch64_emit_sve_ptrue_op (tmp, ptrue,
16768 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
16769 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
16770}
16771
f22d7973 16772/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 16773
f22d7973 16774 (set TARGET (CODE OP0 OP1))
43cacb12
RS
16775
16776 If CAN_INVERT_P is true, the caller can also handle inverted results;
16777 return true if the result is in fact inverted. */
16778
16779bool
16780aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
16781 rtx op0, rtx op1, bool can_invert_p)
16782{
16783 machine_mode pred_mode = GET_MODE (target);
16784 machine_mode data_mode = GET_MODE (op0);
16785
16786 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16787 switch (code)
16788 {
16789 case UNORDERED:
16790 /* UNORDERED has no immediate form. */
16791 op1 = force_reg (data_mode, op1);
f22d7973 16792 /* fall through */
43cacb12
RS
16793 case LT:
16794 case LE:
16795 case GT:
16796 case GE:
16797 case EQ:
16798 case NE:
f22d7973
RS
16799 {
16800 /* There is native support for the comparison. */
16801 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16802 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16803 return false;
16804 }
43cacb12
RS
16805
16806 case LTGT:
16807 /* This is a trapping operation (LT or GT). */
f22d7973 16808 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
16809 return false;
16810
16811 case UNEQ:
16812 if (!flag_trapping_math)
16813 {
16814 /* This would trap for signaling NaNs. */
16815 op1 = force_reg (data_mode, op1);
f22d7973 16816 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
16817 return false;
16818 }
16819 /* fall through */
43cacb12
RS
16820 case UNLT:
16821 case UNLE:
16822 case UNGT:
16823 case UNGE:
f22d7973
RS
16824 if (flag_trapping_math)
16825 {
16826 /* Work out which elements are ordered. */
16827 rtx ordered = gen_reg_rtx (pred_mode);
16828 op1 = force_reg (data_mode, op1);
16829 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
16830
16831 /* Test the opposite condition for the ordered elements,
16832 then invert the result. */
16833 if (code == UNEQ)
16834 code = NE;
16835 else
16836 code = reverse_condition_maybe_unordered (code);
16837 if (can_invert_p)
16838 {
16839 aarch64_emit_sve_predicated_cond (target, code,
16840 ordered, op0, op1);
16841 return true;
16842 }
16843 rtx tmp = gen_reg_rtx (pred_mode);
16844 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
16845 aarch64_emit_unop (target, one_cmpl_optab, tmp);
16846 return false;
16847 }
16848 break;
16849
16850 case ORDERED:
16851 /* ORDERED has no immediate form. */
16852 op1 = force_reg (data_mode, op1);
16853 break;
43cacb12
RS
16854
16855 default:
16856 gcc_unreachable ();
16857 }
f22d7973
RS
16858
16859 /* There is native support for the inverse comparison. */
16860 code = reverse_condition_maybe_unordered (code);
16861 if (can_invert_p)
16862 {
16863 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16864 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16865 return true;
16866 }
16867 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
16868 return false;
43cacb12
RS
16869}
16870
16871/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
16872 of the data being selected and CMP_MODE is the mode of the values being
16873 compared. */
16874
16875void
16876aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
16877 rtx *ops)
16878{
16879 machine_mode pred_mode
16880 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
16881 GET_MODE_SIZE (cmp_mode)).require ();
16882 rtx pred = gen_reg_rtx (pred_mode);
16883 if (FLOAT_MODE_P (cmp_mode))
16884 {
16885 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
16886 ops[4], ops[5], true))
16887 std::swap (ops[1], ops[2]);
16888 }
16889 else
16890 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16891
16892 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16893 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16894}
16895
99e1629f
RS
16896/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16897 true. However due to issues with register allocation it is preferable
16898 to avoid tieing integer scalar and FP scalar modes. Executing integer
16899 operations in general registers is better than treating them as scalar
16900 vector operations. This reduces latency and avoids redundant int<->FP
16901 moves. So tie modes if they are either the same class, or vector modes
16902 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16903
99e1629f 16904static bool
ef4bddc2 16905aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16906{
16907 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16908 return true;
16909
16910 /* We specifically want to allow elements of "structure" modes to
16911 be tieable to the structure. This more general condition allows
43cacb12
RS
16912 other rarer situations too. The reason we don't extend this to
16913 predicate modes is that there are no predicate structure modes
16914 nor any specific instructions for extracting part of a predicate
16915 register. */
16916 if (aarch64_vector_data_mode_p (mode1)
16917 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16918 return true;
16919
16920 /* Also allow any scalar modes with vectors. */
16921 if (aarch64_vector_mode_supported_p (mode1)
16922 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16923 return true;
16924
16925 return false;
16926}
16927
e2c75eea
JG
16928/* Return a new RTX holding the result of moving POINTER forward by
16929 AMOUNT bytes. */
16930
16931static rtx
6a70badb 16932aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16933{
16934 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16935
16936 return adjust_automodify_address (pointer, GET_MODE (pointer),
16937 next, amount);
16938}
16939
16940/* Return a new RTX holding the result of moving POINTER forward by the
16941 size of the mode it points to. */
16942
16943static rtx
16944aarch64_progress_pointer (rtx pointer)
16945{
6a70badb 16946 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16947}
16948
16949/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16950 MODE bytes. */
16951
16952static void
16953aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16954 machine_mode mode)
e2c75eea
JG
16955{
16956 rtx reg = gen_reg_rtx (mode);
16957
16958 /* "Cast" the pointers to the correct mode. */
16959 *src = adjust_address (*src, mode, 0);
16960 *dst = adjust_address (*dst, mode, 0);
16961 /* Emit the memcpy. */
16962 emit_move_insn (reg, *src);
16963 emit_move_insn (*dst, reg);
16964 /* Move the pointers forward. */
16965 *src = aarch64_progress_pointer (*src);
16966 *dst = aarch64_progress_pointer (*dst);
16967}
16968
16969/* Expand movmem, as if from a __builtin_memcpy. Return true if
16970 we succeed, otherwise return false. */
16971
16972bool
16973aarch64_expand_movmem (rtx *operands)
16974{
89c52e5e 16975 int n, mode_bits;
e2c75eea
JG
16976 rtx dst = operands[0];
16977 rtx src = operands[1];
16978 rtx base;
89c52e5e 16979 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
16980 bool speed_p = !optimize_function_for_size_p (cfun);
16981
16982 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
16983 memcpy call, but use the default otherwise. Moves larger than 8 bytes
16984 will always require an even number of instructions to do now. And each
16985 operation requires both a load+store, so devide the max number by 2. */
16986 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
16987
16988 /* We can't do anything smart if the amount to copy is not constant. */
16989 if (!CONST_INT_P (operands[2]))
16990 return false;
16991
89c52e5e 16992 n = INTVAL (operands[2]);
e2c75eea 16993
89c52e5e
TC
16994 /* Try to keep the number of instructions low. For all cases we will do at
16995 most two moves for the residual amount, since we'll always overlap the
16996 remainder. */
16997 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
16998 return false;
16999
17000 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17001 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
17002
17003 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
17004 src = adjust_automodify_address (src, VOIDmode, base, 0);
17005
89c52e5e
TC
17006 /* Convert n to bits to make the rest of the code simpler. */
17007 n = n * BITS_PER_UNIT;
e2c75eea 17008
f7e1d19d
TC
17009 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
17010 larger than TImode, but we should not use them for loads/stores here. */
17011 const int copy_limit = GET_MODE_BITSIZE (TImode);
17012
89c52e5e 17013 while (n > 0)
e2c75eea 17014 {
89c52e5e
TC
17015 /* Find the largest mode in which to do the copy in without over reading
17016 or writing. */
17017 opt_scalar_int_mode mode_iter;
17018 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 17019 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 17020 cur_mode = mode_iter.require ();
e2c75eea 17021
89c52e5e 17022 gcc_assert (cur_mode != BLKmode);
e2c75eea 17023
89c52e5e
TC
17024 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
17025 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 17026
89c52e5e 17027 n -= mode_bits;
e2c75eea 17028
89c52e5e
TC
17029 /* Do certain trailing copies as overlapping if it's going to be
17030 cheaper. i.e. less instructions to do so. For instance doing a 15
17031 byte copy it's more efficient to do two overlapping 8 byte copies than
17032 8 + 6 + 1. */
f7e1d19d 17033 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 17034 {
f7e1d19d
TC
17035 next_mode = smallest_mode_for_size (n, MODE_INT);
17036 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
17037 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
17038 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
17039 n = n_bits;
e2c75eea
JG
17040 }
17041 }
17042
17043 return true;
17044}
17045
141a3ccf
KT
17046/* Split a DImode store of a CONST_INT SRC to MEM DST as two
17047 SImode stores. Handle the case when the constant has identical
17048 bottom and top halves. This is beneficial when the two stores can be
17049 merged into an STP and we avoid synthesising potentially expensive
17050 immediates twice. Return true if such a split is possible. */
17051
17052bool
17053aarch64_split_dimode_const_store (rtx dst, rtx src)
17054{
17055 rtx lo = gen_lowpart (SImode, src);
17056 rtx hi = gen_highpart_mode (SImode, DImode, src);
17057
17058 bool size_p = optimize_function_for_size_p (cfun);
17059
17060 if (!rtx_equal_p (lo, hi))
17061 return false;
17062
17063 unsigned int orig_cost
17064 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
17065 unsigned int lo_cost
17066 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
17067
17068 /* We want to transform:
17069 MOV x1, 49370
17070 MOVK x1, 0x140, lsl 16
17071 MOVK x1, 0xc0da, lsl 32
17072 MOVK x1, 0x140, lsl 48
17073 STR x1, [x0]
17074 into:
17075 MOV w1, 49370
17076 MOVK w1, 0x140, lsl 16
17077 STP w1, w1, [x0]
17078 So we want to perform this only when we save two instructions
17079 or more. When optimizing for size, however, accept any code size
17080 savings we can. */
17081 if (size_p && orig_cost <= lo_cost)
17082 return false;
17083
17084 if (!size_p
17085 && (orig_cost <= lo_cost + 1))
17086 return false;
17087
17088 rtx mem_lo = adjust_address (dst, SImode, 0);
17089 if (!aarch64_mem_pair_operand (mem_lo, SImode))
17090 return false;
17091
17092 rtx tmp_reg = gen_reg_rtx (SImode);
17093 aarch64_expand_mov_immediate (tmp_reg, lo);
17094 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
17095 /* Don't emit an explicit store pair as this may not be always profitable.
17096 Let the sched-fusion logic decide whether to merge them. */
17097 emit_move_insn (mem_lo, tmp_reg);
17098 emit_move_insn (mem_hi, tmp_reg);
17099
17100 return true;
17101}
17102
30c46053
MC
17103/* Generate RTL for a conditional branch with rtx comparison CODE in
17104 mode CC_MODE. The destination of the unlikely conditional branch
17105 is LABEL_REF. */
17106
17107void
17108aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
17109 rtx label_ref)
17110{
17111 rtx x;
17112 x = gen_rtx_fmt_ee (code, VOIDmode,
17113 gen_rtx_REG (cc_mode, CC_REGNUM),
17114 const0_rtx);
17115
17116 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
17117 gen_rtx_LABEL_REF (VOIDmode, label_ref),
17118 pc_rtx);
17119 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
17120}
17121
17122/* Generate DImode scratch registers for 128-bit (TImode) addition.
17123
17124 OP1 represents the TImode destination operand 1
17125 OP2 represents the TImode destination operand 2
17126 LOW_DEST represents the low half (DImode) of TImode operand 0
17127 LOW_IN1 represents the low half (DImode) of TImode operand 1
17128 LOW_IN2 represents the low half (DImode) of TImode operand 2
17129 HIGH_DEST represents the high half (DImode) of TImode operand 0
17130 HIGH_IN1 represents the high half (DImode) of TImode operand 1
17131 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
17132
17133void
17134aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
17135 rtx *low_in1, rtx *low_in2,
17136 rtx *high_dest, rtx *high_in1,
17137 rtx *high_in2)
17138{
17139 *low_dest = gen_reg_rtx (DImode);
17140 *low_in1 = gen_lowpart (DImode, op1);
17141 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
17142 subreg_lowpart_offset (DImode, TImode));
17143 *high_dest = gen_reg_rtx (DImode);
17144 *high_in1 = gen_highpart (DImode, op1);
17145 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
17146 subreg_highpart_offset (DImode, TImode));
17147}
17148
17149/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
17150
17151 This function differs from 'arch64_addti_scratch_regs' in that
17152 OP1 can be an immediate constant (zero). We must call
17153 subreg_highpart_offset with DImode and TImode arguments, otherwise
17154 VOIDmode will be used for the const_int which generates an internal
17155 error from subreg_size_highpart_offset which does not expect a size of zero.
17156
17157 OP1 represents the TImode destination operand 1
17158 OP2 represents the TImode destination operand 2
17159 LOW_DEST represents the low half (DImode) of TImode operand 0
17160 LOW_IN1 represents the low half (DImode) of TImode operand 1
17161 LOW_IN2 represents the low half (DImode) of TImode operand 2
17162 HIGH_DEST represents the high half (DImode) of TImode operand 0
17163 HIGH_IN1 represents the high half (DImode) of TImode operand 1
17164 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
17165
17166
17167void
17168aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
17169 rtx *low_in1, rtx *low_in2,
17170 rtx *high_dest, rtx *high_in1,
17171 rtx *high_in2)
17172{
17173 *low_dest = gen_reg_rtx (DImode);
17174 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
17175 subreg_lowpart_offset (DImode, TImode));
17176
17177 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
17178 subreg_lowpart_offset (DImode, TImode));
17179 *high_dest = gen_reg_rtx (DImode);
17180
17181 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
17182 subreg_highpart_offset (DImode, TImode));
17183 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
17184 subreg_highpart_offset (DImode, TImode));
17185}
17186
17187/* Generate RTL for 128-bit (TImode) subtraction with overflow.
17188
17189 OP0 represents the TImode destination operand 0
17190 LOW_DEST represents the low half (DImode) of TImode operand 0
17191 LOW_IN1 represents the low half (DImode) of TImode operand 1
17192 LOW_IN2 represents the low half (DImode) of TImode operand 2
17193 HIGH_DEST represents the high half (DImode) of TImode operand 0
17194 HIGH_IN1 represents the high half (DImode) of TImode operand 1
a58fe3c5
RE
17195 HIGH_IN2 represents the high half (DImode) of TImode operand 2
17196 UNSIGNED_P is true if the operation is being performed on unsigned
17197 values. */
30c46053
MC
17198void
17199aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
17200 rtx low_in2, rtx high_dest, rtx high_in1,
a58fe3c5 17201 rtx high_in2, bool unsigned_p)
30c46053
MC
17202{
17203 if (low_in2 == const0_rtx)
17204 {
17205 low_dest = low_in1;
a58fe3c5
RE
17206 high_in2 = force_reg (DImode, high_in2);
17207 if (unsigned_p)
17208 emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2));
17209 else
17210 emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2));
30c46053
MC
17211 }
17212 else
17213 {
17214 if (CONST_INT_P (low_in2))
17215 {
30c46053 17216 high_in2 = force_reg (DImode, high_in2);
a58fe3c5
RE
17217 emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
17218 GEN_INT (-INTVAL (low_in2))));
30c46053
MC
17219 }
17220 else
17221 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
a58fe3c5
RE
17222
17223 if (unsigned_p)
17224 emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
17225 else
17226 emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2));
30c46053
MC
17227 }
17228
17229 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
17230 emit_move_insn (gen_highpart (DImode, op0), high_dest);
17231
17232}
17233
a3125fc2
CL
17234/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
17235
17236static unsigned HOST_WIDE_INT
17237aarch64_asan_shadow_offset (void)
17238{
17239 return (HOST_WIDE_INT_1 << 36);
17240}
17241
5f3bc026 17242static rtx
cb4347e8 17243aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
17244 int code, tree treeop0, tree treeop1)
17245{
c8012fbc
WD
17246 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
17247 rtx op0, op1;
5f3bc026 17248 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 17249 insn_code icode;
5f3bc026
ZC
17250 struct expand_operand ops[4];
17251
5f3bc026
ZC
17252 start_sequence ();
17253 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
17254
17255 op_mode = GET_MODE (op0);
17256 if (op_mode == VOIDmode)
17257 op_mode = GET_MODE (op1);
17258
17259 switch (op_mode)
17260 {
4e10a5a7
RS
17261 case E_QImode:
17262 case E_HImode:
17263 case E_SImode:
5f3bc026
ZC
17264 cmp_mode = SImode;
17265 icode = CODE_FOR_cmpsi;
17266 break;
17267
4e10a5a7 17268 case E_DImode:
5f3bc026
ZC
17269 cmp_mode = DImode;
17270 icode = CODE_FOR_cmpdi;
17271 break;
17272
4e10a5a7 17273 case E_SFmode:
786e3c06
WD
17274 cmp_mode = SFmode;
17275 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
17276 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
17277 break;
17278
4e10a5a7 17279 case E_DFmode:
786e3c06
WD
17280 cmp_mode = DFmode;
17281 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
17282 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
17283 break;
17284
5f3bc026
ZC
17285 default:
17286 end_sequence ();
17287 return NULL_RTX;
17288 }
17289
c8012fbc
WD
17290 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
17291 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
17292 if (!op0 || !op1)
17293 {
17294 end_sequence ();
17295 return NULL_RTX;
17296 }
17297 *prep_seq = get_insns ();
17298 end_sequence ();
17299
c8012fbc
WD
17300 create_fixed_operand (&ops[0], op0);
17301 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
17302
17303 start_sequence ();
c8012fbc 17304 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
17305 {
17306 end_sequence ();
17307 return NULL_RTX;
17308 }
17309 *gen_seq = get_insns ();
17310 end_sequence ();
17311
c8012fbc
WD
17312 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
17313 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
17314}
17315
17316static rtx
cb4347e8
TS
17317aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
17318 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 17319{
c8012fbc
WD
17320 rtx op0, op1, target;
17321 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 17322 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 17323 insn_code icode;
5f3bc026 17324 struct expand_operand ops[6];
c8012fbc 17325 int aarch64_cond;
5f3bc026 17326
cb4347e8 17327 push_to_sequence (*prep_seq);
5f3bc026
ZC
17328 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
17329
17330 op_mode = GET_MODE (op0);
17331 if (op_mode == VOIDmode)
17332 op_mode = GET_MODE (op1);
17333
17334 switch (op_mode)
17335 {
4e10a5a7
RS
17336 case E_QImode:
17337 case E_HImode:
17338 case E_SImode:
5f3bc026 17339 cmp_mode = SImode;
c8012fbc 17340 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
17341 break;
17342
4e10a5a7 17343 case E_DImode:
5f3bc026 17344 cmp_mode = DImode;
c8012fbc 17345 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
17346 break;
17347
4e10a5a7 17348 case E_SFmode:
786e3c06
WD
17349 cmp_mode = SFmode;
17350 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
17351 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
17352 break;
17353
4e10a5a7 17354 case E_DFmode:
786e3c06
WD
17355 cmp_mode = DFmode;
17356 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
17357 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
17358 break;
17359
5f3bc026
ZC
17360 default:
17361 end_sequence ();
17362 return NULL_RTX;
17363 }
17364
17365 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
17366 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
17367 if (!op0 || !op1)
17368 {
17369 end_sequence ();
17370 return NULL_RTX;
17371 }
17372 *prep_seq = get_insns ();
17373 end_sequence ();
17374
17375 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 17376 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 17377
c8012fbc
WD
17378 if (bit_code != AND)
17379 {
17380 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
17381 GET_MODE (XEXP (prev, 0))),
17382 VOIDmode, XEXP (prev, 0), const0_rtx);
17383 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
17384 }
17385
17386 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
17387 create_fixed_operand (&ops[1], target);
17388 create_fixed_operand (&ops[2], op0);
17389 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
17390 create_fixed_operand (&ops[4], prev);
17391 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 17392
cb4347e8 17393 push_to_sequence (*gen_seq);
5f3bc026
ZC
17394 if (!maybe_expand_insn (icode, 6, ops))
17395 {
17396 end_sequence ();
17397 return NULL_RTX;
17398 }
17399
17400 *gen_seq = get_insns ();
17401 end_sequence ();
17402
c8012fbc 17403 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
17404}
17405
17406#undef TARGET_GEN_CCMP_FIRST
17407#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
17408
17409#undef TARGET_GEN_CCMP_NEXT
17410#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
17411
6a569cdd
KT
17412/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
17413 instruction fusion of some sort. */
17414
17415static bool
17416aarch64_macro_fusion_p (void)
17417{
b175b679 17418 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
17419}
17420
17421
17422/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
17423 should be kept together during scheduling. */
17424
17425static bool
17426aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
17427{
17428 rtx set_dest;
17429 rtx prev_set = single_set (prev);
17430 rtx curr_set = single_set (curr);
17431 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
17432 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
17433
17434 if (!aarch64_macro_fusion_p ())
17435 return false;
17436
d7b03373 17437 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
17438 {
17439 /* We are trying to match:
17440 prev (mov) == (set (reg r0) (const_int imm16))
17441 curr (movk) == (set (zero_extract (reg r0)
17442 (const_int 16)
17443 (const_int 16))
17444 (const_int imm16_1)) */
17445
17446 set_dest = SET_DEST (curr_set);
17447
17448 if (GET_CODE (set_dest) == ZERO_EXTRACT
17449 && CONST_INT_P (SET_SRC (curr_set))
17450 && CONST_INT_P (SET_SRC (prev_set))
17451 && CONST_INT_P (XEXP (set_dest, 2))
17452 && INTVAL (XEXP (set_dest, 2)) == 16
17453 && REG_P (XEXP (set_dest, 0))
17454 && REG_P (SET_DEST (prev_set))
17455 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
17456 {
17457 return true;
17458 }
17459 }
17460
d7b03373 17461 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
17462 {
17463
17464 /* We're trying to match:
17465 prev (adrp) == (set (reg r1)
17466 (high (symbol_ref ("SYM"))))
17467 curr (add) == (set (reg r0)
17468 (lo_sum (reg r1)
17469 (symbol_ref ("SYM"))))
17470 Note that r0 need not necessarily be the same as r1, especially
17471 during pre-regalloc scheduling. */
17472
17473 if (satisfies_constraint_Ush (SET_SRC (prev_set))
17474 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
17475 {
17476 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
17477 && REG_P (XEXP (SET_SRC (curr_set), 0))
17478 && REGNO (XEXP (SET_SRC (curr_set), 0))
17479 == REGNO (SET_DEST (prev_set))
17480 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
17481 XEXP (SET_SRC (curr_set), 1)))
17482 return true;
17483 }
17484 }
17485
d7b03373 17486 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
17487 {
17488
17489 /* We're trying to match:
17490 prev (movk) == (set (zero_extract (reg r0)
17491 (const_int 16)
17492 (const_int 32))
17493 (const_int imm16_1))
17494 curr (movk) == (set (zero_extract (reg r0)
17495 (const_int 16)
17496 (const_int 48))
17497 (const_int imm16_2)) */
17498
17499 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
17500 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
17501 && REG_P (XEXP (SET_DEST (prev_set), 0))
17502 && REG_P (XEXP (SET_DEST (curr_set), 0))
17503 && REGNO (XEXP (SET_DEST (prev_set), 0))
17504 == REGNO (XEXP (SET_DEST (curr_set), 0))
17505 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
17506 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
17507 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
17508 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
17509 && CONST_INT_P (SET_SRC (prev_set))
17510 && CONST_INT_P (SET_SRC (curr_set)))
17511 return true;
17512
17513 }
d7b03373 17514 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
17515 {
17516 /* We're trying to match:
17517 prev (adrp) == (set (reg r0)
17518 (high (symbol_ref ("SYM"))))
17519 curr (ldr) == (set (reg r1)
17520 (mem (lo_sum (reg r0)
17521 (symbol_ref ("SYM")))))
17522 or
17523 curr (ldr) == (set (reg r1)
17524 (zero_extend (mem
17525 (lo_sum (reg r0)
17526 (symbol_ref ("SYM")))))) */
17527 if (satisfies_constraint_Ush (SET_SRC (prev_set))
17528 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
17529 {
17530 rtx curr_src = SET_SRC (curr_set);
17531
17532 if (GET_CODE (curr_src) == ZERO_EXTEND)
17533 curr_src = XEXP (curr_src, 0);
17534
17535 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
17536 && REG_P (XEXP (XEXP (curr_src, 0), 0))
17537 && REGNO (XEXP (XEXP (curr_src, 0), 0))
17538 == REGNO (SET_DEST (prev_set))
17539 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
17540 XEXP (SET_SRC (prev_set), 0)))
17541 return true;
17542 }
17543 }
cd0cb232 17544
d7b03373 17545 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
17546 && aarch_crypto_can_dual_issue (prev, curr))
17547 return true;
17548
d7b03373 17549 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
17550 && any_condjump_p (curr))
17551 {
509f819a
N
17552 unsigned int condreg1, condreg2;
17553 rtx cc_reg_1;
17554 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
17555 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
17556
17557 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
17558 && prev
17559 && modified_in_p (cc_reg_1, prev))
17560 {
f8a27206
AP
17561 enum attr_type prev_type = get_attr_type (prev);
17562
509f819a
N
17563 /* FIXME: this misses some which is considered simple arthematic
17564 instructions for ThunderX. Simple shifts are missed here. */
17565 if (prev_type == TYPE_ALUS_SREG
17566 || prev_type == TYPE_ALUS_IMM
17567 || prev_type == TYPE_LOGICS_REG
17568 || prev_type == TYPE_LOGICS_IMM)
17569 return true;
17570 }
3759108f
AP
17571 }
17572
bee7e0fc
AP
17573 if (prev_set
17574 && curr_set
17575 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
17576 && any_condjump_p (curr))
17577 {
17578 /* We're trying to match:
17579 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
17580 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
17581 (const_int 0))
17582 (label_ref ("SYM"))
17583 (pc)) */
17584 if (SET_DEST (curr_set) == (pc_rtx)
17585 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
17586 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
17587 && REG_P (SET_DEST (prev_set))
17588 && REGNO (SET_DEST (prev_set))
17589 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
17590 {
17591 /* Fuse ALU operations followed by conditional branch instruction. */
17592 switch (get_attr_type (prev))
17593 {
17594 case TYPE_ALU_IMM:
17595 case TYPE_ALU_SREG:
17596 case TYPE_ADC_REG:
17597 case TYPE_ADC_IMM:
17598 case TYPE_ADCS_REG:
17599 case TYPE_ADCS_IMM:
17600 case TYPE_LOGIC_REG:
17601 case TYPE_LOGIC_IMM:
17602 case TYPE_CSEL:
17603 case TYPE_ADR:
17604 case TYPE_MOV_IMM:
17605 case TYPE_SHIFT_REG:
17606 case TYPE_SHIFT_IMM:
17607 case TYPE_BFM:
17608 case TYPE_RBIT:
17609 case TYPE_REV:
17610 case TYPE_EXTEND:
17611 return true;
17612
17613 default:;
17614 }
17615 }
17616 }
17617
6a569cdd
KT
17618 return false;
17619}
17620
f2879a90
KT
17621/* Return true iff the instruction fusion described by OP is enabled. */
17622
17623bool
17624aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
17625{
17626 return (aarch64_tune_params.fusible_ops & op) != 0;
17627}
17628
350013bc
BC
17629/* If MEM is in the form of [base+offset], extract the two parts
17630 of address and set to BASE and OFFSET, otherwise return false
17631 after clearing BASE and OFFSET. */
17632
17633bool
17634extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
17635{
17636 rtx addr;
17637
17638 gcc_assert (MEM_P (mem));
17639
17640 addr = XEXP (mem, 0);
17641
17642 if (REG_P (addr))
17643 {
17644 *base = addr;
17645 *offset = const0_rtx;
17646 return true;
17647 }
17648
17649 if (GET_CODE (addr) == PLUS
17650 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
17651 {
17652 *base = XEXP (addr, 0);
17653 *offset = XEXP (addr, 1);
17654 return true;
17655 }
17656
17657 *base = NULL_RTX;
17658 *offset = NULL_RTX;
17659
17660 return false;
17661}
17662
17663/* Types for scheduling fusion. */
17664enum sched_fusion_type
17665{
17666 SCHED_FUSION_NONE = 0,
17667 SCHED_FUSION_LD_SIGN_EXTEND,
17668 SCHED_FUSION_LD_ZERO_EXTEND,
17669 SCHED_FUSION_LD,
17670 SCHED_FUSION_ST,
17671 SCHED_FUSION_NUM
17672};
17673
17674/* If INSN is a load or store of address in the form of [base+offset],
17675 extract the two parts and set to BASE and OFFSET. Return scheduling
17676 fusion type this INSN is. */
17677
17678static enum sched_fusion_type
17679fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
17680{
17681 rtx x, dest, src;
17682 enum sched_fusion_type fusion = SCHED_FUSION_LD;
17683
17684 gcc_assert (INSN_P (insn));
17685 x = PATTERN (insn);
17686 if (GET_CODE (x) != SET)
17687 return SCHED_FUSION_NONE;
17688
17689 src = SET_SRC (x);
17690 dest = SET_DEST (x);
17691
abc52318
KT
17692 machine_mode dest_mode = GET_MODE (dest);
17693
17694 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
17695 return SCHED_FUSION_NONE;
17696
17697 if (GET_CODE (src) == SIGN_EXTEND)
17698 {
17699 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
17700 src = XEXP (src, 0);
17701 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17702 return SCHED_FUSION_NONE;
17703 }
17704 else if (GET_CODE (src) == ZERO_EXTEND)
17705 {
17706 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
17707 src = XEXP (src, 0);
17708 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17709 return SCHED_FUSION_NONE;
17710 }
17711
17712 if (GET_CODE (src) == MEM && REG_P (dest))
17713 extract_base_offset_in_addr (src, base, offset);
17714 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
17715 {
17716 fusion = SCHED_FUSION_ST;
17717 extract_base_offset_in_addr (dest, base, offset);
17718 }
17719 else
17720 return SCHED_FUSION_NONE;
17721
17722 if (*base == NULL_RTX || *offset == NULL_RTX)
17723 fusion = SCHED_FUSION_NONE;
17724
17725 return fusion;
17726}
17727
17728/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
17729
17730 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
17731 and PRI are only calculated for these instructions. For other instruction,
17732 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
17733 type instruction fusion can be added by returning different priorities.
17734
17735 It's important that irrelevant instructions get the largest FUSION_PRI. */
17736
17737static void
17738aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
17739 int *fusion_pri, int *pri)
17740{
17741 int tmp, off_val;
17742 rtx base, offset;
17743 enum sched_fusion_type fusion;
17744
17745 gcc_assert (INSN_P (insn));
17746
17747 tmp = max_pri - 1;
17748 fusion = fusion_load_store (insn, &base, &offset);
17749 if (fusion == SCHED_FUSION_NONE)
17750 {
17751 *pri = tmp;
17752 *fusion_pri = tmp;
17753 return;
17754 }
17755
17756 /* Set FUSION_PRI according to fusion type and base register. */
17757 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
17758
17759 /* Calculate PRI. */
17760 tmp /= 2;
17761
17762 /* INSN with smaller offset goes first. */
17763 off_val = (int)(INTVAL (offset));
17764 if (off_val >= 0)
17765 tmp -= (off_val & 0xfffff);
17766 else
17767 tmp += ((- off_val) & 0xfffff);
17768
17769 *pri = tmp;
17770 return;
17771}
17772
9bca63d4
WD
17773/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
17774 Adjust priority of sha1h instructions so they are scheduled before
17775 other SHA1 instructions. */
17776
17777static int
17778aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
17779{
17780 rtx x = PATTERN (insn);
17781
17782 if (GET_CODE (x) == SET)
17783 {
17784 x = SET_SRC (x);
17785
17786 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
17787 return priority + 10;
17788 }
17789
17790 return priority;
17791}
17792
350013bc
BC
17793/* Given OPERANDS of consecutive load/store, check if we can merge
17794 them into ldp/stp. LOAD is true if they are load instructions.
17795 MODE is the mode of memory operands. */
17796
17797bool
17798aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 17799 machine_mode mode)
350013bc
BC
17800{
17801 HOST_WIDE_INT offval_1, offval_2, msize;
17802 enum reg_class rclass_1, rclass_2;
17803 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
17804
17805 if (load)
17806 {
17807 mem_1 = operands[1];
17808 mem_2 = operands[3];
17809 reg_1 = operands[0];
17810 reg_2 = operands[2];
17811 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
17812 if (REGNO (reg_1) == REGNO (reg_2))
17813 return false;
17814 }
17815 else
17816 {
17817 mem_1 = operands[0];
17818 mem_2 = operands[2];
17819 reg_1 = operands[1];
17820 reg_2 = operands[3];
17821 }
17822
bf84ac44
AP
17823 /* The mems cannot be volatile. */
17824 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
17825 return false;
17826
54700e2e
AP
17827 /* If we have SImode and slow unaligned ldp,
17828 check the alignment to be at least 8 byte. */
17829 if (mode == SImode
17830 && (aarch64_tune_params.extra_tuning_flags
17831 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17832 && !optimize_size
17833 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17834 return false;
17835
350013bc
BC
17836 /* Check if the addresses are in the form of [base+offset]. */
17837 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17838 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17839 return false;
17840 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17841 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17842 return false;
17843
17844 /* Check if the bases are same. */
17845 if (!rtx_equal_p (base_1, base_2))
17846 return false;
17847
dfe1da23
JW
17848 /* The operands must be of the same size. */
17849 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
17850 GET_MODE_SIZE (GET_MODE (mem_2))));
17851
350013bc
BC
17852 offval_1 = INTVAL (offset_1);
17853 offval_2 = INTVAL (offset_2);
6a70badb
RS
17854 /* We should only be trying this for fixed-sized modes. There is no
17855 SVE LDP/STP instruction. */
17856 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
17857 /* Check if the offsets are consecutive. */
17858 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
17859 return false;
17860
17861 /* Check if the addresses are clobbered by load. */
17862 if (load)
17863 {
17864 if (reg_mentioned_p (reg_1, mem_1))
17865 return false;
17866
17867 /* In increasing order, the last load can clobber the address. */
17868 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 17869 return false;
350013bc
BC
17870 }
17871
9b56ec11
JW
17872 /* One of the memory accesses must be a mempair operand.
17873 If it is not the first one, they need to be swapped by the
17874 peephole. */
17875 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
17876 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
17877 return false;
17878
350013bc
BC
17879 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17880 rclass_1 = FP_REGS;
17881 else
17882 rclass_1 = GENERAL_REGS;
17883
17884 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17885 rclass_2 = FP_REGS;
17886 else
17887 rclass_2 = GENERAL_REGS;
17888
17889 /* Check if the registers are of same class. */
17890 if (rclass_1 != rclass_2)
17891 return false;
17892
17893 return true;
17894}
17895
9b56ec11
JW
17896/* Given OPERANDS of consecutive load/store that can be merged,
17897 swap them if they are not in ascending order. */
17898void
17899aarch64_swap_ldrstr_operands (rtx* operands, bool load)
17900{
17901 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
17902 HOST_WIDE_INT offval_1, offval_2;
17903
17904 if (load)
17905 {
17906 mem_1 = operands[1];
17907 mem_2 = operands[3];
17908 }
17909 else
17910 {
17911 mem_1 = operands[0];
17912 mem_2 = operands[2];
17913 }
17914
17915 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17916 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17917
17918 offval_1 = INTVAL (offset_1);
17919 offval_2 = INTVAL (offset_2);
17920
17921 if (offval_1 > offval_2)
17922 {
17923 /* Irrespective of whether this is a load or a store,
17924 we do the same swap. */
17925 std::swap (operands[0], operands[2]);
17926 std::swap (operands[1], operands[3]);
17927 }
17928}
17929
d0b51297
JW
17930/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
17931 comparison between the two. */
17932int
17933aarch64_host_wide_int_compare (const void *x, const void *y)
17934{
17935 return wi::cmps (* ((const HOST_WIDE_INT *) x),
17936 * ((const HOST_WIDE_INT *) y));
17937}
17938
17939/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
17940 other pointing to a REG rtx containing an offset, compare the offsets
17941 of the two pairs.
17942
17943 Return:
17944
17945 1 iff offset (X) > offset (Y)
17946 0 iff offset (X) == offset (Y)
17947 -1 iff offset (X) < offset (Y) */
17948int
17949aarch64_ldrstr_offset_compare (const void *x, const void *y)
17950{
17951 const rtx * operands_1 = (const rtx *) x;
17952 const rtx * operands_2 = (const rtx *) y;
17953 rtx mem_1, mem_2, base, offset_1, offset_2;
17954
17955 if (MEM_P (operands_1[0]))
17956 mem_1 = operands_1[0];
17957 else
17958 mem_1 = operands_1[1];
17959
17960 if (MEM_P (operands_2[0]))
17961 mem_2 = operands_2[0];
17962 else
17963 mem_2 = operands_2[1];
17964
17965 /* Extract the offsets. */
17966 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17967 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17968
17969 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
17970
17971 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
17972}
17973
350013bc
BC
17974/* Given OPERANDS of consecutive load/store, check if we can merge
17975 them into ldp/stp by adjusting the offset. LOAD is true if they
17976 are load instructions. MODE is the mode of memory operands.
17977
17978 Given below consecutive stores:
17979
17980 str w1, [xb, 0x100]
17981 str w1, [xb, 0x104]
17982 str w1, [xb, 0x108]
17983 str w1, [xb, 0x10c]
17984
17985 Though the offsets are out of the range supported by stp, we can
17986 still pair them after adjusting the offset, like:
17987
17988 add scratch, xb, 0x100
17989 stp w1, w1, [scratch]
17990 stp w1, w1, [scratch, 0x8]
17991
17992 The peephole patterns detecting this opportunity should guarantee
17993 the scratch register is avaliable. */
17994
17995bool
17996aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 17997 scalar_mode mode)
350013bc 17998{
34d7854d
JW
17999 const int num_insns = 4;
18000 enum reg_class rclass;
18001 HOST_WIDE_INT offvals[num_insns], msize;
18002 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
18003
18004 if (load)
18005 {
34d7854d
JW
18006 for (int i = 0; i < num_insns; i++)
18007 {
18008 reg[i] = operands[2 * i];
18009 mem[i] = operands[2 * i + 1];
18010
18011 gcc_assert (REG_P (reg[i]));
18012 }
d0b51297
JW
18013
18014 /* Do not attempt to merge the loads if the loads clobber each other. */
18015 for (int i = 0; i < 8; i += 2)
18016 for (int j = i + 2; j < 8; j += 2)
18017 if (reg_overlap_mentioned_p (operands[i], operands[j]))
18018 return false;
350013bc
BC
18019 }
18020 else
34d7854d
JW
18021 for (int i = 0; i < num_insns; i++)
18022 {
18023 mem[i] = operands[2 * i];
18024 reg[i] = operands[2 * i + 1];
18025 }
350013bc 18026
34d7854d
JW
18027 /* Skip if memory operand is by itself valid for ldp/stp. */
18028 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
18029 return false;
18030
34d7854d
JW
18031 for (int i = 0; i < num_insns; i++)
18032 {
18033 /* The mems cannot be volatile. */
18034 if (MEM_VOLATILE_P (mem[i]))
18035 return false;
18036
18037 /* Check if the addresses are in the form of [base+offset]. */
18038 extract_base_offset_in_addr (mem[i], base + i, offset + i);
18039 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
18040 return false;
18041 }
18042
363b395b
JW
18043 /* Check if the registers are of same class. */
18044 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
18045 ? FP_REGS : GENERAL_REGS;
18046
18047 for (int i = 1; i < num_insns; i++)
18048 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
18049 {
18050 if (rclass != FP_REGS)
18051 return false;
18052 }
18053 else
18054 {
18055 if (rclass != GENERAL_REGS)
18056 return false;
18057 }
18058
18059 /* Only the last register in the order in which they occur
18060 may be clobbered by the load. */
18061 if (rclass == GENERAL_REGS && load)
18062 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
18063 if (reg_mentioned_p (reg[i], mem[i]))
18064 return false;
350013bc
BC
18065
18066 /* Check if the bases are same. */
34d7854d
JW
18067 for (int i = 0; i < num_insns - 1; i++)
18068 if (!rtx_equal_p (base[i], base[i + 1]))
18069 return false;
18070
18071 for (int i = 0; i < num_insns; i++)
18072 offvals[i] = INTVAL (offset[i]);
350013bc 18073
350013bc 18074 msize = GET_MODE_SIZE (mode);
d0b51297
JW
18075
18076 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
18077 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
18078 aarch64_host_wide_int_compare);
d0b51297
JW
18079
18080 if (!(offvals[1] == offvals[0] + msize
18081 && offvals[3] == offvals[2] + msize))
350013bc
BC
18082 return false;
18083
d0b51297
JW
18084 /* Check that offsets are within range of each other. The ldp/stp
18085 instructions have 7 bit immediate offsets, so use 0x80. */
18086 if (offvals[2] - offvals[0] >= msize * 0x80)
18087 return false;
350013bc 18088
d0b51297
JW
18089 /* The offsets must be aligned with respect to each other. */
18090 if (offvals[0] % msize != offvals[2] % msize)
18091 return false;
18092
54700e2e
AP
18093 /* If we have SImode and slow unaligned ldp,
18094 check the alignment to be at least 8 byte. */
18095 if (mode == SImode
18096 && (aarch64_tune_params.extra_tuning_flags
34d7854d 18097 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 18098 && !optimize_size
34d7854d 18099 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
18100 return false;
18101
350013bc
BC
18102 return true;
18103}
18104
18105/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
18106 into LDP/STP after adjusting the offset. It depends on the fact
18107 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
18108 MODE is the mode of memory operands. CODE is the rtl operator
18109 which should be applied to all memory operands, it's SIGN_EXTEND,
18110 ZERO_EXTEND or UNKNOWN. */
18111
18112bool
18113aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 18114 scalar_mode mode, RTX_CODE code)
350013bc 18115{
d0b51297 18116 rtx base, offset_1, offset_3, t1, t2;
350013bc 18117 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
18118 rtx temp_operands[8];
18119 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
18120 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 18121
d0b51297
JW
18122 /* We make changes on a copy as we may still bail out. */
18123 for (int i = 0; i < 8; i ++)
18124 temp_operands[i] = operands[i];
9b56ec11 18125
d0b51297
JW
18126 /* Sort the operands. */
18127 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 18128
350013bc
BC
18129 if (load)
18130 {
d0b51297
JW
18131 mem_1 = temp_operands[1];
18132 mem_2 = temp_operands[3];
18133 mem_3 = temp_operands[5];
18134 mem_4 = temp_operands[7];
350013bc
BC
18135 }
18136 else
18137 {
d0b51297
JW
18138 mem_1 = temp_operands[0];
18139 mem_2 = temp_operands[2];
18140 mem_3 = temp_operands[4];
18141 mem_4 = temp_operands[6];
350013bc
BC
18142 gcc_assert (code == UNKNOWN);
18143 }
18144
9b56ec11 18145 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
18146 extract_base_offset_in_addr (mem_3, &base, &offset_3);
18147 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
18148 && offset_3 != NULL_RTX);
350013bc 18149
d0b51297 18150 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 18151 msize = GET_MODE_SIZE (mode);
d0b51297
JW
18152 stp_off_upper_limit = msize * (0x40 - 1);
18153 stp_off_lower_limit = - msize * 0x40;
350013bc 18154
d0b51297
JW
18155 off_val_1 = INTVAL (offset_1);
18156 off_val_3 = INTVAL (offset_3);
18157
18158 /* The base offset is optimally half way between the two STP/LDP offsets. */
18159 if (msize <= 4)
18160 base_off = (off_val_1 + off_val_3) / 2;
18161 else
18162 /* However, due to issues with negative LDP/STP offset generation for
18163 larger modes, for DF, DI and vector modes. we must not use negative
18164 addresses smaller than 9 signed unadjusted bits can store. This
18165 provides the most range in this case. */
18166 base_off = off_val_1;
18167
18168 /* Adjust the base so that it is aligned with the addresses but still
18169 optimal. */
18170 if (base_off % msize != off_val_1 % msize)
18171 /* Fix the offset, bearing in mind we want to make it bigger not
18172 smaller. */
18173 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
18174 else if (msize <= 4)
18175 /* The negative range of LDP/STP is one larger than the positive range. */
18176 base_off += msize;
18177
18178 /* Check if base offset is too big or too small. We can attempt to resolve
18179 this issue by setting it to the maximum value and seeing if the offsets
18180 still fit. */
18181 if (base_off >= 0x1000)
350013bc 18182 {
d0b51297
JW
18183 base_off = 0x1000 - 1;
18184 /* We must still make sure that the base offset is aligned with respect
18185 to the address. But it may may not be made any bigger. */
18186 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
18187 }
18188
d0b51297
JW
18189 /* Likewise for the case where the base is too small. */
18190 if (base_off <= -0x1000)
350013bc 18191 {
d0b51297
JW
18192 base_off = -0x1000 + 1;
18193 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
18194 }
18195
d0b51297
JW
18196 /* Offset of the first STP/LDP. */
18197 new_off_1 = off_val_1 - base_off;
18198
18199 /* Offset of the second STP/LDP. */
18200 new_off_3 = off_val_3 - base_off;
350013bc 18201
d0b51297
JW
18202 /* The offsets must be within the range of the LDP/STP instructions. */
18203 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
18204 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
18205 return false;
18206
d0b51297
JW
18207 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
18208 new_off_1), true);
18209 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
18210 new_off_1 + msize), true);
18211 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
18212 new_off_3), true);
18213 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
18214 new_off_3 + msize), true);
18215
18216 if (!aarch64_mem_pair_operand (mem_1, mode)
18217 || !aarch64_mem_pair_operand (mem_3, mode))
18218 return false;
350013bc
BC
18219
18220 if (code == ZERO_EXTEND)
18221 {
18222 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
18223 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
18224 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
18225 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
18226 }
18227 else if (code == SIGN_EXTEND)
18228 {
18229 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
18230 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
18231 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
18232 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
18233 }
18234
18235 if (load)
18236 {
d0b51297 18237 operands[0] = temp_operands[0];
350013bc 18238 operands[1] = mem_1;
d0b51297 18239 operands[2] = temp_operands[2];
350013bc 18240 operands[3] = mem_2;
d0b51297 18241 operands[4] = temp_operands[4];
350013bc 18242 operands[5] = mem_3;
d0b51297 18243 operands[6] = temp_operands[6];
350013bc
BC
18244 operands[7] = mem_4;
18245 }
18246 else
18247 {
18248 operands[0] = mem_1;
d0b51297 18249 operands[1] = temp_operands[1];
350013bc 18250 operands[2] = mem_2;
d0b51297 18251 operands[3] = temp_operands[3];
350013bc 18252 operands[4] = mem_3;
d0b51297 18253 operands[5] = temp_operands[5];
350013bc 18254 operands[6] = mem_4;
d0b51297 18255 operands[7] = temp_operands[7];
350013bc
BC
18256 }
18257
18258 /* Emit adjusting instruction. */
d0b51297 18259 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 18260 /* Emit ldp/stp instructions. */
f7df4a84
RS
18261 t1 = gen_rtx_SET (operands[0], operands[1]);
18262 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 18263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
18264 t1 = gen_rtx_SET (operands[4], operands[5]);
18265 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
18266 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
18267 return true;
18268}
18269
76a34e3f
RS
18270/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
18271 it isn't worth branching around empty masked ops (including masked
18272 stores). */
18273
18274static bool
18275aarch64_empty_mask_is_expensive (unsigned)
18276{
18277 return false;
18278}
18279
1b1e81f8
JW
18280/* Return 1 if pseudo register should be created and used to hold
18281 GOT address for PIC code. */
18282
18283bool
18284aarch64_use_pseudo_pic_reg (void)
18285{
18286 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
18287}
18288
7b841a12
JW
18289/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
18290
18291static int
18292aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
18293{
18294 switch (XINT (x, 1))
18295 {
18296 case UNSPEC_GOTSMALLPIC:
18297 case UNSPEC_GOTSMALLPIC28K:
18298 case UNSPEC_GOTTINYPIC:
18299 return 0;
18300 default:
18301 break;
18302 }
18303
18304 return default_unspec_may_trap_p (x, flags);
18305}
18306
39252973
KT
18307
18308/* If X is a positive CONST_DOUBLE with a value that is a power of 2
18309 return the log2 of that value. Otherwise return -1. */
18310
18311int
18312aarch64_fpconst_pow_of_2 (rtx x)
18313{
18314 const REAL_VALUE_TYPE *r;
18315
18316 if (!CONST_DOUBLE_P (x))
18317 return -1;
18318
18319 r = CONST_DOUBLE_REAL_VALUE (x);
18320
18321 if (REAL_VALUE_NEGATIVE (*r)
18322 || REAL_VALUE_ISNAN (*r)
18323 || REAL_VALUE_ISINF (*r)
18324 || !real_isinteger (r, DFmode))
18325 return -1;
18326
18327 return exact_log2 (real_to_integer (r));
18328}
18329
18330/* If X is a vector of equal CONST_DOUBLE values and that value is
18331 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
18332
18333int
18334aarch64_vec_fpconst_pow_of_2 (rtx x)
18335{
6a70badb
RS
18336 int nelts;
18337 if (GET_CODE (x) != CONST_VECTOR
18338 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
18339 return -1;
18340
18341 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
18342 return -1;
18343
18344 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
18345 if (firstval <= 0)
18346 return -1;
18347
6a70badb 18348 for (int i = 1; i < nelts; i++)
39252973
KT
18349 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
18350 return -1;
18351
18352 return firstval;
18353}
18354
11e554b3
JG
18355/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
18356 to float.
18357
18358 __fp16 always promotes through this hook.
18359 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
18360 through the generic excess precision logic rather than here. */
18361
c2ec330c
AL
18362static tree
18363aarch64_promoted_type (const_tree t)
18364{
11e554b3
JG
18365 if (SCALAR_FLOAT_TYPE_P (t)
18366 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 18367 return float_type_node;
11e554b3 18368
c2ec330c
AL
18369 return NULL_TREE;
18370}
ee62a5a6
RS
18371
18372/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
18373
18374static bool
9acc9cbe 18375aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
18376 optimization_type opt_type)
18377{
18378 switch (op)
18379 {
18380 case rsqrt_optab:
9acc9cbe 18381 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
18382
18383 default:
18384 return true;
18385 }
18386}
18387
43cacb12
RS
18388/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
18389
18390static unsigned int
18391aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
18392 int *offset)
18393{
18394 /* Polynomial invariant 1 == (VG / 2) - 1. */
18395 gcc_assert (i == 1);
18396 *factor = 2;
18397 *offset = 1;
18398 return AARCH64_DWARF_VG;
18399}
18400
11e554b3
JG
18401/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
18402 if MODE is HFmode, and punt to the generic implementation otherwise. */
18403
18404static bool
7c5bd57a 18405aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
18406{
18407 return (mode == HFmode
18408 ? true
18409 : default_libgcc_floating_mode_supported_p (mode));
18410}
18411
2e5f8203
JG
18412/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
18413 if MODE is HFmode, and punt to the generic implementation otherwise. */
18414
18415static bool
18e2a8b8 18416aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
18417{
18418 return (mode == HFmode
18419 ? true
18420 : default_scalar_mode_supported_p (mode));
18421}
18422
11e554b3
JG
18423/* Set the value of FLT_EVAL_METHOD.
18424 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
18425
18426 0: evaluate all operations and constants, whose semantic type has at
18427 most the range and precision of type float, to the range and
18428 precision of float; evaluate all other operations and constants to
18429 the range and precision of the semantic type;
18430
18431 N, where _FloatN is a supported interchange floating type
18432 evaluate all operations and constants, whose semantic type has at
18433 most the range and precision of _FloatN type, to the range and
18434 precision of the _FloatN type; evaluate all other operations and
18435 constants to the range and precision of the semantic type;
18436
18437 If we have the ARMv8.2-A extensions then we support _Float16 in native
18438 precision, so we should set this to 16. Otherwise, we support the type,
18439 but want to evaluate expressions in float precision, so set this to
18440 0. */
18441
18442static enum flt_eval_method
18443aarch64_excess_precision (enum excess_precision_type type)
18444{
18445 switch (type)
18446 {
18447 case EXCESS_PRECISION_TYPE_FAST:
18448 case EXCESS_PRECISION_TYPE_STANDARD:
18449 /* We can calculate either in 16-bit range and precision or
18450 32-bit range and precision. Make that decision based on whether
18451 we have native support for the ARMv8.2-A 16-bit floating-point
18452 instructions or not. */
18453 return (TARGET_FP_F16INST
18454 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
18455 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
18456 case EXCESS_PRECISION_TYPE_IMPLICIT:
18457 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
18458 default:
18459 gcc_unreachable ();
18460 }
18461 return FLT_EVAL_METHOD_UNPREDICTABLE;
18462}
18463
b48d6421
KT
18464/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
18465 scheduled for speculative execution. Reject the long-running division
18466 and square-root instructions. */
18467
18468static bool
18469aarch64_sched_can_speculate_insn (rtx_insn *insn)
18470{
18471 switch (get_attr_type (insn))
18472 {
18473 case TYPE_SDIV:
18474 case TYPE_UDIV:
18475 case TYPE_FDIVS:
18476 case TYPE_FDIVD:
18477 case TYPE_FSQRTS:
18478 case TYPE_FSQRTD:
18479 case TYPE_NEON_FP_SQRT_S:
18480 case TYPE_NEON_FP_SQRT_D:
18481 case TYPE_NEON_FP_SQRT_S_Q:
18482 case TYPE_NEON_FP_SQRT_D_Q:
18483 case TYPE_NEON_FP_DIV_S:
18484 case TYPE_NEON_FP_DIV_D:
18485 case TYPE_NEON_FP_DIV_S_Q:
18486 case TYPE_NEON_FP_DIV_D_Q:
18487 return false;
18488 default:
18489 return true;
18490 }
18491}
18492
43cacb12
RS
18493/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
18494
18495static int
18496aarch64_compute_pressure_classes (reg_class *classes)
18497{
18498 int i = 0;
18499 classes[i++] = GENERAL_REGS;
18500 classes[i++] = FP_REGS;
18501 /* PR_REGS isn't a useful pressure class because many predicate pseudo
18502 registers need to go in PR_LO_REGS at some point during their
18503 lifetime. Splitting it into two halves has the effect of making
18504 all predicates count against PR_LO_REGS, so that we try whenever
18505 possible to restrict the number of live predicates to 8. This
18506 greatly reduces the amount of spilling in certain loops. */
18507 classes[i++] = PR_LO_REGS;
18508 classes[i++] = PR_HI_REGS;
18509 return i;
18510}
18511
18512/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18513
18514static bool
18515aarch64_can_change_mode_class (machine_mode from,
18516 machine_mode to, reg_class_t)
18517{
002092be
RS
18518 if (BYTES_BIG_ENDIAN)
18519 {
18520 bool from_sve_p = aarch64_sve_data_mode_p (from);
18521 bool to_sve_p = aarch64_sve_data_mode_p (to);
18522
18523 /* Don't allow changes between SVE data modes and non-SVE modes.
18524 See the comment at the head of aarch64-sve.md for details. */
18525 if (from_sve_p != to_sve_p)
18526 return false;
18527
18528 /* Don't allow changes in element size: lane 0 of the new vector
18529 would not then be lane 0 of the old vector. See the comment
18530 above aarch64_maybe_expand_sve_subreg_move for a more detailed
18531 description.
18532
18533 In the worst case, this forces a register to be spilled in
18534 one mode and reloaded in the other, which handles the
18535 endianness correctly. */
18536 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
18537 return false;
18538 }
43cacb12
RS
18539 return true;
18540}
18541
5cce8171
RS
18542/* Implement TARGET_EARLY_REMAT_MODES. */
18543
18544static void
18545aarch64_select_early_remat_modes (sbitmap modes)
18546{
18547 /* SVE values are not normally live across a call, so it should be
18548 worth doing early rematerialization even in VL-specific mode. */
18549 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
18550 {
18551 machine_mode mode = (machine_mode) i;
18552 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
18553 if (vec_flags & VEC_ANY_SVE)
18554 bitmap_set_bit (modes, i);
18555 }
18556}
18557
c0111dc4
RE
18558/* Override the default target speculation_safe_value. */
18559static rtx
18560aarch64_speculation_safe_value (machine_mode mode,
18561 rtx result, rtx val, rtx failval)
18562{
18563 /* Maybe we should warn if falling back to hard barriers. They are
18564 likely to be noticably more expensive than the alternative below. */
18565 if (!aarch64_track_speculation)
18566 return default_speculation_safe_value (mode, result, val, failval);
18567
18568 if (!REG_P (val))
18569 val = copy_to_mode_reg (mode, val);
18570
18571 if (!aarch64_reg_or_zero (failval, mode))
18572 failval = copy_to_mode_reg (mode, failval);
18573
21cebf90 18574 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
18575 return result;
18576}
18577
2d56d6ba
KT
18578/* Implement TARGET_ESTIMATED_POLY_VALUE.
18579 Look into the tuning structure for an estimate.
18580 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
18581 Advanced SIMD 128 bits. */
18582
18583static HOST_WIDE_INT
18584aarch64_estimated_poly_value (poly_int64 val)
18585{
18586 enum aarch64_sve_vector_bits_enum width_source
18587 = aarch64_tune_params.sve_width;
18588
18589 /* If we still don't have an estimate, use the default. */
18590 if (width_source == SVE_SCALABLE)
18591 return default_estimated_poly_value (val);
18592
18593 HOST_WIDE_INT over_128 = width_source - 128;
18594 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
18595}
18596
d9186814
SE
18597
18598/* Return true for types that could be supported as SIMD return or
18599 argument types. */
18600
18601static bool
18602supported_simd_type (tree t)
18603{
18604 if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
18605 {
18606 HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t));
18607 return s == 1 || s == 2 || s == 4 || s == 8;
18608 }
18609 return false;
18610}
18611
18612/* Return true for types that currently are supported as SIMD return
18613 or argument types. */
18614
18615static bool
18616currently_supported_simd_type (tree t, tree b)
18617{
18618 if (COMPLEX_FLOAT_TYPE_P (t))
18619 return false;
18620
18621 if (TYPE_SIZE (t) != TYPE_SIZE (b))
18622 return false;
18623
18624 return supported_simd_type (t);
18625}
18626
18627/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
18628
18629static int
18630aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
18631 struct cgraph_simd_clone *clonei,
18632 tree base_type, int num)
18633{
18634 tree t, ret_type, arg_type;
18635 unsigned int elt_bits, vec_bits, count;
18636
18637 if (!TARGET_SIMD)
18638 return 0;
18639
18640 if (clonei->simdlen
18641 && (clonei->simdlen < 2
18642 || clonei->simdlen > 1024
18643 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
18644 {
18645 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18646 "unsupported simdlen %d", clonei->simdlen);
18647 return 0;
18648 }
18649
18650 ret_type = TREE_TYPE (TREE_TYPE (node->decl));
18651 if (TREE_CODE (ret_type) != VOID_TYPE
18652 && !currently_supported_simd_type (ret_type, base_type))
18653 {
18654 if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
18655 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18656 "GCC does not currently support mixed size types "
18657 "for %<simd%> functions");
18658 else if (supported_simd_type (ret_type))
18659 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18660 "GCC does not currently support return type %qT "
18661 "for %<simd%> functions", ret_type);
18662 else
18663 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18664 "unsupported return type %qT for %<simd%> functions",
18665 ret_type);
18666 return 0;
18667 }
18668
18669 for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
18670 {
18671 arg_type = TREE_TYPE (t);
18672
18673 if (!currently_supported_simd_type (arg_type, base_type))
18674 {
18675 if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
18676 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18677 "GCC does not currently support mixed size types "
18678 "for %<simd%> functions");
18679 else
18680 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18681 "GCC does not currently support argument type %qT "
18682 "for %<simd%> functions", arg_type);
18683 return 0;
18684 }
18685 }
18686
18687 clonei->vecsize_mangle = 'n';
18688 clonei->mask_mode = VOIDmode;
18689 elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
18690 if (clonei->simdlen == 0)
18691 {
18692 count = 2;
18693 vec_bits = (num == 0 ? 64 : 128);
18694 clonei->simdlen = vec_bits / elt_bits;
18695 }
18696 else
18697 {
18698 count = 1;
18699 vec_bits = clonei->simdlen * elt_bits;
18700 if (vec_bits != 64 && vec_bits != 128)
18701 {
18702 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
18703 "GCC does not currently support simdlen %d for type %qT",
18704 clonei->simdlen, base_type);
18705 return 0;
18706 }
18707 }
18708 clonei->vecsize_int = vec_bits;
18709 clonei->vecsize_float = vec_bits;
18710 return count;
18711}
18712
18713/* Implement TARGET_SIMD_CLONE_ADJUST. */
18714
18715static void
18716aarch64_simd_clone_adjust (struct cgraph_node *node)
18717{
18718 /* Add aarch64_vector_pcs target attribute to SIMD clones so they
18719 use the correct ABI. */
18720
18721 tree t = TREE_TYPE (node->decl);
18722 TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
18723 TYPE_ATTRIBUTES (t));
18724}
18725
18726/* Implement TARGET_SIMD_CLONE_USABLE. */
18727
18728static int
18729aarch64_simd_clone_usable (struct cgraph_node *node)
18730{
18731 switch (node->simdclone->vecsize_mangle)
18732 {
18733 case 'n':
18734 if (!TARGET_SIMD)
18735 return -1;
18736 return 0;
18737 default:
18738 gcc_unreachable ();
18739 }
18740}
18741
497f281c
SE
18742/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
18743
18744static int
18745aarch64_comp_type_attributes (const_tree type1, const_tree type2)
18746{
18747 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type1))
18748 != lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type2)))
18749 return 0;
18750 return 1;
18751}
18752
3bac1e20
SE
18753/* Implement TARGET_GET_MULTILIB_ABI_NAME */
18754
18755static const char *
18756aarch64_get_multilib_abi_name (void)
18757{
18758 if (TARGET_BIG_END)
18759 return TARGET_ILP32 ? "aarch64_be_ilp32" : "aarch64_be";
18760 return TARGET_ILP32 ? "aarch64_ilp32" : "aarch64";
18761}
18762
e76c8e56
JJ
18763/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
18764 global variable based guard use the default else
18765 return a null tree. */
18766static tree
18767aarch64_stack_protect_guard (void)
18768{
18769 if (aarch64_stack_protector_guard == SSP_GLOBAL)
18770 return default_stack_protect_guard ();
18771
18772 return NULL_TREE;
18773}
18774
32efff9f
SD
18775/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
18776 section at the end if needed. */
18777#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
18778#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
18779#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
18780void
18781aarch64_file_end_indicate_exec_stack ()
18782{
18783 file_end_indicate_exec_stack ();
18784
18785 unsigned feature_1_and = 0;
18786 if (aarch64_bti_enabled ())
18787 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
18788
18789 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
18790 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
18791
18792 if (feature_1_and)
18793 {
18794 /* Generate .note.gnu.property section. */
18795 switch_to_section (get_section (".note.gnu.property",
18796 SECTION_NOTYPE, NULL));
18797
18798 /* PT_NOTE header: namesz, descsz, type.
18799 namesz = 4 ("GNU\0")
18800 descsz = 16 (Size of the program property array)
18801 [(12 + padding) * Number of array elements]
18802 type = 5 (NT_GNU_PROPERTY_TYPE_0). */
18803 assemble_align (POINTER_SIZE);
18804 assemble_integer (GEN_INT (4), 4, 32, 1);
18805 assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
18806 assemble_integer (GEN_INT (5), 4, 32, 1);
18807
18808 /* PT_NOTE name. */
18809 assemble_string ("GNU", 4);
18810
18811 /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
18812 type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
18813 datasz = 4
18814 data = feature_1_and. */
18815 assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
18816 assemble_integer (GEN_INT (4), 4, 32, 1);
18817 assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
18818
18819 /* Pad the size of the note to the required alignment. */
18820 assemble_align (POINTER_SIZE);
18821 }
18822}
18823#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
18824#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
18825#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
e76c8e56 18826
51b86113
DM
18827/* Target-specific selftests. */
18828
18829#if CHECKING_P
18830
18831namespace selftest {
18832
18833/* Selftest for the RTL loader.
18834 Verify that the RTL loader copes with a dump from
18835 print_rtx_function. This is essentially just a test that class
18836 function_reader can handle a real dump, but it also verifies
18837 that lookup_reg_by_dump_name correctly handles hard regs.
18838 The presence of hard reg names in the dump means that the test is
18839 target-specific, hence it is in this file. */
18840
18841static void
18842aarch64_test_loading_full_dump ()
18843{
18844 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
18845
18846 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
18847
18848 rtx_insn *insn_1 = get_insn_by_uid (1);
18849 ASSERT_EQ (NOTE, GET_CODE (insn_1));
18850
18851 rtx_insn *insn_15 = get_insn_by_uid (15);
18852 ASSERT_EQ (INSN, GET_CODE (insn_15));
18853 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
18854
18855 /* Verify crtl->return_rtx. */
18856 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
18857 ASSERT_EQ (0, REGNO (crtl->return_rtx));
18858 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
18859}
18860
18861/* Run all target-specific selftests. */
18862
18863static void
18864aarch64_run_selftests (void)
18865{
18866 aarch64_test_loading_full_dump ();
18867}
18868
18869} // namespace selftest
18870
18871#endif /* #if CHECKING_P */
18872
cd0b2d36
RR
18873#undef TARGET_STACK_PROTECT_GUARD
18874#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
18875
43e9d192
IB
18876#undef TARGET_ADDRESS_COST
18877#define TARGET_ADDRESS_COST aarch64_address_cost
18878
18879/* This hook will determines whether unnamed bitfields affect the alignment
18880 of the containing structure. The hook returns true if the structure
18881 should inherit the alignment requirements of an unnamed bitfield's
18882 type. */
18883#undef TARGET_ALIGN_ANON_BITFIELD
18884#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
18885
18886#undef TARGET_ASM_ALIGNED_DI_OP
18887#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
18888
18889#undef TARGET_ASM_ALIGNED_HI_OP
18890#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
18891
18892#undef TARGET_ASM_ALIGNED_SI_OP
18893#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
18894
18895#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
18896#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
18897 hook_bool_const_tree_hwi_hwi_const_tree_true
18898
e1c1ecb0
KT
18899#undef TARGET_ASM_FILE_START
18900#define TARGET_ASM_FILE_START aarch64_start_file
18901
43e9d192
IB
18902#undef TARGET_ASM_OUTPUT_MI_THUNK
18903#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
18904
18905#undef TARGET_ASM_SELECT_RTX_SECTION
18906#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
18907
18908#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
18909#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
18910
18911#undef TARGET_BUILD_BUILTIN_VA_LIST
18912#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
18913
18914#undef TARGET_CALLEE_COPIES
18915#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
18916
18917#undef TARGET_CAN_ELIMINATE
18918#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
18919
1fd8d40c
KT
18920#undef TARGET_CAN_INLINE_P
18921#define TARGET_CAN_INLINE_P aarch64_can_inline_p
18922
43e9d192
IB
18923#undef TARGET_CANNOT_FORCE_CONST_MEM
18924#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
18925
50487d79
EM
18926#undef TARGET_CASE_VALUES_THRESHOLD
18927#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
18928
43e9d192
IB
18929#undef TARGET_CONDITIONAL_REGISTER_USAGE
18930#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
18931
18932/* Only the least significant bit is used for initialization guard
18933 variables. */
18934#undef TARGET_CXX_GUARD_MASK_BIT
18935#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
18936
18937#undef TARGET_C_MODE_FOR_SUFFIX
18938#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
18939
18940#ifdef TARGET_BIG_ENDIAN_DEFAULT
18941#undef TARGET_DEFAULT_TARGET_FLAGS
18942#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
18943#endif
18944
18945#undef TARGET_CLASS_MAX_NREGS
18946#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
18947
119103ca
JG
18948#undef TARGET_BUILTIN_DECL
18949#define TARGET_BUILTIN_DECL aarch64_builtin_decl
18950
a6fc00da
BH
18951#undef TARGET_BUILTIN_RECIPROCAL
18952#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
18953
11e554b3
JG
18954#undef TARGET_C_EXCESS_PRECISION
18955#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
18956
43e9d192
IB
18957#undef TARGET_EXPAND_BUILTIN
18958#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
18959
18960#undef TARGET_EXPAND_BUILTIN_VA_START
18961#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
18962
9697e620
JG
18963#undef TARGET_FOLD_BUILTIN
18964#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
18965
43e9d192
IB
18966#undef TARGET_FUNCTION_ARG
18967#define TARGET_FUNCTION_ARG aarch64_function_arg
18968
18969#undef TARGET_FUNCTION_ARG_ADVANCE
18970#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
18971
18972#undef TARGET_FUNCTION_ARG_BOUNDARY
18973#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
18974
76b0cbf8
RS
18975#undef TARGET_FUNCTION_ARG_PADDING
18976#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
18977
43cacb12
RS
18978#undef TARGET_GET_RAW_RESULT_MODE
18979#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
18980#undef TARGET_GET_RAW_ARG_MODE
18981#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
18982
43e9d192
IB
18983#undef TARGET_FUNCTION_OK_FOR_SIBCALL
18984#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
18985
18986#undef TARGET_FUNCTION_VALUE
18987#define TARGET_FUNCTION_VALUE aarch64_function_value
18988
18989#undef TARGET_FUNCTION_VALUE_REGNO_P
18990#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
18991
fc72cba7
AL
18992#undef TARGET_GIMPLE_FOLD_BUILTIN
18993#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 18994
43e9d192
IB
18995#undef TARGET_GIMPLIFY_VA_ARG_EXPR
18996#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
18997
18998#undef TARGET_INIT_BUILTINS
18999#define TARGET_INIT_BUILTINS aarch64_init_builtins
19000
c64f7d37
WD
19001#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
19002#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
19003 aarch64_ira_change_pseudo_allocno_class
19004
43e9d192
IB
19005#undef TARGET_LEGITIMATE_ADDRESS_P
19006#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
19007
19008#undef TARGET_LEGITIMATE_CONSTANT_P
19009#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
19010
491ec060
WD
19011#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
19012#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
19013 aarch64_legitimize_address_displacement
19014
43e9d192
IB
19015#undef TARGET_LIBGCC_CMP_RETURN_MODE
19016#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
19017
11e554b3
JG
19018#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
19019#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
19020aarch64_libgcc_floating_mode_supported_p
19021
ac2b960f
YZ
19022#undef TARGET_MANGLE_TYPE
19023#define TARGET_MANGLE_TYPE aarch64_mangle_type
19024
43e9d192
IB
19025#undef TARGET_MEMORY_MOVE_COST
19026#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
19027
26e0ff94
WD
19028#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
19029#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
19030
43e9d192
IB
19031#undef TARGET_MUST_PASS_IN_STACK
19032#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
19033
19034/* This target hook should return true if accesses to volatile bitfields
19035 should use the narrowest mode possible. It should return false if these
19036 accesses should use the bitfield container type. */
19037#undef TARGET_NARROW_VOLATILE_BITFIELD
19038#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
19039
19040#undef TARGET_OPTION_OVERRIDE
19041#define TARGET_OPTION_OVERRIDE aarch64_override_options
19042
19043#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
19044#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
19045 aarch64_override_options_after_change
19046
361fb3ee
KT
19047#undef TARGET_OPTION_SAVE
19048#define TARGET_OPTION_SAVE aarch64_option_save
19049
19050#undef TARGET_OPTION_RESTORE
19051#define TARGET_OPTION_RESTORE aarch64_option_restore
19052
19053#undef TARGET_OPTION_PRINT
19054#define TARGET_OPTION_PRINT aarch64_option_print
19055
5a2c8331
KT
19056#undef TARGET_OPTION_VALID_ATTRIBUTE_P
19057#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
19058
d78006d9
KT
19059#undef TARGET_SET_CURRENT_FUNCTION
19060#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
19061
43e9d192
IB
19062#undef TARGET_PASS_BY_REFERENCE
19063#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
19064
19065#undef TARGET_PREFERRED_RELOAD_CLASS
19066#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
19067
cee66c68
WD
19068#undef TARGET_SCHED_REASSOCIATION_WIDTH
19069#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
19070
c2ec330c
AL
19071#undef TARGET_PROMOTED_TYPE
19072#define TARGET_PROMOTED_TYPE aarch64_promoted_type
19073
43e9d192
IB
19074#undef TARGET_SECONDARY_RELOAD
19075#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
19076
19077#undef TARGET_SHIFT_TRUNCATION_MASK
19078#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
19079
19080#undef TARGET_SETUP_INCOMING_VARARGS
19081#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
19082
19083#undef TARGET_STRUCT_VALUE_RTX
19084#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
19085
19086#undef TARGET_REGISTER_MOVE_COST
19087#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
19088
19089#undef TARGET_RETURN_IN_MEMORY
19090#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
19091
19092#undef TARGET_RETURN_IN_MSB
19093#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
19094
19095#undef TARGET_RTX_COSTS
7cc2145f 19096#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 19097
2e5f8203
JG
19098#undef TARGET_SCALAR_MODE_SUPPORTED_P
19099#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
19100
d126a4ae
AP
19101#undef TARGET_SCHED_ISSUE_RATE
19102#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
19103
d03f7e44
MK
19104#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
19105#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
19106 aarch64_sched_first_cycle_multipass_dfa_lookahead
19107
2d6bc7fa
KT
19108#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
19109#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
19110 aarch64_first_cycle_multipass_dfa_lookahead_guard
19111
827ab47a
KT
19112#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
19113#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
19114 aarch64_get_separate_components
19115
19116#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
19117#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
19118 aarch64_components_for_bb
19119
19120#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
19121#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
19122 aarch64_disqualify_components
19123
19124#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
19125#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
19126 aarch64_emit_prologue_components
19127
19128#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
19129#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
19130 aarch64_emit_epilogue_components
19131
19132#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
19133#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
19134 aarch64_set_handled_components
19135
43e9d192
IB
19136#undef TARGET_TRAMPOLINE_INIT
19137#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
19138
19139#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
19140#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
19141
19142#undef TARGET_VECTOR_MODE_SUPPORTED_P
19143#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
19144
7df76747
N
19145#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
19146#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
19147 aarch64_builtin_support_vector_misalignment
19148
9f4cbab8
RS
19149#undef TARGET_ARRAY_MODE
19150#define TARGET_ARRAY_MODE aarch64_array_mode
19151
43e9d192
IB
19152#undef TARGET_ARRAY_MODE_SUPPORTED_P
19153#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
19154
8990e73a
TB
19155#undef TARGET_VECTORIZE_ADD_STMT_COST
19156#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
19157
19158#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
19159#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
19160 aarch64_builtin_vectorization_cost
19161
43e9d192
IB
19162#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
19163#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
19164
42fc9a7f
JG
19165#undef TARGET_VECTORIZE_BUILTINS
19166#define TARGET_VECTORIZE_BUILTINS
19167
19168#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
19169#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
19170 aarch64_builtin_vectorized_function
19171
3b357264
JG
19172#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
19173#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
19174 aarch64_autovectorize_vector_sizes
19175
aa87aced
KV
19176#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
19177#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
19178 aarch64_atomic_assign_expand_fenv
19179
43e9d192
IB
19180/* Section anchor support. */
19181
19182#undef TARGET_MIN_ANCHOR_OFFSET
19183#define TARGET_MIN_ANCHOR_OFFSET -256
19184
19185/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
19186 byte offset; we can do much more for larger data types, but have no way
19187 to determine the size of the access. We assume accesses are aligned. */
19188#undef TARGET_MAX_ANCHOR_OFFSET
19189#define TARGET_MAX_ANCHOR_OFFSET 4095
19190
db0253a4
TB
19191#undef TARGET_VECTOR_ALIGNMENT
19192#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
19193
43cacb12
RS
19194#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
19195#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
19196 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
19197#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
19198#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
19199 aarch64_simd_vector_alignment_reachable
19200
88b08073
JG
19201/* vec_perm support. */
19202
f151c9e1
RS
19203#undef TARGET_VECTORIZE_VEC_PERM_CONST
19204#define TARGET_VECTORIZE_VEC_PERM_CONST \
19205 aarch64_vectorize_vec_perm_const
88b08073 19206
43cacb12
RS
19207#undef TARGET_VECTORIZE_GET_MASK_MODE
19208#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
19209#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
19210#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
19211 aarch64_empty_mask_is_expensive
6a86928d
RS
19212#undef TARGET_PREFERRED_ELSE_VALUE
19213#define TARGET_PREFERRED_ELSE_VALUE \
19214 aarch64_preferred_else_value
43cacb12 19215
c2ec330c
AL
19216#undef TARGET_INIT_LIBFUNCS
19217#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 19218
706b2314 19219#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
19220#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
19221
5cb74e90
RR
19222#undef TARGET_FLAGS_REGNUM
19223#define TARGET_FLAGS_REGNUM CC_REGNUM
19224
78607708
TV
19225#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
19226#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
19227
a3125fc2
CL
19228#undef TARGET_ASAN_SHADOW_OFFSET
19229#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
19230
0c4ec427
RE
19231#undef TARGET_LEGITIMIZE_ADDRESS
19232#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
19233
b48d6421
KT
19234#undef TARGET_SCHED_CAN_SPECULATE_INSN
19235#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
19236
594bdd53
FY
19237#undef TARGET_CAN_USE_DOLOOP_P
19238#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
19239
9bca63d4
WD
19240#undef TARGET_SCHED_ADJUST_PRIORITY
19241#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
19242
6a569cdd
KT
19243#undef TARGET_SCHED_MACRO_FUSION_P
19244#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
19245
19246#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
19247#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
19248
350013bc
BC
19249#undef TARGET_SCHED_FUSION_PRIORITY
19250#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
19251
7b841a12
JW
19252#undef TARGET_UNSPEC_MAY_TRAP_P
19253#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
19254
1b1e81f8
JW
19255#undef TARGET_USE_PSEUDO_PIC_REG
19256#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
19257
cc8ca59e
JB
19258#undef TARGET_PRINT_OPERAND
19259#define TARGET_PRINT_OPERAND aarch64_print_operand
19260
19261#undef TARGET_PRINT_OPERAND_ADDRESS
19262#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
19263
ee62a5a6
RS
19264#undef TARGET_OPTAB_SUPPORTED_P
19265#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
19266
43203dea
RR
19267#undef TARGET_OMIT_STRUCT_RETURN_REG
19268#define TARGET_OMIT_STRUCT_RETURN_REG true
19269
43cacb12
RS
19270#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
19271#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
19272 aarch64_dwarf_poly_indeterminate_value
19273
f46fe37e
EB
19274/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
19275#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
19276#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
19277
c43f4279
RS
19278#undef TARGET_HARD_REGNO_NREGS
19279#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
19280#undef TARGET_HARD_REGNO_MODE_OK
19281#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
19282
99e1629f
RS
19283#undef TARGET_MODES_TIEABLE_P
19284#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
19285
80ec73f4
RS
19286#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
19287#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
19288 aarch64_hard_regno_call_part_clobbered
19289
b3650d40
SE
19290#undef TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS
19291#define TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS \
19292 aarch64_remove_extra_call_preserved_regs
19293
473574ee
SE
19294#undef TARGET_RETURN_CALL_WITH_MAX_CLOBBERS
19295#define TARGET_RETURN_CALL_WITH_MAX_CLOBBERS \
19296 aarch64_return_call_with_max_clobbers
19297
58e17cf8
RS
19298#undef TARGET_CONSTANT_ALIGNMENT
19299#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
19300
8c6e3b23
TC
19301#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
19302#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
19303 aarch64_stack_clash_protection_alloca_probe_range
19304
43cacb12
RS
19305#undef TARGET_COMPUTE_PRESSURE_CLASSES
19306#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
19307
19308#undef TARGET_CAN_CHANGE_MODE_CLASS
19309#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
19310
5cce8171
RS
19311#undef TARGET_SELECT_EARLY_REMAT_MODES
19312#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
19313
c0111dc4
RE
19314#undef TARGET_SPECULATION_SAFE_VALUE
19315#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
19316
2d56d6ba
KT
19317#undef TARGET_ESTIMATED_POLY_VALUE
19318#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
19319
a0d0b980
SE
19320#undef TARGET_ATTRIBUTE_TABLE
19321#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
19322
d9186814
SE
19323#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
19324#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
19325 aarch64_simd_clone_compute_vecsize_and_simdlen
19326
19327#undef TARGET_SIMD_CLONE_ADJUST
19328#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
19329
19330#undef TARGET_SIMD_CLONE_USABLE
19331#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
19332
497f281c
SE
19333#undef TARGET_COMP_TYPE_ATTRIBUTES
19334#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
19335
3bac1e20
SE
19336#undef TARGET_GET_MULTILIB_ABI_NAME
19337#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
19338
51b86113
DM
19339#if CHECKING_P
19340#undef TARGET_RUN_TARGET_SELFTESTS
19341#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
19342#endif /* #if CHECKING_P */
19343
43e9d192
IB
19344struct gcc_target targetm = TARGET_INITIALIZER;
19345
19346#include "gt-aarch64.h"