]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Add SVE conditional conversion patterns
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
a5544970 2 Copyright (C) 2009-2019 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
d9186814 43#include "cgraph.h"
e11c4407 44#include "diagnostic.h"
43e9d192 45#include "insn-attr.h"
40e23961 46#include "alias.h"
40e23961 47#include "fold-const.h"
d8a2d370
DN
48#include "stor-layout.h"
49#include "calls.h"
50#include "varasm.h"
43e9d192 51#include "output.h"
36566b39 52#include "flags.h"
36566b39 53#include "explow.h"
43e9d192
IB
54#include "expr.h"
55#include "reload.h"
43e9d192 56#include "langhooks.h"
5a2c8331 57#include "opts.h"
2d6bc7fa 58#include "params.h"
45b0be94 59#include "gimplify.h"
43e9d192 60#include "dwarf2.h"
61d371eb 61#include "gimple-iterator.h"
8990e73a 62#include "tree-vectorizer.h"
d1bcc29f 63#include "aarch64-cost-tables.h"
0ee859b5 64#include "dumpfile.h"
9b2b7279 65#include "builtins.h"
8baff86e 66#include "rtl-iter.h"
9bbe08fe 67#include "tm-constrs.h"
d03f7e44 68#include "sched-int.h"
d78006d9 69#include "target-globals.h"
a3eb8a52 70#include "common/common-target.h"
43cacb12 71#include "cfgrtl.h"
51b86113
DM
72#include "selftest.h"
73#include "selftest-rtl.h"
43cacb12 74#include "rtx-vector-builder.h"
d9186814 75#include "intl.h"
43e9d192 76
994c5d85 77/* This file should be included last. */
d58627a0
RS
78#include "target-def.h"
79
28514dda
YZ
80/* Defined for convenience. */
81#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
82
b187677b 83/* Information about a legitimate vector immediate operand. */
48063b9d
IB
84struct simd_immediate_info
85{
0b1fe8cf 86 enum insn_type { MOV, MVN, INDEX, PTRUE };
b187677b
RS
87 enum modifier_type { LSL, MSL };
88
89 simd_immediate_info () {}
90 simd_immediate_info (scalar_float_mode, rtx);
91 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
92 insn_type = MOV, modifier_type = LSL,
93 unsigned int = 0);
43cacb12 94 simd_immediate_info (scalar_mode, rtx, rtx);
0b1fe8cf 95 simd_immediate_info (scalar_int_mode, aarch64_svpattern);
b187677b
RS
96
97 /* The mode of the elements. */
98 scalar_mode elt_mode;
99
b187677b
RS
100 /* The instruction to use to move the immediate into a vector. */
101 insn_type insn;
102
1da83cce
RS
103 union
104 {
105 /* For MOV and MVN. */
106 struct
107 {
108 /* The value of each element. */
109 rtx value;
110
111 /* The kind of shift modifier to use, and the number of bits to shift.
112 This is (LSL, 0) if no shift is needed. */
113 modifier_type modifier;
114 unsigned int shift;
115 } mov;
116
117 /* For INDEX. */
118 struct
119 {
120 /* The value of the first element and the step to be added for each
121 subsequent element. */
122 rtx base, step;
123 } index;
0b1fe8cf
RS
124
125 /* For PTRUE. */
126 aarch64_svpattern pattern;
1da83cce 127 } u;
48063b9d
IB
128};
129
b187677b
RS
130/* Construct a floating-point immediate in which each element has mode
131 ELT_MODE_IN and value VALUE_IN. */
132inline simd_immediate_info
133::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
1da83cce
RS
134 : elt_mode (elt_mode_in), insn (MOV)
135{
136 u.mov.value = value_in;
137 u.mov.modifier = LSL;
138 u.mov.shift = 0;
139}
b187677b
RS
140
141/* Construct an integer immediate in which each element has mode ELT_MODE_IN
142 and value VALUE_IN. The other parameters are as for the structure
143 fields. */
144inline simd_immediate_info
145::simd_immediate_info (scalar_int_mode elt_mode_in,
146 unsigned HOST_WIDE_INT value_in,
147 insn_type insn_in, modifier_type modifier_in,
148 unsigned int shift_in)
1da83cce
RS
149 : elt_mode (elt_mode_in), insn (insn_in)
150{
151 u.mov.value = gen_int_mode (value_in, elt_mode_in);
152 u.mov.modifier = modifier_in;
153 u.mov.shift = shift_in;
154}
43cacb12
RS
155
156/* Construct an integer immediate in which each element has mode ELT_MODE_IN
1da83cce 157 and where element I is equal to BASE_IN + I * STEP_IN. */
43cacb12 158inline simd_immediate_info
1da83cce
RS
159::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
160 : elt_mode (elt_mode_in), insn (INDEX)
161{
162 u.index.base = base_in;
163 u.index.step = step_in;
164}
b187677b 165
0b1fe8cf
RS
166/* Construct a predicate that controls elements of mode ELT_MODE_IN
167 and has PTRUE pattern PATTERN_IN. */
168inline simd_immediate_info
169::simd_immediate_info (scalar_int_mode elt_mode_in,
170 aarch64_svpattern pattern_in)
171 : elt_mode (elt_mode_in), insn (PTRUE)
172{
173 u.pattern = pattern_in;
174}
175
43e9d192
IB
176/* The current code model. */
177enum aarch64_code_model aarch64_cmodel;
178
43cacb12
RS
179/* The number of 64-bit elements in an SVE vector. */
180poly_uint16 aarch64_sve_vg;
181
43e9d192
IB
182#ifdef HAVE_AS_TLS
183#undef TARGET_HAVE_TLS
184#define TARGET_HAVE_TLS 1
185#endif
186
ef4bddc2
RS
187static bool aarch64_composite_type_p (const_tree, machine_mode);
188static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 189 const_tree,
ef4bddc2 190 machine_mode *, int *,
43e9d192
IB
191 bool *);
192static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 194static void aarch64_override_options_after_change (void);
ef4bddc2 195static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 196static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
197static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
198 const_tree type,
199 int misalignment,
200 bool is_packed);
43cacb12 201static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
202static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
203 aarch64_addr_query_type);
eb471ba3 204static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 205
0c6caaf8
RL
206/* Major revision number of the ARM Architecture implemented by the target. */
207unsigned aarch64_architecture_version;
208
43e9d192 209/* The processor for which instructions should be scheduled. */
02fdbd5b 210enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 211
43e9d192 212/* Mask to specify which instruction scheduling options should be used. */
28108a53 213uint64_t aarch64_tune_flags = 0;
43e9d192 214
1be34295 215/* Global flag for PC relative loads. */
9ee6540a 216bool aarch64_pcrelative_literal_loads;
1be34295 217
d6cb6d6a
WD
218/* Global flag for whether frame pointer is enabled. */
219bool aarch64_use_frame_pointer;
220
efac62a3
ST
221#define BRANCH_PROTECT_STR_MAX 255
222char *accepted_branch_protection_string = NULL;
223
224static enum aarch64_parse_opt_result
225aarch64_parse_branch_protection (const char*, char**);
226
8dec06f2
JG
227/* Support for command line parsing of boolean flags in the tuning
228 structures. */
229struct aarch64_flag_desc
230{
231 const char* name;
232 unsigned int flag;
233};
234
ed9fa8d2 235#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
236 { name, AARCH64_FUSE_##internal_name },
237static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
238{
239 { "none", AARCH64_FUSE_NOTHING },
240#include "aarch64-fusion-pairs.def"
241 { "all", AARCH64_FUSE_ALL },
242 { NULL, AARCH64_FUSE_NOTHING }
243};
8dec06f2 244
a339a01c 245#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
246 { name, AARCH64_EXTRA_TUNE_##internal_name },
247static const struct aarch64_flag_desc aarch64_tuning_flags[] =
248{
249 { "none", AARCH64_EXTRA_TUNE_NONE },
250#include "aarch64-tuning-flags.def"
251 { "all", AARCH64_EXTRA_TUNE_ALL },
252 { NULL, AARCH64_EXTRA_TUNE_NONE }
253};
8dec06f2 254
43e9d192
IB
255/* Tuning parameters. */
256
43e9d192
IB
257static const struct cpu_addrcost_table generic_addrcost_table =
258{
67747367 259 {
2fae724a 260 1, /* hi */
bd95e655
JG
261 0, /* si */
262 0, /* di */
2fae724a 263 1, /* ti */
67747367 264 },
bd95e655
JG
265 0, /* pre_modify */
266 0, /* post_modify */
267 0, /* register_offset */
783879e6
EM
268 0, /* register_sextend */
269 0, /* register_zextend */
bd95e655 270 0 /* imm_offset */
43e9d192
IB
271};
272
5ec1ae3b
EM
273static const struct cpu_addrcost_table exynosm1_addrcost_table =
274{
275 {
276 0, /* hi */
277 0, /* si */
278 0, /* di */
279 2, /* ti */
280 },
281 0, /* pre_modify */
282 0, /* post_modify */
283 1, /* register_offset */
284 1, /* register_sextend */
285 2, /* register_zextend */
286 0, /* imm_offset */
287};
288
381e27aa
PT
289static const struct cpu_addrcost_table xgene1_addrcost_table =
290{
381e27aa 291 {
bd95e655
JG
292 1, /* hi */
293 0, /* si */
294 0, /* di */
295 1, /* ti */
381e27aa 296 },
bd95e655 297 1, /* pre_modify */
52ddefd8 298 1, /* post_modify */
bd95e655 299 0, /* register_offset */
783879e6
EM
300 1, /* register_sextend */
301 1, /* register_zextend */
bd95e655 302 0, /* imm_offset */
381e27aa
PT
303};
304
d1261ac6 305static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
306{
307 {
5f407e57
AP
308 1, /* hi */
309 1, /* si */
310 1, /* di */
ad611a4c
VP
311 2, /* ti */
312 },
313 0, /* pre_modify */
314 0, /* post_modify */
315 2, /* register_offset */
316 3, /* register_sextend */
317 3, /* register_zextend */
318 0, /* imm_offset */
319};
320
910f72e7
SZ
321static const struct cpu_addrcost_table tsv110_addrcost_table =
322{
323 {
324 1, /* hi */
325 0, /* si */
326 0, /* di */
327 1, /* ti */
328 },
329 0, /* pre_modify */
330 0, /* post_modify */
331 0, /* register_offset */
332 1, /* register_sextend */
333 1, /* register_zextend */
334 0, /* imm_offset */
335};
336
8d39ea2f
LM
337static const struct cpu_addrcost_table qdf24xx_addrcost_table =
338{
339 {
340 1, /* hi */
341 1, /* si */
342 1, /* di */
343 2, /* ti */
344 },
345 1, /* pre_modify */
346 1, /* post_modify */
347 3, /* register_offset */
31508b39 348 3, /* register_sextend */
8d39ea2f
LM
349 3, /* register_zextend */
350 2, /* imm_offset */
351};
352
43e9d192
IB
353static const struct cpu_regmove_cost generic_regmove_cost =
354{
bd95e655 355 1, /* GP2GP */
3969c510
WD
356 /* Avoid the use of slow int<->fp moves for spilling by setting
357 their cost higher than memmov_cost. */
bd95e655
JG
358 5, /* GP2FP */
359 5, /* FP2GP */
360 2 /* FP2FP */
43e9d192
IB
361};
362
e4a9c55a
WD
363static const struct cpu_regmove_cost cortexa57_regmove_cost =
364{
bd95e655 365 1, /* GP2GP */
e4a9c55a
WD
366 /* Avoid the use of slow int<->fp moves for spilling by setting
367 their cost higher than memmov_cost. */
bd95e655
JG
368 5, /* GP2FP */
369 5, /* FP2GP */
370 2 /* FP2FP */
e4a9c55a
WD
371};
372
373static const struct cpu_regmove_cost cortexa53_regmove_cost =
374{
bd95e655 375 1, /* GP2GP */
e4a9c55a
WD
376 /* Avoid the use of slow int<->fp moves for spilling by setting
377 their cost higher than memmov_cost. */
bd95e655
JG
378 5, /* GP2FP */
379 5, /* FP2GP */
380 2 /* FP2FP */
e4a9c55a
WD
381};
382
5ec1ae3b
EM
383static const struct cpu_regmove_cost exynosm1_regmove_cost =
384{
385 1, /* GP2GP */
386 /* Avoid the use of slow int<->fp moves for spilling by setting
387 their cost higher than memmov_cost (actual, 4 and 9). */
388 9, /* GP2FP */
389 9, /* FP2GP */
390 1 /* FP2FP */
391};
392
d1bcc29f
AP
393static const struct cpu_regmove_cost thunderx_regmove_cost =
394{
bd95e655
JG
395 2, /* GP2GP */
396 2, /* GP2FP */
397 6, /* FP2GP */
398 4 /* FP2FP */
d1bcc29f
AP
399};
400
381e27aa
PT
401static const struct cpu_regmove_cost xgene1_regmove_cost =
402{
bd95e655 403 1, /* GP2GP */
381e27aa
PT
404 /* Avoid the use of slow int<->fp moves for spilling by setting
405 their cost higher than memmov_cost. */
bd95e655
JG
406 8, /* GP2FP */
407 8, /* FP2GP */
408 2 /* FP2FP */
381e27aa
PT
409};
410
ee446d9f
JW
411static const struct cpu_regmove_cost qdf24xx_regmove_cost =
412{
413 2, /* GP2GP */
414 /* Avoid the use of int<->fp moves for spilling. */
415 6, /* GP2FP */
416 6, /* FP2GP */
417 4 /* FP2FP */
418};
419
d1261ac6 420static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
421{
422 1, /* GP2GP */
423 /* Avoid the use of int<->fp moves for spilling. */
424 8, /* GP2FP */
425 8, /* FP2GP */
426 4 /* FP2FP */
427};
428
910f72e7
SZ
429static const struct cpu_regmove_cost tsv110_regmove_cost =
430{
431 1, /* GP2GP */
432 /* Avoid the use of slow int<->fp moves for spilling by setting
433 their cost higher than memmov_cost. */
434 2, /* GP2FP */
435 3, /* FP2GP */
436 2 /* FP2FP */
437};
438
8990e73a 439/* Generic costs for vector insn classes. */
8990e73a
TB
440static const struct cpu_vector_cost generic_vector_cost =
441{
cd8ae5ed
AP
442 1, /* scalar_int_stmt_cost */
443 1, /* scalar_fp_stmt_cost */
bd95e655
JG
444 1, /* scalar_load_cost */
445 1, /* scalar_store_cost */
cd8ae5ed
AP
446 1, /* vec_int_stmt_cost */
447 1, /* vec_fp_stmt_cost */
c428f91c 448 2, /* vec_permute_cost */
bd95e655
JG
449 1, /* vec_to_scalar_cost */
450 1, /* scalar_to_vec_cost */
451 1, /* vec_align_load_cost */
452 1, /* vec_unalign_load_cost */
453 1, /* vec_unalign_store_cost */
454 1, /* vec_store_cost */
455 3, /* cond_taken_branch_cost */
456 1 /* cond_not_taken_branch_cost */
8990e73a
TB
457};
458
e75bc10e
LM
459/* QDF24XX costs for vector insn classes. */
460static const struct cpu_vector_cost qdf24xx_vector_cost =
461{
462 1, /* scalar_int_stmt_cost */
463 1, /* scalar_fp_stmt_cost */
464 1, /* scalar_load_cost */
465 1, /* scalar_store_cost */
466 1, /* vec_int_stmt_cost */
467 3, /* vec_fp_stmt_cost */
468 2, /* vec_permute_cost */
469 1, /* vec_to_scalar_cost */
470 1, /* scalar_to_vec_cost */
471 1, /* vec_align_load_cost */
472 1, /* vec_unalign_load_cost */
473 1, /* vec_unalign_store_cost */
474 1, /* vec_store_cost */
475 3, /* cond_taken_branch_cost */
476 1 /* cond_not_taken_branch_cost */
477};
478
c3f20327
AP
479/* ThunderX costs for vector insn classes. */
480static const struct cpu_vector_cost thunderx_vector_cost =
481{
cd8ae5ed
AP
482 1, /* scalar_int_stmt_cost */
483 1, /* scalar_fp_stmt_cost */
c3f20327
AP
484 3, /* scalar_load_cost */
485 1, /* scalar_store_cost */
cd8ae5ed 486 4, /* vec_int_stmt_cost */
b29d7591 487 1, /* vec_fp_stmt_cost */
c3f20327
AP
488 4, /* vec_permute_cost */
489 2, /* vec_to_scalar_cost */
490 2, /* scalar_to_vec_cost */
491 3, /* vec_align_load_cost */
7e87a3d9
AP
492 5, /* vec_unalign_load_cost */
493 5, /* vec_unalign_store_cost */
c3f20327
AP
494 1, /* vec_store_cost */
495 3, /* cond_taken_branch_cost */
496 3 /* cond_not_taken_branch_cost */
497};
498
910f72e7
SZ
499static const struct cpu_vector_cost tsv110_vector_cost =
500{
501 1, /* scalar_int_stmt_cost */
502 1, /* scalar_fp_stmt_cost */
503 5, /* scalar_load_cost */
504 1, /* scalar_store_cost */
505 2, /* vec_int_stmt_cost */
506 2, /* vec_fp_stmt_cost */
507 2, /* vec_permute_cost */
508 3, /* vec_to_scalar_cost */
509 2, /* scalar_to_vec_cost */
510 5, /* vec_align_load_cost */
511 5, /* vec_unalign_load_cost */
512 1, /* vec_unalign_store_cost */
513 1, /* vec_store_cost */
514 1, /* cond_taken_branch_cost */
515 1 /* cond_not_taken_branch_cost */
516};
517
60bff090 518/* Generic costs for vector insn classes. */
60bff090
JG
519static const struct cpu_vector_cost cortexa57_vector_cost =
520{
cd8ae5ed
AP
521 1, /* scalar_int_stmt_cost */
522 1, /* scalar_fp_stmt_cost */
bd95e655
JG
523 4, /* scalar_load_cost */
524 1, /* scalar_store_cost */
cd8ae5ed
AP
525 2, /* vec_int_stmt_cost */
526 2, /* vec_fp_stmt_cost */
c428f91c 527 3, /* vec_permute_cost */
bd95e655
JG
528 8, /* vec_to_scalar_cost */
529 8, /* scalar_to_vec_cost */
db4a1c18
WD
530 4, /* vec_align_load_cost */
531 4, /* vec_unalign_load_cost */
bd95e655
JG
532 1, /* vec_unalign_store_cost */
533 1, /* vec_store_cost */
534 1, /* cond_taken_branch_cost */
535 1 /* cond_not_taken_branch_cost */
60bff090
JG
536};
537
5ec1ae3b
EM
538static const struct cpu_vector_cost exynosm1_vector_cost =
539{
cd8ae5ed
AP
540 1, /* scalar_int_stmt_cost */
541 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
542 5, /* scalar_load_cost */
543 1, /* scalar_store_cost */
cd8ae5ed
AP
544 3, /* vec_int_stmt_cost */
545 3, /* vec_fp_stmt_cost */
c428f91c 546 3, /* vec_permute_cost */
5ec1ae3b
EM
547 3, /* vec_to_scalar_cost */
548 3, /* scalar_to_vec_cost */
549 5, /* vec_align_load_cost */
550 5, /* vec_unalign_load_cost */
551 1, /* vec_unalign_store_cost */
552 1, /* vec_store_cost */
553 1, /* cond_taken_branch_cost */
554 1 /* cond_not_taken_branch_cost */
555};
556
381e27aa 557/* Generic costs for vector insn classes. */
381e27aa
PT
558static const struct cpu_vector_cost xgene1_vector_cost =
559{
cd8ae5ed
AP
560 1, /* scalar_int_stmt_cost */
561 1, /* scalar_fp_stmt_cost */
bd95e655
JG
562 5, /* scalar_load_cost */
563 1, /* scalar_store_cost */
cd8ae5ed
AP
564 2, /* vec_int_stmt_cost */
565 2, /* vec_fp_stmt_cost */
c428f91c 566 2, /* vec_permute_cost */
bd95e655
JG
567 4, /* vec_to_scalar_cost */
568 4, /* scalar_to_vec_cost */
569 10, /* vec_align_load_cost */
570 10, /* vec_unalign_load_cost */
571 2, /* vec_unalign_store_cost */
572 2, /* vec_store_cost */
573 2, /* cond_taken_branch_cost */
574 1 /* cond_not_taken_branch_cost */
381e27aa
PT
575};
576
ad611a4c 577/* Costs for vector insn classes for Vulcan. */
d1261ac6 578static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 579{
cd8ae5ed
AP
580 1, /* scalar_int_stmt_cost */
581 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
582 4, /* scalar_load_cost */
583 1, /* scalar_store_cost */
cd8ae5ed
AP
584 5, /* vec_int_stmt_cost */
585 6, /* vec_fp_stmt_cost */
ad611a4c
VP
586 3, /* vec_permute_cost */
587 6, /* vec_to_scalar_cost */
588 5, /* scalar_to_vec_cost */
589 8, /* vec_align_load_cost */
590 8, /* vec_unalign_load_cost */
591 4, /* vec_unalign_store_cost */
592 4, /* vec_store_cost */
593 2, /* cond_taken_branch_cost */
594 1 /* cond_not_taken_branch_cost */
595};
596
b9066f5a
MW
597/* Generic costs for branch instructions. */
598static const struct cpu_branch_cost generic_branch_cost =
599{
9094d4a4
WD
600 1, /* Predictable. */
601 3 /* Unpredictable. */
b9066f5a
MW
602};
603
9acc9cbe
EM
604/* Generic approximation modes. */
605static const cpu_approx_modes generic_approx_modes =
606{
79a2bc2d 607 AARCH64_APPROX_NONE, /* division */
98daafa0 608 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
609 AARCH64_APPROX_NONE /* recip_sqrt */
610};
611
612/* Approximation modes for Exynos M1. */
613static const cpu_approx_modes exynosm1_approx_modes =
614{
79a2bc2d 615 AARCH64_APPROX_NONE, /* division */
98daafa0 616 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
617 AARCH64_APPROX_ALL /* recip_sqrt */
618};
619
620/* Approximation modes for X-Gene 1. */
621static const cpu_approx_modes xgene1_approx_modes =
622{
79a2bc2d 623 AARCH64_APPROX_NONE, /* division */
98daafa0 624 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
625 AARCH64_APPROX_ALL /* recip_sqrt */
626};
627
9d2c6e2e
MK
628/* Generic prefetch settings (which disable prefetch). */
629static const cpu_prefetch_tune generic_prefetch_tune =
630{
631 0, /* num_slots */
632 -1, /* l1_cache_size */
633 -1, /* l1_cache_line_size */
16b2cafd 634 -1, /* l2_cache_size */
d2ff35c0 635 true, /* prefetch_dynamic_strides */
59100dfc 636 -1, /* minimum_stride */
16b2cafd 637 -1 /* default_opt_level */
9d2c6e2e
MK
638};
639
640static const cpu_prefetch_tune exynosm1_prefetch_tune =
641{
642 0, /* num_slots */
643 -1, /* l1_cache_size */
644 64, /* l1_cache_line_size */
16b2cafd 645 -1, /* l2_cache_size */
d2ff35c0 646 true, /* prefetch_dynamic_strides */
59100dfc 647 -1, /* minimum_stride */
16b2cafd 648 -1 /* default_opt_level */
9d2c6e2e
MK
649};
650
651static const cpu_prefetch_tune qdf24xx_prefetch_tune =
652{
70c51b58
MK
653 4, /* num_slots */
654 32, /* l1_cache_size */
9d2c6e2e 655 64, /* l1_cache_line_size */
725e2110 656 512, /* l2_cache_size */
d2ff35c0 657 false, /* prefetch_dynamic_strides */
59100dfc
LM
658 2048, /* minimum_stride */
659 3 /* default_opt_level */
9d2c6e2e
MK
660};
661
f1e247d0
AP
662static const cpu_prefetch_tune thunderxt88_prefetch_tune =
663{
664 8, /* num_slots */
665 32, /* l1_cache_size */
666 128, /* l1_cache_line_size */
667 16*1024, /* l2_cache_size */
d2ff35c0 668 true, /* prefetch_dynamic_strides */
59100dfc 669 -1, /* minimum_stride */
f1e247d0
AP
670 3 /* default_opt_level */
671};
672
673static const cpu_prefetch_tune thunderx_prefetch_tune =
674{
675 8, /* num_slots */
676 32, /* l1_cache_size */
677 128, /* l1_cache_line_size */
678 -1, /* l2_cache_size */
d2ff35c0 679 true, /* prefetch_dynamic_strides */
59100dfc 680 -1, /* minimum_stride */
f1e247d0
AP
681 -1 /* default_opt_level */
682};
683
9d2c6e2e
MK
684static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
685{
f1e247d0
AP
686 8, /* num_slots */
687 32, /* l1_cache_size */
9d2c6e2e 688 64, /* l1_cache_line_size */
f1e247d0 689 256, /* l2_cache_size */
d2ff35c0 690 true, /* prefetch_dynamic_strides */
59100dfc 691 -1, /* minimum_stride */
16b2cafd 692 -1 /* default_opt_level */
9d2c6e2e
MK
693};
694
910f72e7
SZ
695static const cpu_prefetch_tune tsv110_prefetch_tune =
696{
697 0, /* num_slots */
698 64, /* l1_cache_size */
699 64, /* l1_cache_line_size */
700 512, /* l2_cache_size */
701 true, /* prefetch_dynamic_strides */
702 -1, /* minimum_stride */
703 -1 /* default_opt_level */
704};
705
d5e9851e
CM
706static const cpu_prefetch_tune xgene1_prefetch_tune =
707{
708 8, /* num_slots */
709 32, /* l1_cache_size */
710 64, /* l1_cache_line_size */
711 256, /* l2_cache_size */
712 true, /* prefetch_dynamic_strides */
713 -1, /* minimum_stride */
714 -1 /* default_opt_level */
715};
716
43e9d192
IB
717static const struct tune_params generic_tunings =
718{
4e2cd668 719 &cortexa57_extra_costs,
43e9d192
IB
720 &generic_addrcost_table,
721 &generic_regmove_cost,
8990e73a 722 &generic_vector_cost,
b9066f5a 723 &generic_branch_cost,
9acc9cbe 724 &generic_approx_modes,
2d56d6ba 725 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
726 4, /* memmov_cost */
727 2, /* issue_rate */
e0701ef0 728 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
4e55aefa 729 "16:12", /* function_align. */
c518c102
ML
730 "4", /* jump_align. */
731 "8", /* loop_align. */
cee66c68
WD
732 2, /* int_reassoc_width. */
733 4, /* fp_reassoc_width. */
50093a33
WD
734 1, /* vec_reassoc_width. */
735 2, /* min_div_recip_mul_sf. */
dfba575f 736 2, /* min_div_recip_mul_df. */
50487d79 737 0, /* max_case_values. */
3b4c0f7e 738 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
739 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
740 &generic_prefetch_tune
43e9d192
IB
741};
742
1c72a3ca
JG
743static const struct tune_params cortexa35_tunings =
744{
745 &cortexa53_extra_costs,
746 &generic_addrcost_table,
747 &cortexa53_regmove_cost,
748 &generic_vector_cost,
aca97ef8 749 &generic_branch_cost,
9acc9cbe 750 &generic_approx_modes,
2d56d6ba 751 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
752 4, /* memmov_cost */
753 1, /* issue_rate */
0bc24338 754 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 755 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
756 "16", /* function_align. */
757 "4", /* jump_align. */
758 "8", /* loop_align. */
1c72a3ca
JG
759 2, /* int_reassoc_width. */
760 4, /* fp_reassoc_width. */
761 1, /* vec_reassoc_width. */
762 2, /* min_div_recip_mul_sf. */
763 2, /* min_div_recip_mul_df. */
764 0, /* max_case_values. */
1c72a3ca 765 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
766 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
767 &generic_prefetch_tune
1c72a3ca
JG
768};
769
984239ad
KT
770static const struct tune_params cortexa53_tunings =
771{
772 &cortexa53_extra_costs,
773 &generic_addrcost_table,
e4a9c55a 774 &cortexa53_regmove_cost,
984239ad 775 &generic_vector_cost,
aca97ef8 776 &generic_branch_cost,
9acc9cbe 777 &generic_approx_modes,
2d56d6ba 778 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
779 4, /* memmov_cost */
780 2, /* issue_rate */
00a8574a 781 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 782 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
783 "16", /* function_align. */
784 "4", /* jump_align. */
785 "8", /* loop_align. */
cee66c68
WD
786 2, /* int_reassoc_width. */
787 4, /* fp_reassoc_width. */
50093a33
WD
788 1, /* vec_reassoc_width. */
789 2, /* min_div_recip_mul_sf. */
dfba575f 790 2, /* min_div_recip_mul_df. */
50487d79 791 0, /* max_case_values. */
2d6bc7fa 792 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
793 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
794 &generic_prefetch_tune
984239ad
KT
795};
796
4fd92af6
KT
797static const struct tune_params cortexa57_tunings =
798{
799 &cortexa57_extra_costs,
a39d4348 800 &generic_addrcost_table,
e4a9c55a 801 &cortexa57_regmove_cost,
60bff090 802 &cortexa57_vector_cost,
aca97ef8 803 &generic_branch_cost,
9acc9cbe 804 &generic_approx_modes,
2d56d6ba 805 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
806 4, /* memmov_cost */
807 3, /* issue_rate */
00a8574a 808 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 809 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
810 "16", /* function_align. */
811 "4", /* jump_align. */
812 "8", /* loop_align. */
cee66c68
WD
813 2, /* int_reassoc_width. */
814 4, /* fp_reassoc_width. */
50093a33
WD
815 1, /* vec_reassoc_width. */
816 2, /* min_div_recip_mul_sf. */
dfba575f 817 2, /* min_div_recip_mul_df. */
50487d79 818 0, /* max_case_values. */
2d6bc7fa 819 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
820 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
821 &generic_prefetch_tune
dfba575f
JG
822};
823
824static const struct tune_params cortexa72_tunings =
825{
826 &cortexa57_extra_costs,
a39d4348 827 &generic_addrcost_table,
dfba575f
JG
828 &cortexa57_regmove_cost,
829 &cortexa57_vector_cost,
aca97ef8 830 &generic_branch_cost,
9acc9cbe 831 &generic_approx_modes,
2d56d6ba 832 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
833 4, /* memmov_cost */
834 3, /* issue_rate */
00a8574a 835 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 836 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
837 "16", /* function_align. */
838 "4", /* jump_align. */
839 "8", /* loop_align. */
dfba575f
JG
840 2, /* int_reassoc_width. */
841 4, /* fp_reassoc_width. */
842 1, /* vec_reassoc_width. */
843 2, /* min_div_recip_mul_sf. */
844 2, /* min_div_recip_mul_df. */
50487d79 845 0, /* max_case_values. */
0bc24338 846 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
847 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
848 &generic_prefetch_tune
4fd92af6
KT
849};
850
4fb570c4
KT
851static const struct tune_params cortexa73_tunings =
852{
853 &cortexa57_extra_costs,
a39d4348 854 &generic_addrcost_table,
4fb570c4
KT
855 &cortexa57_regmove_cost,
856 &cortexa57_vector_cost,
aca97ef8 857 &generic_branch_cost,
4fb570c4 858 &generic_approx_modes,
2d56d6ba 859 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
860 4, /* memmov_cost. */
861 2, /* issue_rate. */
862 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
863 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
864 "16", /* function_align. */
865 "4", /* jump_align. */
866 "8", /* loop_align. */
4fb570c4
KT
867 2, /* int_reassoc_width. */
868 4, /* fp_reassoc_width. */
869 1, /* vec_reassoc_width. */
870 2, /* min_div_recip_mul_sf. */
871 2, /* min_div_recip_mul_df. */
872 0, /* max_case_values. */
4fb570c4 873 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
874 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
875 &generic_prefetch_tune
4fb570c4
KT
876};
877
9d2c6e2e
MK
878
879
5ec1ae3b
EM
880static const struct tune_params exynosm1_tunings =
881{
882 &exynosm1_extra_costs,
883 &exynosm1_addrcost_table,
884 &exynosm1_regmove_cost,
885 &exynosm1_vector_cost,
886 &generic_branch_cost,
9acc9cbe 887 &exynosm1_approx_modes,
2d56d6ba 888 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
889 4, /* memmov_cost */
890 3, /* issue_rate */
25cc2199 891 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
892 "4", /* function_align. */
893 "4", /* jump_align. */
894 "4", /* loop_align. */
5ec1ae3b
EM
895 2, /* int_reassoc_width. */
896 4, /* fp_reassoc_width. */
897 1, /* vec_reassoc_width. */
898 2, /* min_div_recip_mul_sf. */
899 2, /* min_div_recip_mul_df. */
900 48, /* max_case_values. */
220379df 901 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
902 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
903 &exynosm1_prefetch_tune
5ec1ae3b
EM
904};
905
f1e247d0
AP
906static const struct tune_params thunderxt88_tunings =
907{
908 &thunderx_extra_costs,
909 &generic_addrcost_table,
910 &thunderx_regmove_cost,
911 &thunderx_vector_cost,
912 &generic_branch_cost,
913 &generic_approx_modes,
2d56d6ba 914 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
915 6, /* memmov_cost */
916 2, /* issue_rate */
917 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
918 "8", /* function_align. */
919 "8", /* jump_align. */
920 "8", /* loop_align. */
f1e247d0
AP
921 2, /* int_reassoc_width. */
922 4, /* fp_reassoc_width. */
923 1, /* vec_reassoc_width. */
924 2, /* min_div_recip_mul_sf. */
925 2, /* min_div_recip_mul_df. */
926 0, /* max_case_values. */
927 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
928 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
929 &thunderxt88_prefetch_tune
930};
931
d1bcc29f
AP
932static const struct tune_params thunderx_tunings =
933{
934 &thunderx_extra_costs,
935 &generic_addrcost_table,
936 &thunderx_regmove_cost,
c3f20327 937 &thunderx_vector_cost,
b9066f5a 938 &generic_branch_cost,
9acc9cbe 939 &generic_approx_modes,
2d56d6ba 940 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
941 6, /* memmov_cost */
942 2, /* issue_rate */
e9a3a175 943 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
944 "8", /* function_align. */
945 "8", /* jump_align. */
946 "8", /* loop_align. */
cee66c68
WD
947 2, /* int_reassoc_width. */
948 4, /* fp_reassoc_width. */
50093a33
WD
949 1, /* vec_reassoc_width. */
950 2, /* min_div_recip_mul_sf. */
dfba575f 951 2, /* min_div_recip_mul_df. */
50487d79 952 0, /* max_case_values. */
2d6bc7fa 953 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
954 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
955 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 956 &thunderx_prefetch_tune
d1bcc29f
AP
957};
958
910f72e7
SZ
959static const struct tune_params tsv110_tunings =
960{
961 &tsv110_extra_costs,
962 &tsv110_addrcost_table,
963 &tsv110_regmove_cost,
964 &tsv110_vector_cost,
965 &generic_branch_cost,
966 &generic_approx_modes,
2d56d6ba 967 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
968 4, /* memmov_cost */
969 4, /* issue_rate */
970 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
971 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
972 "16", /* function_align. */
973 "4", /* jump_align. */
974 "8", /* loop_align. */
975 2, /* int_reassoc_width. */
976 4, /* fp_reassoc_width. */
977 1, /* vec_reassoc_width. */
978 2, /* min_div_recip_mul_sf. */
979 2, /* min_div_recip_mul_df. */
980 0, /* max_case_values. */
981 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
982 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
983 &tsv110_prefetch_tune
984};
985
381e27aa 986static const struct tune_params xgene1_tunings =
e02669db
CM
987{
988 &xgene1_extra_costs,
989 &xgene1_addrcost_table,
990 &xgene1_regmove_cost,
991 &xgene1_vector_cost,
992 &generic_branch_cost,
993 &xgene1_approx_modes,
2d56d6ba 994 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
995 6, /* memmov_cost */
996 4, /* issue_rate */
997 AARCH64_FUSE_NOTHING, /* fusible_ops */
998 "16", /* function_align. */
999 "16", /* jump_align. */
1000 "16", /* loop_align. */
1001 2, /* int_reassoc_width. */
1002 4, /* fp_reassoc_width. */
1003 1, /* vec_reassoc_width. */
1004 2, /* min_div_recip_mul_sf. */
1005 2, /* min_div_recip_mul_df. */
1006 17, /* max_case_values. */
1007 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
1008 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
1009 &xgene1_prefetch_tune
1010};
1011
1012static const struct tune_params emag_tunings =
381e27aa
PT
1013{
1014 &xgene1_extra_costs,
1015 &xgene1_addrcost_table,
1016 &xgene1_regmove_cost,
1017 &xgene1_vector_cost,
b9066f5a 1018 &generic_branch_cost,
9acc9cbe 1019 &xgene1_approx_modes,
2d56d6ba 1020 SVE_NOT_IMPLEMENTED,
bd95e655
JG
1021 6, /* memmov_cost */
1022 4, /* issue_rate */
e9a3a175 1023 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 1024 "16", /* function_align. */
cf28c77e 1025 "16", /* jump_align. */
c518c102 1026 "16", /* loop_align. */
381e27aa
PT
1027 2, /* int_reassoc_width. */
1028 4, /* fp_reassoc_width. */
50093a33
WD
1029 1, /* vec_reassoc_width. */
1030 2, /* min_div_recip_mul_sf. */
dfba575f 1031 2, /* min_div_recip_mul_df. */
cf28c77e 1032 17, /* max_case_values. */
2d6bc7fa 1033 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 1034 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 1035 &xgene1_prefetch_tune
381e27aa
PT
1036};
1037
ee446d9f
JW
1038static const struct tune_params qdf24xx_tunings =
1039{
1040 &qdf24xx_extra_costs,
8d39ea2f 1041 &qdf24xx_addrcost_table,
ee446d9f 1042 &qdf24xx_regmove_cost,
e75bc10e 1043 &qdf24xx_vector_cost,
ee446d9f
JW
1044 &generic_branch_cost,
1045 &generic_approx_modes,
2d56d6ba 1046 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1047 4, /* memmov_cost */
1048 4, /* issue_rate */
1049 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1050 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1051 "16", /* function_align. */
1052 "8", /* jump_align. */
1053 "16", /* loop_align. */
ee446d9f
JW
1054 2, /* int_reassoc_width. */
1055 4, /* fp_reassoc_width. */
1056 1, /* vec_reassoc_width. */
1057 2, /* min_div_recip_mul_sf. */
1058 2, /* min_div_recip_mul_df. */
1059 0, /* max_case_values. */
4f2a94e6 1060 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1061 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1062 &qdf24xx_prefetch_tune
ee446d9f
JW
1063};
1064
52ee8191
SP
1065/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1066 for now. */
1067static const struct tune_params saphira_tunings =
1068{
1069 &generic_extra_costs,
1070 &generic_addrcost_table,
1071 &generic_regmove_cost,
1072 &generic_vector_cost,
1073 &generic_branch_cost,
1074 &generic_approx_modes,
2d56d6ba 1075 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1076 4, /* memmov_cost */
1077 4, /* issue_rate */
1078 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1079 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1080 "16", /* function_align. */
1081 "8", /* jump_align. */
1082 "16", /* loop_align. */
52ee8191
SP
1083 2, /* int_reassoc_width. */
1084 4, /* fp_reassoc_width. */
1085 1, /* vec_reassoc_width. */
1086 2, /* min_div_recip_mul_sf. */
1087 2, /* min_div_recip_mul_df. */
1088 0, /* max_case_values. */
1089 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1090 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1091 &generic_prefetch_tune
1092};
1093
d1261ac6 1094static const struct tune_params thunderx2t99_tunings =
ad611a4c 1095{
d1261ac6
AP
1096 &thunderx2t99_extra_costs,
1097 &thunderx2t99_addrcost_table,
1098 &thunderx2t99_regmove_cost,
1099 &thunderx2t99_vector_cost,
aca97ef8 1100 &generic_branch_cost,
ad611a4c 1101 &generic_approx_modes,
2d56d6ba 1102 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1103 4, /* memmov_cost. */
1104 4, /* issue_rate. */
00c7c57f
JB
1105 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
1106 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
1107 "16", /* function_align. */
1108 "8", /* jump_align. */
1109 "16", /* loop_align. */
ad611a4c
VP
1110 3, /* int_reassoc_width. */
1111 2, /* fp_reassoc_width. */
1112 2, /* vec_reassoc_width. */
1113 2, /* min_div_recip_mul_sf. */
1114 2, /* min_div_recip_mul_df. */
1115 0, /* max_case_values. */
f1e247d0 1116 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1117 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1118 &thunderx2t99_prefetch_tune
ad611a4c
VP
1119};
1120
9ed6834d 1121static const struct tune_params neoversen1_tunings =
fc881de2
KT
1122{
1123 &cortexa57_extra_costs,
1124 &generic_addrcost_table,
1125 &generic_regmove_cost,
1126 &cortexa57_vector_cost,
1127 &generic_branch_cost,
1128 &generic_approx_modes,
1129 SVE_NOT_IMPLEMENTED, /* sve_width */
1130 4, /* memmov_cost */
1131 3, /* issue_rate */
1132 AARCH64_FUSE_AES_AESMC, /* fusible_ops */
1133 "32:16", /* function_align. */
1134 "32:16", /* jump_align. */
1135 "32:16", /* loop_align. */
1136 2, /* int_reassoc_width. */
1137 4, /* fp_reassoc_width. */
1138 2, /* vec_reassoc_width. */
1139 2, /* min_div_recip_mul_sf. */
1140 2, /* min_div_recip_mul_df. */
1141 0, /* max_case_values. */
1142 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1143 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1144 &generic_prefetch_tune
1145};
1146
8dec06f2
JG
1147/* Support for fine-grained override of the tuning structures. */
1148struct aarch64_tuning_override_function
1149{
1150 const char* name;
1151 void (*parse_override)(const char*, struct tune_params*);
1152};
1153
1154static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1155static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1156static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1157
1158static const struct aarch64_tuning_override_function
1159aarch64_tuning_override_functions[] =
1160{
1161 { "fuse", aarch64_parse_fuse_string },
1162 { "tune", aarch64_parse_tune_string },
886f092f 1163 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1164 { NULL, NULL }
1165};
1166
43e9d192
IB
1167/* A processor implementing AArch64. */
1168struct processor
1169{
1170 const char *const name;
46806c44
KT
1171 enum aarch64_processor ident;
1172 enum aarch64_processor sched_core;
393ae126 1173 enum aarch64_arch arch;
0c6caaf8 1174 unsigned architecture_version;
28108a53 1175 const uint64_t flags;
43e9d192
IB
1176 const struct tune_params *const tune;
1177};
1178
393ae126
KT
1179/* Architectures implementing AArch64. */
1180static const struct processor all_architectures[] =
1181{
1182#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1183 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1184#include "aarch64-arches.def"
393ae126
KT
1185 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1186};
1187
43e9d192
IB
1188/* Processor cores implementing AArch64. */
1189static const struct processor all_cores[] =
1190{
e8fcc9fa 1191#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1192 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1193 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1194 FLAGS, &COSTS##_tunings},
43e9d192 1195#include "aarch64-cores.def"
393ae126
KT
1196 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1197 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1198 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1199};
1200
43e9d192 1201
361fb3ee
KT
1202/* Target specification. These are populated by the -march, -mtune, -mcpu
1203 handling code or by target attributes. */
43e9d192
IB
1204static const struct processor *selected_arch;
1205static const struct processor *selected_cpu;
1206static const struct processor *selected_tune;
1207
8fc16d72
ST
1208enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
1209
b175b679
JG
1210/* The current tuning set. */
1211struct tune_params aarch64_tune_params = generic_tunings;
1212
a0d0b980
SE
1213/* Table of machine attributes. */
1214static const struct attribute_spec aarch64_attribute_table[] =
1215{
1216 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1217 affects_type_identity, handler, exclude } */
497f281c 1218 { "aarch64_vector_pcs", 0, 0, false, true, true, true, NULL, NULL },
a0d0b980
SE
1219 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1220};
1221
43e9d192
IB
1222#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1223
1224/* An ISA extension in the co-processor and main instruction set space. */
1225struct aarch64_option_extension
1226{
1227 const char *const name;
1228 const unsigned long flags_on;
1229 const unsigned long flags_off;
1230};
1231
43e9d192
IB
1232typedef enum aarch64_cond_code
1233{
1234 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1235 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1236 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1237}
1238aarch64_cc;
1239
1240#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1241
efac62a3
ST
1242struct aarch64_branch_protect_type
1243{
1244 /* The type's name that the user passes to the branch-protection option
1245 string. */
1246 const char* name;
1247 /* Function to handle the protection type and set global variables.
1248 First argument is the string token corresponding with this type and the
1249 second argument is the next token in the option string.
1250 Return values:
1251 * AARCH64_PARSE_OK: Handling was sucessful.
1252 * AARCH64_INVALID_ARG: The type is invalid in this context and the caller
1253 should print an error.
1254 * AARCH64_INVALID_FEATURE: The type is invalid and the handler prints its
1255 own error. */
1256 enum aarch64_parse_opt_result (*handler)(char*, char*);
1257 /* A list of types that can follow this type in the option string. */
1258 const aarch64_branch_protect_type* subtypes;
1259 unsigned int num_subtypes;
1260};
1261
1262static enum aarch64_parse_opt_result
1263aarch64_handle_no_branch_protection (char* str, char* rest)
1264{
1265 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
30afdf34 1266 aarch64_enable_bti = 0;
efac62a3
ST
1267 if (rest)
1268 {
1269 error ("unexpected %<%s%> after %<%s%>", rest, str);
1270 return AARCH64_PARSE_INVALID_FEATURE;
1271 }
1272 return AARCH64_PARSE_OK;
1273}
1274
1275static enum aarch64_parse_opt_result
1276aarch64_handle_standard_branch_protection (char* str, char* rest)
1277{
1278 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1279 aarch64_ra_sign_key = AARCH64_KEY_A;
30afdf34 1280 aarch64_enable_bti = 1;
efac62a3
ST
1281 if (rest)
1282 {
1283 error ("unexpected %<%s%> after %<%s%>", rest, str);
1284 return AARCH64_PARSE_INVALID_FEATURE;
1285 }
1286 return AARCH64_PARSE_OK;
1287}
1288
1289static enum aarch64_parse_opt_result
1290aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED,
1291 char* rest ATTRIBUTE_UNUSED)
1292{
1293 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1294 aarch64_ra_sign_key = AARCH64_KEY_A;
efac62a3
ST
1295 return AARCH64_PARSE_OK;
1296}
1297
1298static enum aarch64_parse_opt_result
1299aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED,
1300 char* rest ATTRIBUTE_UNUSED)
1301{
1302 aarch64_ra_sign_scope = AARCH64_FUNCTION_ALL;
1303 return AARCH64_PARSE_OK;
1304}
1305
8fc16d72
ST
1306static enum aarch64_parse_opt_result
1307aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED,
1308 char* rest ATTRIBUTE_UNUSED)
1309{
1310 aarch64_ra_sign_key = AARCH64_KEY_B;
1311 return AARCH64_PARSE_OK;
1312}
1313
30afdf34
SD
1314static enum aarch64_parse_opt_result
1315aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED,
1316 char* rest ATTRIBUTE_UNUSED)
1317{
1318 aarch64_enable_bti = 1;
1319 return AARCH64_PARSE_OK;
1320}
1321
efac62a3
ST
1322static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = {
1323 { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 },
8fc16d72 1324 { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 },
efac62a3
ST
1325 { NULL, NULL, NULL, 0 }
1326};
1327
1328static const struct aarch64_branch_protect_type aarch64_branch_protect_types[] = {
1329 { "none", aarch64_handle_no_branch_protection, NULL, 0 },
1330 { "standard", aarch64_handle_standard_branch_protection, NULL, 0 },
1331 { "pac-ret", aarch64_handle_pac_ret_protection, aarch64_pac_ret_subtypes,
1332 ARRAY_SIZE (aarch64_pac_ret_subtypes) },
30afdf34 1333 { "bti", aarch64_handle_bti_protection, NULL, 0 },
efac62a3
ST
1334 { NULL, NULL, NULL, 0 }
1335};
1336
43e9d192
IB
1337/* The condition codes of the processor, and the inverse function. */
1338static const char * const aarch64_condition_codes[] =
1339{
1340 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1341 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1342};
1343
57d6f4d0
RS
1344/* The preferred condition codes for SVE conditions. */
1345static const char *const aarch64_sve_condition_codes[] =
1346{
1347 "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc",
1348 "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv"
1349};
1350
0b1fe8cf
RS
1351/* Return the assembly token for svpattern value VALUE. */
1352
1353static const char *
1354svpattern_token (enum aarch64_svpattern pattern)
1355{
1356 switch (pattern)
1357 {
1358#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
1359 AARCH64_FOR_SVPATTERN (CASE)
1360#undef CASE
1361 case AARCH64_NUM_SVPATTERNS:
1362 break;
1363 }
1364 gcc_unreachable ();
1365}
1366
973d2e01
TP
1367/* Generate code to enable conditional branches in functions over 1 MiB. */
1368const char *
1369aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1370 const char * branch_format)
1371{
1372 rtx_code_label * tmp_label = gen_label_rtx ();
1373 char label_buf[256];
1374 char buffer[128];
1375 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1376 CODE_LABEL_NUMBER (tmp_label));
1377 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1378 rtx dest_label = operands[pos_label];
1379 operands[pos_label] = tmp_label;
1380
1381 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1382 output_asm_insn (buffer, operands);
1383
1384 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1385 operands[pos_label] = dest_label;
1386 output_asm_insn (buffer, operands);
1387 return "";
1388}
1389
261fb553 1390void
fc29dfc9 1391aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1392{
261fb553 1393 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1394 if (FLOAT_MODE_P (mode))
1395 error ("%qs is incompatible with the use of floating-point types",
1396 "-mgeneral-regs-only");
1397 else
1398 error ("%qs is incompatible with the use of vector types",
1399 "-mgeneral-regs-only");
261fb553 1400 else
fc29dfc9
SE
1401 if (FLOAT_MODE_P (mode))
1402 error ("%qs feature modifier is incompatible with the use of"
1403 " floating-point types", "+nofp");
1404 else
1405 error ("%qs feature modifier is incompatible with the use of"
1406 " vector types", "+nofp");
261fb553
AL
1407}
1408
c64f7d37 1409/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1410 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1411 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1412 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1413 and GENERAL_REGS is lower than the memory cost (in this case the best class
1414 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1415 cost results in bad allocations with many redundant int<->FP moves which
1416 are expensive on various cores.
1417 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1418 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1419 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1420 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1421 The result of this is that it is no longer inefficient to have a higher
1422 memory move cost than the register move cost.
1423*/
c64f7d37
WD
1424
1425static reg_class_t
31e2b5a3
WD
1426aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1427 reg_class_t best_class)
c64f7d37 1428{
b8506a8a 1429 machine_mode mode;
c64f7d37 1430
67e5c59a
RS
1431 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1432 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1433 return allocno_class;
1434
67e5c59a
RS
1435 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1436 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1437 return best_class;
1438
c64f7d37
WD
1439 mode = PSEUDO_REGNO_MODE (regno);
1440 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1441}
1442
26e0ff94 1443static unsigned int
b8506a8a 1444aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1445{
50093a33 1446 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1447 return aarch64_tune_params.min_div_recip_mul_sf;
1448 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1449}
1450
b5b33e11 1451/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1452static int
b5b33e11 1453aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1454{
1455 if (VECTOR_MODE_P (mode))
b175b679 1456 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1457 if (INTEGRAL_MODE_P (mode))
b175b679 1458 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1459 /* Avoid reassociating floating point addition so we emit more FMAs. */
1460 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1461 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1462 return 1;
1463}
1464
43e9d192
IB
1465/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1466unsigned
1467aarch64_dbx_register_number (unsigned regno)
1468{
1469 if (GP_REGNUM_P (regno))
1470 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1471 else if (regno == SP_REGNUM)
1472 return AARCH64_DWARF_SP;
1473 else if (FP_REGNUM_P (regno))
1474 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1475 else if (PR_REGNUM_P (regno))
1476 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1477 else if (regno == VG_REGNUM)
1478 return AARCH64_DWARF_VG;
43e9d192
IB
1479
1480 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1481 equivalent DWARF register. */
1482 return DWARF_FRAME_REGISTERS;
1483}
1484
d29f7dd5
RS
1485/* If X is a CONST_DOUBLE, return its bit representation as a constant
1486 integer, otherwise return X unmodified. */
1487static rtx
1488aarch64_bit_representation (rtx x)
1489{
1490 if (CONST_DOUBLE_P (x))
1491 x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
1492 return x;
1493}
1494
43cacb12
RS
1495/* Return true if MODE is any of the Advanced SIMD structure modes. */
1496static bool
1497aarch64_advsimd_struct_mode_p (machine_mode mode)
1498{
1499 return (TARGET_SIMD
1500 && (mode == OImode || mode == CImode || mode == XImode));
1501}
1502
1503/* Return true if MODE is an SVE predicate mode. */
1504static bool
1505aarch64_sve_pred_mode_p (machine_mode mode)
1506{
1507 return (TARGET_SVE
1508 && (mode == VNx16BImode
1509 || mode == VNx8BImode
1510 || mode == VNx4BImode
1511 || mode == VNx2BImode));
1512}
1513
1514/* Three mutually-exclusive flags describing a vector or predicate type. */
1515const unsigned int VEC_ADVSIMD = 1;
1516const unsigned int VEC_SVE_DATA = 2;
1517const unsigned int VEC_SVE_PRED = 4;
1518/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1519 a structure of 2, 3 or 4 vectors. */
1520const unsigned int VEC_STRUCT = 8;
1521/* Useful combinations of the above. */
1522const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1523const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1524
1525/* Return a set of flags describing the vector properties of mode MODE.
1526 Ignore modes that are not supported by the current target. */
1527static unsigned int
1528aarch64_classify_vector_mode (machine_mode mode)
1529{
1530 if (aarch64_advsimd_struct_mode_p (mode))
1531 return VEC_ADVSIMD | VEC_STRUCT;
1532
1533 if (aarch64_sve_pred_mode_p (mode))
1534 return VEC_SVE_PRED;
1535
806f69cd
RS
1536 /* Make the decision based on the mode's enum value rather than its
1537 properties, so that we keep the correct classification regardless
1538 of -msve-vector-bits. */
1539 switch (mode)
43cacb12 1540 {
806f69cd
RS
1541 /* Single SVE vectors. */
1542 case E_VNx16QImode:
1543 case E_VNx8HImode:
1544 case E_VNx4SImode:
1545 case E_VNx2DImode:
1546 case E_VNx8HFmode:
1547 case E_VNx4SFmode:
1548 case E_VNx2DFmode:
1549 return TARGET_SVE ? VEC_SVE_DATA : 0;
1550
1551 /* x2 SVE vectors. */
1552 case E_VNx32QImode:
1553 case E_VNx16HImode:
1554 case E_VNx8SImode:
1555 case E_VNx4DImode:
1556 case E_VNx16HFmode:
1557 case E_VNx8SFmode:
1558 case E_VNx4DFmode:
1559 /* x3 SVE vectors. */
1560 case E_VNx48QImode:
1561 case E_VNx24HImode:
1562 case E_VNx12SImode:
1563 case E_VNx6DImode:
1564 case E_VNx24HFmode:
1565 case E_VNx12SFmode:
1566 case E_VNx6DFmode:
1567 /* x4 SVE vectors. */
1568 case E_VNx64QImode:
1569 case E_VNx32HImode:
1570 case E_VNx16SImode:
1571 case E_VNx8DImode:
1572 case E_VNx32HFmode:
1573 case E_VNx16SFmode:
1574 case E_VNx8DFmode:
1575 return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0;
1576
1577 /* 64-bit Advanced SIMD vectors. */
1578 case E_V8QImode:
1579 case E_V4HImode:
1580 case E_V2SImode:
1581 /* ...E_V1DImode doesn't exist. */
1582 case E_V4HFmode:
1583 case E_V2SFmode:
1584 case E_V1DFmode:
1585 /* 128-bit Advanced SIMD vectors. */
1586 case E_V16QImode:
1587 case E_V8HImode:
1588 case E_V4SImode:
1589 case E_V2DImode:
1590 case E_V8HFmode:
1591 case E_V4SFmode:
1592 case E_V2DFmode:
1593 return TARGET_SIMD ? VEC_ADVSIMD : 0;
1594
1595 default:
1596 return 0;
43cacb12 1597 }
43cacb12
RS
1598}
1599
1600/* Return true if MODE is any of the data vector modes, including
1601 structure modes. */
43e9d192 1602static bool
43cacb12 1603aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1604{
43cacb12 1605 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1606}
1607
43cacb12
RS
1608/* Return true if MODE is an SVE data vector mode; either a single vector
1609 or a structure of vectors. */
43e9d192 1610static bool
43cacb12 1611aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1612{
43cacb12 1613 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1614}
1615
9f4cbab8
RS
1616/* Implement target hook TARGET_ARRAY_MODE. */
1617static opt_machine_mode
1618aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1619{
1620 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1621 && IN_RANGE (nelems, 2, 4))
1622 return mode_for_vector (GET_MODE_INNER (mode),
1623 GET_MODE_NUNITS (mode) * nelems);
1624
1625 return opt_machine_mode ();
1626}
1627
43e9d192
IB
1628/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1629static bool
ef4bddc2 1630aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1631 unsigned HOST_WIDE_INT nelems)
1632{
1633 if (TARGET_SIMD
635e66fe
AL
1634 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1635 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1636 && (nelems >= 2 && nelems <= 4))
1637 return true;
1638
1639 return false;
1640}
1641
43cacb12
RS
1642/* Return the SVE predicate mode to use for elements that have
1643 ELEM_NBYTES bytes, if such a mode exists. */
1644
1645opt_machine_mode
1646aarch64_sve_pred_mode (unsigned int elem_nbytes)
1647{
1648 if (TARGET_SVE)
1649 {
1650 if (elem_nbytes == 1)
1651 return VNx16BImode;
1652 if (elem_nbytes == 2)
1653 return VNx8BImode;
1654 if (elem_nbytes == 4)
1655 return VNx4BImode;
1656 if (elem_nbytes == 8)
1657 return VNx2BImode;
1658 }
1659 return opt_machine_mode ();
1660}
1661
1662/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1663
1664static opt_machine_mode
1665aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1666{
1667 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1668 {
1669 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1670 machine_mode pred_mode;
1671 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1672 return pred_mode;
1673 }
1674
1675 return default_get_mask_mode (nunits, nbytes);
1676}
1677
1044fa32
RS
1678/* Return the integer element mode associated with SVE mode MODE. */
1679
1680static scalar_int_mode
1681aarch64_sve_element_int_mode (machine_mode mode)
1682{
1683 unsigned int elt_bits = vector_element_size (BITS_PER_SVE_VECTOR,
1684 GET_MODE_NUNITS (mode));
1685 return int_mode_for_size (elt_bits, 0).require ();
1686}
1687
b41d1f6e
RS
1688/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1689 prefer to use the first arithmetic operand as the else value if
1690 the else value doesn't matter, since that exactly matches the SVE
1691 destructive merging form. For ternary operations we could either
1692 pick the first operand and use FMAD-like instructions or the last
1693 operand and use FMLA-like instructions; the latter seems more
1694 natural. */
6a86928d
RS
1695
1696static tree
b41d1f6e 1697aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1698{
b41d1f6e 1699 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1700}
1701
c43f4279 1702/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1703
c43f4279 1704static unsigned int
ef4bddc2 1705aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1706{
6a70badb
RS
1707 /* ??? Logically we should only need to provide a value when
1708 HARD_REGNO_MODE_OK says that the combination is valid,
1709 but at the moment we need to handle all modes. Just ignore
1710 any runtime parts for registers that can't store them. */
1711 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1712 switch (aarch64_regno_regclass (regno))
1713 {
1714 case FP_REGS:
1715 case FP_LO_REGS:
163b1f6a 1716 case FP_LO8_REGS:
43cacb12
RS
1717 if (aarch64_sve_data_mode_p (mode))
1718 return exact_div (GET_MODE_SIZE (mode),
1719 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1720 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1721 case PR_REGS:
1722 case PR_LO_REGS:
1723 case PR_HI_REGS:
1724 return 1;
43e9d192 1725 default:
6a70badb 1726 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1727 }
1728 gcc_unreachable ();
1729}
1730
f939c3e6 1731/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1732
f939c3e6 1733static bool
ef4bddc2 1734aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1735{
1736 if (GET_MODE_CLASS (mode) == MODE_CC)
1737 return regno == CC_REGNUM;
1738
43cacb12
RS
1739 if (regno == VG_REGNUM)
1740 /* This must have the same size as _Unwind_Word. */
1741 return mode == DImode;
1742
1743 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1744 if (vec_flags & VEC_SVE_PRED)
1745 return PR_REGNUM_P (regno);
1746
1747 if (PR_REGNUM_P (regno))
1748 return 0;
1749
9259db42
YZ
1750 if (regno == SP_REGNUM)
1751 /* The purpose of comparing with ptr_mode is to support the
1752 global register variable associated with the stack pointer
1753 register via the syntax of asm ("wsp") in ILP32. */
1754 return mode == Pmode || mode == ptr_mode;
1755
1756 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1757 return mode == Pmode;
1758
563cc649
RH
1759 if (GP_REGNUM_P (regno))
1760 {
1761 if (known_le (GET_MODE_SIZE (mode), 8))
1762 return true;
1763 else if (known_le (GET_MODE_SIZE (mode), 16))
1764 return (regno & 1) == 0;
1765 }
1766 else if (FP_REGNUM_P (regno))
43e9d192 1767 {
43cacb12 1768 if (vec_flags & VEC_STRUCT)
4edd6298 1769 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1770 else
43cacb12 1771 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1772 }
1773
f939c3e6 1774 return false;
43e9d192
IB
1775}
1776
a0d0b980
SE
1777/* Return true if this is a definition of a vectorized simd function. */
1778
1779static bool
1780aarch64_simd_decl_p (tree fndecl)
1781{
1782 tree fntype;
1783
1784 if (fndecl == NULL)
1785 return false;
1786 fntype = TREE_TYPE (fndecl);
1787 if (fntype == NULL)
1788 return false;
1789
1790 /* Functions with the aarch64_vector_pcs attribute use the simd ABI. */
1791 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL)
1792 return true;
1793
1794 return false;
1795}
1796
1797/* Return the mode a register save/restore should use. DImode for integer
1798 registers, DFmode for FP registers in non-SIMD functions (they only save
1799 the bottom half of a 128 bit register), or TFmode for FP registers in
1800 SIMD functions. */
1801
1802static machine_mode
1803aarch64_reg_save_mode (tree fndecl, unsigned regno)
1804{
1805 return GP_REGNUM_P (regno)
1806 ? E_DImode
1807 : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode);
1808}
1809
b3650d40
SE
1810/* Return true if the instruction is a call to a SIMD function, false
1811 if it is not a SIMD function or if we do not know anything about
1812 the function. */
1813
1814static bool
1815aarch64_simd_call_p (rtx_insn *insn)
1816{
1817 rtx symbol;
1818 rtx call;
1819 tree fndecl;
1820
1821 gcc_assert (CALL_P (insn));
1822 call = get_call_rtx_from (insn);
1823 symbol = XEXP (XEXP (call, 0), 0);
1824 if (GET_CODE (symbol) != SYMBOL_REF)
1825 return false;
1826 fndecl = SYMBOL_REF_DECL (symbol);
1827 if (!fndecl)
1828 return false;
1829
1830 return aarch64_simd_decl_p (fndecl);
1831}
1832
1833/* Implement TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS. If INSN calls
1834 a function that uses the SIMD ABI, take advantage of the extra
1835 call-preserved registers that the ABI provides. */
1836
1837void
1838aarch64_remove_extra_call_preserved_regs (rtx_insn *insn,
1839 HARD_REG_SET *return_set)
1840{
1841 if (aarch64_simd_call_p (insn))
1842 {
1843 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1844 if (FP_SIMD_SAVED_REGNUM_P (regno))
1845 CLEAR_HARD_REG_BIT (*return_set, regno);
1846 }
1847}
1848
80ec73f4
RS
1849/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1850 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1851 clobbers the top 64 bits when restoring the bottom 64 bits. */
1852
1853static bool
473574ee
SE
1854aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno,
1855 machine_mode mode)
80ec73f4 1856{
473574ee
SE
1857 bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn);
1858 return FP_REGNUM_P (regno)
1859 && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16 : 8);
1860}
1861
1862/* Implement TARGET_RETURN_CALL_WITH_MAX_CLOBBERS. */
1863
1864rtx_insn *
1865aarch64_return_call_with_max_clobbers (rtx_insn *call_1, rtx_insn *call_2)
1866{
1867 gcc_assert (CALL_P (call_1) && CALL_P (call_2));
1868
1869 if (!aarch64_simd_call_p (call_1) || aarch64_simd_call_p (call_2))
1870 return call_1;
1871 else
1872 return call_2;
80ec73f4
RS
1873}
1874
43cacb12
RS
1875/* Implement REGMODE_NATURAL_SIZE. */
1876poly_uint64
1877aarch64_regmode_natural_size (machine_mode mode)
1878{
1879 /* The natural size for SVE data modes is one SVE data vector,
1880 and similarly for predicates. We can't independently modify
1881 anything smaller than that. */
1882 /* ??? For now, only do this for variable-width SVE registers.
1883 Doing it for constant-sized registers breaks lower-subreg.c. */
1884 /* ??? And once that's fixed, we should probably have similar
1885 code for Advanced SIMD. */
1886 if (!aarch64_sve_vg.is_constant ())
1887 {
1888 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1889 if (vec_flags & VEC_SVE_PRED)
1890 return BYTES_PER_SVE_PRED;
1891 if (vec_flags & VEC_SVE_DATA)
1892 return BYTES_PER_SVE_VECTOR;
1893 }
1894 return UNITS_PER_WORD;
1895}
1896
73d9ac6a 1897/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1898machine_mode
43cacb12
RS
1899aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1900 machine_mode mode)
1901{
1902 /* The predicate mode determines which bits are significant and
1903 which are "don't care". Decreasing the number of lanes would
1904 lose data while increasing the number of lanes would make bits
1905 unnecessarily significant. */
1906 if (PR_REGNUM_P (regno))
1907 return mode;
6a70badb
RS
1908 if (known_ge (GET_MODE_SIZE (mode), 4))
1909 return mode;
73d9ac6a 1910 else
6a70badb 1911 return SImode;
73d9ac6a
IB
1912}
1913
231c52ae
ST
1914/* Return true if I's bits are consecutive ones from the MSB. */
1915bool
1916aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
1917{
1918 return exact_log2 (-i) != HOST_WIDE_INT_M1;
1919}
1920
58e17cf8
RS
1921/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1922 that strcpy from constants will be faster. */
1923
1924static HOST_WIDE_INT
1925aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1926{
1927 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1928 return MAX (align, BITS_PER_WORD);
1929 return align;
1930}
1931
43e9d192
IB
1932/* Return true if calls to DECL should be treated as
1933 long-calls (ie called via a register). */
1934static bool
1935aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1936{
1937 return false;
1938}
1939
1940/* Return true if calls to symbol-ref SYM should be treated as
1941 long-calls (ie called via a register). */
1942bool
1943aarch64_is_long_call_p (rtx sym)
1944{
1945 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1946}
1947
b60d63cb
JW
1948/* Return true if calls to symbol-ref SYM should not go through
1949 plt stubs. */
1950
1951bool
1952aarch64_is_noplt_call_p (rtx sym)
1953{
1954 const_tree decl = SYMBOL_REF_DECL (sym);
1955
1956 if (flag_pic
1957 && decl
1958 && (!flag_plt
1959 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1960 && !targetm.binds_local_p (decl))
1961 return true;
1962
1963 return false;
1964}
1965
43e9d192
IB
1966/* Return true if the offsets to a zero/sign-extract operation
1967 represent an expression that matches an extend operation. The
1968 operands represent the paramters from
1969
4745e701 1970 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1971bool
77e994c9 1972aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1973 rtx extract_imm)
1974{
1975 HOST_WIDE_INT mult_val, extract_val;
1976
1977 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1978 return false;
1979
1980 mult_val = INTVAL (mult_imm);
1981 extract_val = INTVAL (extract_imm);
1982
1983 if (extract_val > 8
1984 && extract_val < GET_MODE_BITSIZE (mode)
1985 && exact_log2 (extract_val & ~7) > 0
1986 && (extract_val & 7) <= 4
1987 && mult_val == (1 << (extract_val & 7)))
1988 return true;
1989
1990 return false;
1991}
1992
1993/* Emit an insn that's a simple single-set. Both the operands must be
1994 known to be valid. */
827ab47a 1995inline static rtx_insn *
43e9d192
IB
1996emit_set_insn (rtx x, rtx y)
1997{
f7df4a84 1998 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1999}
2000
2001/* X and Y are two things to compare using CODE. Emit the compare insn and
2002 return the rtx for register 0 in the proper mode. */
2003rtx
2004aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
2005{
ef4bddc2 2006 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
2007 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
2008
2009 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
2010 return cc_reg;
2011}
2012
d400fda3
RH
2013/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
2014
2015static rtx
2016aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
2017 machine_mode y_mode)
2018{
2019 if (y_mode == E_QImode || y_mode == E_HImode)
2020 {
2021 if (CONST_INT_P (y))
2022 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
2023 else
2024 {
2025 rtx t, cc_reg;
2026 machine_mode cc_mode;
2027
2028 t = gen_rtx_ZERO_EXTEND (SImode, y);
2029 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
2030 cc_mode = CC_SWPmode;
2031 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2032 emit_set_insn (cc_reg, t);
2033 return cc_reg;
2034 }
2035 }
2036
2037 return aarch64_gen_compare_reg (code, x, y);
2038}
2039
43e9d192
IB
2040/* Build the SYMBOL_REF for __tls_get_addr. */
2041
2042static GTY(()) rtx tls_get_addr_libfunc;
2043
2044rtx
2045aarch64_tls_get_addr (void)
2046{
2047 if (!tls_get_addr_libfunc)
2048 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
2049 return tls_get_addr_libfunc;
2050}
2051
2052/* Return the TLS model to use for ADDR. */
2053
2054static enum tls_model
2055tls_symbolic_operand_type (rtx addr)
2056{
2057 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
2058 if (GET_CODE (addr) == CONST)
2059 {
6a70badb
RS
2060 poly_int64 addend;
2061 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
2062 if (GET_CODE (sym) == SYMBOL_REF)
2063 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
2064 }
2065 else if (GET_CODE (addr) == SYMBOL_REF)
2066 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
2067
2068 return tls_kind;
2069}
2070
2071/* We'll allow lo_sum's in addresses in our legitimate addresses
2072 so that combine would take care of combining addresses where
2073 necessary, but for generation purposes, we'll generate the address
2074 as :
2075 RTL Absolute
2076 tmp = hi (symbol_ref); adrp x1, foo
2077 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
2078 nop
2079
2080 PIC TLS
2081 adrp x1, :got:foo adrp tmp, :tlsgd:foo
2082 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
2083 bl __tls_get_addr
2084 nop
2085
2086 Load TLS symbol, depending on TLS mechanism and TLS access model.
2087
2088 Global Dynamic - Traditional TLS:
2089 adrp tmp, :tlsgd:imm
2090 add dest, tmp, #:tlsgd_lo12:imm
2091 bl __tls_get_addr
2092
2093 Global Dynamic - TLS Descriptors:
2094 adrp dest, :tlsdesc:imm
2095 ldr tmp, [dest, #:tlsdesc_lo12:imm]
2096 add dest, dest, #:tlsdesc_lo12:imm
2097 blr tmp
2098 mrs tp, tpidr_el0
2099 add dest, dest, tp
2100
2101 Initial Exec:
2102 mrs tp, tpidr_el0
2103 adrp tmp, :gottprel:imm
2104 ldr dest, [tmp, #:gottprel_lo12:imm]
2105 add dest, dest, tp
2106
2107 Local Exec:
2108 mrs tp, tpidr_el0
0699caae
RL
2109 add t0, tp, #:tprel_hi12:imm, lsl #12
2110 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
2111*/
2112
2113static void
2114aarch64_load_symref_appropriately (rtx dest, rtx imm,
2115 enum aarch64_symbol_type type)
2116{
2117 switch (type)
2118 {
2119 case SYMBOL_SMALL_ABSOLUTE:
2120 {
28514dda 2121 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 2122 rtx tmp_reg = dest;
ef4bddc2 2123 machine_mode mode = GET_MODE (dest);
28514dda
YZ
2124
2125 gcc_assert (mode == Pmode || mode == ptr_mode);
2126
43e9d192 2127 if (can_create_pseudo_p ())
28514dda 2128 tmp_reg = gen_reg_rtx (mode);
43e9d192 2129
28514dda 2130 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
2131 emit_insn (gen_add_losym (dest, tmp_reg, imm));
2132 return;
2133 }
2134
a5350ddc 2135 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 2136 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
2137 return;
2138
1b1e81f8
JW
2139 case SYMBOL_SMALL_GOT_28K:
2140 {
2141 machine_mode mode = GET_MODE (dest);
2142 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
2143 rtx insn;
2144 rtx mem;
1b1e81f8
JW
2145
2146 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
2147 here before rtl expand. Tree IVOPT will generate rtl pattern to
2148 decide rtx costs, in which case pic_offset_table_rtx is not
2149 initialized. For that case no need to generate the first adrp
026c3cfd 2150 instruction as the final cost for global variable access is
1b1e81f8
JW
2151 one instruction. */
2152 if (gp_rtx != NULL)
2153 {
2154 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
2155 using the page base as GOT base, the first page may be wasted,
2156 in the worst scenario, there is only 28K space for GOT).
2157
2158 The generate instruction sequence for accessing global variable
2159 is:
2160
a3957742 2161 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
2162
2163 Only one instruction needed. But we must initialize
2164 pic_offset_table_rtx properly. We generate initialize insn for
2165 every global access, and allow CSE to remove all redundant.
2166
2167 The final instruction sequences will look like the following
2168 for multiply global variables access.
2169
a3957742 2170 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 2171
a3957742
JW
2172 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
2173 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
2174 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
2175 ... */
1b1e81f8
JW
2176
2177 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2178 crtl->uses_pic_offset_table = 1;
2179 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
2180
2181 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
2182 gp_rtx = gen_lowpart (mode, gp_rtx);
2183
1b1e81f8
JW
2184 }
2185
2186 if (mode == ptr_mode)
2187 {
2188 if (mode == DImode)
53021678 2189 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 2190 else
53021678
JW
2191 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
2192
2193 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
2194 }
2195 else
2196 {
2197 gcc_assert (mode == Pmode);
53021678
JW
2198
2199 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
2200 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
2201 }
2202
53021678
JW
2203 /* The operand is expected to be MEM. Whenever the related insn
2204 pattern changed, above code which calculate mem should be
2205 updated. */
2206 gcc_assert (GET_CODE (mem) == MEM);
2207 MEM_READONLY_P (mem) = 1;
2208 MEM_NOTRAP_P (mem) = 1;
2209 emit_insn (insn);
1b1e81f8
JW
2210 return;
2211 }
2212
6642bdb4 2213 case SYMBOL_SMALL_GOT_4G:
43e9d192 2214 {
28514dda
YZ
2215 /* In ILP32, the mode of dest can be either SImode or DImode,
2216 while the got entry is always of SImode size. The mode of
2217 dest depends on how dest is used: if dest is assigned to a
2218 pointer (e.g. in the memory), it has SImode; it may have
2219 DImode if dest is dereferenced to access the memeory.
2220 This is why we have to handle three different ldr_got_small
2221 patterns here (two patterns for ILP32). */
53021678
JW
2222
2223 rtx insn;
2224 rtx mem;
43e9d192 2225 rtx tmp_reg = dest;
ef4bddc2 2226 machine_mode mode = GET_MODE (dest);
28514dda 2227
43e9d192 2228 if (can_create_pseudo_p ())
28514dda
YZ
2229 tmp_reg = gen_reg_rtx (mode);
2230
2231 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
2232 if (mode == ptr_mode)
2233 {
2234 if (mode == DImode)
53021678 2235 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 2236 else
53021678
JW
2237 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
2238
2239 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
2240 }
2241 else
2242 {
2243 gcc_assert (mode == Pmode);
53021678
JW
2244
2245 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
2246 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
2247 }
2248
53021678
JW
2249 gcc_assert (GET_CODE (mem) == MEM);
2250 MEM_READONLY_P (mem) = 1;
2251 MEM_NOTRAP_P (mem) = 1;
2252 emit_insn (insn);
43e9d192
IB
2253 return;
2254 }
2255
2256 case SYMBOL_SMALL_TLSGD:
2257 {
5d8a22a5 2258 rtx_insn *insns;
23b88fda
N
2259 machine_mode mode = GET_MODE (dest);
2260 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2261
2262 start_sequence ();
23b88fda
N
2263 if (TARGET_ILP32)
2264 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
2265 else
2266 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
2267 insns = get_insns ();
2268 end_sequence ();
2269
2270 RTL_CONST_CALL_P (insns) = 1;
2271 emit_libcall_block (insns, dest, result, imm);
2272 return;
2273 }
2274
2275 case SYMBOL_SMALL_TLSDESC:
2276 {
ef4bddc2 2277 machine_mode mode = GET_MODE (dest);
621ad2de 2278 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2279 rtx tp;
2280
621ad2de
AP
2281 gcc_assert (mode == Pmode || mode == ptr_mode);
2282
2876a13f
JW
2283 /* In ILP32, the got entry is always of SImode size. Unlike
2284 small GOT, the dest is fixed at reg 0. */
2285 if (TARGET_ILP32)
2286 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 2287 else
2876a13f 2288 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 2289 tp = aarch64_load_tp (NULL);
621ad2de
AP
2290
2291 if (mode != Pmode)
2292 tp = gen_lowpart (mode, tp);
2293
2876a13f 2294 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
2295 if (REG_P (dest))
2296 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2297 return;
2298 }
2299
79496620 2300 case SYMBOL_SMALL_TLSIE:
43e9d192 2301 {
621ad2de
AP
2302 /* In ILP32, the mode of dest can be either SImode or DImode,
2303 while the got entry is always of SImode size. The mode of
2304 dest depends on how dest is used: if dest is assigned to a
2305 pointer (e.g. in the memory), it has SImode; it may have
2306 DImode if dest is dereferenced to access the memeory.
2307 This is why we have to handle three different tlsie_small
2308 patterns here (two patterns for ILP32). */
ef4bddc2 2309 machine_mode mode = GET_MODE (dest);
621ad2de 2310 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 2311 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
2312
2313 if (mode == ptr_mode)
2314 {
2315 if (mode == DImode)
2316 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
2317 else
2318 {
2319 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
2320 tp = gen_lowpart (mode, tp);
2321 }
2322 }
2323 else
2324 {
2325 gcc_assert (mode == Pmode);
2326 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
2327 }
2328
f7df4a84 2329 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
2330 if (REG_P (dest))
2331 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2332 return;
2333 }
2334
cbf5629e 2335 case SYMBOL_TLSLE12:
d18ba284 2336 case SYMBOL_TLSLE24:
cbf5629e
JW
2337 case SYMBOL_TLSLE32:
2338 case SYMBOL_TLSLE48:
43e9d192 2339 {
cbf5629e 2340 machine_mode mode = GET_MODE (dest);
43e9d192 2341 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 2342
cbf5629e
JW
2343 if (mode != Pmode)
2344 tp = gen_lowpart (mode, tp);
2345
2346 switch (type)
2347 {
2348 case SYMBOL_TLSLE12:
2349 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
2350 (dest, tp, imm));
2351 break;
2352 case SYMBOL_TLSLE24:
2353 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
2354 (dest, tp, imm));
2355 break;
2356 case SYMBOL_TLSLE32:
2357 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
2358 (dest, imm));
2359 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2360 (dest, dest, tp));
2361 break;
2362 case SYMBOL_TLSLE48:
2363 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
2364 (dest, imm));
2365 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2366 (dest, dest, tp));
2367 break;
2368 default:
2369 gcc_unreachable ();
2370 }
e6f7f0e9 2371
241dbd9d
QZ
2372 if (REG_P (dest))
2373 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2374 return;
2375 }
2376
87dd8ab0
MS
2377 case SYMBOL_TINY_GOT:
2378 emit_insn (gen_ldr_got_tiny (dest, imm));
2379 return;
2380
5ae7caad
JW
2381 case SYMBOL_TINY_TLSIE:
2382 {
2383 machine_mode mode = GET_MODE (dest);
2384 rtx tp = aarch64_load_tp (NULL);
2385
2386 if (mode == ptr_mode)
2387 {
2388 if (mode == DImode)
2389 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2390 else
2391 {
2392 tp = gen_lowpart (mode, tp);
2393 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2394 }
2395 }
2396 else
2397 {
2398 gcc_assert (mode == Pmode);
2399 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2400 }
2401
241dbd9d
QZ
2402 if (REG_P (dest))
2403 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2404 return;
2405 }
2406
43e9d192
IB
2407 default:
2408 gcc_unreachable ();
2409 }
2410}
2411
2412/* Emit a move from SRC to DEST. Assume that the move expanders can
2413 handle all moves if !can_create_pseudo_p (). The distinction is
2414 important because, unlike emit_move_insn, the move expanders know
2415 how to force Pmode objects into the constant pool even when the
2416 constant pool address is not itself legitimate. */
2417static rtx
2418aarch64_emit_move (rtx dest, rtx src)
2419{
2420 return (can_create_pseudo_p ()
2421 ? emit_move_insn (dest, src)
2422 : emit_move_insn_1 (dest, src));
2423}
2424
f22d7973
RS
2425/* Apply UNOPTAB to OP and store the result in DEST. */
2426
2427static void
2428aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2429{
2430 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2431 if (dest != tmp)
2432 emit_move_insn (dest, tmp);
2433}
2434
2435/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2436
2437static void
2438aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2439{
2440 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2441 OPTAB_DIRECT);
2442 if (dest != tmp)
2443 emit_move_insn (dest, tmp);
2444}
2445
030d03b8
RE
2446/* Split a 128-bit move operation into two 64-bit move operations,
2447 taking care to handle partial overlap of register to register
2448 copies. Special cases are needed when moving between GP regs and
2449 FP regs. SRC can be a register, constant or memory; DST a register
2450 or memory. If either operand is memory it must not have any side
2451 effects. */
43e9d192
IB
2452void
2453aarch64_split_128bit_move (rtx dst, rtx src)
2454{
030d03b8
RE
2455 rtx dst_lo, dst_hi;
2456 rtx src_lo, src_hi;
43e9d192 2457
ef4bddc2 2458 machine_mode mode = GET_MODE (dst);
12dc6974 2459
030d03b8
RE
2460 gcc_assert (mode == TImode || mode == TFmode);
2461 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2462 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2463
2464 if (REG_P (dst) && REG_P (src))
2465 {
030d03b8
RE
2466 int src_regno = REGNO (src);
2467 int dst_regno = REGNO (dst);
43e9d192 2468
030d03b8 2469 /* Handle FP <-> GP regs. */
43e9d192
IB
2470 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2471 {
030d03b8
RE
2472 src_lo = gen_lowpart (word_mode, src);
2473 src_hi = gen_highpart (word_mode, src);
2474
0016d8d9
RS
2475 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2476 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2477 return;
43e9d192
IB
2478 }
2479 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2480 {
030d03b8
RE
2481 dst_lo = gen_lowpart (word_mode, dst);
2482 dst_hi = gen_highpart (word_mode, dst);
2483
0016d8d9
RS
2484 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2485 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2486 return;
43e9d192 2487 }
43e9d192
IB
2488 }
2489
030d03b8
RE
2490 dst_lo = gen_lowpart (word_mode, dst);
2491 dst_hi = gen_highpart (word_mode, dst);
2492 src_lo = gen_lowpart (word_mode, src);
2493 src_hi = gen_highpart_mode (word_mode, mode, src);
2494
2495 /* At most one pairing may overlap. */
2496 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2497 {
2498 aarch64_emit_move (dst_hi, src_hi);
2499 aarch64_emit_move (dst_lo, src_lo);
2500 }
2501 else
2502 {
2503 aarch64_emit_move (dst_lo, src_lo);
2504 aarch64_emit_move (dst_hi, src_hi);
2505 }
43e9d192
IB
2506}
2507
2508bool
2509aarch64_split_128bit_move_p (rtx dst, rtx src)
2510{
2511 return (! REG_P (src)
2512 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2513}
2514
8b033a8a
SN
2515/* Split a complex SIMD combine. */
2516
2517void
2518aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2519{
ef4bddc2
RS
2520 machine_mode src_mode = GET_MODE (src1);
2521 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2522
2523 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2524 gcc_assert (register_operand (dst, dst_mode)
2525 && register_operand (src1, src_mode)
2526 && register_operand (src2, src_mode));
8b033a8a 2527
0016d8d9 2528 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2529 return;
8b033a8a
SN
2530}
2531
fd4842cd
SN
2532/* Split a complex SIMD move. */
2533
2534void
2535aarch64_split_simd_move (rtx dst, rtx src)
2536{
ef4bddc2
RS
2537 machine_mode src_mode = GET_MODE (src);
2538 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2539
2540 gcc_assert (VECTOR_MODE_P (dst_mode));
2541
2542 if (REG_P (dst) && REG_P (src))
2543 {
2544 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2545 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2546 }
2547}
2548
ef22810a
RH
2549bool
2550aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2551 machine_mode ymode, rtx y)
2552{
2553 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2554 gcc_assert (r != NULL);
2555 return rtx_equal_p (x, r);
2556}
2557
2558
678faefc
RS
2559/* Return TARGET if it is nonnull and a register of mode MODE.
2560 Otherwise, return a fresh register of mode MODE if we can,
2561 or TARGET reinterpreted as MODE if we can't. */
2562
2563static rtx
2564aarch64_target_reg (rtx target, machine_mode mode)
2565{
2566 if (target && REG_P (target) && GET_MODE (target) == mode)
2567 return target;
2568 if (!can_create_pseudo_p ())
2569 {
2570 gcc_assert (target);
2571 return gen_lowpart (mode, target);
2572 }
2573 return gen_reg_rtx (mode);
2574}
2575
2576/* Return a register that contains the constant in BUILDER, given that
2577 the constant is a legitimate move operand. Use TARGET as the register
2578 if it is nonnull and convenient. */
2579
2580static rtx
2581aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
2582{
2583 rtx src = builder.build ();
2584 target = aarch64_target_reg (target, GET_MODE (src));
2585 emit_insn (gen_rtx_SET (target, src));
2586 return target;
2587}
2588
43e9d192 2589static rtx
ef4bddc2 2590aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2591{
2592 if (can_create_pseudo_p ())
e18b4a81 2593 return force_reg (mode, value);
43e9d192
IB
2594 else
2595 {
f5470a77
RS
2596 gcc_assert (x);
2597 aarch64_emit_move (x, value);
43e9d192
IB
2598 return x;
2599 }
2600}
2601
0b1fe8cf
RS
2602/* Return true if predicate value X is a constant in which every element
2603 is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI
2604 value, i.e. as a predicate in which all bits are significant. */
2605
2606static bool
2607aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x)
2608{
2609 if (GET_CODE (x) != CONST_VECTOR)
2610 return false;
2611
2612 unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
2613 GET_MODE_NUNITS (GET_MODE (x)));
2614 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor;
2615 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
2616 builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern);
2617
2618 unsigned int nelts = const_vector_encoded_nelts (x);
2619 for (unsigned int i = 0; i < nelts; ++i)
2620 {
2621 rtx elt = CONST_VECTOR_ENCODED_ELT (x, i);
2622 if (!CONST_INT_P (elt))
2623 return false;
2624
2625 builder.quick_push (elt);
2626 for (unsigned int j = 1; j < factor; ++j)
2627 builder.quick_push (const0_rtx);
2628 }
2629 builder.finalize ();
2630 return true;
2631}
2632
2633/* BUILDER contains a predicate constant of mode VNx16BI. Return the
2634 widest predicate element size it can have (that is, the largest size
2635 for which each element would still be 0 or 1). */
2636
2637unsigned int
2638aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder)
2639{
2640 /* Start with the most optimistic assumption: that we only need
2641 one bit per pattern. This is what we will use if only the first
2642 bit in each pattern is ever set. */
2643 unsigned int mask = GET_MODE_SIZE (DImode);
2644 mask |= builder.npatterns ();
2645
2646 /* Look for set bits. */
2647 unsigned int nelts = builder.encoded_nelts ();
2648 for (unsigned int i = 1; i < nelts; ++i)
2649 if (INTVAL (builder.elt (i)) != 0)
2650 {
2651 if (i & 1)
2652 return 1;
2653 mask |= i;
2654 }
2655 return mask & -mask;
2656}
2657
2658/* BUILDER is a predicate constant of mode VNx16BI. Consider the value
2659 that the constant would have with predicate element size ELT_SIZE
2660 (ignoring the upper bits in each element) and return:
2661
2662 * -1 if all bits are set
2663 * N if the predicate has N leading set bits followed by all clear bits
2664 * 0 if the predicate does not have any of these forms. */
2665
2666int
2667aarch64_partial_ptrue_length (rtx_vector_builder &builder,
2668 unsigned int elt_size)
2669{
2670 /* If nelts_per_pattern is 3, we have set bits followed by clear bits
2671 followed by set bits. */
2672 if (builder.nelts_per_pattern () == 3)
2673 return 0;
2674
2675 /* Skip over leading set bits. */
2676 unsigned int nelts = builder.encoded_nelts ();
2677 unsigned int i = 0;
2678 for (; i < nelts; i += elt_size)
2679 if (INTVAL (builder.elt (i)) == 0)
2680 break;
2681 unsigned int vl = i / elt_size;
2682
2683 /* Check for the all-true case. */
2684 if (i == nelts)
2685 return -1;
2686
2687 /* If nelts_per_pattern is 1, then either VL is zero, or we have a
2688 repeating pattern of set bits followed by clear bits. */
2689 if (builder.nelts_per_pattern () != 2)
2690 return 0;
2691
2692 /* We have a "foreground" value and a duplicated "background" value.
2693 If the background might repeat and the last set bit belongs to it,
2694 we might have set bits followed by clear bits followed by set bits. */
2695 if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ()))
2696 return 0;
2697
2698 /* Make sure that the rest are all clear. */
2699 for (; i < nelts; i += elt_size)
2700 if (INTVAL (builder.elt (i)) != 0)
2701 return 0;
2702
2703 return vl;
2704}
2705
2706/* See if there is an svpattern that encodes an SVE predicate of mode
2707 PRED_MODE in which the first VL bits are set and the rest are clear.
2708 Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
2709 A VL of -1 indicates an all-true vector. */
2710
2711aarch64_svpattern
2712aarch64_svpattern_for_vl (machine_mode pred_mode, int vl)
2713{
2714 if (vl < 0)
2715 return AARCH64_SV_ALL;
2716
2717 if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode)))
2718 return AARCH64_NUM_SVPATTERNS;
2719
2720 if (vl >= 1 && vl <= 8)
2721 return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1));
2722
2723 if (vl >= 16 && vl <= 256 && pow2p_hwi (vl))
2724 return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4));
2725
2726 int max_vl;
2727 if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl))
2728 {
2729 if (vl == (max_vl / 3) * 3)
2730 return AARCH64_SV_MUL3;
2731 /* These would only trigger for non-power-of-2 lengths. */
2732 if (vl == (max_vl & -4))
2733 return AARCH64_SV_MUL4;
2734 if (vl == (1 << floor_log2 (max_vl)))
2735 return AARCH64_SV_POW2;
2736 if (vl == max_vl)
2737 return AARCH64_SV_ALL;
2738 }
2739 return AARCH64_NUM_SVPATTERNS;
2740}
2741
34467289
RS
2742/* Return a VNx16BImode constant in which every sequence of ELT_SIZE
2743 bits has the lowest bit set and the upper bits clear. This is the
2744 VNx16BImode equivalent of a PTRUE for controlling elements of
2745 ELT_SIZE bytes. However, because the constant is VNx16BImode,
2746 all bits are significant, even the upper zeros. */
2747
2748rtx
2749aarch64_ptrue_all (unsigned int elt_size)
2750{
2751 rtx_vector_builder builder (VNx16BImode, elt_size, 1);
2752 builder.quick_push (const1_rtx);
2753 for (unsigned int i = 1; i < elt_size; ++i)
2754 builder.quick_push (const0_rtx);
2755 return builder.build ();
2756}
2757
16de3637
RS
2758/* Return an all-true predicate register of mode MODE. */
2759
2760rtx
2761aarch64_ptrue_reg (machine_mode mode)
2762{
2763 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
2764 rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
2765 return gen_lowpart (mode, reg);
16de3637
RS
2766}
2767
e7053b0c
RS
2768/* Return an all-false predicate register of mode MODE. */
2769
2770rtx
2771aarch64_pfalse_reg (machine_mode mode)
2772{
2773 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
2774 rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
2775 return gen_lowpart (mode, reg);
2776}
2777
c9c5a809
RS
2778/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
2779 true, or alternatively if we know that the operation predicated by
2780 PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a
2781 aarch64_sve_gp_strictness operand that describes the operation
2782 predicated by PRED1[0]. */
2783
2784bool
2785aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
2786{
2787 machine_mode mode = GET_MODE (pred2);
2788 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
2789 && mode == GET_MODE (pred1[0])
2790 && aarch64_sve_gp_strictness (pred1[1], SImode));
2791 return (pred1[0] == CONSTM1_RTX (mode)
2792 || INTVAL (pred1[1]) == SVE_RELAXED_GP
2793 || rtx_equal_p (pred1[0], pred2));
2794}
2795
00fa90d9
RS
2796/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
2797 for it. PRED2[0] is the predicate for the instruction whose result
2798 is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
2799 for it. Return true if we can prove that the two predicates are
2800 equivalent for PTEST purposes; that is, if we can replace PRED2[0]
2801 with PRED1[0] without changing behavior. */
2802
2803bool
2804aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
2805{
2806 machine_mode mode = GET_MODE (pred1[0]);
2807 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
2808 && mode == GET_MODE (pred2[0])
2809 && aarch64_sve_ptrue_flag (pred1[1], SImode)
2810 && aarch64_sve_ptrue_flag (pred2[1], SImode));
2811
2812 bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
2813 || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
2814 bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
2815 || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
2816 return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
2817}
2818
2819/* Emit a comparison CMP between OP0 and OP1, both of which have mode
2820 DATA_MODE, and return the result in a predicate of mode PRED_MODE.
2821 Use TARGET as the target register if nonnull and convenient. */
2822
2823static rtx
2824aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
2825 machine_mode data_mode, rtx op1, rtx op2)
2826{
2827 insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
2828 expand_operand ops[5];
2829 create_output_operand (&ops[0], target, pred_mode);
2830 create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
2831 create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
2832 create_input_operand (&ops[3], op1, data_mode);
2833 create_input_operand (&ops[4], op2, data_mode);
2834 expand_insn (icode, 5, ops);
2835 return ops[0].value;
2836}
2837
678faefc
RS
2838/* Use a comparison to convert integer vector SRC into MODE, which is
2839 the corresponding SVE predicate mode. Use TARGET for the result
2840 if it's nonnull and convenient. */
2841
2842static rtx
2843aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
2844{
2845 machine_mode src_mode = GET_MODE (src);
00fa90d9
RS
2846 return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
2847 src, CONST0_RTX (src_mode));
e7053b0c
RS
2848}
2849
43cacb12
RS
2850/* Return true if we can move VALUE into a register using a single
2851 CNT[BHWD] instruction. */
2852
2853static bool
2854aarch64_sve_cnt_immediate_p (poly_int64 value)
2855{
2856 HOST_WIDE_INT factor = value.coeffs[0];
2857 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2858 return (value.coeffs[1] == factor
2859 && IN_RANGE (factor, 2, 16 * 16)
2860 && (factor & 1) == 0
2861 && factor <= 16 * (factor & -factor));
2862}
2863
2864/* Likewise for rtx X. */
2865
2866bool
2867aarch64_sve_cnt_immediate_p (rtx x)
2868{
2869 poly_int64 value;
2870 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2871}
2872
2873/* Return the asm string for an instruction with a CNT-like vector size
2874 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2875 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2876 first part of the operands template (the part that comes before the
2877 vector size itself). FACTOR is the number of quadwords.
2878 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2879 If it is zero, we can use any element size. */
2880
2881static char *
2882aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2883 unsigned int factor,
2884 unsigned int nelts_per_vq)
2885{
2886 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2887
2888 if (nelts_per_vq == 0)
2889 /* There is some overlap in the ranges of the four CNT instructions.
2890 Here we always use the smallest possible element size, so that the
2891 multiplier is 1 whereever possible. */
2892 nelts_per_vq = factor & -factor;
2893 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2894 gcc_assert (IN_RANGE (shift, 1, 4));
2895 char suffix = "dwhb"[shift - 1];
2896
2897 factor >>= shift;
2898 unsigned int written;
2899 if (factor == 1)
2900 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2901 prefix, suffix, operands);
2902 else
2903 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2904 prefix, suffix, operands, factor);
2905 gcc_assert (written < sizeof (buffer));
2906 return buffer;
2907}
2908
2909/* Return the asm string for an instruction with a CNT-like vector size
2910 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2911 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2912 first part of the operands template (the part that comes before the
2913 vector size itself). X is the value of the vector size operand,
2914 as a polynomial integer rtx. */
2915
2916char *
2917aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2918 rtx x)
2919{
2920 poly_int64 value = rtx_to_poly_int64 (x);
2921 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2922 return aarch64_output_sve_cnt_immediate (prefix, operands,
2923 value.coeffs[1], 0);
2924}
2925
2926/* Return true if we can add VALUE to a register using a single ADDVL
2927 or ADDPL instruction. */
2928
2929static bool
2930aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2931{
2932 HOST_WIDE_INT factor = value.coeffs[0];
2933 if (factor == 0 || value.coeffs[1] != factor)
2934 return false;
2935 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2936 and a value of 16 is one vector width. */
2937 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2938 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2939}
2940
2941/* Likewise for rtx X. */
2942
2943bool
2944aarch64_sve_addvl_addpl_immediate_p (rtx x)
2945{
2946 poly_int64 value;
2947 return (poly_int_rtx_p (x, &value)
2948 && aarch64_sve_addvl_addpl_immediate_p (value));
2949}
2950
2951/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2952 and storing the result in operand 0. */
2953
2954char *
2955aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2956{
2957 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2958 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2959 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2960
2961 /* Use INC or DEC if possible. */
2962 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2963 {
2964 if (aarch64_sve_cnt_immediate_p (offset_value))
2965 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2966 offset_value.coeffs[1], 0);
2967 if (aarch64_sve_cnt_immediate_p (-offset_value))
2968 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2969 -offset_value.coeffs[1], 0);
2970 }
2971
2972 int factor = offset_value.coeffs[1];
2973 if ((factor & 15) == 0)
2974 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2975 else
2976 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2977 return buffer;
2978}
2979
2980/* Return true if X is a valid immediate for an SVE vector INC or DEC
2981 instruction. If it is, store the number of elements in each vector
2982 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2983 factor in *FACTOR_OUT (if nonnull). */
2984
2985bool
2986aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2987 unsigned int *nelts_per_vq_out)
2988{
2989 rtx elt;
2990 poly_int64 value;
2991
2992 if (!const_vec_duplicate_p (x, &elt)
2993 || !poly_int_rtx_p (elt, &value))
2994 return false;
2995
2996 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2997 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2998 /* There's no vector INCB. */
2999 return false;
3000
3001 HOST_WIDE_INT factor = value.coeffs[0];
3002 if (value.coeffs[1] != factor)
3003 return false;
3004
3005 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
3006 if ((factor % nelts_per_vq) != 0
3007 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
3008 return false;
3009
3010 if (factor_out)
3011 *factor_out = factor;
3012 if (nelts_per_vq_out)
3013 *nelts_per_vq_out = nelts_per_vq;
3014 return true;
3015}
3016
3017/* Return true if X is a valid immediate for an SVE vector INC or DEC
3018 instruction. */
3019
3020bool
3021aarch64_sve_inc_dec_immediate_p (rtx x)
3022{
3023 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
3024}
3025
3026/* Return the asm template for an SVE vector INC or DEC instruction.
3027 OPERANDS gives the operands before the vector count and X is the
3028 value of the vector count operand itself. */
3029
3030char *
3031aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
3032{
3033 int factor;
3034 unsigned int nelts_per_vq;
3035 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
3036 gcc_unreachable ();
3037 if (factor < 0)
3038 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
3039 nelts_per_vq);
3040 else
3041 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
3042 nelts_per_vq);
3043}
43e9d192 3044
82614948
RR
3045static int
3046aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 3047 scalar_int_mode mode)
43e9d192 3048{
43e9d192 3049 int i;
9a4865db
WD
3050 unsigned HOST_WIDE_INT val, val2, mask;
3051 int one_match, zero_match;
3052 int num_insns;
43e9d192 3053
9a4865db
WD
3054 val = INTVAL (imm);
3055
3056 if (aarch64_move_imm (val, mode))
43e9d192 3057 {
82614948 3058 if (generate)
f7df4a84 3059 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 3060 return 1;
43e9d192
IB
3061 }
3062
9de00935
TC
3063 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
3064 (with XXXX non-zero). In that case check to see if the move can be done in
3065 a smaller mode. */
3066 val2 = val & 0xffffffff;
3067 if (mode == DImode
3068 && aarch64_move_imm (val2, SImode)
3069 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
3070 {
3071 if (generate)
3072 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
3073
3074 /* Check if we have to emit a second instruction by checking to see
3075 if any of the upper 32 bits of the original DI mode value is set. */
3076 if (val == val2)
3077 return 1;
3078
3079 i = (val >> 48) ? 48 : 32;
3080
3081 if (generate)
3082 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
3083 GEN_INT ((val >> i) & 0xffff)));
3084
3085 return 2;
3086 }
3087
9a4865db 3088 if ((val >> 32) == 0 || mode == SImode)
43e9d192 3089 {
82614948
RR
3090 if (generate)
3091 {
9a4865db
WD
3092 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
3093 if (mode == SImode)
3094 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
3095 GEN_INT ((val >> 16) & 0xffff)));
3096 else
3097 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
3098 GEN_INT ((val >> 16) & 0xffff)));
82614948 3099 }
9a4865db 3100 return 2;
43e9d192
IB
3101 }
3102
3103 /* Remaining cases are all for DImode. */
3104
43e9d192 3105 mask = 0xffff;
9a4865db
WD
3106 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
3107 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
3108 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
3109 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 3110
62c8d76c 3111 if (zero_match != 2 && one_match != 2)
43e9d192 3112 {
62c8d76c
WD
3113 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
3114 For a 64-bit bitmask try whether changing 16 bits to all ones or
3115 zeroes creates a valid bitmask. To check any repeated bitmask,
3116 try using 16 bits from the other 32-bit half of val. */
43e9d192 3117
62c8d76c 3118 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 3119 {
62c8d76c
WD
3120 val2 = val & ~mask;
3121 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3122 break;
3123 val2 = val | mask;
3124 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3125 break;
3126 val2 = val2 & ~mask;
3127 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
3128 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3129 break;
43e9d192 3130 }
62c8d76c 3131 if (i != 64)
43e9d192 3132 {
62c8d76c 3133 if (generate)
43e9d192 3134 {
62c8d76c
WD
3135 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
3136 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 3137 GEN_INT ((val >> i) & 0xffff)));
43e9d192 3138 }
1312b1ba 3139 return 2;
43e9d192
IB
3140 }
3141 }
3142
9a4865db
WD
3143 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
3144 are emitted by the initial mov. If one_match > zero_match, skip set bits,
3145 otherwise skip zero bits. */
2c274197 3146
9a4865db 3147 num_insns = 1;
43e9d192 3148 mask = 0xffff;
9a4865db
WD
3149 val2 = one_match > zero_match ? ~val : val;
3150 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
3151
3152 if (generate)
3153 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
3154 ? (val | ~(mask << i))
3155 : (val & (mask << i)))));
3156 for (i += 16; i < 64; i += 16)
43e9d192 3157 {
9a4865db
WD
3158 if ((val2 & (mask << i)) == 0)
3159 continue;
3160 if (generate)
3161 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
3162 GEN_INT ((val >> i) & 0xffff)));
3163 num_insns ++;
82614948
RR
3164 }
3165
3166 return num_insns;
3167}
3168
c0bb5bc5
WD
3169/* Return whether imm is a 128-bit immediate which is simple enough to
3170 expand inline. */
3171bool
3172aarch64_mov128_immediate (rtx imm)
3173{
3174 if (GET_CODE (imm) == CONST_INT)
3175 return true;
3176
3177 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
3178
3179 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
3180 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
3181
3182 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
3183 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
3184}
3185
3186
43cacb12
RS
3187/* Return the number of temporary registers that aarch64_add_offset_1
3188 would need to add OFFSET to a register. */
3189
3190static unsigned int
3191aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
3192{
3193 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
3194}
3195
f5470a77
RS
3196/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
3197 a non-polynomial OFFSET. MODE is the mode of the addition.
3198 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
3199 be set and CFA adjustments added to the generated instructions.
3200
3201 TEMP1, if nonnull, is a register of mode MODE that can be used as a
3202 temporary if register allocation is already complete. This temporary
3203 register may overlap DEST but must not overlap SRC. If TEMP1 is known
3204 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
3205 the immediate again.
0100c5f9
RS
3206
3207 Since this function may be used to adjust the stack pointer, we must
3208 ensure that it cannot cause transient stack deallocation (for example
3209 by first incrementing SP and then decrementing when adjusting by a
3210 large immediate). */
3211
3212static void
f5470a77
RS
3213aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
3214 rtx src, HOST_WIDE_INT offset, rtx temp1,
3215 bool frame_related_p, bool emit_move_imm)
0100c5f9 3216{
f5470a77
RS
3217 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
3218 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
3219
3220 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
3221 rtx_insn *insn;
3222
f5470a77
RS
3223 if (!moffset)
3224 {
3225 if (!rtx_equal_p (dest, src))
3226 {
3227 insn = emit_insn (gen_rtx_SET (dest, src));
3228 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3229 }
3230 return;
3231 }
0100c5f9
RS
3232
3233 /* Single instruction adjustment. */
f5470a77 3234 if (aarch64_uimm12_shift (moffset))
0100c5f9 3235 {
f5470a77 3236 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
3237 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3238 return;
3239 }
3240
f5470a77
RS
3241 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
3242 and either:
3243
3244 a) the offset cannot be loaded by a 16-bit move or
3245 b) there is no spare register into which we can move it. */
3246 if (moffset < 0x1000000
3247 && ((!temp1 && !can_create_pseudo_p ())
3248 || !aarch64_move_imm (moffset, mode)))
0100c5f9 3249 {
f5470a77 3250 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 3251
f5470a77
RS
3252 low_off = offset < 0 ? -low_off : low_off;
3253 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 3254 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 3255 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
3256 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3257 return;
3258 }
3259
3260 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 3261 if (emit_move_imm)
f5470a77
RS
3262 {
3263 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
3264 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
3265 }
3266 insn = emit_insn (offset < 0
3267 ? gen_sub3_insn (dest, src, temp1)
3268 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
3269 if (frame_related_p)
3270 {
3271 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
3272 rtx adj = plus_constant (mode, src, offset);
3273 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
3274 }
3275}
3276
43cacb12
RS
3277/* Return the number of temporary registers that aarch64_add_offset
3278 would need to move OFFSET into a register or add OFFSET to a register;
3279 ADD_P is true if we want the latter rather than the former. */
3280
3281static unsigned int
3282aarch64_offset_temporaries (bool add_p, poly_int64 offset)
3283{
3284 /* This follows the same structure as aarch64_add_offset. */
3285 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
3286 return 0;
3287
3288 unsigned int count = 0;
3289 HOST_WIDE_INT factor = offset.coeffs[1];
3290 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
3291 poly_int64 poly_offset (factor, factor);
3292 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
3293 /* Need one register for the ADDVL/ADDPL result. */
3294 count += 1;
3295 else if (factor != 0)
3296 {
3297 factor = abs (factor);
3298 if (factor > 16 * (factor & -factor))
3299 /* Need one register for the CNT result and one for the multiplication
3300 factor. If necessary, the second temporary can be reused for the
3301 constant part of the offset. */
3302 return 2;
3303 /* Need one register for the CNT result (which might then
3304 be shifted). */
3305 count += 1;
3306 }
3307 return count + aarch64_add_offset_1_temporaries (constant);
3308}
3309
3310/* If X can be represented as a poly_int64, return the number
3311 of temporaries that are required to add it to a register.
3312 Return -1 otherwise. */
3313
3314int
3315aarch64_add_offset_temporaries (rtx x)
3316{
3317 poly_int64 offset;
3318 if (!poly_int_rtx_p (x, &offset))
3319 return -1;
3320 return aarch64_offset_temporaries (true, offset);
3321}
3322
f5470a77
RS
3323/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
3324 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
3325 be set and CFA adjustments added to the generated instructions.
3326
3327 TEMP1, if nonnull, is a register of mode MODE that can be used as a
3328 temporary if register allocation is already complete. This temporary
43cacb12
RS
3329 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
3330 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
3331 false to avoid emitting the immediate again.
3332
3333 TEMP2, if nonnull, is a second temporary register that doesn't
3334 overlap either DEST or REG.
f5470a77
RS
3335
3336 Since this function may be used to adjust the stack pointer, we must
3337 ensure that it cannot cause transient stack deallocation (for example
3338 by first incrementing SP and then decrementing when adjusting by a
3339 large immediate). */
3340
3341static void
3342aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
3343 poly_int64 offset, rtx temp1, rtx temp2,
3344 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 3345{
f5470a77
RS
3346 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
3347 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
3348 gcc_assert (temp1 == NULL_RTX
3349 || !frame_related_p
3350 || !reg_overlap_mentioned_p (temp1, dest));
3351 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
3352
3353 /* Try using ADDVL or ADDPL to add the whole value. */
3354 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
3355 {
3356 rtx offset_rtx = gen_int_mode (offset, mode);
3357 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
3358 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3359 return;
3360 }
3361
3362 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
3363 SVE vector register, over and above the minimum size of 128 bits.
3364 This is equivalent to half the value returned by CNTD with a
3365 vector shape of ALL. */
3366 HOST_WIDE_INT factor = offset.coeffs[1];
3367 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
3368
3369 /* Try using ADDVL or ADDPL to add the VG-based part. */
3370 poly_int64 poly_offset (factor, factor);
3371 if (src != const0_rtx
3372 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
3373 {
3374 rtx offset_rtx = gen_int_mode (poly_offset, mode);
3375 if (frame_related_p)
3376 {
3377 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
3378 RTX_FRAME_RELATED_P (insn) = true;
3379 src = dest;
3380 }
3381 else
3382 {
3383 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
3384 src = aarch64_force_temporary (mode, temp1, addr);
3385 temp1 = temp2;
3386 temp2 = NULL_RTX;
3387 }
3388 }
3389 /* Otherwise use a CNT-based sequence. */
3390 else if (factor != 0)
3391 {
3392 /* Use a subtraction if we have a negative factor. */
3393 rtx_code code = PLUS;
3394 if (factor < 0)
3395 {
3396 factor = -factor;
3397 code = MINUS;
3398 }
3399
3400 /* Calculate CNTD * FACTOR / 2. First try to fold the division
3401 into the multiplication. */
3402 rtx val;
3403 int shift = 0;
3404 if (factor & 1)
3405 /* Use a right shift by 1. */
3406 shift = -1;
3407 else
3408 factor /= 2;
3409 HOST_WIDE_INT low_bit = factor & -factor;
3410 if (factor <= 16 * low_bit)
3411 {
3412 if (factor > 16 * 8)
3413 {
3414 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
3415 the value with the minimum multiplier and shift it into
3416 position. */
3417 int extra_shift = exact_log2 (low_bit);
3418 shift += extra_shift;
3419 factor >>= extra_shift;
3420 }
3421 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
3422 }
3423 else
3424 {
3425 /* Use CNTD, then multiply it by FACTOR. */
3426 val = gen_int_mode (poly_int64 (2, 2), mode);
3427 val = aarch64_force_temporary (mode, temp1, val);
3428
3429 /* Go back to using a negative multiplication factor if we have
3430 no register from which to subtract. */
3431 if (code == MINUS && src == const0_rtx)
3432 {
3433 factor = -factor;
3434 code = PLUS;
3435 }
3436 rtx coeff1 = gen_int_mode (factor, mode);
3437 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
3438 val = gen_rtx_MULT (mode, val, coeff1);
3439 }
3440
3441 if (shift > 0)
3442 {
3443 /* Multiply by 1 << SHIFT. */
3444 val = aarch64_force_temporary (mode, temp1, val);
3445 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
3446 }
3447 else if (shift == -1)
3448 {
3449 /* Divide by 2. */
3450 val = aarch64_force_temporary (mode, temp1, val);
3451 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
3452 }
3453
3454 /* Calculate SRC +/- CNTD * FACTOR / 2. */
3455 if (src != const0_rtx)
3456 {
3457 val = aarch64_force_temporary (mode, temp1, val);
3458 val = gen_rtx_fmt_ee (code, mode, src, val);
3459 }
3460 else if (code == MINUS)
3461 {
3462 val = aarch64_force_temporary (mode, temp1, val);
3463 val = gen_rtx_NEG (mode, val);
3464 }
3465
3466 if (constant == 0 || frame_related_p)
3467 {
3468 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
3469 if (frame_related_p)
3470 {
3471 RTX_FRAME_RELATED_P (insn) = true;
3472 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3473 gen_rtx_SET (dest, plus_constant (Pmode, src,
3474 poly_offset)));
3475 }
3476 src = dest;
3477 if (constant == 0)
3478 return;
3479 }
3480 else
3481 {
3482 src = aarch64_force_temporary (mode, temp1, val);
3483 temp1 = temp2;
3484 temp2 = NULL_RTX;
3485 }
3486
3487 emit_move_imm = true;
3488 }
f5470a77 3489
f5470a77
RS
3490 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
3491 frame_related_p, emit_move_imm);
0100c5f9
RS
3492}
3493
43cacb12
RS
3494/* Like aarch64_add_offset, but the offset is given as an rtx rather
3495 than a poly_int64. */
3496
3497void
3498aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
3499 rtx offset_rtx, rtx temp1, rtx temp2)
3500{
3501 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
3502 temp1, temp2, false);
3503}
3504
f5470a77
RS
3505/* Add DELTA to the stack pointer, marking the instructions frame-related.
3506 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
3507 if TEMP1 already contains abs (DELTA). */
3508
0100c5f9 3509static inline void
43cacb12 3510aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 3511{
f5470a77 3512 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 3513 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
3514}
3515
f5470a77
RS
3516/* Subtract DELTA from the stack pointer, marking the instructions
3517 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
3518 if nonnull. */
3519
0100c5f9 3520static inline void
cd1bef27
JL
3521aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
3522 bool emit_move_imm = true)
0100c5f9 3523{
f5470a77 3524 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 3525 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 3526}
82614948 3527
43cacb12
RS
3528/* Set DEST to (vec_series BASE STEP). */
3529
3530static void
3531aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
3532{
3533 machine_mode mode = GET_MODE (dest);
43cacb12
RS
3534 scalar_mode inner = GET_MODE_INNER (mode);
3535
3536 /* Each operand can be a register or an immediate in the range [-16, 15]. */
3537 if (!aarch64_sve_index_immediate_p (base))
3538 base = force_reg (inner, base);
3539 if (!aarch64_sve_index_immediate_p (step))
3540 step = force_reg (inner, step);
3541
3542 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
3543}
82614948 3544
4aeb1ba7
RS
3545/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE
3546 register of mode MODE. Use TARGET for the result if it's nonnull
3547 and convenient.
3548
3549 The two vector modes must have the same element mode. The behavior
3550 is to duplicate architectural lane N of SRC into architectural lanes
3551 N + I * STEP of the result. On big-endian targets, architectural
3552 lane 0 of an Advanced SIMD vector is the last element of the vector
3553 in memory layout, so for big-endian targets this operation has the
3554 effect of reversing SRC before duplicating it. Callers need to
3555 account for this. */
43cacb12 3556
4aeb1ba7
RS
3557rtx
3558aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src)
3559{
3560 machine_mode src_mode = GET_MODE (src);
3561 gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode));
3562 insn_code icode = (BYTES_BIG_ENDIAN
3563 ? code_for_aarch64_vec_duplicate_vq_be (mode)
3564 : code_for_aarch64_vec_duplicate_vq_le (mode));
3565
3566 unsigned int i = 0;
3567 expand_operand ops[3];
3568 create_output_operand (&ops[i++], target, mode);
3569 create_output_operand (&ops[i++], src, src_mode);
3570 if (BYTES_BIG_ENDIAN)
3571 {
3572 /* Create a PARALLEL describing the reversal of SRC. */
3573 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode);
3574 rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq,
3575 nelts_per_vq - 1, -1);
3576 create_fixed_operand (&ops[i++], sel);
43cacb12 3577 }
4aeb1ba7
RS
3578 expand_insn (icode, i, ops);
3579 return ops[0].value;
3580}
3581
3582/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch
3583 the memory image into DEST. Return true on success. */
43cacb12 3584
4aeb1ba7
RS
3585static bool
3586aarch64_expand_sve_ld1rq (rtx dest, rtx src)
3587{
3588 src = force_const_mem (GET_MODE (src), src);
43cacb12
RS
3589 if (!src)
3590 return false;
3591
3592 /* Make sure that the address is legitimate. */
4aeb1ba7 3593 if (!aarch64_sve_ld1rq_operand_p (src))
43cacb12
RS
3594 {
3595 rtx addr = force_reg (Pmode, XEXP (src, 0));
3596 src = replace_equiv_address (src, addr);
3597 }
3598
947b1372
RS
3599 machine_mode mode = GET_MODE (dest);
3600 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
3601 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
16de3637 3602 rtx ptrue = aarch64_ptrue_reg (pred_mode);
4aeb1ba7 3603 emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue));
43cacb12
RS
3604 return true;
3605}
3606
4aeb1ba7
RS
3607/* Return a register containing CONST_VECTOR SRC, given that SRC has an
3608 SVE data mode and isn't a legitimate constant. Use TARGET for the
3609 result if convenient.
43cacb12 3610
4aeb1ba7
RS
3611 The returned register can have whatever mode seems most natural
3612 given the contents of SRC. */
3613
3614static rtx
3615aarch64_expand_sve_const_vector (rtx target, rtx src)
43cacb12
RS
3616{
3617 machine_mode mode = GET_MODE (src);
3618 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
3619 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
4aeb1ba7
RS
3620 scalar_mode elt_mode = GET_MODE_INNER (mode);
3621 unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode);
3622 unsigned int encoded_bits = npatterns * nelts_per_pattern * elt_bits;
3623
3624 if (nelts_per_pattern == 1 && encoded_bits == 128)
3625 {
3626 /* The constant is a duplicated quadword but can't be narrowed
3627 beyond a quadword. Get the memory image of the first quadword
3628 as a 128-bit vector and try using LD1RQ to load it from memory.
3629
3630 The effect for both endiannesses is to load memory lane N into
3631 architectural lanes N + I * STEP of the result. On big-endian
3632 targets, the layout of the 128-bit vector in an Advanced SIMD
3633 register would be different from its layout in an SVE register,
3634 but this 128-bit vector is a memory value only. */
3635 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
3636 rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0);
3637 if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value))
3638 return target;
3639 }
3640
3641 if (nelts_per_pattern == 1 && encoded_bits < 128)
3642 {
3643 /* The vector is a repeating sequence of 64 bits or fewer.
3644 See if we can load them using an Advanced SIMD move and then
3645 duplicate it to fill a vector. This is better than using a GPR
3646 move because it keeps everything in the same register file. */
3647 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
3648 rtx_vector_builder builder (vq_mode, npatterns, 1);
3649 for (unsigned int i = 0; i < npatterns; ++i)
3650 {
3651 /* We want memory lane N to go into architectural lane N,
3652 so reverse for big-endian targets. The DUP .Q pattern
3653 has a compensating reverse built-in. */
3654 unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i;
3655 builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci));
3656 }
3657 rtx vq_src = builder.build ();
3658 if (aarch64_simd_valid_immediate (vq_src, NULL))
3659 {
3660 vq_src = force_reg (vq_mode, vq_src);
3661 return aarch64_expand_sve_dupq (target, mode, vq_src);
3662 }
3663
3664 /* Get an integer representation of the repeating part of Advanced
3665 SIMD vector VQ_SRC. This preserves the endianness of VQ_SRC,
3666 which for big-endian targets is lane-swapped wrt a normal
3667 Advanced SIMD vector. This means that for both endiannesses,
3668 memory lane N of SVE vector SRC corresponds to architectural
3669 lane N of a register holding VQ_SRC. This in turn means that
3670 memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed
3671 as a single 128-bit value) and thus that memory lane 0 of SRC is
3672 in the lsb of the integer. Duplicating the integer therefore
3673 ensures that memory lane N of SRC goes into architectural lane
3674 N + I * INDEX of the SVE register. */
3675 scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require ();
3676 rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0);
3677 if (elt_value)
3678 {
3679 /* Pretend that we had a vector of INT_MODE to start with. */
3680 elt_mode = int_mode;
3681 mode = aarch64_full_sve_mode (int_mode).require ();
3682
3683 /* If the integer can be moved into a general register by a
3684 single instruction, do that and duplicate the result. */
3685 if (CONST_INT_P (elt_value)
3686 && aarch64_move_imm (INTVAL (elt_value), elt_mode))
3687 {
3688 elt_value = force_reg (elt_mode, elt_value);
3689 return expand_vector_broadcast (mode, elt_value);
3690 }
3691 }
3692 else if (npatterns == 1)
3693 /* We're duplicating a single value, but can't do better than
3694 force it to memory and load from there. This handles things
3695 like symbolic constants. */
3696 elt_value = CONST_VECTOR_ENCODED_ELT (src, 0);
43cacb12 3697
4aeb1ba7 3698 if (elt_value)
8179efe0 3699 {
4aeb1ba7
RS
3700 /* Load the element from memory if we can, otherwise move it into
3701 a register and use a DUP. */
3702 rtx op = force_const_mem (elt_mode, elt_value);
3703 if (!op)
3704 op = force_reg (elt_mode, elt_value);
3705 return expand_vector_broadcast (mode, op);
8179efe0 3706 }
43cacb12
RS
3707 }
3708
4aeb1ba7
RS
3709 /* Try using INDEX. */
3710 rtx base, step;
3711 if (const_vec_series_p (src, &base, &step))
3712 {
3713 aarch64_expand_vec_series (target, base, step);
3714 return target;
3715 }
3716
3717 /* From here on, it's better to force the whole constant to memory
3718 if we can. */
3719 if (GET_MODE_NUNITS (mode).is_constant ())
3720 return NULL_RTX;
3721
43cacb12 3722 /* Expand each pattern individually. */
4aeb1ba7 3723 gcc_assert (npatterns > 1);
43cacb12
RS
3724 rtx_vector_builder builder;
3725 auto_vec<rtx, 16> vectors (npatterns);
3726 for (unsigned int i = 0; i < npatterns; ++i)
3727 {
3728 builder.new_vector (mode, 1, nelts_per_pattern);
3729 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
3730 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
3731 vectors.quick_push (force_reg (mode, builder.build ()));
3732 }
3733
3734 /* Use permutes to interleave the separate vectors. */
3735 while (npatterns > 1)
3736 {
3737 npatterns /= 2;
3738 for (unsigned int i = 0; i < npatterns; ++i)
3739 {
4aeb1ba7 3740 rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode));
43cacb12
RS
3741 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
3742 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
3743 vectors[i] = tmp;
3744 }
3745 }
4aeb1ba7
RS
3746 gcc_assert (vectors[0] == target);
3747 return target;
43cacb12
RS
3748}
3749
678faefc
RS
3750/* Use WHILE to set a predicate register of mode MODE in which the first
3751 VL bits are set and the rest are clear. Use TARGET for the register
3752 if it's nonnull and convenient. */
0b1fe8cf 3753
678faefc
RS
3754static rtx
3755aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
3756 unsigned int vl)
0b1fe8cf
RS
3757{
3758 rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
678faefc
RS
3759 target = aarch64_target_reg (target, mode);
3760 emit_insn (gen_while_ult (DImode, mode, target, const0_rtx, limit));
3761 return target;
3762}
3763
2803bc3b
RS
3764static rtx
3765aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool);
3766
3767/* BUILDER is a constant predicate in which the index of every set bit
3768 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
3769 by inverting every element at a multiple of ELT_SIZE and EORing the
3770 result with an ELT_SIZE PTRUE.
3771
3772 Return a register that contains the constant on success, otherwise
3773 return null. Use TARGET as the register if it is nonnull and
3774 convenient. */
3775
3776static rtx
3777aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder,
3778 unsigned int elt_size)
3779{
3780 /* Invert every element at a multiple of ELT_SIZE, keeping the
3781 other bits zero. */
3782 rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (),
3783 builder.nelts_per_pattern ());
3784 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
3785 if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0)
3786 inv_builder.quick_push (const1_rtx);
3787 else
3788 inv_builder.quick_push (const0_rtx);
3789 inv_builder.finalize ();
3790
3791 /* See if we can load the constant cheaply. */
3792 rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false);
3793 if (!inv)
3794 return NULL_RTX;
3795
3796 /* EOR the result with an ELT_SIZE PTRUE. */
3797 rtx mask = aarch64_ptrue_all (elt_size);
3798 mask = force_reg (VNx16BImode, mask);
3799 target = aarch64_target_reg (target, VNx16BImode);
3800 emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
3801 return target;
3802}
3803
3804/* BUILDER is a constant predicate in which the index of every set bit
3805 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
3806 using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE. Return the
3807 register on success, otherwise return null. Use TARGET as the register
3808 if nonnull and convenient. */
3809
3810static rtx
3811aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
3812 unsigned int elt_size,
3813 unsigned int permute_size)
3814{
3815 /* We're going to split the constant into two new constants A and B,
3816 with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0
3817 and into B otherwise. E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1:
3818
3819 A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ }
3820 B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ }
3821
3822 where _ indicates elements that will be discarded by the permute.
3823
3824 First calculate the ELT_SIZEs for A and B. */
3825 unsigned int a_elt_size = GET_MODE_SIZE (DImode);
3826 unsigned int b_elt_size = GET_MODE_SIZE (DImode);
3827 for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size)
3828 if (INTVAL (builder.elt (i)) != 0)
3829 {
3830 if (i & permute_size)
3831 b_elt_size |= i - permute_size;
3832 else
3833 a_elt_size |= i;
3834 }
3835 a_elt_size &= -a_elt_size;
3836 b_elt_size &= -b_elt_size;
3837
3838 /* Now construct the vectors themselves. */
3839 rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (),
3840 builder.nelts_per_pattern ());
3841 rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (),
3842 builder.nelts_per_pattern ());
3843 unsigned int nelts = builder.encoded_nelts ();
3844 for (unsigned int i = 0; i < nelts; ++i)
3845 if (i & (elt_size - 1))
3846 {
3847 a_builder.quick_push (const0_rtx);
3848 b_builder.quick_push (const0_rtx);
3849 }
3850 else if ((i & permute_size) == 0)
3851 {
3852 /* The A and B elements are significant. */
3853 a_builder.quick_push (builder.elt (i));
3854 b_builder.quick_push (builder.elt (i + permute_size));
3855 }
3856 else
3857 {
3858 /* The A and B elements are going to be discarded, so pick whatever
3859 is likely to give a nice constant. We are targeting element
3860 sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively,
3861 with the aim of each being a sequence of ones followed by
3862 a sequence of zeros. So:
3863
3864 * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to
3865 duplicate the last X_ELT_SIZE element, to extend the
3866 current sequence of ones or zeros.
3867
3868 * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a
3869 zero, so that the constant really does have X_ELT_SIZE and
3870 not a smaller size. */
3871 if (a_elt_size > permute_size)
3872 a_builder.quick_push (const0_rtx);
3873 else
3874 a_builder.quick_push (a_builder.elt (i - a_elt_size));
3875 if (b_elt_size > permute_size)
3876 b_builder.quick_push (const0_rtx);
3877 else
3878 b_builder.quick_push (b_builder.elt (i - b_elt_size));
3879 }
3880 a_builder.finalize ();
3881 b_builder.finalize ();
3882
3883 /* Try loading A into a register. */
3884 rtx_insn *last = get_last_insn ();
3885 rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false);
3886 if (!a)
3887 return NULL_RTX;
3888
3889 /* Try loading B into a register. */
3890 rtx b = a;
3891 if (a_builder != b_builder)
3892 {
3893 b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false);
3894 if (!b)
3895 {
3896 delete_insns_since (last);
3897 return NULL_RTX;
3898 }
3899 }
3900
3901 /* Emit the TRN1 itself. */
3902 machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
3903 target = aarch64_target_reg (target, mode);
3904 emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
3905 gen_lowpart (mode, a),
3906 gen_lowpart (mode, b)));
3907 return target;
3908}
3909
678faefc
RS
3910/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI
3911 constant in BUILDER into an SVE predicate register. Return the register
3912 on success, otherwise return null. Use TARGET for the register if
2803bc3b
RS
3913 nonnull and convenient.
3914
3915 ALLOW_RECURSE_P is true if we can use methods that would call this
3916 function recursively. */
678faefc
RS
3917
3918static rtx
2803bc3b
RS
3919aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder,
3920 bool allow_recurse_p)
678faefc
RS
3921{
3922 if (builder.encoded_nelts () == 1)
3923 /* A PFALSE or a PTRUE .B ALL. */
3924 return aarch64_emit_set_immediate (target, builder);
3925
3926 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
3927 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
3928 {
3929 /* If we can load the constant using PTRUE, use it as-is. */
3930 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
3931 if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
3932 return aarch64_emit_set_immediate (target, builder);
3933
3934 /* Otherwise use WHILE to set the first VL bits. */
3935 return aarch64_sve_move_pred_via_while (target, mode, vl);
3936 }
3937
2803bc3b
RS
3938 if (!allow_recurse_p)
3939 return NULL_RTX;
3940
3941 /* Try inverting the vector in element size ELT_SIZE and then EORing
3942 the result with an ELT_SIZE PTRUE. */
3943 if (INTVAL (builder.elt (0)) == 0)
3944 if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder,
3945 elt_size))
3946 return res;
3947
3948 /* Try using TRN1 to permute two simpler constants. */
3949 for (unsigned int i = elt_size; i <= 8; i *= 2)
3950 if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder,
3951 elt_size, i))
3952 return res;
3953
678faefc
RS
3954 return NULL_RTX;
3955}
3956
3957/* Return an SVE predicate register that contains the VNx16BImode
3958 constant in BUILDER, without going through the move expanders.
3959
3960 The returned register can have whatever mode seems most natural
3961 given the contents of BUILDER. Use TARGET for the result if
3962 convenient. */
3963
3964static rtx
3965aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
3966{
3967 /* Try loading the constant using pure predicate operations. */
2803bc3b 3968 if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true))
678faefc
RS
3969 return res;
3970
3971 /* Try forcing the constant to memory. */
3972 if (builder.full_nelts ().is_constant ())
3973 if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
3974 {
3975 target = aarch64_target_reg (target, VNx16BImode);
3976 emit_move_insn (target, mem);
3977 return target;
3978 }
3979
3980 /* The last resort is to load the constant as an integer and then
3981 compare it against zero. Use -1 for set bits in order to increase
3982 the changes of using SVE DUPM or an Advanced SIMD byte mask. */
3983 rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
3984 builder.nelts_per_pattern ());
3985 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
3986 int_builder.quick_push (INTVAL (builder.elt (i))
3987 ? constm1_rtx : const0_rtx);
3988 return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
3989 int_builder.build ());
0b1fe8cf
RS
3990}
3991
4aeb1ba7 3992/* Set DEST to immediate IMM. */
43cacb12
RS
3993
3994void
4aeb1ba7 3995aarch64_expand_mov_immediate (rtx dest, rtx imm)
43cacb12
RS
3996{
3997 machine_mode mode = GET_MODE (dest);
82614948
RR
3998
3999 /* Check on what type of symbol it is. */
77e994c9
RS
4000 scalar_int_mode int_mode;
4001 if ((GET_CODE (imm) == SYMBOL_REF
4002 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
4003 || GET_CODE (imm) == CONST
4004 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 4005 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 4006 {
43cacb12
RS
4007 rtx mem;
4008 poly_int64 offset;
4009 HOST_WIDE_INT const_offset;
82614948
RR
4010 enum aarch64_symbol_type sty;
4011
4012 /* If we have (const (plus symbol offset)), separate out the offset
4013 before we start classifying the symbol. */
43cacb12 4014 rtx base = strip_offset (imm, &offset);
82614948 4015
43cacb12
RS
4016 /* We must always add an offset involving VL separately, rather than
4017 folding it into the relocation. */
4018 if (!offset.is_constant (&const_offset))
4019 {
4020 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
4021 emit_insn (gen_rtx_SET (dest, imm));
4022 else
4023 {
4024 /* Do arithmetic on 32-bit values if the result is smaller
4025 than that. */
4026 if (partial_subreg_p (int_mode, SImode))
4027 {
4028 /* It is invalid to do symbol calculations in modes
4029 narrower than SImode. */
4030 gcc_assert (base == const0_rtx);
4031 dest = gen_lowpart (SImode, dest);
4032 int_mode = SImode;
4033 }
4034 if (base != const0_rtx)
4035 {
4036 base = aarch64_force_temporary (int_mode, dest, base);
4037 aarch64_add_offset (int_mode, dest, base, offset,
4038 NULL_RTX, NULL_RTX, false);
4039 }
4040 else
4041 aarch64_add_offset (int_mode, dest, base, offset,
4042 dest, NULL_RTX, false);
4043 }
4044 return;
4045 }
4046
4047 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
4048 switch (sty)
4049 {
4050 case SYMBOL_FORCE_TO_MEM:
43cacb12 4051 if (const_offset != 0
77e994c9 4052 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
4053 {
4054 gcc_assert (can_create_pseudo_p ());
77e994c9 4055 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
4056 aarch64_add_offset (int_mode, dest, base, const_offset,
4057 NULL_RTX, NULL_RTX, false);
82614948
RR
4058 return;
4059 }
b4f50fd4 4060
82614948
RR
4061 mem = force_const_mem (ptr_mode, imm);
4062 gcc_assert (mem);
b4f50fd4
RR
4063
4064 /* If we aren't generating PC relative literals, then
4065 we need to expand the literal pool access carefully.
4066 This is something that needs to be done in a number
4067 of places, so could well live as a separate function. */
9ee6540a 4068 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
4069 {
4070 gcc_assert (can_create_pseudo_p ());
4071 base = gen_reg_rtx (ptr_mode);
4072 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
4073 if (ptr_mode != Pmode)
4074 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
4075 mem = gen_rtx_MEM (ptr_mode, base);
4076 }
4077
77e994c9
RS
4078 if (int_mode != ptr_mode)
4079 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 4080
f7df4a84 4081 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 4082
82614948
RR
4083 return;
4084
4085 case SYMBOL_SMALL_TLSGD:
4086 case SYMBOL_SMALL_TLSDESC:
79496620 4087 case SYMBOL_SMALL_TLSIE:
1b1e81f8 4088 case SYMBOL_SMALL_GOT_28K:
6642bdb4 4089 case SYMBOL_SMALL_GOT_4G:
82614948 4090 case SYMBOL_TINY_GOT:
5ae7caad 4091 case SYMBOL_TINY_TLSIE:
43cacb12 4092 if (const_offset != 0)
82614948
RR
4093 {
4094 gcc_assert(can_create_pseudo_p ());
77e994c9 4095 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
4096 aarch64_add_offset (int_mode, dest, base, const_offset,
4097 NULL_RTX, NULL_RTX, false);
82614948
RR
4098 return;
4099 }
4100 /* FALLTHRU */
4101
82614948
RR
4102 case SYMBOL_SMALL_ABSOLUTE:
4103 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 4104 case SYMBOL_TLSLE12:
d18ba284 4105 case SYMBOL_TLSLE24:
cbf5629e
JW
4106 case SYMBOL_TLSLE32:
4107 case SYMBOL_TLSLE48:
82614948
RR
4108 aarch64_load_symref_appropriately (dest, imm, sty);
4109 return;
4110
4111 default:
4112 gcc_unreachable ();
4113 }
4114 }
4115
4116 if (!CONST_INT_P (imm))
4117 {
678faefc
RS
4118 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
4119 {
4120 /* Only the low bit of each .H, .S and .D element is defined,
4121 so we can set the upper bits to whatever we like. If the
4122 predicate is all-true in MODE, prefer to set all the undefined
4123 bits as well, so that we can share a single .B predicate for
4124 all modes. */
4125 if (imm == CONSTM1_RTX (mode))
4126 imm = CONSTM1_RTX (VNx16BImode);
4127
4128 /* All methods for constructing predicate modes wider than VNx16BI
4129 will set the upper bits of each element to zero. Expose this
4130 by moving such constants as a VNx16BI, so that all bits are
4131 significant and so that constants for different modes can be
4132 shared. The wider constant will still be available as a
4133 REG_EQUAL note. */
4134 rtx_vector_builder builder;
4135 if (aarch64_get_sve_pred_bits (builder, imm))
4136 {
4137 rtx res = aarch64_expand_sve_const_pred (dest, builder);
4138 if (dest != res)
4139 emit_move_insn (dest, gen_lowpart (mode, res));
4140 return;
4141 }
4142 }
4143
43cacb12
RS
4144 if (GET_CODE (imm) == HIGH
4145 || aarch64_simd_valid_immediate (imm, NULL))
43cacb12 4146 {
4aeb1ba7
RS
4147 emit_insn (gen_rtx_SET (dest, imm));
4148 return;
43e9d192 4149 }
82614948 4150
4aeb1ba7
RS
4151 if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
4152 if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
4153 {
4154 if (dest != res)
4155 emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res));
4156 return;
4157 }
4158
4159 rtx mem = force_const_mem (mode, imm);
4160 gcc_assert (mem);
4161 emit_move_insn (dest, mem);
82614948 4162 return;
43e9d192 4163 }
82614948 4164
77e994c9
RS
4165 aarch64_internal_mov_immediate (dest, imm, true,
4166 as_a <scalar_int_mode> (mode));
43e9d192
IB
4167}
4168
43cacb12
RS
4169/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
4170 that is known to contain PTRUE. */
4171
4172void
4173aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
4174{
0c63a8ee
TC
4175 expand_operand ops[3];
4176 machine_mode mode = GET_MODE (dest);
4177 create_output_operand (&ops[0], dest, mode);
4178 create_input_operand (&ops[1], pred, GET_MODE(pred));
4179 create_input_operand (&ops[2], src, mode);
f2b29269 4180 temporary_volatile_ok v (true);
0c63a8ee 4181 expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
43cacb12
RS
4182}
4183
4184/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
4185 operand is in memory. In this case we need to use the predicated LD1
4186 and ST1 instead of LDR and STR, both for correctness on big-endian
4187 targets and because LD1 and ST1 support a wider range of addressing modes.
4188 PRED_MODE is the mode of the predicate.
4189
4190 See the comment at the head of aarch64-sve.md for details about the
4191 big-endian handling. */
4192
4193void
4194aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
4195{
4196 machine_mode mode = GET_MODE (dest);
16de3637 4197 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
4198 if (!register_operand (src, mode)
4199 && !register_operand (dest, mode))
4200 {
4201 rtx tmp = gen_reg_rtx (mode);
4202 if (MEM_P (src))
4203 aarch64_emit_sve_pred_move (tmp, ptrue, src);
4204 else
4205 emit_move_insn (tmp, src);
4206 src = tmp;
4207 }
4208 aarch64_emit_sve_pred_move (dest, ptrue, src);
4209}
4210
002092be
RS
4211/* Called only on big-endian targets. See whether an SVE vector move
4212 from SRC to DEST is effectively a REV[BHW] instruction, because at
4213 least one operand is a subreg of an SVE vector that has wider or
4214 narrower elements. Return true and emit the instruction if so.
4215
4216 For example:
4217
4218 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
4219
4220 represents a VIEW_CONVERT between the following vectors, viewed
4221 in memory order:
4222
4223 R2: { [0].high, [0].low, [1].high, [1].low, ... }
4224 R1: { [0], [1], [2], [3], ... }
4225
4226 The high part of lane X in R2 should therefore correspond to lane X*2
4227 of R1, but the register representations are:
4228
4229 msb lsb
4230 R2: ...... [1].high [1].low [0].high [0].low
4231 R1: ...... [3] [2] [1] [0]
4232
4233 where the low part of lane X in R2 corresponds to lane X*2 in R1.
4234 We therefore need a reverse operation to swap the high and low values
4235 around.
4236
4237 This is purely an optimization. Without it we would spill the
4238 subreg operand to the stack in one mode and reload it in the
4239 other mode, which has the same effect as the REV. */
4240
4241bool
4242aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
4243{
4244 gcc_assert (BYTES_BIG_ENDIAN);
4245 if (GET_CODE (dest) == SUBREG)
4246 dest = SUBREG_REG (dest);
4247 if (GET_CODE (src) == SUBREG)
4248 src = SUBREG_REG (src);
4249
4250 /* The optimization handles two single SVE REGs with different element
4251 sizes. */
4252 if (!REG_P (dest)
4253 || !REG_P (src)
4254 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
4255 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
4256 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
4257 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
4258 return false;
4259
4260 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
16de3637 4261 rtx ptrue = aarch64_ptrue_reg (VNx16BImode);
002092be
RS
4262 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
4263 UNSPEC_REV_SUBREG);
4264 emit_insn (gen_rtx_SET (dest, unspec));
4265 return true;
4266}
4267
4268/* Return a copy of X with mode MODE, without changing its other
4269 attributes. Unlike gen_lowpart, this doesn't care whether the
4270 mode change is valid. */
4271
4272static rtx
4273aarch64_replace_reg_mode (rtx x, machine_mode mode)
4274{
4275 if (GET_MODE (x) == mode)
4276 return x;
4277
4278 x = shallow_copy_rtx (x);
4279 set_mode_and_regno (x, mode, REGNO (x));
4280 return x;
4281}
4282
4283/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
4284 operands. */
4285
4286void
4287aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
4288{
4289 /* Decide which REV operation we need. The mode with narrower elements
4290 determines the mode of the operands and the mode with the wider
4291 elements determines the reverse width. */
4292 machine_mode mode_with_wider_elts = GET_MODE (dest);
4293 machine_mode mode_with_narrower_elts = GET_MODE (src);
4294 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
4295 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
4296 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
4297
4298 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
4299 unsigned int unspec;
4300 if (wider_bytes == 8)
4301 unspec = UNSPEC_REV64;
4302 else if (wider_bytes == 4)
4303 unspec = UNSPEC_REV32;
4304 else if (wider_bytes == 2)
4305 unspec = UNSPEC_REV16;
4306 else
4307 gcc_unreachable ();
4308 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
4309
4310 /* Emit:
4311
06308276 4312 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)] UNSPEC_PRED_X))
002092be
RS
4313
4314 with the appropriate modes. */
4315 ptrue = gen_lowpart (pred_mode, ptrue);
4316 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
4317 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
4318 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
4319 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
06308276 4320 UNSPEC_PRED_X);
002092be
RS
4321 emit_insn (gen_rtx_SET (dest, src));
4322}
4323
43e9d192 4324static bool
fee9ba42
JW
4325aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
4326 tree exp ATTRIBUTE_UNUSED)
43e9d192 4327{
a0d0b980
SE
4328 if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl))
4329 return false;
4330
43e9d192
IB
4331 return true;
4332}
4333
4334/* Implement TARGET_PASS_BY_REFERENCE. */
4335
4336static bool
4337aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 4338 machine_mode mode,
43e9d192
IB
4339 const_tree type,
4340 bool named ATTRIBUTE_UNUSED)
4341{
4342 HOST_WIDE_INT size;
ef4bddc2 4343 machine_mode dummymode;
43e9d192
IB
4344 int nregs;
4345
4346 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
4347 if (mode == BLKmode && type)
4348 size = int_size_in_bytes (type);
4349 else
4350 /* No frontends can create types with variable-sized modes, so we
4351 shouldn't be asked to pass or return them. */
4352 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 4353
aadc1c43
MHD
4354 /* Aggregates are passed by reference based on their size. */
4355 if (type && AGGREGATE_TYPE_P (type))
43e9d192 4356 {
aadc1c43 4357 size = int_size_in_bytes (type);
43e9d192
IB
4358 }
4359
4360 /* Variable sized arguments are always returned by reference. */
4361 if (size < 0)
4362 return true;
4363
4364 /* Can this be a candidate to be passed in fp/simd register(s)? */
4365 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
4366 &dummymode, &nregs,
4367 NULL))
4368 return false;
4369
4370 /* Arguments which are variable sized or larger than 2 registers are
4371 passed by reference unless they are a homogenous floating point
4372 aggregate. */
4373 return size > 2 * UNITS_PER_WORD;
4374}
4375
4376/* Return TRUE if VALTYPE is padded to its least significant bits. */
4377static bool
4378aarch64_return_in_msb (const_tree valtype)
4379{
ef4bddc2 4380 machine_mode dummy_mode;
43e9d192
IB
4381 int dummy_int;
4382
4383 /* Never happens in little-endian mode. */
4384 if (!BYTES_BIG_ENDIAN)
4385 return false;
4386
4387 /* Only composite types smaller than or equal to 16 bytes can
4388 be potentially returned in registers. */
4389 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
4390 || int_size_in_bytes (valtype) <= 0
4391 || int_size_in_bytes (valtype) > 16)
4392 return false;
4393
4394 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
4395 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
4396 is always passed/returned in the least significant bits of fp/simd
4397 register(s). */
4398 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
4399 &dummy_mode, &dummy_int, NULL))
4400 return false;
4401
4402 return true;
4403}
4404
4405/* Implement TARGET_FUNCTION_VALUE.
4406 Define how to find the value returned by a function. */
4407
4408static rtx
4409aarch64_function_value (const_tree type, const_tree func,
4410 bool outgoing ATTRIBUTE_UNUSED)
4411{
ef4bddc2 4412 machine_mode mode;
43e9d192
IB
4413 int unsignedp;
4414 int count;
ef4bddc2 4415 machine_mode ag_mode;
43e9d192
IB
4416
4417 mode = TYPE_MODE (type);
4418 if (INTEGRAL_TYPE_P (type))
4419 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
4420
4421 if (aarch64_return_in_msb (type))
4422 {
4423 HOST_WIDE_INT size = int_size_in_bytes (type);
4424
4425 if (size % UNITS_PER_WORD != 0)
4426 {
4427 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 4428 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
4429 }
4430 }
4431
4432 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
4433 &ag_mode, &count, NULL))
4434 {
4435 if (!aarch64_composite_type_p (type, mode))
4436 {
4437 gcc_assert (count == 1 && mode == ag_mode);
4438 return gen_rtx_REG (mode, V0_REGNUM);
4439 }
4440 else
4441 {
4442 int i;
4443 rtx par;
4444
4445 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4446 for (i = 0; i < count; i++)
4447 {
4448 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
4449 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
4450 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
4451 XVECEXP (par, 0, i) = tmp;
4452 }
4453 return par;
4454 }
4455 }
4456 else
4457 return gen_rtx_REG (mode, R0_REGNUM);
4458}
4459
4460/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
4461 Return true if REGNO is the number of a hard register in which the values
4462 of called function may come back. */
4463
4464static bool
4465aarch64_function_value_regno_p (const unsigned int regno)
4466{
4467 /* Maximum of 16 bytes can be returned in the general registers. Examples
4468 of 16-byte return values are: 128-bit integers and 16-byte small
4469 structures (excluding homogeneous floating-point aggregates). */
4470 if (regno == R0_REGNUM || regno == R1_REGNUM)
4471 return true;
4472
4473 /* Up to four fp/simd registers can return a function value, e.g. a
4474 homogeneous floating-point aggregate having four members. */
4475 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 4476 return TARGET_FLOAT;
43e9d192
IB
4477
4478 return false;
4479}
4480
4481/* Implement TARGET_RETURN_IN_MEMORY.
4482
4483 If the type T of the result of a function is such that
4484 void func (T arg)
4485 would require that arg be passed as a value in a register (or set of
4486 registers) according to the parameter passing rules, then the result
4487 is returned in the same registers as would be used for such an
4488 argument. */
4489
4490static bool
4491aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
4492{
4493 HOST_WIDE_INT size;
ef4bddc2 4494 machine_mode ag_mode;
43e9d192
IB
4495 int count;
4496
4497 if (!AGGREGATE_TYPE_P (type)
4498 && TREE_CODE (type) != COMPLEX_TYPE
4499 && TREE_CODE (type) != VECTOR_TYPE)
4500 /* Simple scalar types always returned in registers. */
4501 return false;
4502
4503 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
4504 type,
4505 &ag_mode,
4506 &count,
4507 NULL))
4508 return false;
4509
4510 /* Types larger than 2 registers returned in memory. */
4511 size = int_size_in_bytes (type);
4512 return (size < 0 || size > 2 * UNITS_PER_WORD);
4513}
4514
4515static bool
ef4bddc2 4516aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
4517 const_tree type, int *nregs)
4518{
4519 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4520 return aarch64_vfp_is_call_or_return_candidate (mode,
4521 type,
4522 &pcum->aapcs_vfp_rmode,
4523 nregs,
4524 NULL);
4525}
4526
985b8393 4527/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192 4528 bits. The idea is to suppress any stronger alignment requested by
c590597c
RE
4529 the user and opt for the natural alignment (specified in AAPCS64 \S
4530 4.1). ABI_BREAK is set to true if the alignment was incorrectly
4531 calculated in versions of GCC prior to GCC-9. This is a helper
4532 function for local use only. */
43e9d192 4533
985b8393 4534static unsigned int
c590597c
RE
4535aarch64_function_arg_alignment (machine_mode mode, const_tree type,
4536 bool *abi_break)
43e9d192 4537{
c590597c 4538 *abi_break = false;
75d6cc81 4539 if (!type)
985b8393 4540 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 4541
75d6cc81 4542 if (integer_zerop (TYPE_SIZE (type)))
985b8393 4543 return 0;
43e9d192 4544
75d6cc81
AL
4545 gcc_assert (TYPE_MODE (type) == mode);
4546
4547 if (!AGGREGATE_TYPE_P (type))
985b8393 4548 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
4549
4550 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 4551 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 4552
985b8393 4553 unsigned int alignment = 0;
c590597c 4554 unsigned int bitfield_alignment = 0;
75d6cc81 4555 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393 4556 if (TREE_CODE (field) == FIELD_DECL)
c590597c
RE
4557 {
4558 alignment = std::max (alignment, DECL_ALIGN (field));
4559 if (DECL_BIT_FIELD_TYPE (field))
4560 bitfield_alignment
4561 = std::max (bitfield_alignment,
4562 TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
4563 }
4564
4565 if (bitfield_alignment > alignment)
4566 {
4567 *abi_break = true;
4568 return bitfield_alignment;
4569 }
43e9d192 4570
985b8393 4571 return alignment;
43e9d192
IB
4572}
4573
4574/* Layout a function argument according to the AAPCS64 rules. The rule
4575 numbers refer to the rule numbers in the AAPCS64. */
4576
4577static void
ef4bddc2 4578aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
4579 const_tree type,
4580 bool named ATTRIBUTE_UNUSED)
4581{
4582 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4583 int ncrn, nvrn, nregs;
4584 bool allocate_ncrn, allocate_nvrn;
3abf17cf 4585 HOST_WIDE_INT size;
c590597c 4586 bool abi_break;
43e9d192
IB
4587
4588 /* We need to do this once per argument. */
4589 if (pcum->aapcs_arg_processed)
4590 return;
4591
4592 pcum->aapcs_arg_processed = true;
4593
3abf17cf 4594 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
4595 if (type)
4596 size = int_size_in_bytes (type);
4597 else
4598 /* No frontends can create types with variable-sized modes, so we
4599 shouldn't be asked to pass or return them. */
4600 size = GET_MODE_SIZE (mode).to_constant ();
4601 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 4602
43e9d192
IB
4603 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
4604 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
4605 mode,
4606 type,
4607 &nregs);
4608
4609 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
4610 The following code thus handles passing by SIMD/FP registers first. */
4611
4612 nvrn = pcum->aapcs_nvrn;
4613
4614 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
4615 and homogenous short-vector aggregates (HVA). */
4616 if (allocate_nvrn)
4617 {
261fb553 4618 if (!TARGET_FLOAT)
fc29dfc9 4619 aarch64_err_no_fpadvsimd (mode);
261fb553 4620
43e9d192
IB
4621 if (nvrn + nregs <= NUM_FP_ARG_REGS)
4622 {
4623 pcum->aapcs_nextnvrn = nvrn + nregs;
4624 if (!aarch64_composite_type_p (type, mode))
4625 {
4626 gcc_assert (nregs == 1);
4627 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
4628 }
4629 else
4630 {
4631 rtx par;
4632 int i;
4633 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
4634 for (i = 0; i < nregs; i++)
4635 {
4636 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
4637 V0_REGNUM + nvrn + i);
6a70badb
RS
4638 rtx offset = gen_int_mode
4639 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
4640 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
4641 XVECEXP (par, 0, i) = tmp;
4642 }
4643 pcum->aapcs_reg = par;
4644 }
4645 return;
4646 }
4647 else
4648 {
4649 /* C.3 NSRN is set to 8. */
4650 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
4651 goto on_stack;
4652 }
4653 }
4654
4655 ncrn = pcum->aapcs_ncrn;
3abf17cf 4656 nregs = size / UNITS_PER_WORD;
43e9d192
IB
4657
4658 /* C6 - C9. though the sign and zero extension semantics are
4659 handled elsewhere. This is the case where the argument fits
4660 entirely general registers. */
4661 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
4662 {
43e9d192
IB
4663 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
4664
4665 /* C.8 if the argument has an alignment of 16 then the NGRN is
c590597c 4666 rounded up to the next even number. */
985b8393
JJ
4667 if (nregs == 2
4668 && ncrn % 2
2ec07fa6 4669 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 4670 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
4671 alignment nregs should be > 2 and therefore it should be
4672 passed by reference rather than value. */
c590597c
RE
4673 && (aarch64_function_arg_alignment (mode, type, &abi_break)
4674 == 16 * BITS_PER_UNIT))
985b8393 4675 {
c590597c
RE
4676 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
4677 inform (input_location, "parameter passing for argument of type "
4678 "%qT changed in GCC 9.1", type);
985b8393
JJ
4679 ++ncrn;
4680 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 4681 }
2ec07fa6 4682
43e9d192 4683 /* NREGS can be 0 when e.g. an empty structure is to be passed.
c590597c 4684 A reg is still generated for it, but the caller should be smart
43e9d192
IB
4685 enough not to use it. */
4686 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 4687 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
4688 else
4689 {
4690 rtx par;
4691 int i;
4692
4693 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
4694 for (i = 0; i < nregs; i++)
4695 {
4696 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
4697 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4698 GEN_INT (i * UNITS_PER_WORD));
4699 XVECEXP (par, 0, i) = tmp;
4700 }
4701 pcum->aapcs_reg = par;
4702 }
4703
4704 pcum->aapcs_nextncrn = ncrn + nregs;
4705 return;
4706 }
4707
4708 /* C.11 */
4709 pcum->aapcs_nextncrn = NUM_ARG_REGS;
4710
4711 /* The argument is passed on stack; record the needed number of words for
3abf17cf 4712 this argument and align the total size if necessary. */
43e9d192 4713on_stack:
3abf17cf 4714 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 4715
c590597c
RE
4716 if (aarch64_function_arg_alignment (mode, type, &abi_break)
4717 == 16 * BITS_PER_UNIT)
4718 {
4719 int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
4720 if (pcum->aapcs_stack_size != new_size)
4721 {
4722 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
4723 inform (input_location, "parameter passing for argument of type "
4724 "%qT changed in GCC 9.1", type);
4725 pcum->aapcs_stack_size = new_size;
4726 }
4727 }
43e9d192
IB
4728 return;
4729}
4730
4731/* Implement TARGET_FUNCTION_ARG. */
4732
4733static rtx
ef4bddc2 4734aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
4735 const_tree type, bool named)
4736{
4737 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4738 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
4739
4740 if (mode == VOIDmode)
4741 return NULL_RTX;
4742
4743 aarch64_layout_arg (pcum_v, mode, type, named);
4744 return pcum->aapcs_reg;
4745}
4746
4747void
4748aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
4749 const_tree fntype ATTRIBUTE_UNUSED,
4750 rtx libname ATTRIBUTE_UNUSED,
4751 const_tree fndecl ATTRIBUTE_UNUSED,
4752 unsigned n_named ATTRIBUTE_UNUSED)
4753{
4754 pcum->aapcs_ncrn = 0;
4755 pcum->aapcs_nvrn = 0;
4756 pcum->aapcs_nextncrn = 0;
4757 pcum->aapcs_nextnvrn = 0;
4758 pcum->pcs_variant = ARM_PCS_AAPCS64;
4759 pcum->aapcs_reg = NULL_RTX;
4760 pcum->aapcs_arg_processed = false;
4761 pcum->aapcs_stack_words = 0;
4762 pcum->aapcs_stack_size = 0;
4763
261fb553
AL
4764 if (!TARGET_FLOAT
4765 && fndecl && TREE_PUBLIC (fndecl)
4766 && fntype && fntype != error_mark_node)
4767 {
4768 const_tree type = TREE_TYPE (fntype);
4769 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
4770 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
4771 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
4772 &mode, &nregs, NULL))
fc29dfc9 4773 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 4774 }
43e9d192
IB
4775 return;
4776}
4777
4778static void
4779aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 4780 machine_mode mode,
43e9d192
IB
4781 const_tree type,
4782 bool named)
4783{
4784 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4785 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
4786 {
4787 aarch64_layout_arg (pcum_v, mode, type, named);
4788 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
4789 != (pcum->aapcs_stack_words != 0));
4790 pcum->aapcs_arg_processed = false;
4791 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
4792 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
4793 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
4794 pcum->aapcs_stack_words = 0;
4795 pcum->aapcs_reg = NULL_RTX;
4796 }
4797}
4798
4799bool
4800aarch64_function_arg_regno_p (unsigned regno)
4801{
4802 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
4803 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
4804}
4805
4806/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
4807 PARM_BOUNDARY bits of alignment, but will be given anything up
4808 to STACK_BOUNDARY bits if the type requires it. This makes sure
4809 that both before and after the layout of each argument, the Next
4810 Stacked Argument Address (NSAA) will have a minimum alignment of
4811 8 bytes. */
4812
4813static unsigned int
ef4bddc2 4814aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 4815{
c590597c
RE
4816 bool abi_break;
4817 unsigned int alignment = aarch64_function_arg_alignment (mode, type,
4818 &abi_break);
4819 if (abi_break & warn_psabi)
4820 inform (input_location, "parameter passing for argument of type "
4821 "%qT changed in GCC 9.1", type);
4822
985b8393 4823 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
4824}
4825
43cacb12
RS
4826/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
4827
4828static fixed_size_mode
4829aarch64_get_reg_raw_mode (int regno)
4830{
4831 if (TARGET_SVE && FP_REGNUM_P (regno))
4832 /* Don't use the SVE part of the register for __builtin_apply and
4833 __builtin_return. The SVE registers aren't used by the normal PCS,
4834 so using them there would be a waste of time. The PCS extensions
4835 for SVE types are fundamentally incompatible with the
4836 __builtin_return/__builtin_apply interface. */
4837 return as_a <fixed_size_mode> (V16QImode);
4838 return default_get_reg_raw_mode (regno);
4839}
4840
76b0cbf8 4841/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
4842
4843 Small aggregate types are placed in the lowest memory address.
4844
4845 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
4846
76b0cbf8
RS
4847static pad_direction
4848aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
4849{
4850 /* On little-endian targets, the least significant byte of every stack
4851 argument is passed at the lowest byte address of the stack slot. */
4852 if (!BYTES_BIG_ENDIAN)
76b0cbf8 4853 return PAD_UPWARD;
43e9d192 4854
00edcfbe 4855 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
4856 the least significant byte of a stack argument is passed at the highest
4857 byte address of the stack slot. */
4858 if (type
00edcfbe
YZ
4859 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
4860 || POINTER_TYPE_P (type))
43e9d192 4861 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 4862 return PAD_DOWNWARD;
43e9d192
IB
4863
4864 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 4865 return PAD_UPWARD;
43e9d192
IB
4866}
4867
4868/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
4869
4870 It specifies padding for the last (may also be the only)
4871 element of a block move between registers and memory. If
4872 assuming the block is in the memory, padding upward means that
4873 the last element is padded after its highest significant byte,
4874 while in downward padding, the last element is padded at the
4875 its least significant byte side.
4876
4877 Small aggregates and small complex types are always padded
4878 upwards.
4879
4880 We don't need to worry about homogeneous floating-point or
4881 short-vector aggregates; their move is not affected by the
4882 padding direction determined here. Regardless of endianness,
4883 each element of such an aggregate is put in the least
4884 significant bits of a fp/simd register.
4885
4886 Return !BYTES_BIG_ENDIAN if the least significant byte of the
4887 register has useful data, and return the opposite if the most
4888 significant byte does. */
4889
4890bool
ef4bddc2 4891aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
4892 bool first ATTRIBUTE_UNUSED)
4893{
4894
4895 /* Small composite types are always padded upward. */
4896 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
4897 {
6a70badb
RS
4898 HOST_WIDE_INT size;
4899 if (type)
4900 size = int_size_in_bytes (type);
4901 else
4902 /* No frontends can create types with variable-sized modes, so we
4903 shouldn't be asked to pass or return them. */
4904 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
4905 if (size < 2 * UNITS_PER_WORD)
4906 return true;
4907 }
4908
4909 /* Otherwise, use the default padding. */
4910 return !BYTES_BIG_ENDIAN;
4911}
4912
095a2d76 4913static scalar_int_mode
43e9d192
IB
4914aarch64_libgcc_cmp_return_mode (void)
4915{
4916 return SImode;
4917}
4918
a3eb8a52
EB
4919#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4920
4921/* We use the 12-bit shifted immediate arithmetic instructions so values
4922 must be multiple of (1 << 12), i.e. 4096. */
4923#define ARITH_FACTOR 4096
4924
4925#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
4926#error Cannot use simple address calculation for stack probing
4927#endif
4928
4929/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
4930#define PROBE_STACK_FIRST_REG R9_REGNUM
4931#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 4932
6a70badb 4933/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
4934 inclusive. These are offsets from the current stack pointer. */
4935
4936static void
6a70badb 4937aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 4938{
6a70badb
RS
4939 HOST_WIDE_INT size;
4940 if (!poly_size.is_constant (&size))
4941 {
4942 sorry ("stack probes for SVE frames");
4943 return;
4944 }
4945
5f5c5e0f 4946 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
4947
4948 /* See the same assertion on PROBE_INTERVAL above. */
4949 gcc_assert ((first % ARITH_FACTOR) == 0);
4950
4951 /* See if we have a constant small number of probes to generate. If so,
4952 that's the easy case. */
4953 if (size <= PROBE_INTERVAL)
4954 {
4955 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
4956
4957 emit_set_insn (reg1,
5f5c5e0f 4958 plus_constant (Pmode,
a3eb8a52 4959 stack_pointer_rtx, -(first + base)));
5f5c5e0f 4960 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
4961 }
4962
4963 /* The run-time loop is made up of 8 insns in the generic case while the
4964 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4965 else if (size <= 4 * PROBE_INTERVAL)
4966 {
4967 HOST_WIDE_INT i, rem;
4968
4969 emit_set_insn (reg1,
5f5c5e0f 4970 plus_constant (Pmode,
a3eb8a52
EB
4971 stack_pointer_rtx,
4972 -(first + PROBE_INTERVAL)));
4973 emit_stack_probe (reg1);
4974
4975 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4976 it exceeds SIZE. If only two probes are needed, this will not
4977 generate any code. Then probe at FIRST + SIZE. */
4978 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4979 {
4980 emit_set_insn (reg1,
5f5c5e0f 4981 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
4982 emit_stack_probe (reg1);
4983 }
4984
4985 rem = size - (i - PROBE_INTERVAL);
4986 if (rem > 256)
4987 {
4988 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4989
5f5c5e0f
EB
4990 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
4991 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
4992 }
4993 else
5f5c5e0f 4994 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
4995 }
4996
4997 /* Otherwise, do the same as above, but in a loop. Note that we must be
4998 extra careful with variables wrapping around because we might be at
4999 the very top (or the very bottom) of the address space and we have
5000 to be able to handle this case properly; in particular, we use an
5001 equality test for the loop condition. */
5002 else
5003 {
5f5c5e0f 5004 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
5005
5006 /* Step 1: round SIZE to the previous multiple of the interval. */
5007
5008 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
5009
5010
5011 /* Step 2: compute initial and final value of the loop counter. */
5012
5013 /* TEST_ADDR = SP + FIRST. */
5014 emit_set_insn (reg1,
5f5c5e0f 5015 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
5016
5017 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
5018 HOST_WIDE_INT adjustment = - (first + rounded_size);
5019 if (! aarch64_uimm12_shift (adjustment))
5020 {
5021 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
5022 true, Pmode);
5023 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
5024 }
5025 else
8dd64cdf
EB
5026 emit_set_insn (reg2,
5027 plus_constant (Pmode, stack_pointer_rtx, adjustment));
5028
a3eb8a52
EB
5029 /* Step 3: the loop
5030
5031 do
5032 {
5033 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5034 probe at TEST_ADDR
5035 }
5036 while (TEST_ADDR != LAST_ADDR)
5037
5038 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5039 until it is equal to ROUNDED_SIZE. */
5040
5f5c5e0f 5041 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
5042
5043
5044 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5045 that SIZE is equal to ROUNDED_SIZE. */
5046
5047 if (size != rounded_size)
5048 {
5049 HOST_WIDE_INT rem = size - rounded_size;
5050
5051 if (rem > 256)
5052 {
5053 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
5054
5f5c5e0f
EB
5055 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
5056 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
5057 }
5058 else
5f5c5e0f 5059 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
5060 }
5061 }
5062
5063 /* Make sure nothing is scheduled before we are done. */
5064 emit_insn (gen_blockage ());
5065}
5066
5067/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5068 absolute addresses. */
5069
5070const char *
5071aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
5072{
5073 static int labelno = 0;
5074 char loop_lab[32];
5075 rtx xops[2];
5076
5077 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5078
5079 /* Loop. */
5080 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5081
cd1bef27
JL
5082 HOST_WIDE_INT stack_clash_probe_interval
5083 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5084
a3eb8a52
EB
5085 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5086 xops[0] = reg1;
cd1bef27
JL
5087 HOST_WIDE_INT interval;
5088 if (flag_stack_clash_protection)
5089 interval = stack_clash_probe_interval;
5090 else
5091 interval = PROBE_INTERVAL;
5092
5093 gcc_assert (aarch64_uimm12_shift (interval));
5094 xops[1] = GEN_INT (interval);
5095
a3eb8a52
EB
5096 output_asm_insn ("sub\t%0, %0, %1", xops);
5097
cd1bef27
JL
5098 /* If doing stack clash protection then we probe up by the ABI specified
5099 amount. We do this because we're dropping full pages at a time in the
5100 loop. But if we're doing non-stack clash probing, probe at SP 0. */
5101 if (flag_stack_clash_protection)
5102 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
5103 else
5104 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
5105
5106 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
5107 by this amount for each iteration. */
5108 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
5109
5110 /* Test if TEST_ADDR == LAST_ADDR. */
5111 xops[1] = reg2;
5112 output_asm_insn ("cmp\t%0, %1", xops);
5113
5114 /* Branch. */
5115 fputs ("\tb.ne\t", asm_out_file);
5116 assemble_name_raw (asm_out_file, loop_lab);
5117 fputc ('\n', asm_out_file);
5118
5119 return "";
5120}
5121
eb471ba3
TC
5122/* Emit the probe loop for doing stack clash probes and stack adjustments for
5123 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
5124 of GUARD_SIZE. When a probe is emitted it is done at most
5125 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
5126 at most MIN_PROBE_THRESHOLD. By the end of this function
5127 BASE = BASE - ADJUSTMENT. */
5128
5129const char *
5130aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
5131 rtx min_probe_threshold, rtx guard_size)
5132{
5133 /* This function is not allowed to use any instruction generation function
5134 like gen_ and friends. If you do you'll likely ICE during CFG validation,
5135 so instead emit the code you want using output_asm_insn. */
5136 gcc_assert (flag_stack_clash_protection);
5137 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
5138 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
5139
5140 /* The minimum required allocation before the residual requires probing. */
5141 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
5142
5143 /* Clamp the value down to the nearest value that can be used with a cmp. */
5144 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
5145 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
5146
5147 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
5148 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
5149
5150 static int labelno = 0;
5151 char loop_start_lab[32];
5152 char loop_end_lab[32];
5153 rtx xops[2];
5154
5155 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
5156 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
5157
5158 /* Emit loop start label. */
5159 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
5160
5161 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
5162 xops[0] = adjustment;
5163 xops[1] = probe_offset_value_rtx;
5164 output_asm_insn ("cmp\t%0, %1", xops);
5165
5166 /* Branch to end if not enough adjustment to probe. */
5167 fputs ("\tb.lt\t", asm_out_file);
5168 assemble_name_raw (asm_out_file, loop_end_lab);
5169 fputc ('\n', asm_out_file);
5170
5171 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
5172 xops[0] = base;
5173 xops[1] = probe_offset_value_rtx;
5174 output_asm_insn ("sub\t%0, %0, %1", xops);
5175
5176 /* Probe at BASE. */
5177 xops[1] = const0_rtx;
5178 output_asm_insn ("str\txzr, [%0, %1]", xops);
5179
5180 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
5181 xops[0] = adjustment;
5182 xops[1] = probe_offset_value_rtx;
5183 output_asm_insn ("sub\t%0, %0, %1", xops);
5184
5185 /* Branch to start if still more bytes to allocate. */
5186 fputs ("\tb\t", asm_out_file);
5187 assemble_name_raw (asm_out_file, loop_start_lab);
5188 fputc ('\n', asm_out_file);
5189
5190 /* No probe leave. */
5191 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
5192
5193 /* BASE = BASE - ADJUSTMENT. */
5194 xops[0] = base;
5195 xops[1] = adjustment;
5196 output_asm_insn ("sub\t%0, %0, %1", xops);
5197 return "";
5198}
5199
d6cb6d6a
WD
5200/* Determine whether a frame chain needs to be generated. */
5201static bool
5202aarch64_needs_frame_chain (void)
5203{
5204 /* Force a frame chain for EH returns so the return address is at FP+8. */
5205 if (frame_pointer_needed || crtl->calls_eh_return)
5206 return true;
5207
5208 /* A leaf function cannot have calls or write LR. */
5209 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
5210
5211 /* Don't use a frame chain in leaf functions if leaf frame pointers
5212 are disabled. */
5213 if (flag_omit_leaf_frame_pointer && is_leaf)
5214 return false;
5215
5216 return aarch64_use_frame_pointer;
5217}
5218
43e9d192
IB
5219/* Mark the registers that need to be saved by the callee and calculate
5220 the size of the callee-saved registers area and frame record (both FP
33a2e348 5221 and LR may be omitted). */
43e9d192
IB
5222static void
5223aarch64_layout_frame (void)
5224{
5225 HOST_WIDE_INT offset = 0;
4b0685d9 5226 int regno, last_fp_reg = INVALID_REGNUM;
a0d0b980 5227 bool simd_function = aarch64_simd_decl_p (cfun->decl);
43e9d192 5228
d6cb6d6a 5229 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 5230
8c6e3b23
TC
5231 /* Adjust the outgoing arguments size if required. Keep it in sync with what
5232 the mid-end is doing. */
5233 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
5234
97826595
MS
5235#define SLOT_NOT_REQUIRED (-2)
5236#define SLOT_REQUIRED (-1)
5237
71bfb77a
WD
5238 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
5239 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 5240
a0d0b980
SE
5241 /* If this is a non-leaf simd function with calls we assume that
5242 at least one of those calls is to a non-simd function and thus
5243 we must save V8 to V23 in the prologue. */
5244
5245 if (simd_function && !crtl->is_leaf)
5246 {
5247 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
5248 if (FP_SIMD_SAVED_REGNUM_P (regno))
5249 df_set_regs_ever_live (regno, true);
5250 }
5251
43e9d192
IB
5252 /* First mark all the registers that really need to be saved... */
5253 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 5254 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
5255
5256 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 5257 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
5258
5259 /* ... that includes the eh data registers (if needed)... */
5260 if (crtl->calls_eh_return)
5261 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
5262 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
5263 = SLOT_REQUIRED;
43e9d192
IB
5264
5265 /* ... and any callee saved register that dataflow says is live. */
5266 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
5267 if (df_regs_ever_live_p (regno)
1c923b60
JW
5268 && (regno == R30_REGNUM
5269 || !call_used_regs[regno]))
97826595 5270 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
5271
5272 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
5273 if (df_regs_ever_live_p (regno)
a0d0b980
SE
5274 && (!call_used_regs[regno]
5275 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))))
4b0685d9
WD
5276 {
5277 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
5278 last_fp_reg = regno;
5279 }
43e9d192 5280
204d2c03 5281 if (cfun->machine->frame.emit_frame_chain)
43e9d192 5282 {
2e1cdae5 5283 /* FP and LR are placed in the linkage record. */
43e9d192 5284 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 5285 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 5286 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 5287 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
5288 offset = 2 * UNITS_PER_WORD;
5289 }
43e9d192 5290
db6b62a8
TC
5291 /* With stack-clash, LR must be saved in non-leaf functions. */
5292 gcc_assert (crtl->is_leaf
5293 || (cfun->machine->frame.reg_offset[R30_REGNUM]
5294 != SLOT_NOT_REQUIRED));
5295
43e9d192 5296 /* Now assign stack slots for them. */
2e1cdae5 5297 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 5298 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
5299 {
5300 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 5301 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 5302 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 5303 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 5304 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
5305 offset += UNITS_PER_WORD;
5306 }
5307
4b0685d9
WD
5308 HOST_WIDE_INT max_int_offset = offset;
5309 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
5310 bool has_align_gap = offset != max_int_offset;
5311
43e9d192 5312 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 5313 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 5314 {
4b0685d9
WD
5315 /* If there is an alignment gap between integer and fp callee-saves,
5316 allocate the last fp register to it if possible. */
a0d0b980
SE
5317 if (regno == last_fp_reg
5318 && has_align_gap
5319 && !simd_function
5320 && (offset & 8) == 0)
4b0685d9
WD
5321 {
5322 cfun->machine->frame.reg_offset[regno] = max_int_offset;
5323 break;
5324 }
5325
43e9d192 5326 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 5327 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 5328 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 5329 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
5330 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
5331 cfun->machine->frame.wb_candidate2 = regno;
a0d0b980 5332 offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD;
43e9d192
IB
5333 }
5334
4f59f9f2 5335 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
5336
5337 cfun->machine->frame.saved_regs_size = offset;
1c960e02 5338
71bfb77a
WD
5339 HOST_WIDE_INT varargs_and_saved_regs_size
5340 = offset + cfun->machine->frame.saved_varargs_size;
5341
1c960e02 5342 cfun->machine->frame.hard_fp_offset
6a70badb
RS
5343 = aligned_upper_bound (varargs_and_saved_regs_size
5344 + get_frame_size (),
5345 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 5346
6a70badb
RS
5347 /* Both these values are already aligned. */
5348 gcc_assert (multiple_p (crtl->outgoing_args_size,
5349 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 5350 cfun->machine->frame.frame_size
6a70badb
RS
5351 = (cfun->machine->frame.hard_fp_offset
5352 + crtl->outgoing_args_size);
1c960e02 5353
71bfb77a
WD
5354 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
5355
5356 cfun->machine->frame.initial_adjust = 0;
5357 cfun->machine->frame.final_adjust = 0;
5358 cfun->machine->frame.callee_adjust = 0;
5359 cfun->machine->frame.callee_offset = 0;
5360
5361 HOST_WIDE_INT max_push_offset = 0;
5362 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
5363 max_push_offset = 512;
5364 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
5365 max_push_offset = 256;
5366
6a70badb
RS
5367 HOST_WIDE_INT const_size, const_fp_offset;
5368 if (cfun->machine->frame.frame_size.is_constant (&const_size)
5369 && const_size < max_push_offset
5370 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
5371 {
5372 /* Simple, small frame with no outgoing arguments:
5373 stp reg1, reg2, [sp, -frame_size]!
5374 stp reg3, reg4, [sp, 16] */
6a70badb 5375 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 5376 }
6a70badb
RS
5377 else if (known_lt (crtl->outgoing_args_size
5378 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 5379 && !(cfun->calls_alloca
6a70badb
RS
5380 && known_lt (cfun->machine->frame.hard_fp_offset,
5381 max_push_offset)))
71bfb77a
WD
5382 {
5383 /* Frame with small outgoing arguments:
5384 sub sp, sp, frame_size
5385 stp reg1, reg2, [sp, outgoing_args_size]
5386 stp reg3, reg4, [sp, outgoing_args_size + 16] */
5387 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
5388 cfun->machine->frame.callee_offset
5389 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
5390 }
6a70badb
RS
5391 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
5392 && const_fp_offset < max_push_offset)
71bfb77a
WD
5393 {
5394 /* Frame with large outgoing arguments but a small local area:
5395 stp reg1, reg2, [sp, -hard_fp_offset]!
5396 stp reg3, reg4, [sp, 16]
5397 sub sp, sp, outgoing_args_size */
6a70badb 5398 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
5399 cfun->machine->frame.final_adjust
5400 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
5401 }
71bfb77a
WD
5402 else
5403 {
5404 /* Frame with large local area and outgoing arguments using frame pointer:
5405 sub sp, sp, hard_fp_offset
5406 stp x29, x30, [sp, 0]
5407 add x29, sp, 0
5408 stp reg3, reg4, [sp, 16]
5409 sub sp, sp, outgoing_args_size */
5410 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
5411 cfun->machine->frame.final_adjust
5412 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
5413 }
5414
43e9d192
IB
5415 cfun->machine->frame.laid_out = true;
5416}
5417
04ddfe06
KT
5418/* Return true if the register REGNO is saved on entry to
5419 the current function. */
5420
43e9d192
IB
5421static bool
5422aarch64_register_saved_on_entry (int regno)
5423{
97826595 5424 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
5425}
5426
04ddfe06
KT
5427/* Return the next register up from REGNO up to LIMIT for the callee
5428 to save. */
5429
64dedd72
JW
5430static unsigned
5431aarch64_next_callee_save (unsigned regno, unsigned limit)
5432{
5433 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
5434 regno ++;
5435 return regno;
5436}
43e9d192 5437
04ddfe06
KT
5438/* Push the register number REGNO of mode MODE to the stack with write-back
5439 adjusting the stack by ADJUSTMENT. */
5440
c5e1f66e 5441static void
ef4bddc2 5442aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
5443 HOST_WIDE_INT adjustment)
5444 {
5445 rtx base_rtx = stack_pointer_rtx;
5446 rtx insn, reg, mem;
5447
5448 reg = gen_rtx_REG (mode, regno);
5449 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
5450 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 5451 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
5452
5453 insn = emit_move_insn (mem, reg);
5454 RTX_FRAME_RELATED_P (insn) = 1;
5455}
5456
04ddfe06
KT
5457/* Generate and return an instruction to store the pair of registers
5458 REG and REG2 of mode MODE to location BASE with write-back adjusting
5459 the stack location BASE by ADJUSTMENT. */
5460
80c11907 5461static rtx
ef4bddc2 5462aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
5463 HOST_WIDE_INT adjustment)
5464{
5465 switch (mode)
5466 {
4e10a5a7 5467 case E_DImode:
80c11907
JW
5468 return gen_storewb_pairdi_di (base, base, reg, reg2,
5469 GEN_INT (-adjustment),
5470 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 5471 case E_DFmode:
80c11907
JW
5472 return gen_storewb_pairdf_di (base, base, reg, reg2,
5473 GEN_INT (-adjustment),
5474 GEN_INT (UNITS_PER_WORD - adjustment));
a0d0b980
SE
5475 case E_TFmode:
5476 return gen_storewb_pairtf_di (base, base, reg, reg2,
5477 GEN_INT (-adjustment),
5478 GEN_INT (UNITS_PER_VREG - adjustment));
80c11907
JW
5479 default:
5480 gcc_unreachable ();
5481 }
5482}
5483
04ddfe06
KT
5484/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
5485 stack pointer by ADJUSTMENT. */
5486
80c11907 5487static void
89ac681e 5488aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 5489{
5d8a22a5 5490 rtx_insn *insn;
a0d0b980 5491 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e 5492
71bfb77a 5493 if (regno2 == INVALID_REGNUM)
89ac681e
WD
5494 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
5495
80c11907
JW
5496 rtx reg1 = gen_rtx_REG (mode, regno1);
5497 rtx reg2 = gen_rtx_REG (mode, regno2);
5498
5499 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
5500 reg2, adjustment));
5501 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
5502 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
5503 RTX_FRAME_RELATED_P (insn) = 1;
5504}
5505
04ddfe06
KT
5506/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
5507 adjusting it by ADJUSTMENT afterwards. */
5508
159313d9 5509static rtx
ef4bddc2 5510aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
5511 HOST_WIDE_INT adjustment)
5512{
5513 switch (mode)
5514 {
4e10a5a7 5515 case E_DImode:
159313d9 5516 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 5517 GEN_INT (UNITS_PER_WORD));
4e10a5a7 5518 case E_DFmode:
159313d9 5519 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 5520 GEN_INT (UNITS_PER_WORD));
a0d0b980
SE
5521 case E_TFmode:
5522 return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
5523 GEN_INT (UNITS_PER_VREG));
159313d9
JW
5524 default:
5525 gcc_unreachable ();
5526 }
5527}
5528
04ddfe06
KT
5529/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
5530 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
5531 into CFI_OPS. */
5532
89ac681e
WD
5533static void
5534aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
5535 rtx *cfi_ops)
5536{
a0d0b980 5537 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e
WD
5538 rtx reg1 = gen_rtx_REG (mode, regno1);
5539
5540 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
5541
71bfb77a 5542 if (regno2 == INVALID_REGNUM)
89ac681e
WD
5543 {
5544 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
5545 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 5546 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
5547 }
5548 else
5549 {
5550 rtx reg2 = gen_rtx_REG (mode, regno2);
5551 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
5552 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
5553 reg2, adjustment));
5554 }
5555}
5556
04ddfe06
KT
5557/* Generate and return a store pair instruction of mode MODE to store
5558 register REG1 to MEM1 and register REG2 to MEM2. */
5559
72df5c1f 5560static rtx
ef4bddc2 5561aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
5562 rtx reg2)
5563{
5564 switch (mode)
5565 {
4e10a5a7 5566 case E_DImode:
dfe1da23 5567 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 5568
4e10a5a7 5569 case E_DFmode:
dfe1da23 5570 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f 5571
a0d0b980
SE
5572 case E_TFmode:
5573 return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
5574
72df5c1f
JW
5575 default:
5576 gcc_unreachable ();
5577 }
5578}
5579
04ddfe06
KT
5580/* Generate and regurn a load pair isntruction of mode MODE to load register
5581 REG1 from MEM1 and register REG2 from MEM2. */
5582
72df5c1f 5583static rtx
ef4bddc2 5584aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
5585 rtx mem2)
5586{
5587 switch (mode)
5588 {
4e10a5a7 5589 case E_DImode:
dfe1da23 5590 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 5591
4e10a5a7 5592 case E_DFmode:
dfe1da23 5593 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f 5594
a0d0b980
SE
5595 case E_TFmode:
5596 return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
5597
72df5c1f
JW
5598 default:
5599 gcc_unreachable ();
5600 }
5601}
5602
db58fd89
JW
5603/* Return TRUE if return address signing should be enabled for the current
5604 function, otherwise return FALSE. */
5605
5606bool
5607aarch64_return_address_signing_enabled (void)
5608{
5609 /* This function should only be called after frame laid out. */
5610 gcc_assert (cfun->machine->frame.laid_out);
5611
5612 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
8fc16d72 5613 if its LR is pushed onto stack. */
db58fd89
JW
5614 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
5615 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
5616 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
5617}
5618
30afdf34
SD
5619/* Return TRUE if Branch Target Identification Mechanism is enabled. */
5620bool
5621aarch64_bti_enabled (void)
5622{
5623 return (aarch64_enable_bti == 1);
5624}
5625
04ddfe06
KT
5626/* Emit code to save the callee-saved registers from register number START
5627 to LIMIT to the stack at the location starting at offset START_OFFSET,
5628 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 5629
43e9d192 5630static void
6a70badb 5631aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 5632 unsigned start, unsigned limit, bool skip_wb)
43e9d192 5633{
5d8a22a5 5634 rtx_insn *insn;
43e9d192
IB
5635 unsigned regno;
5636 unsigned regno2;
5637
0ec74a1e 5638 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
5639 regno <= limit;
5640 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 5641 {
ae13fce3 5642 rtx reg, mem;
6a70badb 5643 poly_int64 offset;
a0d0b980 5644 int offset_diff;
64dedd72 5645
ae13fce3
JW
5646 if (skip_wb
5647 && (regno == cfun->machine->frame.wb_candidate1
5648 || regno == cfun->machine->frame.wb_candidate2))
5649 continue;
5650
827ab47a
KT
5651 if (cfun->machine->reg_is_wrapped_separately[regno])
5652 continue;
5653
ae13fce3
JW
5654 reg = gen_rtx_REG (mode, regno);
5655 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
5656 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
5657 offset));
64dedd72
JW
5658
5659 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
5660 offset_diff = cfun->machine->frame.reg_offset[regno2]
5661 - cfun->machine->frame.reg_offset[regno];
64dedd72
JW
5662
5663 if (regno2 <= limit
827ab47a 5664 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 5665 && known_eq (GET_MODE_SIZE (mode), offset_diff))
43e9d192 5666 {
0ec74a1e 5667 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
5668 rtx mem2;
5669
5670 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
5671 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
5672 offset));
8ed2fc62
JW
5673 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
5674 reg2));
0b4a9743 5675
64dedd72
JW
5676 /* The first part of a frame-related parallel insn is
5677 always assumed to be relevant to the frame
5678 calculations; subsequent parts, are only
5679 frame-related if explicitly marked. */
5680 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
5681 regno = regno2;
5682 }
5683 else
8ed2fc62
JW
5684 insn = emit_move_insn (mem, reg);
5685
5686 RTX_FRAME_RELATED_P (insn) = 1;
5687 }
5688}
5689
04ddfe06
KT
5690/* Emit code to restore the callee registers of mode MODE from register
5691 number START up to and including LIMIT. Restore from the stack offset
5692 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
5693 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
5694
8ed2fc62 5695static void
ef4bddc2 5696aarch64_restore_callee_saves (machine_mode mode,
6a70badb 5697 poly_int64 start_offset, unsigned start,
dd991abb 5698 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 5699{
8ed2fc62 5700 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
5701 unsigned regno;
5702 unsigned regno2;
6a70badb 5703 poly_int64 offset;
8ed2fc62
JW
5704
5705 for (regno = aarch64_next_callee_save (start, limit);
5706 regno <= limit;
5707 regno = aarch64_next_callee_save (regno + 1, limit))
5708 {
827ab47a
KT
5709 if (cfun->machine->reg_is_wrapped_separately[regno])
5710 continue;
5711
ae13fce3 5712 rtx reg, mem;
a0d0b980 5713 int offset_diff;
8ed2fc62 5714
ae13fce3
JW
5715 if (skip_wb
5716 && (regno == cfun->machine->frame.wb_candidate1
5717 || regno == cfun->machine->frame.wb_candidate2))
5718 continue;
5719
5720 reg = gen_rtx_REG (mode, regno);
8ed2fc62 5721 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 5722 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
5723
5724 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
5725 offset_diff = cfun->machine->frame.reg_offset[regno2]
5726 - cfun->machine->frame.reg_offset[regno];
8ed2fc62
JW
5727
5728 if (regno2 <= limit
827ab47a 5729 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 5730 && known_eq (GET_MODE_SIZE (mode), offset_diff))
64dedd72 5731 {
8ed2fc62
JW
5732 rtx reg2 = gen_rtx_REG (mode, regno2);
5733 rtx mem2;
5734
5735 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 5736 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 5737 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 5738
dd991abb 5739 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 5740 regno = regno2;
43e9d192 5741 }
8ed2fc62 5742 else
dd991abb
RH
5743 emit_move_insn (reg, mem);
5744 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 5745 }
43e9d192
IB
5746}
5747
43cacb12
RS
5748/* Return true if OFFSET is a signed 4-bit value multiplied by the size
5749 of MODE. */
5750
5751static inline bool
5752offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
5753{
5754 HOST_WIDE_INT multiple;
5755 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
5756 && IN_RANGE (multiple, -8, 7));
5757}
5758
5759/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
5760 of MODE. */
5761
5762static inline bool
5763offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
5764{
5765 HOST_WIDE_INT multiple;
5766 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
5767 && IN_RANGE (multiple, 0, 63));
5768}
5769
5770/* Return true if OFFSET is a signed 7-bit value multiplied by the size
5771 of MODE. */
5772
5773bool
5774aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
5775{
5776 HOST_WIDE_INT multiple;
5777 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
5778 && IN_RANGE (multiple, -64, 63));
5779}
5780
5781/* Return true if OFFSET is a signed 9-bit value. */
5782
3c5af608
MM
5783bool
5784aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
5785 poly_int64 offset)
827ab47a 5786{
6a70badb
RS
5787 HOST_WIDE_INT const_offset;
5788 return (offset.is_constant (&const_offset)
5789 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
5790}
5791
43cacb12
RS
5792/* Return true if OFFSET is a signed 9-bit value multiplied by the size
5793 of MODE. */
5794
827ab47a 5795static inline bool
43cacb12 5796offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 5797{
6a70badb
RS
5798 HOST_WIDE_INT multiple;
5799 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 5800 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
5801}
5802
43cacb12
RS
5803/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
5804 of MODE. */
5805
5806static inline bool
5807offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 5808{
6a70badb
RS
5809 HOST_WIDE_INT multiple;
5810 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 5811 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
5812}
5813
5814/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
5815
5816static sbitmap
5817aarch64_get_separate_components (void)
5818{
827ab47a
KT
5819 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
5820 bitmap_clear (components);
5821
5822 /* The registers we need saved to the frame. */
5823 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
5824 if (aarch64_register_saved_on_entry (regno))
5825 {
6a70badb 5826 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
5827 if (!frame_pointer_needed)
5828 offset += cfun->machine->frame.frame_size
5829 - cfun->machine->frame.hard_fp_offset;
5830 /* Check that we can access the stack slot of the register with one
5831 direct load with no adjustments needed. */
5832 if (offset_12bit_unsigned_scaled_p (DImode, offset))
5833 bitmap_set_bit (components, regno);
5834 }
5835
5836 /* Don't mess with the hard frame pointer. */
5837 if (frame_pointer_needed)
5838 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
5839
5840 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5841 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 5842 /* If registers have been chosen to be stored/restored with
827ab47a
KT
5843 writeback don't interfere with them to avoid having to output explicit
5844 stack adjustment instructions. */
5845 if (reg2 != INVALID_REGNUM)
5846 bitmap_clear_bit (components, reg2);
5847 if (reg1 != INVALID_REGNUM)
5848 bitmap_clear_bit (components, reg1);
5849
5850 bitmap_clear_bit (components, LR_REGNUM);
5851 bitmap_clear_bit (components, SP_REGNUM);
5852
5853 return components;
5854}
5855
5856/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
5857
5858static sbitmap
5859aarch64_components_for_bb (basic_block bb)
5860{
5861 bitmap in = DF_LIVE_IN (bb);
5862 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
5863 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
a0d0b980 5864 bool simd_function = aarch64_simd_decl_p (cfun->decl);
827ab47a
KT
5865
5866 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
5867 bitmap_clear (components);
5868
5869 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
5870 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
a0d0b980
SE
5871 if ((!call_used_regs[regno]
5872 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
827ab47a
KT
5873 && (bitmap_bit_p (in, regno)
5874 || bitmap_bit_p (gen, regno)
5875 || bitmap_bit_p (kill, regno)))
3f26f054
WD
5876 {
5877 unsigned regno2, offset, offset2;
5878 bitmap_set_bit (components, regno);
5879
5880 /* If there is a callee-save at an adjacent offset, add it too
5881 to increase the use of LDP/STP. */
5882 offset = cfun->machine->frame.reg_offset[regno];
5883 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
5884
5885 if (regno2 <= LAST_SAVED_REGNUM)
5886 {
5887 offset2 = cfun->machine->frame.reg_offset[regno2];
5888 if ((offset & ~8) == (offset2 & ~8))
5889 bitmap_set_bit (components, regno2);
5890 }
5891 }
827ab47a
KT
5892
5893 return components;
5894}
5895
5896/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
5897 Nothing to do for aarch64. */
5898
5899static void
5900aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
5901{
5902}
5903
5904/* Return the next set bit in BMP from START onwards. Return the total number
5905 of bits in BMP if no set bit is found at or after START. */
5906
5907static unsigned int
5908aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
5909{
5910 unsigned int nbits = SBITMAP_SIZE (bmp);
5911 if (start == nbits)
5912 return start;
5913
5914 gcc_assert (start < nbits);
5915 for (unsigned int i = start; i < nbits; i++)
5916 if (bitmap_bit_p (bmp, i))
5917 return i;
5918
5919 return nbits;
5920}
5921
5922/* Do the work for aarch64_emit_prologue_components and
5923 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
5924 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
5925 for these components or the epilogue sequence. That is, it determines
5926 whether we should emit stores or loads and what kind of CFA notes to attach
5927 to the insns. Otherwise the logic for the two sequences is very
5928 similar. */
5929
5930static void
5931aarch64_process_components (sbitmap components, bool prologue_p)
5932{
5933 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
5934 ? HARD_FRAME_POINTER_REGNUM
5935 : STACK_POINTER_REGNUM);
5936
5937 unsigned last_regno = SBITMAP_SIZE (components);
5938 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
5939 rtx_insn *insn = NULL;
5940
5941 while (regno != last_regno)
5942 {
a0d0b980
SE
5943 /* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved
5944 so DFmode for the vector registers is enough. For simd functions
5945 we want to save the low 128 bits. */
5946 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno);
5947
827ab47a 5948 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 5949 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
5950 if (!frame_pointer_needed)
5951 offset += cfun->machine->frame.frame_size
5952 - cfun->machine->frame.hard_fp_offset;
5953 rtx addr = plus_constant (Pmode, ptr_reg, offset);
5954 rtx mem = gen_frame_mem (mode, addr);
5955
5956 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
5957 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
5958 /* No more registers to handle after REGNO.
5959 Emit a single save/restore and exit. */
5960 if (regno2 == last_regno)
5961 {
5962 insn = emit_insn (set);
5963 RTX_FRAME_RELATED_P (insn) = 1;
5964 if (prologue_p)
5965 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
5966 else
5967 add_reg_note (insn, REG_CFA_RESTORE, reg);
5968 break;
5969 }
5970
6a70badb 5971 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
5972 /* The next register is not of the same class or its offset is not
5973 mergeable with the current one into a pair. */
5974 if (!satisfies_constraint_Ump (mem)
5975 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
a0d0b980 5976 || (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno))
6a70badb
RS
5977 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
5978 GET_MODE_SIZE (mode)))
827ab47a
KT
5979 {
5980 insn = emit_insn (set);
5981 RTX_FRAME_RELATED_P (insn) = 1;
5982 if (prologue_p)
5983 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
5984 else
5985 add_reg_note (insn, REG_CFA_RESTORE, reg);
5986
5987 regno = regno2;
5988 continue;
5989 }
5990
5991 /* REGNO2 can be saved/restored in a pair with REGNO. */
5992 rtx reg2 = gen_rtx_REG (mode, regno2);
5993 if (!frame_pointer_needed)
5994 offset2 += cfun->machine->frame.frame_size
5995 - cfun->machine->frame.hard_fp_offset;
5996 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
5997 rtx mem2 = gen_frame_mem (mode, addr2);
5998 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
5999 : gen_rtx_SET (reg2, mem2);
6000
6001 if (prologue_p)
6002 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
6003 else
6004 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
6005
6006 RTX_FRAME_RELATED_P (insn) = 1;
6007 if (prologue_p)
6008 {
6009 add_reg_note (insn, REG_CFA_OFFSET, set);
6010 add_reg_note (insn, REG_CFA_OFFSET, set2);
6011 }
6012 else
6013 {
6014 add_reg_note (insn, REG_CFA_RESTORE, reg);
6015 add_reg_note (insn, REG_CFA_RESTORE, reg2);
6016 }
6017
6018 regno = aarch64_get_next_set_bit (components, regno2 + 1);
6019 }
6020}
6021
6022/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
6023
6024static void
6025aarch64_emit_prologue_components (sbitmap components)
6026{
6027 aarch64_process_components (components, true);
6028}
6029
6030/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
6031
6032static void
6033aarch64_emit_epilogue_components (sbitmap components)
6034{
6035 aarch64_process_components (components, false);
6036}
6037
6038/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
6039
6040static void
6041aarch64_set_handled_components (sbitmap components)
6042{
6043 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
6044 if (bitmap_bit_p (components, regno))
6045 cfun->machine->reg_is_wrapped_separately[regno] = true;
6046}
6047
8c6e3b23
TC
6048/* On AArch64 we have an ABI defined safe buffer. This constant is used to
6049 determining the probe offset for alloca. */
6050
6051static HOST_WIDE_INT
6052aarch64_stack_clash_protection_alloca_probe_range (void)
6053{
6054 return STACK_CLASH_CALLER_GUARD;
6055}
6056
6057
cd1bef27
JL
6058/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
6059 registers. If POLY_SIZE is not large enough to require a probe this function
6060 will only adjust the stack. When allocating the stack space
6061 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
6062 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
6063 arguments. If we are then we ensure that any allocation larger than the ABI
6064 defined buffer needs a probe so that the invariant of having a 1KB buffer is
6065 maintained.
6066
6067 We emit barriers after each stack adjustment to prevent optimizations from
6068 breaking the invariant that we never drop the stack more than a page. This
6069 invariant is needed to make it easier to correctly handle asynchronous
6070 events, e.g. if we were to allow the stack to be dropped by more than a page
6071 and then have multiple probes up and we take a signal somewhere in between
6072 then the signal handler doesn't know the state of the stack and can make no
6073 assumptions about which pages have been probed. */
6074
6075static void
6076aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
6077 poly_int64 poly_size,
6078 bool frame_related_p,
6079 bool final_adjustment_p)
6080{
6081 HOST_WIDE_INT guard_size
6082 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
6083 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
6084 /* When doing the final adjustment for the outgoing argument size we can't
6085 assume that LR was saved at position 0. So subtract it's offset from the
6086 ABI safe buffer so that we don't accidentally allow an adjustment that
6087 would result in an allocation larger than the ABI buffer without
6088 probing. */
6089 HOST_WIDE_INT min_probe_threshold
6090 = final_adjustment_p
6091 ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
6092 : guard_size - guard_used_by_caller;
6093
6094 poly_int64 frame_size = cfun->machine->frame.frame_size;
6095
6096 /* We should always have a positive probe threshold. */
6097 gcc_assert (min_probe_threshold > 0);
6098
6099 if (flag_stack_clash_protection && !final_adjustment_p)
6100 {
6101 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
6102 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
6103
6104 if (known_eq (frame_size, 0))
6105 {
6106 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
6107 }
6108 else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
6109 && known_lt (final_adjust, guard_used_by_caller))
6110 {
6111 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
6112 }
6113 }
6114
cd1bef27
JL
6115 /* If SIZE is not large enough to require probing, just adjust the stack and
6116 exit. */
eb471ba3 6117 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
6118 || !flag_stack_clash_protection)
6119 {
6120 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
6121 return;
6122 }
6123
eb471ba3
TC
6124 HOST_WIDE_INT size;
6125 /* Handle the SVE non-constant case first. */
6126 if (!poly_size.is_constant (&size))
6127 {
6128 if (dump_file)
6129 {
6130 fprintf (dump_file, "Stack clash SVE prologue: ");
6131 print_dec (poly_size, dump_file);
6132 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
6133 }
6134
6135 /* First calculate the amount of bytes we're actually spilling. */
6136 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
6137 poly_size, temp1, temp2, false, true);
6138
6139 rtx_insn *insn = get_last_insn ();
6140
6141 if (frame_related_p)
6142 {
6143 /* This is done to provide unwinding information for the stack
6144 adjustments we're about to do, however to prevent the optimizers
143d3b15 6145 from removing the R11 move and leaving the CFA note (which would be
eb471ba3
TC
6146 very wrong) we tie the old and new stack pointer together.
6147 The tie will expand to nothing but the optimizers will not touch
6148 the instruction. */
143d3b15 6149 rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
eb471ba3
TC
6150 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
6151 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
6152
6153 /* We want the CFA independent of the stack pointer for the
6154 duration of the loop. */
6155 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
6156 RTX_FRAME_RELATED_P (insn) = 1;
6157 }
6158
6159 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
6160 rtx guard_const = gen_int_mode (guard_size, Pmode);
6161
6162 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
6163 stack_pointer_rtx, temp1,
6164 probe_const, guard_const));
6165
6166 /* Now reset the CFA register if needed. */
6167 if (frame_related_p)
6168 {
6169 add_reg_note (insn, REG_CFA_DEF_CFA,
6170 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
6171 gen_int_mode (poly_size, Pmode)));
6172 RTX_FRAME_RELATED_P (insn) = 1;
6173 }
6174
6175 return;
6176 }
6177
cd1bef27
JL
6178 if (dump_file)
6179 fprintf (dump_file,
eb471ba3
TC
6180 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
6181 " bytes, probing will be required.\n", size);
cd1bef27
JL
6182
6183 /* Round size to the nearest multiple of guard_size, and calculate the
6184 residual as the difference between the original size and the rounded
6185 size. */
6186 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
6187 HOST_WIDE_INT residual = size - rounded_size;
6188
6189 /* We can handle a small number of allocations/probes inline. Otherwise
6190 punt to a loop. */
6191 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
6192 {
6193 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
6194 {
6195 aarch64_sub_sp (NULL, temp2, guard_size, true);
6196 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
6197 guard_used_by_caller));
6198 emit_insn (gen_blockage ());
6199 }
6200 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
6201 }
6202 else
6203 {
6204 /* Compute the ending address. */
6205 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
6206 temp1, NULL, false, true);
6207 rtx_insn *insn = get_last_insn ();
6208
6209 /* For the initial allocation, we don't have a frame pointer
6210 set up, so we always need CFI notes. If we're doing the
6211 final allocation, then we may have a frame pointer, in which
6212 case it is the CFA, otherwise we need CFI notes.
6213
6214 We can determine which allocation we are doing by looking at
6215 the value of FRAME_RELATED_P since the final allocations are not
6216 frame related. */
6217 if (frame_related_p)
6218 {
6219 /* We want the CFA independent of the stack pointer for the
6220 duration of the loop. */
6221 add_reg_note (insn, REG_CFA_DEF_CFA,
6222 plus_constant (Pmode, temp1, rounded_size));
6223 RTX_FRAME_RELATED_P (insn) = 1;
6224 }
6225
6226 /* This allocates and probes the stack. Note that this re-uses some of
6227 the existing Ada stack protection code. However we are guaranteed not
6228 to enter the non loop or residual branches of that code.
6229
6230 The non-loop part won't be entered because if our allocation amount
6231 doesn't require a loop, the case above would handle it.
6232
6233 The residual amount won't be entered because TEMP1 is a mutliple of
6234 the allocation size. The residual will always be 0. As such, the only
6235 part we are actually using from that code is the loop setup. The
6236 actual probing is done in aarch64_output_probe_stack_range. */
6237 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
6238 stack_pointer_rtx, temp1));
6239
6240 /* Now reset the CFA register if needed. */
6241 if (frame_related_p)
6242 {
6243 add_reg_note (insn, REG_CFA_DEF_CFA,
6244 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
6245 RTX_FRAME_RELATED_P (insn) = 1;
6246 }
6247
6248 emit_insn (gen_blockage ());
6249 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
6250 }
6251
6252 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
6253 be probed. This maintains the requirement that each page is probed at
6254 least once. For initial probing we probe only if the allocation is
6255 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
6256 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
6257 GUARD_SIZE. This works that for any allocation that is large enough to
6258 trigger a probe here, we'll have at least one, and if they're not large
6259 enough for this code to emit anything for them, The page would have been
6260 probed by the saving of FP/LR either by this function or any callees. If
6261 we don't have any callees then we won't have more stack adjustments and so
6262 are still safe. */
6263 if (residual)
6264 {
6265 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
6266 /* If we're doing final adjustments, and we've done any full page
6267 allocations then any residual needs to be probed. */
6268 if (final_adjustment_p && rounded_size != 0)
6269 min_probe_threshold = 0;
6270 /* If doing a small final adjustment, we always probe at offset 0.
6271 This is done to avoid issues when LR is not at position 0 or when
6272 the final adjustment is smaller than the probing offset. */
6273 else if (final_adjustment_p && rounded_size == 0)
6274 residual_probe_offset = 0;
6275
6276 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
6277 if (residual >= min_probe_threshold)
6278 {
6279 if (dump_file)
6280 fprintf (dump_file,
6281 "Stack clash AArch64 prologue residuals: "
6282 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
6283 "\n", residual);
6284
6285 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
6286 residual_probe_offset));
6287 emit_insn (gen_blockage ());
6288 }
6289 }
6290}
6291
a0d0b980
SE
6292/* Return 1 if the register is used by the epilogue. We need to say the
6293 return register is used, but only after epilogue generation is complete.
6294 Note that in the case of sibcalls, the values "used by the epilogue" are
6295 considered live at the start of the called function.
6296
6297 For SIMD functions we need to return 1 for FP registers that are saved and
6298 restored by a function but are not zero in call_used_regs. If we do not do
6299 this optimizations may remove the restore of the register. */
6300
6301int
6302aarch64_epilogue_uses (int regno)
6303{
6304 if (epilogue_completed)
6305 {
6306 if (regno == LR_REGNUM)
6307 return 1;
6308 if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno))
6309 return 1;
6310 }
6311 return 0;
6312}
6313
43cacb12
RS
6314/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
6315 is saved at BASE + OFFSET. */
6316
6317static void
6318aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
6319 rtx base, poly_int64 offset)
6320{
6321 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
6322 add_reg_note (insn, REG_CFA_EXPRESSION,
6323 gen_rtx_SET (mem, regno_reg_rtx[reg]));
6324}
6325
43e9d192
IB
6326/* AArch64 stack frames generated by this compiler look like:
6327
6328 +-------------------------------+
6329 | |
6330 | incoming stack arguments |
6331 | |
34834420
MS
6332 +-------------------------------+
6333 | | <-- incoming stack pointer (aligned)
43e9d192
IB
6334 | callee-allocated save area |
6335 | for register varargs |
6336 | |
34834420
MS
6337 +-------------------------------+
6338 | local variables | <-- frame_pointer_rtx
43e9d192
IB
6339 | |
6340 +-------------------------------+
cd1bef27 6341 | padding | \
454fdba9 6342 +-------------------------------+ |
454fdba9 6343 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
6344 +-------------------------------+ |
6345 | LR' | |
6346 +-------------------------------+ |
34834420
MS
6347 | FP' | / <- hard_frame_pointer_rtx (aligned)
6348 +-------------------------------+
43e9d192
IB
6349 | dynamic allocation |
6350 +-------------------------------+
34834420
MS
6351 | padding |
6352 +-------------------------------+
6353 | outgoing stack arguments | <-- arg_pointer
6354 | |
6355 +-------------------------------+
6356 | | <-- stack_pointer_rtx (aligned)
43e9d192 6357
34834420
MS
6358 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
6359 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
6360 unchanged.
6361
6362 By default for stack-clash we assume the guard is at least 64KB, but this
6363 value is configurable to either 4KB or 64KB. We also force the guard size to
6364 be the same as the probing interval and both values are kept in sync.
6365
6366 With those assumptions the callee can allocate up to 63KB (or 3KB depending
6367 on the guard size) of stack space without probing.
6368
6369 When probing is needed, we emit a probe at the start of the prologue
6370 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
6371
6372 We have to track how much space has been allocated and the only stores
6373 to the stack we track as implicit probes are the FP/LR stores.
6374
6375 For outgoing arguments we probe if the size is larger than 1KB, such that
143d3b15
TC
6376 the ABI specified buffer is maintained for the next callee.
6377
6378 The following registers are reserved during frame layout and should not be
6379 used for any other purpose:
6380
6381 - r11: Used by stack clash protection when SVE is enabled.
6382 - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment.
6383 - r14 and r15: Used for speculation tracking.
6384 - r16(IP0), r17(IP1): Used by indirect tailcalls.
6385 - r30(LR), r29(FP): Used by standard frame layout.
6386
6387 These registers must be avoided in frame layout related code unless the
6388 explicit intention is to interact with one of the features listed above. */
43e9d192
IB
6389
6390/* Generate the prologue instructions for entry into a function.
6391 Establish the stack frame by decreasing the stack pointer with a
6392 properly calculated size and, if necessary, create a frame record
6393 filled with the values of LR and previous frame pointer. The
6991c977 6394 current FP is also set up if it is in use. */
43e9d192
IB
6395
6396void
6397aarch64_expand_prologue (void)
6398{
6a70badb
RS
6399 poly_int64 frame_size = cfun->machine->frame.frame_size;
6400 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 6401 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
6402 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
6403 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
6404 unsigned reg1 = cfun->machine->frame.wb_candidate1;
6405 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 6406 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 6407 rtx_insn *insn;
43e9d192 6408
db58fd89
JW
6409 /* Sign return address for functions. */
6410 if (aarch64_return_address_signing_enabled ())
27169e45 6411 {
8fc16d72
ST
6412 switch (aarch64_ra_sign_key)
6413 {
6414 case AARCH64_KEY_A:
6415 insn = emit_insn (gen_paciasp ());
6416 break;
6417 case AARCH64_KEY_B:
6418 insn = emit_insn (gen_pacibsp ());
6419 break;
6420 default:
6421 gcc_unreachable ();
6422 }
27169e45
JW
6423 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
6424 RTX_FRAME_RELATED_P (insn) = 1;
6425 }
db58fd89 6426
dd991abb 6427 if (flag_stack_usage_info)
6a70badb 6428 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 6429
a3eb8a52
EB
6430 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6431 {
6432 if (crtl->is_leaf && !cfun->calls_alloca)
6433 {
6a70badb
RS
6434 if (maybe_gt (frame_size, PROBE_INTERVAL)
6435 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
6436 aarch64_emit_probe_stack_range (get_stack_check_protect (),
6437 (frame_size
6438 - get_stack_check_protect ()));
a3eb8a52 6439 }
6a70badb 6440 else if (maybe_gt (frame_size, 0))
8c1dd970 6441 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
6442 }
6443
901e66e0
SD
6444 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
6445 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 6446
cd1bef27
JL
6447 /* In theory we should never have both an initial adjustment
6448 and a callee save adjustment. Verify that is the case since the
6449 code below does not handle it for -fstack-clash-protection. */
6450 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
6451
6452 /* Will only probe if the initial adjustment is larger than the guard
6453 less the amount of the guard reserved for use by the caller's
6454 outgoing args. */
901e66e0 6455 aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
cd1bef27 6456 true, false);
43e9d192 6457
71bfb77a
WD
6458 if (callee_adjust != 0)
6459 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 6460
204d2c03 6461 if (emit_frame_chain)
43e9d192 6462 {
43cacb12 6463 poly_int64 reg_offset = callee_adjust;
71bfb77a 6464 if (callee_adjust == 0)
43cacb12
RS
6465 {
6466 reg1 = R29_REGNUM;
6467 reg2 = R30_REGNUM;
6468 reg_offset = callee_offset;
6469 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
6470 }
f5470a77 6471 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12 6472 stack_pointer_rtx, callee_offset,
901e66e0 6473 tmp1_rtx, tmp0_rtx, frame_pointer_needed);
43cacb12
RS
6474 if (frame_pointer_needed && !frame_size.is_constant ())
6475 {
6476 /* Variable-sized frames need to describe the save slot
6477 address using DW_CFA_expression rather than DW_CFA_offset.
6478 This means that, without taking further action, the
6479 locations of the registers that we've already saved would
6480 remain based on the stack pointer even after we redefine
6481 the CFA based on the frame pointer. We therefore need new
6482 DW_CFA_expressions to re-express the save slots with addresses
6483 based on the frame pointer. */
6484 rtx_insn *insn = get_last_insn ();
6485 gcc_assert (RTX_FRAME_RELATED_P (insn));
6486
6487 /* Add an explicit CFA definition if this was previously
6488 implicit. */
6489 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
6490 {
6491 rtx src = plus_constant (Pmode, stack_pointer_rtx,
6492 callee_offset);
6493 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6494 gen_rtx_SET (hard_frame_pointer_rtx, src));
6495 }
6496
6497 /* Change the save slot expressions for the registers that
6498 we've already saved. */
6499 reg_offset -= callee_offset;
6500 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
6501 reg_offset + UNITS_PER_WORD);
6502 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
6503 reg_offset);
6504 }
71bfb77a 6505 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 6506 }
71bfb77a
WD
6507
6508 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 6509 callee_adjust != 0 || emit_frame_chain);
a0d0b980
SE
6510 if (aarch64_simd_decl_p (cfun->decl))
6511 aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
6512 callee_adjust != 0 || emit_frame_chain);
6513 else
6514 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
6515 callee_adjust != 0 || emit_frame_chain);
cd1bef27
JL
6516
6517 /* We may need to probe the final adjustment if it is larger than the guard
6518 that is assumed by the called. */
901e66e0 6519 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
cd1bef27 6520 !frame_pointer_needed, true);
43e9d192
IB
6521}
6522
4f942779
RL
6523/* Return TRUE if we can use a simple_return insn.
6524
6525 This function checks whether the callee saved stack is empty, which
6526 means no restore actions are need. The pro_and_epilogue will use
6527 this to check whether shrink-wrapping opt is feasible. */
6528
6529bool
6530aarch64_use_return_insn_p (void)
6531{
6532 if (!reload_completed)
6533 return false;
6534
6535 if (crtl->profile)
6536 return false;
6537
6a70badb 6538 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
6539}
6540
a0d0b980
SE
6541/* Return false for non-leaf SIMD functions in order to avoid
6542 shrink-wrapping them. Doing this will lose the necessary
6543 save/restore of FP registers. */
6544
6545bool
6546aarch64_use_simple_return_insn_p (void)
6547{
6548 if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
6549 return false;
6550
6551 return true;
6552}
6553
71bfb77a
WD
6554/* Generate the epilogue instructions for returning from a function.
6555 This is almost exactly the reverse of the prolog sequence, except
6556 that we need to insert barriers to avoid scheduling loads that read
6557 from a deallocated stack, and we optimize the unwind records by
6558 emitting them all together if possible. */
43e9d192
IB
6559void
6560aarch64_expand_epilogue (bool for_sibcall)
6561{
6a70badb 6562 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 6563 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
6564 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
6565 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
6566 unsigned reg1 = cfun->machine->frame.wb_candidate1;
6567 unsigned reg2 = cfun->machine->frame.wb_candidate2;
6568 rtx cfi_ops = NULL;
6569 rtx_insn *insn;
901e66e0
SD
6570 /* A stack clash protection prologue may not have left EP0_REGNUM or
6571 EP1_REGNUM in a usable state. The same is true for allocations
43cacb12 6572 with an SVE component, since we then need both temporary registers
cd1bef27
JL
6573 for each allocation. For stack clash we are in a usable state if
6574 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
6575 HOST_WIDE_INT guard_size
6576 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
6577 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
6578
6579 /* We can re-use the registers when the allocation amount is smaller than
6580 guard_size - guard_used_by_caller because we won't be doing any probes
6581 then. In such situations the register should remain live with the correct
6582 value. */
43cacb12 6583 bool can_inherit_p = (initial_adjust.is_constant ()
cd1bef27
JL
6584 && final_adjust.is_constant ())
6585 && (!flag_stack_clash_protection
901e66e0
SD
6586 || known_lt (initial_adjust,
6587 guard_size - guard_used_by_caller));
44c0e7b9 6588
71bfb77a 6589 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
6590 bool need_barrier_p
6591 = maybe_ne (get_frame_size ()
6592 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 6593
71bfb77a 6594 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
6595 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
6596 || cfun->calls_alloca
8144a493 6597 || crtl->calls_eh_return)
43e9d192 6598 {
71bfb77a
WD
6599 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
6600 need_barrier_p = false;
6601 }
7e8c2bd5 6602
71bfb77a
WD
6603 /* Restore the stack pointer from the frame pointer if it may not
6604 be the same as the stack pointer. */
901e66e0
SD
6605 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
6606 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
6a70badb
RS
6607 if (frame_pointer_needed
6608 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
6609 /* If writeback is used when restoring callee-saves, the CFA
6610 is restored on the instruction doing the writeback. */
6611 aarch64_add_offset (Pmode, stack_pointer_rtx,
6612 hard_frame_pointer_rtx, -callee_offset,
901e66e0 6613 tmp1_rtx, tmp0_rtx, callee_adjust == 0);
71bfb77a 6614 else
cd1bef27
JL
6615 /* The case where we need to re-use the register here is very rare, so
6616 avoid the complicated condition and just always emit a move if the
6617 immediate doesn't fit. */
901e66e0 6618 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
43e9d192 6619
71bfb77a
WD
6620 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
6621 callee_adjust != 0, &cfi_ops);
a0d0b980
SE
6622 if (aarch64_simd_decl_p (cfun->decl))
6623 aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
6624 callee_adjust != 0, &cfi_ops);
6625 else
6626 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
6627 callee_adjust != 0, &cfi_ops);
43e9d192 6628
71bfb77a
WD
6629 if (need_barrier_p)
6630 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
6631
6632 if (callee_adjust != 0)
6633 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
6634
6a70badb 6635 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
6636 {
6637 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 6638 insn = get_last_insn ();
71bfb77a
WD
6639 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
6640 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 6641 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 6642 cfi_ops = NULL;
43e9d192
IB
6643 }
6644
901e66e0
SD
6645 /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
6646 add restriction on emit_move optimization to leaf functions. */
6647 aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust,
6648 (!can_inherit_p || !crtl->is_leaf
6649 || df_regs_ever_live_p (EP0_REGNUM)));
7e8c2bd5 6650
71bfb77a
WD
6651 if (cfi_ops)
6652 {
6653 /* Emit delayed restores and reset the CFA to be SP. */
6654 insn = get_last_insn ();
6655 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
6656 REG_NOTES (insn) = cfi_ops;
6657 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
6658 }
6659
db58fd89
JW
6660 /* We prefer to emit the combined return/authenticate instruction RETAA,
6661 however there are three cases in which we must instead emit an explicit
6662 authentication instruction.
6663
6664 1) Sibcalls don't return in a normal way, so if we're about to call one
6665 we must authenticate.
6666
6667 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
6668 generating code for !TARGET_ARMV8_3 we can't use it and must
6669 explicitly authenticate.
6670
6671 3) On an eh_return path we make extra stack adjustments to update the
6672 canonical frame address to be the exception handler's CFA. We want
6673 to authenticate using the CFA of the function which calls eh_return.
6674 */
6675 if (aarch64_return_address_signing_enabled ()
6676 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45 6677 {
8fc16d72
ST
6678 switch (aarch64_ra_sign_key)
6679 {
6680 case AARCH64_KEY_A:
6681 insn = emit_insn (gen_autiasp ());
6682 break;
6683 case AARCH64_KEY_B:
6684 insn = emit_insn (gen_autibsp ());
6685 break;
6686 default:
6687 gcc_unreachable ();
6688 }
27169e45
JW
6689 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
6690 RTX_FRAME_RELATED_P (insn) = 1;
6691 }
db58fd89 6692
dd991abb 6693 /* Stack adjustment for exception handler. */
b5b9147d 6694 if (crtl->calls_eh_return && !for_sibcall)
dd991abb
RH
6695 {
6696 /* We need to unwind the stack by the offset computed by
6697 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
6698 to be SP; letting the CFA move during this adjustment
6699 is just as correct as retaining the CFA from the body
6700 of the function. Therefore, do nothing special. */
6701 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
6702 }
6703
6704 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
6705 if (!for_sibcall)
6706 emit_jump_insn (ret_rtx);
6707}
6708
8144a493
WD
6709/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
6710 normally or return to a previous frame after unwinding.
1c960e02 6711
8144a493
WD
6712 An EH return uses a single shared return sequence. The epilogue is
6713 exactly like a normal epilogue except that it has an extra input
6714 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
6715 that must be applied after the frame has been destroyed. An extra label
6716 is inserted before the epilogue which initializes this register to zero,
6717 and this is the entry point for a normal return.
43e9d192 6718
8144a493
WD
6719 An actual EH return updates the return address, initializes the stack
6720 adjustment and jumps directly into the epilogue (bypassing the zeroing
6721 of the adjustment). Since the return address is typically saved on the
6722 stack when a function makes a call, the saved LR must be updated outside
6723 the epilogue.
43e9d192 6724
8144a493
WD
6725 This poses problems as the store is generated well before the epilogue,
6726 so the offset of LR is not known yet. Also optimizations will remove the
6727 store as it appears dead, even after the epilogue is generated (as the
6728 base or offset for loading LR is different in many cases).
43e9d192 6729
8144a493
WD
6730 To avoid these problems this implementation forces the frame pointer
6731 in eh_return functions so that the location of LR is fixed and known early.
6732 It also marks the store volatile, so no optimization is permitted to
6733 remove the store. */
6734rtx
6735aarch64_eh_return_handler_rtx (void)
6736{
6737 rtx tmp = gen_frame_mem (Pmode,
6738 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 6739
8144a493
WD
6740 /* Mark the store volatile, so no optimization is permitted to remove it. */
6741 MEM_VOLATILE_P (tmp) = true;
6742 return tmp;
43e9d192
IB
6743}
6744
43e9d192
IB
6745/* Output code to add DELTA to the first argument, and then jump
6746 to FUNCTION. Used for C++ multiple inheritance. */
6747static void
6748aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
6749 HOST_WIDE_INT delta,
6750 HOST_WIDE_INT vcall_offset,
6751 tree function)
6752{
6753 /* The this pointer is always in x0. Note that this differs from
6754 Arm where the this pointer maybe bumped to r1 if r0 is required
6755 to return a pointer to an aggregate. On AArch64 a result value
6756 pointer will be in x8. */
6757 int this_regno = R0_REGNUM;
5d8a22a5
DM
6758 rtx this_rtx, temp0, temp1, addr, funexp;
6759 rtx_insn *insn;
6b5777c6 6760 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
43e9d192 6761
c904388d
SD
6762 if (aarch64_bti_enabled ())
6763 emit_insn (gen_bti_c());
6764
75f1d6fc
SN
6765 reload_completed = 1;
6766 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 6767
f5470a77 6768 this_rtx = gen_rtx_REG (Pmode, this_regno);
901e66e0
SD
6769 temp0 = gen_rtx_REG (Pmode, EP0_REGNUM);
6770 temp1 = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 6771
43e9d192 6772 if (vcall_offset == 0)
43cacb12 6773 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
6774 else
6775 {
28514dda 6776 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 6777
75f1d6fc
SN
6778 addr = this_rtx;
6779 if (delta != 0)
6780 {
6781 if (delta >= -256 && delta < 256)
6782 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
6783 plus_constant (Pmode, this_rtx, delta));
6784 else
43cacb12
RS
6785 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
6786 temp1, temp0, false);
43e9d192
IB
6787 }
6788
28514dda
YZ
6789 if (Pmode == ptr_mode)
6790 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
6791 else
6792 aarch64_emit_move (temp0,
6793 gen_rtx_ZERO_EXTEND (Pmode,
6794 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 6795
28514dda 6796 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 6797 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
6798 else
6799 {
f43657b4
JW
6800 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
6801 Pmode);
75f1d6fc 6802 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
6803 }
6804
28514dda
YZ
6805 if (Pmode == ptr_mode)
6806 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
6807 else
6808 aarch64_emit_move (temp1,
6809 gen_rtx_SIGN_EXTEND (Pmode,
6810 gen_rtx_MEM (ptr_mode, addr)));
6811
75f1d6fc 6812 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
6813 }
6814
75f1d6fc
SN
6815 /* Generate a tail call to the target function. */
6816 if (!TREE_USED (function))
6817 {
6818 assemble_external (function);
6819 TREE_USED (function) = 1;
6820 }
6821 funexp = XEXP (DECL_RTL (function), 0);
6822 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
6823 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
6824 SIBLING_CALL_P (insn) = 1;
6825
6826 insn = get_insns ();
6827 shorten_branches (insn);
6b5777c6
MF
6828
6829 assemble_start_function (thunk, fnname);
75f1d6fc
SN
6830 final_start_function (insn, file, 1);
6831 final (insn, file, 1);
43e9d192 6832 final_end_function ();
6b5777c6 6833 assemble_end_function (thunk, fnname);
75f1d6fc
SN
6834
6835 /* Stop pretending to be a post-reload pass. */
6836 reload_completed = 0;
43e9d192
IB
6837}
6838
43e9d192
IB
6839static bool
6840aarch64_tls_referenced_p (rtx x)
6841{
6842 if (!TARGET_HAVE_TLS)
6843 return false;
e7de8563
RS
6844 subrtx_iterator::array_type array;
6845 FOR_EACH_SUBRTX (iter, array, x, ALL)
6846 {
6847 const_rtx x = *iter;
6848 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
6849 return true;
6850 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6851 TLS offsets, not real symbol references. */
6852 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6853 iter.skip_subrtxes ();
6854 }
6855 return false;
43e9d192
IB
6856}
6857
6858
43e9d192
IB
6859/* Return true if val can be encoded as a 12-bit unsigned immediate with
6860 a left shift of 0 or 12 bits. */
6861bool
6862aarch64_uimm12_shift (HOST_WIDE_INT val)
6863{
6864 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
6865 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
6866 );
6867}
6868
eb471ba3
TC
6869/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
6870 that can be created with a left shift of 0 or 12. */
6871static HOST_WIDE_INT
6872aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
6873{
6874 /* Check to see if the value fits in 24 bits, as that is the maximum we can
6875 handle correctly. */
6876 gcc_assert ((val & 0xffffff) == val);
6877
6878 if (((val & 0xfff) << 0) == val)
6879 return val;
6880
6881 return val & (0xfff << 12);
6882}
43e9d192
IB
6883
6884/* Return true if val is an immediate that can be loaded into a
6885 register by a MOVZ instruction. */
6886static bool
77e994c9 6887aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
6888{
6889 if (GET_MODE_SIZE (mode) > 4)
6890 {
6891 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
6892 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
6893 return 1;
6894 }
6895 else
6896 {
43cacb12
RS
6897 /* Ignore sign extension. */
6898 val &= (HOST_WIDE_INT) 0xffffffff;
6899 }
6900 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
6901 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
6902}
6903
6904/* VAL is a value with the inner mode of MODE. Replicate it to fill a
6905 64-bit (DImode) integer. */
6906
6907static unsigned HOST_WIDE_INT
6908aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
6909{
6910 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
6911 while (size < 64)
6912 {
6913 val &= (HOST_WIDE_INT_1U << size) - 1;
6914 val |= val << size;
6915 size *= 2;
43e9d192 6916 }
43cacb12 6917 return val;
43e9d192
IB
6918}
6919
a64c73a2
WD
6920/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
6921
6922static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
6923 {
6924 0x0000000100000001ull,
6925 0x0001000100010001ull,
6926 0x0101010101010101ull,
6927 0x1111111111111111ull,
6928 0x5555555555555555ull,
6929 };
6930
43e9d192
IB
6931
6932/* Return true if val is a valid bitmask immediate. */
a64c73a2 6933
43e9d192 6934bool
a64c73a2 6935aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 6936{
a64c73a2
WD
6937 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
6938 int bits;
6939
6940 /* Check for a single sequence of one bits and return quickly if so.
6941 The special cases of all ones and all zeroes returns false. */
43cacb12 6942 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
6943 tmp = val + (val & -val);
6944
6945 if (tmp == (tmp & -tmp))
6946 return (val + 1) > 1;
6947
6948 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
6949 if (mode == SImode)
6950 val = (val << 32) | (val & 0xffffffff);
6951
6952 /* Invert if the immediate doesn't start with a zero bit - this means we
6953 only need to search for sequences of one bits. */
6954 if (val & 1)
6955 val = ~val;
6956
6957 /* Find the first set bit and set tmp to val with the first sequence of one
6958 bits removed. Return success if there is a single sequence of ones. */
6959 first_one = val & -val;
6960 tmp = val & (val + first_one);
6961
6962 if (tmp == 0)
6963 return true;
6964
6965 /* Find the next set bit and compute the difference in bit position. */
6966 next_one = tmp & -tmp;
6967 bits = clz_hwi (first_one) - clz_hwi (next_one);
6968 mask = val ^ tmp;
6969
6970 /* Check the bit position difference is a power of 2, and that the first
6971 sequence of one bits fits within 'bits' bits. */
6972 if ((mask >> bits) != 0 || bits != (bits & -bits))
6973 return false;
6974
6975 /* Check the sequence of one bits is repeated 64/bits times. */
6976 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
6977}
6978
43fd192f
MC
6979/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
6980 Assumed precondition: VAL_IN Is not zero. */
6981
6982unsigned HOST_WIDE_INT
6983aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
6984{
6985 int lowest_bit_set = ctz_hwi (val_in);
6986 int highest_bit_set = floor_log2 (val_in);
6987 gcc_assert (val_in != 0);
6988
6989 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
6990 (HOST_WIDE_INT_1U << lowest_bit_set));
6991}
6992
6993/* Create constant where bits outside of lowest bit set to highest bit set
6994 are set to 1. */
6995
6996unsigned HOST_WIDE_INT
6997aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
6998{
6999 return val_in | ~aarch64_and_split_imm1 (val_in);
7000}
7001
7002/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
7003
7004bool
7005aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
7006{
77e994c9
RS
7007 scalar_int_mode int_mode;
7008 if (!is_a <scalar_int_mode> (mode, &int_mode))
7009 return false;
7010
7011 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
7012 return false;
7013
77e994c9 7014 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
7015 return false;
7016
7017 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
7018
77e994c9 7019 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 7020}
43e9d192
IB
7021
7022/* Return true if val is an immediate that can be loaded into a
7023 register in a single instruction. */
7024bool
ef4bddc2 7025aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 7026{
77e994c9
RS
7027 scalar_int_mode int_mode;
7028 if (!is_a <scalar_int_mode> (mode, &int_mode))
7029 return false;
7030
7031 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 7032 return 1;
77e994c9 7033 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
7034}
7035
7036static bool
ef4bddc2 7037aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
7038{
7039 rtx base, offset;
7eda14e1 7040
43e9d192
IB
7041 if (GET_CODE (x) == HIGH)
7042 return true;
7043
43cacb12
RS
7044 /* There's no way to calculate VL-based values using relocations. */
7045 subrtx_iterator::array_type array;
7046 FOR_EACH_SUBRTX (iter, array, x, ALL)
7047 if (GET_CODE (*iter) == CONST_POLY_INT)
7048 return true;
7049
43e9d192
IB
7050 split_const (x, &base, &offset);
7051 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 7052 {
43cacb12 7053 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
7054 != SYMBOL_FORCE_TO_MEM)
7055 return true;
7056 else
7057 /* Avoid generating a 64-bit relocation in ILP32; leave
7058 to aarch64_expand_mov_immediate to handle it properly. */
7059 return mode != ptr_mode;
7060 }
43e9d192
IB
7061
7062 return aarch64_tls_referenced_p (x);
7063}
7064
e79136e4
WD
7065/* Implement TARGET_CASE_VALUES_THRESHOLD.
7066 The expansion for a table switch is quite expensive due to the number
7067 of instructions, the table lookup and hard to predict indirect jump.
7068 When optimizing for speed, and -O3 enabled, use the per-core tuning if
7069 set, otherwise use tables for > 16 cases as a tradeoff between size and
7070 performance. When optimizing for size, use the default setting. */
50487d79
EM
7071
7072static unsigned int
7073aarch64_case_values_threshold (void)
7074{
7075 /* Use the specified limit for the number of cases before using jump
7076 tables at higher optimization levels. */
7077 if (optimize > 2
7078 && selected_cpu->tune->max_case_values != 0)
7079 return selected_cpu->tune->max_case_values;
7080 else
e79136e4 7081 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
7082}
7083
43e9d192
IB
7084/* Return true if register REGNO is a valid index register.
7085 STRICT_P is true if REG_OK_STRICT is in effect. */
7086
7087bool
7088aarch64_regno_ok_for_index_p (int regno, bool strict_p)
7089{
7090 if (!HARD_REGISTER_NUM_P (regno))
7091 {
7092 if (!strict_p)
7093 return true;
7094
7095 if (!reg_renumber)
7096 return false;
7097
7098 regno = reg_renumber[regno];
7099 }
7100 return GP_REGNUM_P (regno);
7101}
7102
7103/* Return true if register REGNO is a valid base register for mode MODE.
7104 STRICT_P is true if REG_OK_STRICT is in effect. */
7105
7106bool
7107aarch64_regno_ok_for_base_p (int regno, bool strict_p)
7108{
7109 if (!HARD_REGISTER_NUM_P (regno))
7110 {
7111 if (!strict_p)
7112 return true;
7113
7114 if (!reg_renumber)
7115 return false;
7116
7117 regno = reg_renumber[regno];
7118 }
7119
7120 /* The fake registers will be eliminated to either the stack or
7121 hard frame pointer, both of which are usually valid base registers.
7122 Reload deals with the cases where the eliminated form isn't valid. */
7123 return (GP_REGNUM_P (regno)
7124 || regno == SP_REGNUM
7125 || regno == FRAME_POINTER_REGNUM
7126 || regno == ARG_POINTER_REGNUM);
7127}
7128
7129/* Return true if X is a valid base register for mode MODE.
7130 STRICT_P is true if REG_OK_STRICT is in effect. */
7131
7132static bool
7133aarch64_base_register_rtx_p (rtx x, bool strict_p)
7134{
76160199
RS
7135 if (!strict_p
7136 && GET_CODE (x) == SUBREG
7137 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
7138 x = SUBREG_REG (x);
7139
7140 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
7141}
7142
7143/* Return true if address offset is a valid index. If it is, fill in INFO
7144 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
7145
7146static bool
7147aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 7148 machine_mode mode, bool strict_p)
43e9d192
IB
7149{
7150 enum aarch64_address_type type;
7151 rtx index;
7152 int shift;
7153
7154 /* (reg:P) */
7155 if ((REG_P (x) || GET_CODE (x) == SUBREG)
7156 && GET_MODE (x) == Pmode)
7157 {
7158 type = ADDRESS_REG_REG;
7159 index = x;
7160 shift = 0;
7161 }
7162 /* (sign_extend:DI (reg:SI)) */
7163 else if ((GET_CODE (x) == SIGN_EXTEND
7164 || GET_CODE (x) == ZERO_EXTEND)
7165 && GET_MODE (x) == DImode
7166 && GET_MODE (XEXP (x, 0)) == SImode)
7167 {
7168 type = (GET_CODE (x) == SIGN_EXTEND)
7169 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
7170 index = XEXP (x, 0);
7171 shift = 0;
7172 }
7173 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
7174 else if (GET_CODE (x) == MULT
7175 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
7176 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
7177 && GET_MODE (XEXP (x, 0)) == DImode
7178 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
7179 && CONST_INT_P (XEXP (x, 1)))
7180 {
7181 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
7182 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
7183 index = XEXP (XEXP (x, 0), 0);
7184 shift = exact_log2 (INTVAL (XEXP (x, 1)));
7185 }
7186 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
7187 else if (GET_CODE (x) == ASHIFT
7188 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
7189 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
7190 && GET_MODE (XEXP (x, 0)) == DImode
7191 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
7192 && CONST_INT_P (XEXP (x, 1)))
7193 {
7194 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
7195 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
7196 index = XEXP (XEXP (x, 0), 0);
7197 shift = INTVAL (XEXP (x, 1));
7198 }
7199 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
7200 else if ((GET_CODE (x) == SIGN_EXTRACT
7201 || GET_CODE (x) == ZERO_EXTRACT)
7202 && GET_MODE (x) == DImode
7203 && GET_CODE (XEXP (x, 0)) == MULT
7204 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
7205 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7206 {
7207 type = (GET_CODE (x) == SIGN_EXTRACT)
7208 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
7209 index = XEXP (XEXP (x, 0), 0);
7210 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
7211 if (INTVAL (XEXP (x, 1)) != 32 + shift
7212 || INTVAL (XEXP (x, 2)) != 0)
7213 shift = -1;
7214 }
7215 /* (and:DI (mult:DI (reg:DI) (const_int scale))
7216 (const_int 0xffffffff<<shift)) */
7217 else if (GET_CODE (x) == AND
7218 && GET_MODE (x) == DImode
7219 && GET_CODE (XEXP (x, 0)) == MULT
7220 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
7221 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7222 && CONST_INT_P (XEXP (x, 1)))
7223 {
7224 type = ADDRESS_REG_UXTW;
7225 index = XEXP (XEXP (x, 0), 0);
7226 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
7227 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
7228 shift = -1;
7229 }
7230 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
7231 else if ((GET_CODE (x) == SIGN_EXTRACT
7232 || GET_CODE (x) == ZERO_EXTRACT)
7233 && GET_MODE (x) == DImode
7234 && GET_CODE (XEXP (x, 0)) == ASHIFT
7235 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
7236 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7237 {
7238 type = (GET_CODE (x) == SIGN_EXTRACT)
7239 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
7240 index = XEXP (XEXP (x, 0), 0);
7241 shift = INTVAL (XEXP (XEXP (x, 0), 1));
7242 if (INTVAL (XEXP (x, 1)) != 32 + shift
7243 || INTVAL (XEXP (x, 2)) != 0)
7244 shift = -1;
7245 }
7246 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
7247 (const_int 0xffffffff<<shift)) */
7248 else if (GET_CODE (x) == AND
7249 && GET_MODE (x) == DImode
7250 && GET_CODE (XEXP (x, 0)) == ASHIFT
7251 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
7252 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7253 && CONST_INT_P (XEXP (x, 1)))
7254 {
7255 type = ADDRESS_REG_UXTW;
7256 index = XEXP (XEXP (x, 0), 0);
7257 shift = INTVAL (XEXP (XEXP (x, 0), 1));
7258 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
7259 shift = -1;
7260 }
7261 /* (mult:P (reg:P) (const_int scale)) */
7262 else if (GET_CODE (x) == MULT
7263 && GET_MODE (x) == Pmode
7264 && GET_MODE (XEXP (x, 0)) == Pmode
7265 && CONST_INT_P (XEXP (x, 1)))
7266 {
7267 type = ADDRESS_REG_REG;
7268 index = XEXP (x, 0);
7269 shift = exact_log2 (INTVAL (XEXP (x, 1)));
7270 }
7271 /* (ashift:P (reg:P) (const_int shift)) */
7272 else if (GET_CODE (x) == ASHIFT
7273 && GET_MODE (x) == Pmode
7274 && GET_MODE (XEXP (x, 0)) == Pmode
7275 && CONST_INT_P (XEXP (x, 1)))
7276 {
7277 type = ADDRESS_REG_REG;
7278 index = XEXP (x, 0);
7279 shift = INTVAL (XEXP (x, 1));
7280 }
7281 else
7282 return false;
7283
76160199
RS
7284 if (!strict_p
7285 && GET_CODE (index) == SUBREG
7286 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
7287 index = SUBREG_REG (index);
7288
43cacb12
RS
7289 if (aarch64_sve_data_mode_p (mode))
7290 {
7291 if (type != ADDRESS_REG_REG
7292 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
7293 return false;
7294 }
7295 else
7296 {
7297 if (shift != 0
7298 && !(IN_RANGE (shift, 1, 3)
7299 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
7300 return false;
7301 }
7302
7303 if (REG_P (index)
43e9d192
IB
7304 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
7305 {
7306 info->type = type;
7307 info->offset = index;
7308 info->shift = shift;
7309 return true;
7310 }
7311
7312 return false;
7313}
7314
abc52318
KT
7315/* Return true if MODE is one of the modes for which we
7316 support LDP/STP operations. */
7317
7318static bool
7319aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
7320{
7321 return mode == SImode || mode == DImode
7322 || mode == SFmode || mode == DFmode
7323 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
7324 && (known_eq (GET_MODE_SIZE (mode), 8)
7325 || (known_eq (GET_MODE_SIZE (mode), 16)
7326 && (aarch64_tune_params.extra_tuning_flags
7327 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
7328}
7329
9e0218fc
RH
7330/* Return true if REGNO is a virtual pointer register, or an eliminable
7331 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
7332 include stack_pointer or hard_frame_pointer. */
7333static bool
7334virt_or_elim_regno_p (unsigned regno)
7335{
7336 return ((regno >= FIRST_VIRTUAL_REGISTER
7337 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
7338 || regno == FRAME_POINTER_REGNUM
7339 || regno == ARG_POINTER_REGNUM);
7340}
7341
a97d8b98
RS
7342/* Return true if X is a valid address of type TYPE for machine mode MODE.
7343 If it is, fill in INFO appropriately. STRICT_P is true if
7344 REG_OK_STRICT is in effect. */
43e9d192 7345
a98824ac 7346bool
43e9d192 7347aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 7348 rtx x, machine_mode mode, bool strict_p,
a98824ac 7349 aarch64_addr_query_type type)
43e9d192
IB
7350{
7351 enum rtx_code code = GET_CODE (x);
7352 rtx op0, op1;
dc640181
RS
7353 poly_int64 offset;
7354
6a70badb 7355 HOST_WIDE_INT const_size;
2d8c6dc1 7356
80d43579
WD
7357 /* On BE, we use load/store pair for all large int mode load/stores.
7358 TI/TFmode may also use a load/store pair. */
43cacb12
RS
7359 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
7360 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 7361 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 7362 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
7363 || mode == TImode
7364 || mode == TFmode
43cacb12 7365 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 7366
a25831ac
AV
7367 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
7368 corresponds to the actual size of the memory being loaded/stored and the
7369 mode of the corresponding addressing mode is half of that. */
7370 if (type == ADDR_QUERY_LDP_STP_N
7371 && known_eq (GET_MODE_SIZE (mode), 16))
7372 mode = DFmode;
7373
6a70badb 7374 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
7375 && (known_lt (GET_MODE_SIZE (mode), 16)
7376 || vec_flags == VEC_ADVSIMD
fa9863e7 7377 || vec_flags & VEC_SVE_DATA));
43cacb12
RS
7378
7379 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
7380 [Rn, #offset, MUL VL]. */
7381 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
7382 && (code != REG && code != PLUS))
7383 return false;
2d8c6dc1
AH
7384
7385 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
7386 REG addressing. */
43cacb12
RS
7387 if (advsimd_struct_p
7388 && !BYTES_BIG_ENDIAN
43e9d192
IB
7389 && (code != POST_INC && code != REG))
7390 return false;
7391
43cacb12
RS
7392 gcc_checking_assert (GET_MODE (x) == VOIDmode
7393 || SCALAR_INT_MODE_P (GET_MODE (x)));
7394
43e9d192
IB
7395 switch (code)
7396 {
7397 case REG:
7398 case SUBREG:
7399 info->type = ADDRESS_REG_IMM;
7400 info->base = x;
7401 info->offset = const0_rtx;
dc640181 7402 info->const_offset = 0;
43e9d192
IB
7403 return aarch64_base_register_rtx_p (x, strict_p);
7404
7405 case PLUS:
7406 op0 = XEXP (x, 0);
7407 op1 = XEXP (x, 1);
15c0c5c9
JW
7408
7409 if (! strict_p
4aa81c2e 7410 && REG_P (op0)
9e0218fc 7411 && virt_or_elim_regno_p (REGNO (op0))
dc640181 7412 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
7413 {
7414 info->type = ADDRESS_REG_IMM;
7415 info->base = op0;
7416 info->offset = op1;
dc640181 7417 info->const_offset = offset;
15c0c5c9
JW
7418
7419 return true;
7420 }
7421
6a70badb 7422 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
7423 && aarch64_base_register_rtx_p (op0, strict_p)
7424 && poly_int_rtx_p (op1, &offset))
43e9d192 7425 {
43e9d192
IB
7426 info->type = ADDRESS_REG_IMM;
7427 info->base = op0;
7428 info->offset = op1;
dc640181 7429 info->const_offset = offset;
43e9d192
IB
7430
7431 /* TImode and TFmode values are allowed in both pairs of X
7432 registers and individual Q registers. The available
7433 address modes are:
7434 X,X: 7-bit signed scaled offset
7435 Q: 9-bit signed offset
7436 We conservatively require an offset representable in either mode.
8ed49fab
KT
7437 When performing the check for pairs of X registers i.e. LDP/STP
7438 pass down DImode since that is the natural size of the LDP/STP
7439 instruction memory accesses. */
43e9d192 7440 if (mode == TImode || mode == TFmode)
8ed49fab 7441 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 7442 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 7443 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 7444
2d8c6dc1
AH
7445 /* A 7bit offset check because OImode will emit a ldp/stp
7446 instruction (only big endian will get here).
7447 For ldp/stp instructions, the offset is scaled for the size of a
7448 single element of the pair. */
7449 if (mode == OImode)
7450 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
7451
7452 /* Three 9/12 bit offsets checks because CImode will emit three
7453 ldr/str instructions (only big endian will get here). */
7454 if (mode == CImode)
7455 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
7456 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
7457 offset + 32)
2d8c6dc1
AH
7458 || offset_12bit_unsigned_scaled_p (V16QImode,
7459 offset + 32)));
7460
7461 /* Two 7bit offsets checks because XImode will emit two ldp/stp
7462 instructions (only big endian will get here). */
7463 if (mode == XImode)
7464 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
7465 && aarch64_offset_7bit_signed_scaled_p (TImode,
7466 offset + 32));
7467
43cacb12
RS
7468 /* Make "m" use the LD1 offset range for SVE data modes, so
7469 that pre-RTL optimizers like ivopts will work to that
7470 instead of the wider LDR/STR range. */
7471 if (vec_flags == VEC_SVE_DATA)
7472 return (type == ADDR_QUERY_M
7473 ? offset_4bit_signed_scaled_p (mode, offset)
7474 : offset_9bit_signed_scaled_p (mode, offset));
7475
9f4cbab8
RS
7476 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
7477 {
7478 poly_int64 end_offset = (offset
7479 + GET_MODE_SIZE (mode)
7480 - BYTES_PER_SVE_VECTOR);
7481 return (type == ADDR_QUERY_M
7482 ? offset_4bit_signed_scaled_p (mode, offset)
7483 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
7484 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
7485 end_offset)));
7486 }
7487
43cacb12
RS
7488 if (vec_flags == VEC_SVE_PRED)
7489 return offset_9bit_signed_scaled_p (mode, offset);
7490
2d8c6dc1 7491 if (load_store_pair_p)
6a70badb 7492 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
7493 || known_eq (GET_MODE_SIZE (mode), 8)
7494 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 7495 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 7496 else
3c5af608 7497 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
7498 || offset_12bit_unsigned_scaled_p (mode, offset));
7499 }
7500
7501 if (allow_reg_index_p)
7502 {
7503 /* Look for base + (scaled/extended) index register. */
7504 if (aarch64_base_register_rtx_p (op0, strict_p)
7505 && aarch64_classify_index (info, op1, mode, strict_p))
7506 {
7507 info->base = op0;
7508 return true;
7509 }
7510 if (aarch64_base_register_rtx_p (op1, strict_p)
7511 && aarch64_classify_index (info, op0, mode, strict_p))
7512 {
7513 info->base = op1;
7514 return true;
7515 }
7516 }
7517
7518 return false;
7519
7520 case POST_INC:
7521 case POST_DEC:
7522 case PRE_INC:
7523 case PRE_DEC:
7524 info->type = ADDRESS_REG_WB;
7525 info->base = XEXP (x, 0);
7526 info->offset = NULL_RTX;
7527 return aarch64_base_register_rtx_p (info->base, strict_p);
7528
7529 case POST_MODIFY:
7530 case PRE_MODIFY:
7531 info->type = ADDRESS_REG_WB;
7532 info->base = XEXP (x, 0);
7533 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 7534 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
7535 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
7536 && aarch64_base_register_rtx_p (info->base, strict_p))
7537 {
43e9d192 7538 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 7539 info->const_offset = offset;
43e9d192
IB
7540
7541 /* TImode and TFmode values are allowed in both pairs of X
7542 registers and individual Q registers. The available
7543 address modes are:
7544 X,X: 7-bit signed scaled offset
7545 Q: 9-bit signed offset
7546 We conservatively require an offset representable in either mode.
7547 */
7548 if (mode == TImode || mode == TFmode)
44707478 7549 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 7550 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 7551
2d8c6dc1 7552 if (load_store_pair_p)
6a70badb 7553 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
7554 || known_eq (GET_MODE_SIZE (mode), 8)
7555 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 7556 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 7557 else
3c5af608 7558 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
7559 }
7560 return false;
7561
7562 case CONST:
7563 case SYMBOL_REF:
7564 case LABEL_REF:
79517551
SN
7565 /* load literal: pc-relative constant pool entry. Only supported
7566 for SI mode or larger. */
43e9d192 7567 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 7568
6a70badb
RS
7569 if (!load_store_pair_p
7570 && GET_MODE_SIZE (mode).is_constant (&const_size)
7571 && const_size >= 4)
43e9d192
IB
7572 {
7573 rtx sym, addend;
7574
7575 split_const (x, &sym, &addend);
b4f50fd4
RR
7576 return ((GET_CODE (sym) == LABEL_REF
7577 || (GET_CODE (sym) == SYMBOL_REF
7578 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 7579 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
7580 }
7581 return false;
7582
7583 case LO_SUM:
7584 info->type = ADDRESS_LO_SUM;
7585 info->base = XEXP (x, 0);
7586 info->offset = XEXP (x, 1);
7587 if (allow_reg_index_p
7588 && aarch64_base_register_rtx_p (info->base, strict_p))
7589 {
7590 rtx sym, offs;
7591 split_const (info->offset, &sym, &offs);
7592 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
7593 && (aarch64_classify_symbol (sym, INTVAL (offs))
7594 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
7595 {
7596 /* The symbol and offset must be aligned to the access size. */
7597 unsigned int align;
43e9d192
IB
7598
7599 if (CONSTANT_POOL_ADDRESS_P (sym))
7600 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
7601 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
7602 {
7603 tree exp = SYMBOL_REF_DECL (sym);
7604 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 7605 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
7606 }
7607 else if (SYMBOL_REF_DECL (sym))
7608 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
7609 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
7610 && SYMBOL_REF_BLOCK (sym) != NULL)
7611 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
7612 else
7613 align = BITS_PER_UNIT;
7614
6a70badb
RS
7615 poly_int64 ref_size = GET_MODE_SIZE (mode);
7616 if (known_eq (ref_size, 0))
43e9d192
IB
7617 ref_size = GET_MODE_SIZE (DImode);
7618
6a70badb
RS
7619 return (multiple_p (INTVAL (offs), ref_size)
7620 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
7621 }
7622 }
7623 return false;
7624
7625 default:
7626 return false;
7627 }
7628}
7629
9bf2f779
KT
7630/* Return true if the address X is valid for a PRFM instruction.
7631 STRICT_P is true if we should do strict checking with
7632 aarch64_classify_address. */
7633
7634bool
7635aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
7636{
7637 struct aarch64_address_info addr;
7638
7639 /* PRFM accepts the same addresses as DImode... */
a97d8b98 7640 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
7641 if (!res)
7642 return false;
7643
7644 /* ... except writeback forms. */
7645 return addr.type != ADDRESS_REG_WB;
7646}
7647
43e9d192
IB
7648bool
7649aarch64_symbolic_address_p (rtx x)
7650{
7651 rtx offset;
7652
7653 split_const (x, &x, &offset);
7654 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
7655}
7656
a6e0bfa7 7657/* Classify the base of symbolic expression X. */
da4f13a4
MS
7658
7659enum aarch64_symbol_type
a6e0bfa7 7660aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
7661{
7662 rtx offset;
da4f13a4 7663
43e9d192 7664 split_const (x, &x, &offset);
43cacb12 7665 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
7666}
7667
7668
7669/* Return TRUE if X is a legitimate address for accessing memory in
7670 mode MODE. */
7671static bool
ef4bddc2 7672aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
7673{
7674 struct aarch64_address_info addr;
7675
a97d8b98 7676 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
7677}
7678
a97d8b98
RS
7679/* Return TRUE if X is a legitimate address of type TYPE for accessing
7680 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 7681bool
a97d8b98
RS
7682aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
7683 aarch64_addr_query_type type)
43e9d192
IB
7684{
7685 struct aarch64_address_info addr;
7686
a97d8b98 7687 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
7688}
7689
9005477f
RS
7690/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
7691
491ec060 7692static bool
9005477f
RS
7693aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
7694 poly_int64 orig_offset,
7695 machine_mode mode)
491ec060 7696{
6a70badb
RS
7697 HOST_WIDE_INT size;
7698 if (GET_MODE_SIZE (mode).is_constant (&size))
7699 {
9005477f
RS
7700 HOST_WIDE_INT const_offset, second_offset;
7701
7702 /* A general SVE offset is A * VQ + B. Remove the A component from
7703 coefficient 0 in order to get the constant B. */
7704 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
7705
7706 /* Split an out-of-range address displacement into a base and
7707 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
7708 range otherwise to increase opportunities for sharing the base
7709 address of different sizes. Unaligned accesses use the signed
7710 9-bit range, TImode/TFmode use the intersection of signed
7711 scaled 7-bit and signed 9-bit offset. */
6a70badb 7712 if (mode == TImode || mode == TFmode)
9005477f
RS
7713 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
7714 else if ((const_offset & (size - 1)) != 0)
7715 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 7716 else
9005477f 7717 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 7718
9005477f
RS
7719 if (second_offset == 0 || known_eq (orig_offset, second_offset))
7720 return false;
7721
7722 /* Split the offset into second_offset and the rest. */
7723 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
7724 *offset2 = gen_int_mode (second_offset, Pmode);
7725 return true;
7726 }
7727 else
7728 {
7729 /* Get the mode we should use as the basis of the range. For structure
7730 modes this is the mode of one vector. */
7731 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
7732 machine_mode step_mode
7733 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
7734
7735 /* Get the "mul vl" multiplier we'd like to use. */
7736 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
7737 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
7738 if (vec_flags & VEC_SVE_DATA)
7739 /* LDR supports a 9-bit range, but the move patterns for
7740 structure modes require all vectors to be in range of the
7741 same base. The simplest way of accomodating that while still
7742 promoting reuse of anchor points between different modes is
7743 to use an 8-bit range unconditionally. */
7744 vnum = ((vnum + 128) & 255) - 128;
7745 else
7746 /* Predicates are only handled singly, so we might as well use
7747 the full range. */
7748 vnum = ((vnum + 256) & 511) - 256;
7749 if (vnum == 0)
7750 return false;
7751
7752 /* Convert the "mul vl" multiplier into a byte offset. */
7753 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
7754 if (known_eq (second_offset, orig_offset))
7755 return false;
7756
7757 /* Split the offset into second_offset and the rest. */
7758 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
7759 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
7760 return true;
7761 }
491ec060
WD
7762}
7763
a2170965
TC
7764/* Return the binary representation of floating point constant VALUE in INTVAL.
7765 If the value cannot be converted, return false without setting INTVAL.
7766 The conversion is done in the given MODE. */
7767bool
7768aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
7769{
7770
7771 /* We make a general exception for 0. */
7772 if (aarch64_float_const_zero_rtx_p (value))
7773 {
7774 *intval = 0;
7775 return true;
7776 }
7777
0d0e0188 7778 scalar_float_mode mode;
a2170965 7779 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 7780 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
7781 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
7782 /* Only support up to DF mode. */
7783 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
7784 return false;
7785
7786 unsigned HOST_WIDE_INT ival = 0;
7787
7788 long res[2];
7789 real_to_target (res,
7790 CONST_DOUBLE_REAL_VALUE (value),
7791 REAL_MODE_FORMAT (mode));
7792
5c22bb48
TC
7793 if (mode == DFmode)
7794 {
7795 int order = BYTES_BIG_ENDIAN ? 1 : 0;
7796 ival = zext_hwi (res[order], 32);
7797 ival |= (zext_hwi (res[1 - order], 32) << 32);
7798 }
7799 else
7800 ival = zext_hwi (res[0], 32);
a2170965
TC
7801
7802 *intval = ival;
7803 return true;
7804}
7805
7806/* Return TRUE if rtx X is an immediate constant that can be moved using a
7807 single MOV(+MOVK) followed by an FMOV. */
7808bool
7809aarch64_float_const_rtx_p (rtx x)
7810{
7811 machine_mode mode = GET_MODE (x);
7812 if (mode == VOIDmode)
7813 return false;
7814
7815 /* Determine whether it's cheaper to write float constants as
7816 mov/movk pairs over ldr/adrp pairs. */
7817 unsigned HOST_WIDE_INT ival;
7818
7819 if (GET_CODE (x) == CONST_DOUBLE
7820 && SCALAR_FLOAT_MODE_P (mode)
7821 && aarch64_reinterpret_float_as_int (x, &ival))
7822 {
77e994c9
RS
7823 scalar_int_mode imode = (mode == HFmode
7824 ? SImode
7825 : int_mode_for_mode (mode).require ());
a2170965
TC
7826 int num_instr = aarch64_internal_mov_immediate
7827 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
7828 return num_instr < 3;
7829 }
7830
7831 return false;
7832}
7833
43e9d192
IB
7834/* Return TRUE if rtx X is immediate constant 0.0 */
7835bool
3520f7cc 7836aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 7837{
43e9d192
IB
7838 if (GET_MODE (x) == VOIDmode)
7839 return false;
7840
34a72c33 7841 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 7842 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 7843 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
7844}
7845
a2170965
TC
7846/* Return TRUE if rtx X is immediate constant that fits in a single
7847 MOVI immediate operation. */
7848bool
7849aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
7850{
7851 if (!TARGET_SIMD)
7852 return false;
7853
77e994c9
RS
7854 machine_mode vmode;
7855 scalar_int_mode imode;
a2170965
TC
7856 unsigned HOST_WIDE_INT ival;
7857
7858 if (GET_CODE (x) == CONST_DOUBLE
7859 && SCALAR_FLOAT_MODE_P (mode))
7860 {
7861 if (!aarch64_reinterpret_float_as_int (x, &ival))
7862 return false;
7863
35c38fa6
TC
7864 /* We make a general exception for 0. */
7865 if (aarch64_float_const_zero_rtx_p (x))
7866 return true;
7867
304b9962 7868 imode = int_mode_for_mode (mode).require ();
a2170965
TC
7869 }
7870 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
7871 && is_a <scalar_int_mode> (mode, &imode))
7872 ival = INTVAL (x);
a2170965
TC
7873 else
7874 return false;
7875
7876 /* use a 64 bit mode for everything except for DI/DF mode, where we use
7877 a 128 bit vector mode. */
77e994c9 7878 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
7879
7880 vmode = aarch64_simd_container_mode (imode, width);
7881 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
7882
b187677b 7883 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
7884}
7885
7886
70f09188
AP
7887/* Return the fixed registers used for condition codes. */
7888
7889static bool
7890aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7891{
7892 *p1 = CC_REGNUM;
7893 *p2 = INVALID_REGNUM;
7894 return true;
7895}
7896
47210a04
RL
7897/* This function is used by the call expanders of the machine description.
7898 RESULT is the register in which the result is returned. It's NULL for
7899 "call" and "sibcall".
7900 MEM is the location of the function call.
7901 SIBCALL indicates whether this function call is normal call or sibling call.
7902 It will generate different pattern accordingly. */
7903
7904void
7905aarch64_expand_call (rtx result, rtx mem, bool sibcall)
7906{
7907 rtx call, callee, tmp;
7908 rtvec vec;
7909 machine_mode mode;
7910
7911 gcc_assert (MEM_P (mem));
7912 callee = XEXP (mem, 0);
7913 mode = GET_MODE (callee);
7914 gcc_assert (mode == Pmode);
7915
7916 /* Decide if we should generate indirect calls by loading the
7917 address of the callee into a register before performing
7918 the branch-and-link. */
7919 if (SYMBOL_REF_P (callee)
7920 ? (aarch64_is_long_call_p (callee)
7921 || aarch64_is_noplt_call_p (callee))
7922 : !REG_P (callee))
7923 XEXP (mem, 0) = force_reg (mode, callee);
7924
7925 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
7926
7927 if (result != NULL_RTX)
7928 call = gen_rtx_SET (result, call);
7929
7930 if (sibcall)
7931 tmp = ret_rtx;
7932 else
7933 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
7934
7935 vec = gen_rtvec (2, call, tmp);
7936 call = gen_rtx_PARALLEL (VOIDmode, vec);
7937
7938 aarch64_emit_call_insn (call);
7939}
7940
78607708
TV
7941/* Emit call insn with PAT and do aarch64-specific handling. */
7942
d07a3fed 7943void
78607708
TV
7944aarch64_emit_call_insn (rtx pat)
7945{
7946 rtx insn = emit_call_insn (pat);
7947
7948 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
7949 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
7950 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
7951}
7952
ef4bddc2 7953machine_mode
43e9d192
IB
7954aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
7955{
f7343f20
RE
7956 machine_mode mode_x = GET_MODE (x);
7957 rtx_code code_x = GET_CODE (x);
7958
43e9d192
IB
7959 /* All floating point compares return CCFP if it is an equality
7960 comparison, and CCFPE otherwise. */
f7343f20 7961 if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
43e9d192
IB
7962 {
7963 switch (code)
7964 {
7965 case EQ:
7966 case NE:
7967 case UNORDERED:
7968 case ORDERED:
7969 case UNLT:
7970 case UNLE:
7971 case UNGT:
7972 case UNGE:
7973 case UNEQ:
43e9d192
IB
7974 return CCFPmode;
7975
7976 case LT:
7977 case LE:
7978 case GT:
7979 case GE:
8332c5ee 7980 case LTGT:
43e9d192
IB
7981 return CCFPEmode;
7982
7983 default:
7984 gcc_unreachable ();
7985 }
7986 }
7987
2b8568fe
KT
7988 /* Equality comparisons of short modes against zero can be performed
7989 using the TST instruction with the appropriate bitmask. */
f73dc006 7990 if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
2b8568fe 7991 && (code == EQ || code == NE)
f7343f20 7992 && (mode_x == HImode || mode_x == QImode))
2b8568fe
KT
7993 return CC_NZmode;
7994
b06335f9
KT
7995 /* Similarly, comparisons of zero_extends from shorter modes can
7996 be performed using an ANDS with an immediate mask. */
f7343f20
RE
7997 if (y == const0_rtx && code_x == ZERO_EXTEND
7998 && (mode_x == SImode || mode_x == DImode)
b06335f9
KT
7999 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
8000 && (code == EQ || code == NE))
8001 return CC_NZmode;
8002
f7343f20 8003 if ((mode_x == SImode || mode_x == DImode)
43e9d192
IB
8004 && y == const0_rtx
8005 && (code == EQ || code == NE || code == LT || code == GE)
f7343f20
RE
8006 && (code_x == PLUS || code_x == MINUS || code_x == AND
8007 || code_x == NEG
8008 || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
7325d85a 8009 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
8010 return CC_NZmode;
8011
1c992d1e 8012 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
8013 the comparison will have to be swapped when we emit the assembly
8014 code. */
f7343f20 8015 if ((mode_x == SImode || mode_x == DImode)
ffa8a921 8016 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
f7343f20
RE
8017 && (code_x == ASHIFT || code_x == ASHIFTRT
8018 || code_x == LSHIFTRT
8019 || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
43e9d192
IB
8020 return CC_SWPmode;
8021
1c992d1e
RE
8022 /* Similarly for a negated operand, but we can only do this for
8023 equalities. */
f7343f20 8024 if ((mode_x == SImode || mode_x == DImode)
4aa81c2e 8025 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e 8026 && (code == EQ || code == NE)
f7343f20 8027 && code_x == NEG)
1c992d1e
RE
8028 return CC_Zmode;
8029
f7343f20
RE
8030 /* A test for unsigned overflow from an addition. */
8031 if ((mode_x == DImode || mode_x == TImode)
8032 && (code == LTU || code == GEU)
8033 && code_x == PLUS
8034 && rtx_equal_p (XEXP (x, 0), y))
ef22810a
RH
8035 return CC_Cmode;
8036
f7343f20
RE
8037 /* A test for unsigned overflow from an add with carry. */
8038 if ((mode_x == DImode || mode_x == TImode)
8039 && (code == LTU || code == GEU)
8040 && code_x == PLUS
8041 && CONST_SCALAR_INT_P (y)
8042 && (rtx_mode_t (y, mode_x)
8043 == (wi::shwi (1, mode_x)
8044 << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
8045 return CC_ADCmode;
8046
30c46053 8047 /* A test for signed overflow. */
f7343f20 8048 if ((mode_x == DImode || mode_x == TImode)
30c46053 8049 && code == NE
f7343f20 8050 && code_x == PLUS
30c46053
MC
8051 && GET_CODE (y) == SIGN_EXTEND)
8052 return CC_Vmode;
8053
43e9d192
IB
8054 /* For everything else, return CCmode. */
8055 return CCmode;
8056}
8057
3dfa7055 8058static int
b8506a8a 8059aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 8060
cd5660ab 8061int
43e9d192
IB
8062aarch64_get_condition_code (rtx x)
8063{
ef4bddc2 8064 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
8065 enum rtx_code comp_code = GET_CODE (x);
8066
8067 if (GET_MODE_CLASS (mode) != MODE_CC)
8068 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
8069 return aarch64_get_condition_code_1 (mode, comp_code);
8070}
43e9d192 8071
3dfa7055 8072static int
b8506a8a 8073aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 8074{
43e9d192
IB
8075 switch (mode)
8076 {
4e10a5a7
RS
8077 case E_CCFPmode:
8078 case E_CCFPEmode:
43e9d192
IB
8079 switch (comp_code)
8080 {
8081 case GE: return AARCH64_GE;
8082 case GT: return AARCH64_GT;
8083 case LE: return AARCH64_LS;
8084 case LT: return AARCH64_MI;
8085 case NE: return AARCH64_NE;
8086 case EQ: return AARCH64_EQ;
8087 case ORDERED: return AARCH64_VC;
8088 case UNORDERED: return AARCH64_VS;
8089 case UNLT: return AARCH64_LT;
8090 case UNLE: return AARCH64_LE;
8091 case UNGT: return AARCH64_HI;
8092 case UNGE: return AARCH64_PL;
cd5660ab 8093 default: return -1;
43e9d192
IB
8094 }
8095 break;
8096
4e10a5a7 8097 case E_CCmode:
43e9d192
IB
8098 switch (comp_code)
8099 {
8100 case NE: return AARCH64_NE;
8101 case EQ: return AARCH64_EQ;
8102 case GE: return AARCH64_GE;
8103 case GT: return AARCH64_GT;
8104 case LE: return AARCH64_LE;
8105 case LT: return AARCH64_LT;
8106 case GEU: return AARCH64_CS;
8107 case GTU: return AARCH64_HI;
8108 case LEU: return AARCH64_LS;
8109 case LTU: return AARCH64_CC;
cd5660ab 8110 default: return -1;
43e9d192
IB
8111 }
8112 break;
8113
4e10a5a7 8114 case E_CC_SWPmode:
43e9d192
IB
8115 switch (comp_code)
8116 {
8117 case NE: return AARCH64_NE;
8118 case EQ: return AARCH64_EQ;
8119 case GE: return AARCH64_LE;
8120 case GT: return AARCH64_LT;
8121 case LE: return AARCH64_GE;
8122 case LT: return AARCH64_GT;
8123 case GEU: return AARCH64_LS;
8124 case GTU: return AARCH64_CC;
8125 case LEU: return AARCH64_CS;
8126 case LTU: return AARCH64_HI;
cd5660ab 8127 default: return -1;
43e9d192
IB
8128 }
8129 break;
8130
57d6f4d0
RS
8131 case E_CC_NZCmode:
8132 switch (comp_code)
8133 {
8134 case NE: return AARCH64_NE; /* = any */
8135 case EQ: return AARCH64_EQ; /* = none */
8136 case GE: return AARCH64_PL; /* = nfrst */
8137 case LT: return AARCH64_MI; /* = first */
8138 case GEU: return AARCH64_CS; /* = nlast */
8139 case GTU: return AARCH64_HI; /* = pmore */
8140 case LEU: return AARCH64_LS; /* = plast */
8141 case LTU: return AARCH64_CC; /* = last */
8142 default: return -1;
8143 }
8144 break;
8145
4e10a5a7 8146 case E_CC_NZmode:
43e9d192
IB
8147 switch (comp_code)
8148 {
8149 case NE: return AARCH64_NE;
8150 case EQ: return AARCH64_EQ;
8151 case GE: return AARCH64_PL;
8152 case LT: return AARCH64_MI;
cd5660ab 8153 default: return -1;
43e9d192
IB
8154 }
8155 break;
8156
4e10a5a7 8157 case E_CC_Zmode:
1c992d1e
RE
8158 switch (comp_code)
8159 {
8160 case NE: return AARCH64_NE;
8161 case EQ: return AARCH64_EQ;
cd5660ab 8162 default: return -1;
1c992d1e
RE
8163 }
8164 break;
8165
4e10a5a7 8166 case E_CC_Cmode:
ef22810a
RH
8167 switch (comp_code)
8168 {
f7343f20
RE
8169 case LTU: return AARCH64_CS;
8170 case GEU: return AARCH64_CC;
8171 default: return -1;
8172 }
8173 break;
8174
8175 case E_CC_ADCmode:
8176 switch (comp_code)
8177 {
8178 case GEU: return AARCH64_CS;
8179 case LTU: return AARCH64_CC;
ef22810a
RH
8180 default: return -1;
8181 }
8182 break;
8183
30c46053
MC
8184 case E_CC_Vmode:
8185 switch (comp_code)
8186 {
8187 case NE: return AARCH64_VS;
8188 case EQ: return AARCH64_VC;
8189 default: return -1;
8190 }
8191 break;
8192
43e9d192 8193 default:
cd5660ab 8194 return -1;
43e9d192 8195 }
3dfa7055 8196
3dfa7055 8197 return -1;
43e9d192
IB
8198}
8199
ddeabd3e
AL
8200bool
8201aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
8202 HOST_WIDE_INT minval,
8203 HOST_WIDE_INT maxval)
ddeabd3e 8204{
6a70badb
RS
8205 rtx elt;
8206 return (const_vec_duplicate_p (x, &elt)
8207 && CONST_INT_P (elt)
8208 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
8209}
8210
8211bool
8212aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
8213{
8214 return aarch64_const_vec_all_same_in_range_p (x, val, val);
8215}
8216
43cacb12
RS
8217/* Return true if VEC is a constant in which every element is in the range
8218 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
8219
8220static bool
8221aarch64_const_vec_all_in_range_p (rtx vec,
8222 HOST_WIDE_INT minval,
8223 HOST_WIDE_INT maxval)
8224{
8225 if (GET_CODE (vec) != CONST_VECTOR
8226 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
8227 return false;
8228
8229 int nunits;
8230 if (!CONST_VECTOR_STEPPED_P (vec))
8231 nunits = const_vector_encoded_nelts (vec);
8232 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
8233 return false;
8234
8235 for (int i = 0; i < nunits; i++)
8236 {
8237 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
8238 if (!CONST_INT_P (vec_elem)
8239 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
8240 return false;
8241 }
8242 return true;
8243}
43e9d192 8244
cf670503
ZC
8245/* N Z C V. */
8246#define AARCH64_CC_V 1
8247#define AARCH64_CC_C (1 << 1)
8248#define AARCH64_CC_Z (1 << 2)
8249#define AARCH64_CC_N (1 << 3)
8250
c8012fbc
WD
8251/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
8252static const int aarch64_nzcv_codes[] =
8253{
8254 0, /* EQ, Z == 1. */
8255 AARCH64_CC_Z, /* NE, Z == 0. */
8256 0, /* CS, C == 1. */
8257 AARCH64_CC_C, /* CC, C == 0. */
8258 0, /* MI, N == 1. */
8259 AARCH64_CC_N, /* PL, N == 0. */
8260 0, /* VS, V == 1. */
8261 AARCH64_CC_V, /* VC, V == 0. */
8262 0, /* HI, C ==1 && Z == 0. */
8263 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
8264 AARCH64_CC_V, /* GE, N == V. */
8265 0, /* LT, N != V. */
8266 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
8267 0, /* LE, !(Z == 0 && N == V). */
8268 0, /* AL, Any. */
8269 0 /* NV, Any. */
cf670503
ZC
8270};
8271
43cacb12
RS
8272/* Print floating-point vector immediate operand X to F, negating it
8273 first if NEGATE is true. Return true on success, false if it isn't
8274 a constant we can handle. */
8275
8276static bool
8277aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
8278{
8279 rtx elt;
8280
8281 if (!const_vec_duplicate_p (x, &elt))
8282 return false;
8283
8284 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
8285 if (negate)
8286 r = real_value_negate (&r);
8287
d29f7dd5
RS
8288 /* Handle the SVE single-bit immediates specially, since they have a
8289 fixed form in the assembly syntax. */
43cacb12
RS
8290 if (real_equal (&r, &dconst0))
8291 asm_fprintf (f, "0.0");
8292 else if (real_equal (&r, &dconst1))
8293 asm_fprintf (f, "1.0");
8294 else if (real_equal (&r, &dconsthalf))
8295 asm_fprintf (f, "0.5");
8296 else
d29f7dd5
RS
8297 {
8298 const int buf_size = 20;
8299 char float_buf[buf_size] = {'\0'};
8300 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
8301 1, GET_MODE (elt));
8302 asm_fprintf (f, "%s", float_buf);
8303 }
43cacb12
RS
8304
8305 return true;
8306}
8307
9f4cbab8
RS
8308/* Return the equivalent letter for size. */
8309static char
8310sizetochar (int size)
8311{
8312 switch (size)
8313 {
8314 case 64: return 'd';
8315 case 32: return 's';
8316 case 16: return 'h';
8317 case 8 : return 'b';
8318 default: gcc_unreachable ();
8319 }
8320}
8321
bcf19844
JW
8322/* Print operand X to file F in a target specific manner according to CODE.
8323 The acceptable formatting commands given by CODE are:
8324 'c': An integer or symbol address without a preceding #
8325 sign.
43cacb12
RS
8326 'C': Take the duplicated element in a vector constant
8327 and print it in hex.
8328 'D': Take the duplicated element in a vector constant
8329 and print it as an unsigned integer, in decimal.
bcf19844
JW
8330 'e': Print the sign/zero-extend size as a character 8->b,
8331 16->h, 32->w.
d29f7dd5
RS
8332 'I': If the operand is a duplicated vector constant,
8333 replace it with the duplicated scalar. If the
8334 operand is then a floating-point constant, replace
8335 it with the integer bit representation. Print the
8336 transformed constant as a signed decimal number.
bcf19844
JW
8337 'p': Prints N such that 2^N == X (X must be power of 2 and
8338 const int).
8339 'P': Print the number of non-zero bits in X (a const_int).
8340 'H': Print the higher numbered register of a pair (TImode)
8341 of regs.
8342 'm': Print a condition (eq, ne, etc).
8343 'M': Same as 'm', but invert condition.
43cacb12
RS
8344 'N': Take the duplicated element in a vector constant
8345 and print the negative of it in decimal.
bcf19844
JW
8346 'b/h/s/d/q': Print a scalar FP/SIMD register name.
8347 'S/T/U/V': Print a FP/SIMD register name for a register list.
8348 The register printed is the FP/SIMD register name
8349 of X + 0/1/2/3 for S/T/U/V.
8350 'R': Print a scalar FP/SIMD register name + 1.
8351 'X': Print bottom 16 bits of integer constant in hex.
8352 'w/x': Print a general register name or the zero register
8353 (32-bit or 64-bit).
8354 '0': Print a normal operand, if it's a general register,
8355 then we assume DImode.
8356 'k': Print NZCV for conditional compare instructions.
8357 'A': Output address constant representing the first
8358 argument of X, specifying a relocation offset
8359 if appropriate.
8360 'L': Output constant address specified by X
8361 with a relocation offset if appropriate.
8362 'G': Prints address of X, specifying a PC relative
e69a816d
WD
8363 relocation mode if appropriate.
8364 'y': Output address of LDP or STP - this is used for
8365 some LDP/STPs which don't use a PARALLEL in their
8366 pattern (so the mode needs to be adjusted).
8367 'z': Output address of a typical LDP or STP. */
bcf19844 8368
cc8ca59e
JB
8369static void
8370aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 8371{
43cacb12 8372 rtx elt;
43e9d192
IB
8373 switch (code)
8374 {
f541a481
KT
8375 case 'c':
8376 switch (GET_CODE (x))
8377 {
8378 case CONST_INT:
8379 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8380 break;
8381
8382 case SYMBOL_REF:
8383 output_addr_const (f, x);
8384 break;
8385
8386 case CONST:
8387 if (GET_CODE (XEXP (x, 0)) == PLUS
8388 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
8389 {
8390 output_addr_const (f, x);
8391 break;
8392 }
8393 /* Fall through. */
8394
8395 default:
ee61f880 8396 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
8397 }
8398 break;
8399
43e9d192 8400 case 'e':
43e9d192
IB
8401 {
8402 int n;
8403
4aa81c2e 8404 if (!CONST_INT_P (x)
43e9d192
IB
8405 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
8406 {
8407 output_operand_lossage ("invalid operand for '%%%c'", code);
8408 return;
8409 }
8410
8411 switch (n)
8412 {
8413 case 3:
8414 fputc ('b', f);
8415 break;
8416 case 4:
8417 fputc ('h', f);
8418 break;
8419 case 5:
8420 fputc ('w', f);
8421 break;
8422 default:
8423 output_operand_lossage ("invalid operand for '%%%c'", code);
8424 return;
8425 }
8426 }
8427 break;
8428
8429 case 'p':
8430 {
8431 int n;
8432
4aa81c2e 8433 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
8434 {
8435 output_operand_lossage ("invalid operand for '%%%c'", code);
8436 return;
8437 }
8438
8439 asm_fprintf (f, "%d", n);
8440 }
8441 break;
8442
8443 case 'P':
4aa81c2e 8444 if (!CONST_INT_P (x))
43e9d192
IB
8445 {
8446 output_operand_lossage ("invalid operand for '%%%c'", code);
8447 return;
8448 }
8449
8d55c61b 8450 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
8451 break;
8452
8453 case 'H':
c0111dc4
RE
8454 if (x == const0_rtx)
8455 {
8456 asm_fprintf (f, "xzr");
8457 break;
8458 }
8459
4aa81c2e 8460 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
8461 {
8462 output_operand_lossage ("invalid operand for '%%%c'", code);
8463 return;
8464 }
8465
01a3a324 8466 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
8467 break;
8468
d29f7dd5
RS
8469 case 'I':
8470 {
8471 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
8472 if (CONST_INT_P (x))
8473 asm_fprintf (f, "%wd", INTVAL (x));
8474 else
8475 {
8476 output_operand_lossage ("invalid operand for '%%%c'", code);
8477 return;
8478 }
8479 break;
8480 }
8481
43e9d192 8482 case 'M':
c8012fbc 8483 case 'm':
cd5660ab
KT
8484 {
8485 int cond_code;
c8012fbc
WD
8486 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
8487 if (x == const_true_rtx)
cd5660ab 8488 {
c8012fbc
WD
8489 if (code == 'M')
8490 fputs ("nv", f);
cd5660ab
KT
8491 return;
8492 }
43e9d192 8493
cd5660ab
KT
8494 if (!COMPARISON_P (x))
8495 {
8496 output_operand_lossage ("invalid operand for '%%%c'", code);
8497 return;
8498 }
c8012fbc 8499
cd5660ab
KT
8500 cond_code = aarch64_get_condition_code (x);
8501 gcc_assert (cond_code >= 0);
c8012fbc
WD
8502 if (code == 'M')
8503 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
57d6f4d0
RS
8504 if (GET_MODE (XEXP (x, 0)) == CC_NZCmode)
8505 fputs (aarch64_sve_condition_codes[cond_code], f);
8506 else
8507 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 8508 }
43e9d192
IB
8509 break;
8510
43cacb12
RS
8511 case 'N':
8512 if (!const_vec_duplicate_p (x, &elt))
8513 {
8514 output_operand_lossage ("invalid vector constant");
8515 return;
8516 }
8517
8518 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
8519 asm_fprintf (f, "%wd", -INTVAL (elt));
8520 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
8521 && aarch64_print_vector_float_operand (f, x, true))
8522 ;
8523 else
8524 {
8525 output_operand_lossage ("invalid vector constant");
8526 return;
8527 }
8528 break;
8529
43e9d192
IB
8530 case 'b':
8531 case 'h':
8532 case 's':
8533 case 'd':
8534 case 'q':
43e9d192
IB
8535 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
8536 {
8537 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
8538 return;
8539 }
50ce6f88 8540 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
8541 break;
8542
8543 case 'S':
8544 case 'T':
8545 case 'U':
8546 case 'V':
43e9d192
IB
8547 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
8548 {
8549 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
8550 return;
8551 }
43cacb12
RS
8552 asm_fprintf (f, "%c%d",
8553 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
8554 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
8555 break;
8556
2d8c6dc1 8557 case 'R':
2d8c6dc1
AH
8558 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
8559 {
8560 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
8561 return;
8562 }
8563 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
8564 break;
8565
a05c0ddf 8566 case 'X':
4aa81c2e 8567 if (!CONST_INT_P (x))
a05c0ddf
IB
8568 {
8569 output_operand_lossage ("invalid operand for '%%%c'", code);
8570 return;
8571 }
50d38551 8572 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
8573 break;
8574
43cacb12
RS
8575 case 'C':
8576 {
8577 /* Print a replicated constant in hex. */
8578 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
8579 {
8580 output_operand_lossage ("invalid operand for '%%%c'", code);
8581 return;
8582 }
8583 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
8584 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
8585 }
8586 break;
8587
8588 case 'D':
8589 {
8590 /* Print a replicated constant in decimal, treating it as
8591 unsigned. */
8592 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
8593 {
8594 output_operand_lossage ("invalid operand for '%%%c'", code);
8595 return;
8596 }
8597 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
8598 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
8599 }
8600 break;
8601
43e9d192
IB
8602 case 'w':
8603 case 'x':
3520f7cc
JG
8604 if (x == const0_rtx
8605 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 8606 {
50ce6f88 8607 asm_fprintf (f, "%czr", code);
43e9d192
IB
8608 break;
8609 }
8610
8611 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
8612 {
50ce6f88 8613 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
8614 break;
8615 }
8616
8617 if (REG_P (x) && REGNO (x) == SP_REGNUM)
8618 {
50ce6f88 8619 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
8620 break;
8621 }
8622
8623 /* Fall through */
8624
8625 case 0:
43e9d192
IB
8626 if (x == NULL)
8627 {
8628 output_operand_lossage ("missing operand");
8629 return;
8630 }
8631
8632 switch (GET_CODE (x))
8633 {
8634 case REG:
43cacb12 8635 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
8636 {
8637 if (REG_NREGS (x) == 1)
8638 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
8639 else
8640 {
8641 char suffix
8642 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
8643 asm_fprintf (f, "{z%d.%c - z%d.%c}",
8644 REGNO (x) - V0_REGNUM, suffix,
8645 END_REGNO (x) - V0_REGNUM - 1, suffix);
8646 }
8647 }
43cacb12
RS
8648 else
8649 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
8650 break;
8651
8652 case MEM:
cc8ca59e 8653 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
8654 break;
8655
8656 case LABEL_REF:
8657 case SYMBOL_REF:
8658 output_addr_const (asm_out_file, x);
8659 break;
8660
8661 case CONST_INT:
8662 asm_fprintf (f, "%wd", INTVAL (x));
8663 break;
8664
43cacb12
RS
8665 case CONST:
8666 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 8667 {
43cacb12
RS
8668 output_addr_const (asm_out_file, x);
8669 break;
3520f7cc 8670 }
43cacb12
RS
8671 /* fall through */
8672
8673 case CONST_VECTOR:
8674 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 8675 {
43cacb12
RS
8676 output_operand_lossage ("invalid vector constant");
8677 return;
3520f7cc 8678 }
43cacb12
RS
8679
8680 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
8681 asm_fprintf (f, "%wd", INTVAL (elt));
8682 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
8683 && aarch64_print_vector_float_operand (f, x, false))
8684 ;
3520f7cc 8685 else
43cacb12
RS
8686 {
8687 output_operand_lossage ("invalid vector constant");
8688 return;
8689 }
43e9d192
IB
8690 break;
8691
3520f7cc 8692 case CONST_DOUBLE:
2ca5b430
KT
8693 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
8694 be getting CONST_DOUBLEs holding integers. */
8695 gcc_assert (GET_MODE (x) != VOIDmode);
8696 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
8697 {
8698 fputc ('0', f);
8699 break;
8700 }
8701 else if (aarch64_float_const_representable_p (x))
8702 {
8703#define buf_size 20
8704 char float_buf[buf_size] = {'\0'};
34a72c33
RS
8705 real_to_decimal_for_mode (float_buf,
8706 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
8707 buf_size, buf_size,
8708 1, GET_MODE (x));
8709 asm_fprintf (asm_out_file, "%s", float_buf);
8710 break;
8711#undef buf_size
8712 }
8713 output_operand_lossage ("invalid constant");
8714 return;
43e9d192
IB
8715 default:
8716 output_operand_lossage ("invalid operand");
8717 return;
8718 }
8719 break;
8720
8721 case 'A':
8722 if (GET_CODE (x) == HIGH)
8723 x = XEXP (x, 0);
8724
a6e0bfa7 8725 switch (aarch64_classify_symbolic_expression (x))
43e9d192 8726 {
6642bdb4 8727 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
8728 asm_fprintf (asm_out_file, ":got:");
8729 break;
8730
8731 case SYMBOL_SMALL_TLSGD:
8732 asm_fprintf (asm_out_file, ":tlsgd:");
8733 break;
8734
8735 case SYMBOL_SMALL_TLSDESC:
8736 asm_fprintf (asm_out_file, ":tlsdesc:");
8737 break;
8738
79496620 8739 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
8740 asm_fprintf (asm_out_file, ":gottprel:");
8741 break;
8742
d18ba284 8743 case SYMBOL_TLSLE24:
43e9d192
IB
8744 asm_fprintf (asm_out_file, ":tprel:");
8745 break;
8746
87dd8ab0
MS
8747 case SYMBOL_TINY_GOT:
8748 gcc_unreachable ();
8749 break;
8750
43e9d192
IB
8751 default:
8752 break;
8753 }
8754 output_addr_const (asm_out_file, x);
8755 break;
8756
8757 case 'L':
a6e0bfa7 8758 switch (aarch64_classify_symbolic_expression (x))
43e9d192 8759 {
6642bdb4 8760 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
8761 asm_fprintf (asm_out_file, ":lo12:");
8762 break;
8763
8764 case SYMBOL_SMALL_TLSGD:
8765 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
8766 break;
8767
8768 case SYMBOL_SMALL_TLSDESC:
8769 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
8770 break;
8771
79496620 8772 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
8773 asm_fprintf (asm_out_file, ":gottprel_lo12:");
8774 break;
8775
cbf5629e
JW
8776 case SYMBOL_TLSLE12:
8777 asm_fprintf (asm_out_file, ":tprel_lo12:");
8778 break;
8779
d18ba284 8780 case SYMBOL_TLSLE24:
43e9d192
IB
8781 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
8782 break;
8783
87dd8ab0
MS
8784 case SYMBOL_TINY_GOT:
8785 asm_fprintf (asm_out_file, ":got:");
8786 break;
8787
5ae7caad
JW
8788 case SYMBOL_TINY_TLSIE:
8789 asm_fprintf (asm_out_file, ":gottprel:");
8790 break;
8791
43e9d192
IB
8792 default:
8793 break;
8794 }
8795 output_addr_const (asm_out_file, x);
8796 break;
8797
8798 case 'G':
a6e0bfa7 8799 switch (aarch64_classify_symbolic_expression (x))
43e9d192 8800 {
d18ba284 8801 case SYMBOL_TLSLE24:
43e9d192
IB
8802 asm_fprintf (asm_out_file, ":tprel_hi12:");
8803 break;
8804 default:
8805 break;
8806 }
8807 output_addr_const (asm_out_file, x);
8808 break;
8809
cf670503
ZC
8810 case 'k':
8811 {
c8012fbc 8812 HOST_WIDE_INT cond_code;
cf670503 8813
c8012fbc 8814 if (!CONST_INT_P (x))
cf670503
ZC
8815 {
8816 output_operand_lossage ("invalid operand for '%%%c'", code);
8817 return;
8818 }
8819
c8012fbc
WD
8820 cond_code = INTVAL (x);
8821 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
8822 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
8823 }
8824 break;
8825
e69a816d
WD
8826 case 'y':
8827 case 'z':
8828 {
8829 machine_mode mode = GET_MODE (x);
8830
c348cab0 8831 if (GET_CODE (x) != MEM
6a70badb 8832 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
8833 {
8834 output_operand_lossage ("invalid operand for '%%%c'", code);
8835 return;
8836 }
8837
a25831ac
AV
8838 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
8839 code == 'y'
8840 ? ADDR_QUERY_LDP_STP_N
8841 : ADDR_QUERY_LDP_STP))
c348cab0 8842 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
8843 }
8844 break;
8845
43e9d192
IB
8846 default:
8847 output_operand_lossage ("invalid operand prefix '%%%c'", code);
8848 return;
8849 }
8850}
8851
e69a816d
WD
8852/* Print address 'x' of a memory access with mode 'mode'.
8853 'op' is the context required by aarch64_classify_address. It can either be
8854 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 8855static bool
a97d8b98
RS
8856aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
8857 aarch64_addr_query_type type)
43e9d192
IB
8858{
8859 struct aarch64_address_info addr;
6a70badb 8860 unsigned int size;
43e9d192 8861
e69a816d 8862 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
8863 if (GET_MODE (x) != Pmode
8864 && (!CONST_INT_P (x)
8865 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
8866 {
8867 output_operand_lossage ("invalid address mode");
8868 return false;
8869 }
e69a816d 8870
a97d8b98 8871 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
8872 switch (addr.type)
8873 {
8874 case ADDRESS_REG_IMM:
dc640181 8875 if (known_eq (addr.const_offset, 0))
01a3a324 8876 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
8877 else if (aarch64_sve_data_mode_p (mode))
8878 {
8879 HOST_WIDE_INT vnum
8880 = exact_div (addr.const_offset,
8881 BYTES_PER_SVE_VECTOR).to_constant ();
8882 asm_fprintf (f, "[%s, #%wd, mul vl]",
8883 reg_names[REGNO (addr.base)], vnum);
8884 }
8885 else if (aarch64_sve_pred_mode_p (mode))
8886 {
8887 HOST_WIDE_INT vnum
8888 = exact_div (addr.const_offset,
8889 BYTES_PER_SVE_PRED).to_constant ();
8890 asm_fprintf (f, "[%s, #%wd, mul vl]",
8891 reg_names[REGNO (addr.base)], vnum);
8892 }
43e9d192 8893 else
16a3246f 8894 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 8895 INTVAL (addr.offset));
c348cab0 8896 return true;
43e9d192
IB
8897
8898 case ADDRESS_REG_REG:
8899 if (addr.shift == 0)
16a3246f 8900 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 8901 reg_names [REGNO (addr.offset)]);
43e9d192 8902 else
16a3246f 8903 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 8904 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 8905 return true;
43e9d192
IB
8906
8907 case ADDRESS_REG_UXTW:
8908 if (addr.shift == 0)
16a3246f 8909 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
8910 REGNO (addr.offset) - R0_REGNUM);
8911 else
16a3246f 8912 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 8913 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 8914 return true;
43e9d192
IB
8915
8916 case ADDRESS_REG_SXTW:
8917 if (addr.shift == 0)
16a3246f 8918 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
8919 REGNO (addr.offset) - R0_REGNUM);
8920 else
16a3246f 8921 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 8922 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 8923 return true;
43e9d192
IB
8924
8925 case ADDRESS_REG_WB:
6a70badb
RS
8926 /* Writeback is only supported for fixed-width modes. */
8927 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
8928 switch (GET_CODE (x))
8929 {
8930 case PRE_INC:
6a70badb 8931 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 8932 return true;
43e9d192 8933 case POST_INC:
6a70badb 8934 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 8935 return true;
43e9d192 8936 case PRE_DEC:
6a70badb 8937 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 8938 return true;
43e9d192 8939 case POST_DEC:
6a70badb 8940 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 8941 return true;
43e9d192 8942 case PRE_MODIFY:
6a70badb 8943 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 8944 INTVAL (addr.offset));
c348cab0 8945 return true;
43e9d192 8946 case POST_MODIFY:
6a70badb 8947 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 8948 INTVAL (addr.offset));
c348cab0 8949 return true;
43e9d192
IB
8950 default:
8951 break;
8952 }
8953 break;
8954
8955 case ADDRESS_LO_SUM:
16a3246f 8956 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
8957 output_addr_const (f, addr.offset);
8958 asm_fprintf (f, "]");
c348cab0 8959 return true;
43e9d192
IB
8960
8961 case ADDRESS_SYMBOLIC:
d6591257 8962 output_addr_const (f, x);
c348cab0 8963 return true;
43e9d192
IB
8964 }
8965
c348cab0 8966 return false;
43e9d192
IB
8967}
8968
e69a816d
WD
8969/* Print address 'x' of a memory access with mode 'mode'. */
8970static void
8971aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
8972{
43cacb12 8973 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 8974 output_addr_const (f, x);
e69a816d
WD
8975}
8976
43e9d192
IB
8977bool
8978aarch64_label_mentioned_p (rtx x)
8979{
8980 const char *fmt;
8981 int i;
8982
8983 if (GET_CODE (x) == LABEL_REF)
8984 return true;
8985
8986 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
8987 referencing instruction, but they are constant offsets, not
8988 symbols. */
8989 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8990 return false;
8991
8992 fmt = GET_RTX_FORMAT (GET_CODE (x));
8993 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8994 {
8995 if (fmt[i] == 'E')
8996 {
8997 int j;
8998
8999 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9000 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
9001 return 1;
9002 }
9003 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
9004 return 1;
9005 }
9006
9007 return 0;
9008}
9009
9010/* Implement REGNO_REG_CLASS. */
9011
9012enum reg_class
9013aarch64_regno_regclass (unsigned regno)
9014{
9015 if (GP_REGNUM_P (regno))
a4a182c6 9016 return GENERAL_REGS;
43e9d192
IB
9017
9018 if (regno == SP_REGNUM)
9019 return STACK_REG;
9020
9021 if (regno == FRAME_POINTER_REGNUM
9022 || regno == ARG_POINTER_REGNUM)
f24bb080 9023 return POINTER_REGS;
43e9d192
IB
9024
9025 if (FP_REGNUM_P (regno))
163b1f6a
RS
9026 return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS
9027 : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS);
43e9d192 9028
43cacb12
RS
9029 if (PR_REGNUM_P (regno))
9030 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
9031
43e9d192
IB
9032 return NO_REGS;
9033}
9034
6a70badb
RS
9035/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
9036 If OFFSET is out of range, return an offset of an anchor point
9037 that is in range. Return 0 otherwise. */
9038
9039static HOST_WIDE_INT
9040aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
9041 machine_mode mode)
9042{
9043 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9044 if (size > 16)
9045 return (offset + 0x400) & ~0x7f0;
9046
9047 /* For offsets that aren't a multiple of the access size, the limit is
9048 -256...255. */
9049 if (offset & (size - 1))
9050 {
9051 /* BLKmode typically uses LDP of X-registers. */
9052 if (mode == BLKmode)
9053 return (offset + 512) & ~0x3ff;
9054 return (offset + 0x100) & ~0x1ff;
9055 }
9056
9057 /* Small negative offsets are supported. */
9058 if (IN_RANGE (offset, -256, 0))
9059 return 0;
9060
9061 if (mode == TImode || mode == TFmode)
9062 return (offset + 0x100) & ~0x1ff;
9063
9064 /* Use 12-bit offset by access size. */
9065 return offset & (~0xfff * size);
9066}
9067
0c4ec427 9068static rtx
ef4bddc2 9069aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
9070{
9071 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
9072 where mask is selected by alignment and size of the offset.
9073 We try to pick as large a range for the offset as possible to
9074 maximize the chance of a CSE. However, for aligned addresses
9075 we limit the range to 4k so that structures with different sized
e8426e0a
BC
9076 elements are likely to use the same base. We need to be careful
9077 not to split a CONST for some forms of address expression, otherwise
9078 it will generate sub-optimal code. */
0c4ec427
RE
9079
9080 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
9081 {
9e0218fc 9082 rtx base = XEXP (x, 0);
17d7bdd8 9083 rtx offset_rtx = XEXP (x, 1);
9e0218fc 9084 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 9085
9e0218fc 9086 if (GET_CODE (base) == PLUS)
e8426e0a 9087 {
9e0218fc
RH
9088 rtx op0 = XEXP (base, 0);
9089 rtx op1 = XEXP (base, 1);
9090
9091 /* Force any scaling into a temp for CSE. */
9092 op0 = force_reg (Pmode, op0);
9093 op1 = force_reg (Pmode, op1);
9094
9095 /* Let the pointer register be in op0. */
9096 if (REG_POINTER (op1))
9097 std::swap (op0, op1);
9098
9099 /* If the pointer is virtual or frame related, then we know that
9100 virtual register instantiation or register elimination is going
9101 to apply a second constant. We want the two constants folded
9102 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
9103 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 9104 {
9e0218fc
RH
9105 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
9106 NULL_RTX, true, OPTAB_DIRECT);
9107 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 9108 }
e8426e0a 9109
9e0218fc
RH
9110 /* Otherwise, in order to encourage CSE (and thence loop strength
9111 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
9112 base = expand_binop (Pmode, add_optab, op0, op1,
9113 NULL_RTX, true, OPTAB_DIRECT);
9114 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
9115 }
9116
6a70badb
RS
9117 HOST_WIDE_INT size;
9118 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 9119 {
6a70badb
RS
9120 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
9121 mode);
9122 if (base_offset != 0)
9123 {
9124 base = plus_constant (Pmode, base, base_offset);
9125 base = force_operand (base, NULL_RTX);
9126 return plus_constant (Pmode, base, offset - base_offset);
9127 }
9e0218fc 9128 }
0c4ec427
RE
9129 }
9130
9131 return x;
9132}
9133
43e9d192
IB
9134static reg_class_t
9135aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
9136 reg_class_t rclass,
ef4bddc2 9137 machine_mode mode,
43e9d192
IB
9138 secondary_reload_info *sri)
9139{
9a1b9cb4
RS
9140 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
9141 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
9142 comment at the head of aarch64-sve.md for more details about the
9143 big-endian handling. */
43cacb12
RS
9144 if (BYTES_BIG_ENDIAN
9145 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
9146 && !((REG_P (x) && HARD_REGISTER_P (x))
9147 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
9148 && aarch64_sve_data_mode_p (mode))
9149 {
9150 sri->icode = CODE_FOR_aarch64_sve_reload_be;
9151 return NO_REGS;
9152 }
b4f50fd4
RR
9153
9154 /* If we have to disable direct literal pool loads and stores because the
9155 function is too big, then we need a scratch register. */
9156 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
9157 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
9158 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 9159 && !aarch64_pcrelative_literal_loads)
b4f50fd4 9160 {
0016d8d9 9161 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
9162 return NO_REGS;
9163 }
9164
43e9d192
IB
9165 /* Without the TARGET_SIMD instructions we cannot move a Q register
9166 to a Q register directly. We need a scratch. */
9167 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
9168 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
9169 && reg_class_subset_p (rclass, FP_REGS))
9170 {
0016d8d9 9171 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
9172 return NO_REGS;
9173 }
9174
9175 /* A TFmode or TImode memory access should be handled via an FP_REGS
9176 because AArch64 has richer addressing modes for LDR/STR instructions
9177 than LDP/STP instructions. */
d5726973 9178 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 9179 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
9180 return FP_REGS;
9181
9182 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 9183 return GENERAL_REGS;
43e9d192
IB
9184
9185 return NO_REGS;
9186}
9187
9188static bool
6216fd90 9189aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 9190{
6216fd90 9191 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 9192
6216fd90
WD
9193 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
9194 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 9195 if (frame_pointer_needed)
6216fd90 9196 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
9197 return true;
9198}
9199
6a70badb 9200poly_int64
43e9d192
IB
9201aarch64_initial_elimination_offset (unsigned from, unsigned to)
9202{
78c29983
MS
9203 if (to == HARD_FRAME_POINTER_REGNUM)
9204 {
9205 if (from == ARG_POINTER_REGNUM)
71bfb77a 9206 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
9207
9208 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
9209 return cfun->machine->frame.hard_fp_offset
9210 - cfun->machine->frame.locals_offset;
78c29983
MS
9211 }
9212
9213 if (to == STACK_POINTER_REGNUM)
9214 {
9215 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
9216 return cfun->machine->frame.frame_size
9217 - cfun->machine->frame.locals_offset;
78c29983
MS
9218 }
9219
1c960e02 9220 return cfun->machine->frame.frame_size;
43e9d192
IB
9221}
9222
43e9d192
IB
9223/* Implement RETURN_ADDR_RTX. We do not support moving back to a
9224 previous frame. */
9225
9226rtx
9227aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
9228{
9229 if (count != 0)
9230 return const0_rtx;
9231 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
9232}
9233
9234
9235static void
9236aarch64_asm_trampoline_template (FILE *f)
9237{
b5f794b4
SD
9238 int offset1 = 16;
9239 int offset2 = 20;
9240
9241 if (aarch64_bti_enabled ())
9242 {
9243 asm_fprintf (f, "\thint\t34 // bti c\n");
9244 offset1 -= 4;
9245 offset2 -= 4;
9246 }
9247
28514dda
YZ
9248 if (TARGET_ILP32)
9249 {
b5f794b4
SD
9250 asm_fprintf (f, "\tldr\tw%d, .+%d\n", IP1_REGNUM - R0_REGNUM, offset1);
9251 asm_fprintf (f, "\tldr\tw%d, .+%d\n", STATIC_CHAIN_REGNUM - R0_REGNUM,
9252 offset1);
28514dda
YZ
9253 }
9254 else
9255 {
b5f794b4
SD
9256 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [IP1_REGNUM], offset1);
9257 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [STATIC_CHAIN_REGNUM],
9258 offset2);
28514dda 9259 }
01a3a324 9260 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
b5f794b4
SD
9261
9262 /* The trampoline needs an extra padding instruction. In case if BTI is
9263 enabled the padding instruction is replaced by the BTI instruction at
9264 the beginning. */
9265 if (!aarch64_bti_enabled ())
9266 assemble_aligned_integer (4, const0_rtx);
9267
28514dda
YZ
9268 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
9269 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
9270}
9271
9272static void
9273aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9274{
9275 rtx fnaddr, mem, a_tramp;
28514dda 9276 const int tramp_code_sz = 16;
43e9d192
IB
9277
9278 /* Don't need to copy the trailing D-words, we fill those in below. */
9279 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
9280 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
9281 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 9282 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
9283 if (GET_MODE (fnaddr) != ptr_mode)
9284 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
9285 emit_move_insn (mem, fnaddr);
9286
28514dda 9287 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
9288 emit_move_insn (mem, chain_value);
9289
9290 /* XXX We should really define a "clear_cache" pattern and use
9291 gen_clear_cache(). */
9292 a_tramp = XEXP (m_tramp, 0);
9293 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 9294 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
9295 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
9296 ptr_mode);
43e9d192
IB
9297}
9298
9299static unsigned char
ef4bddc2 9300aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 9301{
6a70badb
RS
9302 /* ??? Logically we should only need to provide a value when
9303 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
9304 can hold MODE, but at the moment we need to handle all modes.
9305 Just ignore any runtime parts for registers that can't store them. */
9306 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 9307 unsigned int nregs;
43e9d192
IB
9308 switch (regclass)
9309 {
d677263e 9310 case TAILCALL_ADDR_REGS:
43e9d192
IB
9311 case POINTER_REGS:
9312 case GENERAL_REGS:
9313 case ALL_REGS:
f25a140b 9314 case POINTER_AND_FP_REGS:
43e9d192
IB
9315 case FP_REGS:
9316 case FP_LO_REGS:
163b1f6a 9317 case FP_LO8_REGS:
43cacb12
RS
9318 if (aarch64_sve_data_mode_p (mode)
9319 && constant_multiple_p (GET_MODE_SIZE (mode),
9320 BYTES_PER_SVE_VECTOR, &nregs))
9321 return nregs;
9322 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
9323 ? CEIL (lowest_size, UNITS_PER_VREG)
9324 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 9325 case STACK_REG:
43cacb12
RS
9326 case PR_REGS:
9327 case PR_LO_REGS:
9328 case PR_HI_REGS:
43e9d192
IB
9329 return 1;
9330
9331 case NO_REGS:
9332 return 0;
9333
9334 default:
9335 break;
9336 }
9337 gcc_unreachable ();
9338}
9339
9340static reg_class_t
78d8b9f0 9341aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 9342{
51bb310d 9343 if (regclass == POINTER_REGS)
78d8b9f0
IB
9344 return GENERAL_REGS;
9345
51bb310d
MS
9346 if (regclass == STACK_REG)
9347 {
9348 if (REG_P(x)
9349 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
9350 return regclass;
9351
9352 return NO_REGS;
9353 }
9354
27bd251b
IB
9355 /* Register eliminiation can result in a request for
9356 SP+constant->FP_REGS. We cannot support such operations which
9357 use SP as source and an FP_REG as destination, so reject out
9358 right now. */
9359 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
9360 {
9361 rtx lhs = XEXP (x, 0);
9362
9363 /* Look through a possible SUBREG introduced by ILP32. */
9364 if (GET_CODE (lhs) == SUBREG)
9365 lhs = SUBREG_REG (lhs);
9366
9367 gcc_assert (REG_P (lhs));
9368 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
9369 POINTER_REGS));
9370 return NO_REGS;
9371 }
9372
78d8b9f0 9373 return regclass;
43e9d192
IB
9374}
9375
9376void
9377aarch64_asm_output_labelref (FILE* f, const char *name)
9378{
9379 asm_fprintf (f, "%U%s", name);
9380}
9381
9382static void
9383aarch64_elf_asm_constructor (rtx symbol, int priority)
9384{
9385 if (priority == DEFAULT_INIT_PRIORITY)
9386 default_ctor_section_asm_out_constructor (symbol, priority);
9387 else
9388 {
9389 section *s;
53d190c1
AT
9390 /* While priority is known to be in range [0, 65535], so 18 bytes
9391 would be enough, the compiler might not know that. To avoid
9392 -Wformat-truncation false positive, use a larger size. */
9393 char buf[23];
43e9d192 9394 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 9395 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
9396 switch_to_section (s);
9397 assemble_align (POINTER_SIZE);
28514dda 9398 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
9399 }
9400}
9401
9402static void
9403aarch64_elf_asm_destructor (rtx symbol, int priority)
9404{
9405 if (priority == DEFAULT_INIT_PRIORITY)
9406 default_dtor_section_asm_out_destructor (symbol, priority);
9407 else
9408 {
9409 section *s;
53d190c1
AT
9410 /* While priority is known to be in range [0, 65535], so 18 bytes
9411 would be enough, the compiler might not know that. To avoid
9412 -Wformat-truncation false positive, use a larger size. */
9413 char buf[23];
43e9d192 9414 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 9415 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
9416 switch_to_section (s);
9417 assemble_align (POINTER_SIZE);
28514dda 9418 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
9419 }
9420}
9421
9422const char*
9423aarch64_output_casesi (rtx *operands)
9424{
9425 char buf[100];
9426 char label[100];
b32d5189 9427 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
9428 int index;
9429 static const char *const patterns[4][2] =
9430 {
9431 {
9432 "ldrb\t%w3, [%0,%w1,uxtw]",
9433 "add\t%3, %4, %w3, sxtb #2"
9434 },
9435 {
9436 "ldrh\t%w3, [%0,%w1,uxtw #1]",
9437 "add\t%3, %4, %w3, sxth #2"
9438 },
9439 {
9440 "ldr\t%w3, [%0,%w1,uxtw #2]",
9441 "add\t%3, %4, %w3, sxtw #2"
9442 },
9443 /* We assume that DImode is only generated when not optimizing and
9444 that we don't really need 64-bit address offsets. That would
9445 imply an object file with 8GB of code in a single function! */
9446 {
9447 "ldr\t%w3, [%0,%w1,uxtw #2]",
9448 "add\t%3, %4, %w3, sxtw #2"
9449 }
9450 };
9451
9452 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
9453
77e994c9
RS
9454 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
9455 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
9456
9457 gcc_assert (index >= 0 && index <= 3);
9458
9459 /* Need to implement table size reduction, by chaning the code below. */
9460 output_asm_insn (patterns[index][0], operands);
9461 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
9462 snprintf (buf, sizeof (buf),
9463 "adr\t%%4, %s", targetm.strip_name_encoding (label));
9464 output_asm_insn (buf, operands);
9465 output_asm_insn (patterns[index][1], operands);
9466 output_asm_insn ("br\t%3", operands);
9467 assemble_label (asm_out_file, label);
9468 return "";
9469}
9470
9471
9472/* Return size in bits of an arithmetic operand which is shifted/scaled and
9473 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
9474 operator. */
9475
9476int
9477aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
9478{
9479 if (shift >= 0 && shift <= 3)
9480 {
9481 int size;
9482 for (size = 8; size <= 32; size *= 2)
9483 {
9484 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
9485 if (mask == bits << shift)
9486 return size;
9487 }
9488 }
9489 return 0;
9490}
9491
e78d485e
RR
9492/* Constant pools are per function only when PC relative
9493 literal loads are true or we are in the large memory
9494 model. */
9495
9496static inline bool
9497aarch64_can_use_per_function_literal_pools_p (void)
9498{
9ee6540a 9499 return (aarch64_pcrelative_literal_loads
e78d485e
RR
9500 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
9501}
9502
43e9d192 9503static bool
e78d485e 9504aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 9505{
74a9301d
VM
9506 /* We can't use blocks for constants when we're using a per-function
9507 constant pool. */
9508 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
9509}
9510
e78d485e
RR
9511/* Select appropriate section for constants depending
9512 on where we place literal pools. */
9513
43e9d192 9514static section *
e78d485e
RR
9515aarch64_select_rtx_section (machine_mode mode,
9516 rtx x,
9517 unsigned HOST_WIDE_INT align)
43e9d192 9518{
e78d485e
RR
9519 if (aarch64_can_use_per_function_literal_pools_p ())
9520 return function_section (current_function_decl);
43e9d192 9521
e78d485e
RR
9522 return default_elf_select_rtx_section (mode, x, align);
9523}
43e9d192 9524
5fca7b66
RH
9525/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
9526void
9527aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
9528 HOST_WIDE_INT offset)
9529{
9530 /* When using per-function literal pools, we must ensure that any code
9531 section is aligned to the minimal instruction length, lest we get
9532 errors from the assembler re "unaligned instructions". */
9533 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
9534 ASM_OUTPUT_ALIGN (f, 2);
9535}
9536
43e9d192
IB
9537/* Costs. */
9538
9539/* Helper function for rtx cost calculation. Strip a shift expression
9540 from X. Returns the inner operand if successful, or the original
9541 expression on failure. */
9542static rtx
9543aarch64_strip_shift (rtx x)
9544{
9545 rtx op = x;
9546
57b77d46
RE
9547 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
9548 we can convert both to ROR during final output. */
43e9d192
IB
9549 if ((GET_CODE (op) == ASHIFT
9550 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
9551 || GET_CODE (op) == LSHIFTRT
9552 || GET_CODE (op) == ROTATERT
9553 || GET_CODE (op) == ROTATE)
43e9d192
IB
9554 && CONST_INT_P (XEXP (op, 1)))
9555 return XEXP (op, 0);
9556
9557 if (GET_CODE (op) == MULT
9558 && CONST_INT_P (XEXP (op, 1))
9559 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
9560 return XEXP (op, 0);
9561
9562 return x;
9563}
9564
4745e701 9565/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
9566 expression from X. Returns the inner operand if successful, or the
9567 original expression on failure. We deal with a number of possible
b10f1009
AP
9568 canonicalization variations here. If STRIP_SHIFT is true, then
9569 we can strip off a shift also. */
43e9d192 9570static rtx
b10f1009 9571aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 9572{
77e994c9 9573 scalar_int_mode mode;
43e9d192
IB
9574 rtx op = x;
9575
77e994c9
RS
9576 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
9577 return op;
9578
43e9d192
IB
9579 /* Zero and sign extraction of a widened value. */
9580 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
9581 && XEXP (op, 2) == const0_rtx
4745e701 9582 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 9583 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
9584 XEXP (op, 1)))
9585 return XEXP (XEXP (op, 0), 0);
9586
9587 /* It can also be represented (for zero-extend) as an AND with an
9588 immediate. */
9589 if (GET_CODE (op) == AND
9590 && GET_CODE (XEXP (op, 0)) == MULT
9591 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
9592 && CONST_INT_P (XEXP (op, 1))
9593 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
9594 INTVAL (XEXP (op, 1))) != 0)
9595 return XEXP (XEXP (op, 0), 0);
9596
9597 /* Now handle extended register, as this may also have an optional
9598 left shift by 1..4. */
b10f1009
AP
9599 if (strip_shift
9600 && GET_CODE (op) == ASHIFT
43e9d192
IB
9601 && CONST_INT_P (XEXP (op, 1))
9602 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
9603 op = XEXP (op, 0);
9604
9605 if (GET_CODE (op) == ZERO_EXTEND
9606 || GET_CODE (op) == SIGN_EXTEND)
9607 op = XEXP (op, 0);
9608
9609 if (op != x)
9610 return op;
9611
4745e701
JG
9612 return x;
9613}
9614
0a78ebe4
KT
9615/* Return true iff CODE is a shift supported in combination
9616 with arithmetic instructions. */
4d1919ed 9617
0a78ebe4
KT
9618static bool
9619aarch64_shift_p (enum rtx_code code)
9620{
9621 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
9622}
9623
b10f1009
AP
9624
9625/* Return true iff X is a cheap shift without a sign extend. */
9626
9627static bool
9628aarch64_cheap_mult_shift_p (rtx x)
9629{
9630 rtx op0, op1;
9631
9632 op0 = XEXP (x, 0);
9633 op1 = XEXP (x, 1);
9634
9635 if (!(aarch64_tune_params.extra_tuning_flags
9636 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
9637 return false;
9638
9639 if (GET_CODE (op0) == SIGN_EXTEND)
9640 return false;
9641
9642 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
9643 && UINTVAL (op1) <= 4)
9644 return true;
9645
9646 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
9647 return false;
9648
9649 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
9650
9651 if (l2 > 0 && l2 <= 4)
9652 return true;
9653
9654 return false;
9655}
9656
4745e701 9657/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
9658 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
9659 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
9660 operands where needed. */
9661
9662static int
e548c9df 9663aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
9664{
9665 rtx op0, op1;
9666 const struct cpu_cost_table *extra_cost
b175b679 9667 = aarch64_tune_params.insn_extra_cost;
4745e701 9668 int cost = 0;
0a78ebe4 9669 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 9670 machine_mode mode = GET_MODE (x);
4745e701
JG
9671
9672 gcc_checking_assert (code == MULT);
9673
9674 op0 = XEXP (x, 0);
9675 op1 = XEXP (x, 1);
9676
9677 if (VECTOR_MODE_P (mode))
9678 mode = GET_MODE_INNER (mode);
9679
9680 /* Integer multiply/fma. */
9681 if (GET_MODE_CLASS (mode) == MODE_INT)
9682 {
9683 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
9684 if (aarch64_shift_p (GET_CODE (x))
9685 || (CONST_INT_P (op1)
9686 && exact_log2 (INTVAL (op1)) > 0))
4745e701 9687 {
0a78ebe4
KT
9688 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
9689 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
9690 if (speed)
9691 {
0a78ebe4
KT
9692 if (compound_p)
9693 {
b10f1009
AP
9694 /* If the shift is considered cheap,
9695 then don't add any cost. */
9696 if (aarch64_cheap_mult_shift_p (x))
9697 ;
9698 else if (REG_P (op1))
0a78ebe4
KT
9699 /* ARITH + shift-by-register. */
9700 cost += extra_cost->alu.arith_shift_reg;
9701 else if (is_extend)
9702 /* ARITH + extended register. We don't have a cost field
9703 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
9704 cost += extra_cost->alu.extend_arith;
9705 else
9706 /* ARITH + shift-by-immediate. */
9707 cost += extra_cost->alu.arith_shift;
9708 }
4745e701
JG
9709 else
9710 /* LSL (immediate). */
0a78ebe4
KT
9711 cost += extra_cost->alu.shift;
9712
4745e701 9713 }
0a78ebe4
KT
9714 /* Strip extends as we will have costed them in the case above. */
9715 if (is_extend)
b10f1009 9716 op0 = aarch64_strip_extend (op0, true);
4745e701 9717
e548c9df 9718 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
9719
9720 return cost;
9721 }
9722
d2ac256b
KT
9723 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
9724 compound and let the below cases handle it. After all, MNEG is a
9725 special-case alias of MSUB. */
9726 if (GET_CODE (op0) == NEG)
9727 {
9728 op0 = XEXP (op0, 0);
9729 compound_p = true;
9730 }
9731
4745e701
JG
9732 /* Integer multiplies or FMAs have zero/sign extending variants. */
9733 if ((GET_CODE (op0) == ZERO_EXTEND
9734 && GET_CODE (op1) == ZERO_EXTEND)
9735 || (GET_CODE (op0) == SIGN_EXTEND
9736 && GET_CODE (op1) == SIGN_EXTEND))
9737 {
e548c9df
AM
9738 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
9739 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
9740
9741 if (speed)
9742 {
0a78ebe4 9743 if (compound_p)
d2ac256b 9744 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
9745 cost += extra_cost->mult[0].extend_add;
9746 else
9747 /* MUL/SMULL/UMULL. */
9748 cost += extra_cost->mult[0].extend;
9749 }
9750
9751 return cost;
9752 }
9753
d2ac256b 9754 /* This is either an integer multiply or a MADD. In both cases
4745e701 9755 we want to recurse and cost the operands. */
e548c9df
AM
9756 cost += rtx_cost (op0, mode, MULT, 0, speed);
9757 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
9758
9759 if (speed)
9760 {
0a78ebe4 9761 if (compound_p)
d2ac256b 9762 /* MADD/MSUB. */
4745e701
JG
9763 cost += extra_cost->mult[mode == DImode].add;
9764 else
9765 /* MUL. */
9766 cost += extra_cost->mult[mode == DImode].simple;
9767 }
9768
9769 return cost;
9770 }
9771 else
9772 {
9773 if (speed)
9774 {
3d840f7d 9775 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
9776 operands, unless the rounding mode is upward or downward in
9777 which case FNMUL is different than FMUL with operand negation. */
9778 bool neg0 = GET_CODE (op0) == NEG;
9779 bool neg1 = GET_CODE (op1) == NEG;
9780 if (compound_p || !flag_rounding_math || (neg0 && neg1))
9781 {
9782 if (neg0)
9783 op0 = XEXP (op0, 0);
9784 if (neg1)
9785 op1 = XEXP (op1, 0);
9786 }
4745e701 9787
0a78ebe4 9788 if (compound_p)
4745e701
JG
9789 /* FMADD/FNMADD/FNMSUB/FMSUB. */
9790 cost += extra_cost->fp[mode == DFmode].fma;
9791 else
3d840f7d 9792 /* FMUL/FNMUL. */
4745e701
JG
9793 cost += extra_cost->fp[mode == DFmode].mult;
9794 }
9795
e548c9df
AM
9796 cost += rtx_cost (op0, mode, MULT, 0, speed);
9797 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
9798 return cost;
9799 }
43e9d192
IB
9800}
9801
67747367
JG
9802static int
9803aarch64_address_cost (rtx x,
ef4bddc2 9804 machine_mode mode,
67747367
JG
9805 addr_space_t as ATTRIBUTE_UNUSED,
9806 bool speed)
9807{
9808 enum rtx_code c = GET_CODE (x);
b175b679 9809 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
9810 struct aarch64_address_info info;
9811 int cost = 0;
9812 info.shift = 0;
9813
a97d8b98 9814 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
9815 {
9816 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
9817 {
9818 /* This is a CONST or SYMBOL ref which will be split
9819 in a different way depending on the code model in use.
9820 Cost it through the generic infrastructure. */
e548c9df 9821 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
9822 /* Divide through by the cost of one instruction to
9823 bring it to the same units as the address costs. */
9824 cost_symbol_ref /= COSTS_N_INSNS (1);
9825 /* The cost is then the cost of preparing the address,
9826 followed by an immediate (possibly 0) offset. */
9827 return cost_symbol_ref + addr_cost->imm_offset;
9828 }
9829 else
9830 {
9831 /* This is most likely a jump table from a case
9832 statement. */
9833 return addr_cost->register_offset;
9834 }
9835 }
9836
9837 switch (info.type)
9838 {
9839 case ADDRESS_LO_SUM:
9840 case ADDRESS_SYMBOLIC:
9841 case ADDRESS_REG_IMM:
9842 cost += addr_cost->imm_offset;
9843 break;
9844
9845 case ADDRESS_REG_WB:
9846 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
9847 cost += addr_cost->pre_modify;
9848 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
9849 cost += addr_cost->post_modify;
9850 else
9851 gcc_unreachable ();
9852
9853 break;
9854
9855 case ADDRESS_REG_REG:
9856 cost += addr_cost->register_offset;
9857 break;
9858
67747367 9859 case ADDRESS_REG_SXTW:
783879e6
EM
9860 cost += addr_cost->register_sextend;
9861 break;
9862
9863 case ADDRESS_REG_UXTW:
9864 cost += addr_cost->register_zextend;
67747367
JG
9865 break;
9866
9867 default:
9868 gcc_unreachable ();
9869 }
9870
9871
9872 if (info.shift > 0)
9873 {
9874 /* For the sake of calculating the cost of the shifted register
9875 component, we can treat same sized modes in the same way. */
6a70badb
RS
9876 if (known_eq (GET_MODE_BITSIZE (mode), 16))
9877 cost += addr_cost->addr_scale_costs.hi;
9878 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
9879 cost += addr_cost->addr_scale_costs.si;
9880 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
9881 cost += addr_cost->addr_scale_costs.di;
9882 else
9883 /* We can't tell, or this is a 128-bit vector. */
9884 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
9885 }
9886
9887 return cost;
9888}
9889
b9066f5a
MW
9890/* Return the cost of a branch. If SPEED_P is true then the compiler is
9891 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
9892 to be taken. */
9893
9894int
9895aarch64_branch_cost (bool speed_p, bool predictable_p)
9896{
9897 /* When optimizing for speed, use the cost of unpredictable branches. */
9898 const struct cpu_branch_cost *branch_costs =
b175b679 9899 aarch64_tune_params.branch_costs;
b9066f5a
MW
9900
9901 if (!speed_p || predictable_p)
9902 return branch_costs->predictable;
9903 else
9904 return branch_costs->unpredictable;
9905}
9906
7cc2145f
JG
9907/* Return true if the RTX X in mode MODE is a zero or sign extract
9908 usable in an ADD or SUB (extended register) instruction. */
9909static bool
77e994c9 9910aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
9911{
9912 /* Catch add with a sign extract.
9913 This is add_<optab><mode>_multp2. */
9914 if (GET_CODE (x) == SIGN_EXTRACT
9915 || GET_CODE (x) == ZERO_EXTRACT)
9916 {
9917 rtx op0 = XEXP (x, 0);
9918 rtx op1 = XEXP (x, 1);
9919 rtx op2 = XEXP (x, 2);
9920
9921 if (GET_CODE (op0) == MULT
9922 && CONST_INT_P (op1)
9923 && op2 == const0_rtx
9924 && CONST_INT_P (XEXP (op0, 1))
9925 && aarch64_is_extend_from_extract (mode,
9926 XEXP (op0, 1),
9927 op1))
9928 {
9929 return true;
9930 }
9931 }
e47c4031
KT
9932 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
9933 No shift. */
9934 else if (GET_CODE (x) == SIGN_EXTEND
9935 || GET_CODE (x) == ZERO_EXTEND)
9936 return REG_P (XEXP (x, 0));
7cc2145f
JG
9937
9938 return false;
9939}
9940
61263118
KT
9941static bool
9942aarch64_frint_unspec_p (unsigned int u)
9943{
9944 switch (u)
9945 {
9946 case UNSPEC_FRINTZ:
9947 case UNSPEC_FRINTP:
9948 case UNSPEC_FRINTM:
9949 case UNSPEC_FRINTA:
9950 case UNSPEC_FRINTN:
9951 case UNSPEC_FRINTX:
9952 case UNSPEC_FRINTI:
9953 return true;
9954
9955 default:
9956 return false;
9957 }
9958}
9959
fb0cb7fa
KT
9960/* Return true iff X is an rtx that will match an extr instruction
9961 i.e. as described in the *extr<mode>5_insn family of patterns.
9962 OP0 and OP1 will be set to the operands of the shifts involved
9963 on success and will be NULL_RTX otherwise. */
9964
9965static bool
9966aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
9967{
9968 rtx op0, op1;
77e994c9
RS
9969 scalar_int_mode mode;
9970 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
9971 return false;
fb0cb7fa
KT
9972
9973 *res_op0 = NULL_RTX;
9974 *res_op1 = NULL_RTX;
9975
9976 if (GET_CODE (x) != IOR)
9977 return false;
9978
9979 op0 = XEXP (x, 0);
9980 op1 = XEXP (x, 1);
9981
9982 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
9983 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
9984 {
9985 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
9986 if (GET_CODE (op1) == ASHIFT)
9987 std::swap (op0, op1);
9988
9989 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
9990 return false;
9991
9992 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
9993 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
9994
9995 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
9996 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
9997 {
9998 *res_op0 = XEXP (op0, 0);
9999 *res_op1 = XEXP (op1, 0);
10000 return true;
10001 }
10002 }
10003
10004 return false;
10005}
10006
2d5ffe46
AP
10007/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
10008 storing it in *COST. Result is true if the total cost of the operation
10009 has now been calculated. */
10010static bool
10011aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
10012{
b9e3afe9
AP
10013 rtx inner;
10014 rtx comparator;
10015 enum rtx_code cmpcode;
10016
10017 if (COMPARISON_P (op0))
10018 {
10019 inner = XEXP (op0, 0);
10020 comparator = XEXP (op0, 1);
10021 cmpcode = GET_CODE (op0);
10022 }
10023 else
10024 {
10025 inner = op0;
10026 comparator = const0_rtx;
10027 cmpcode = NE;
10028 }
10029
2d5ffe46
AP
10030 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
10031 {
10032 /* Conditional branch. */
b9e3afe9 10033 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
10034 return true;
10035 else
10036 {
b9e3afe9 10037 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 10038 {
2d5ffe46
AP
10039 if (comparator == const0_rtx)
10040 {
10041 /* TBZ/TBNZ/CBZ/CBNZ. */
10042 if (GET_CODE (inner) == ZERO_EXTRACT)
10043 /* TBZ/TBNZ. */
e548c9df
AM
10044 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
10045 ZERO_EXTRACT, 0, speed);
10046 else
10047 /* CBZ/CBNZ. */
10048 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
10049
10050 return true;
10051 }
10052 }
b9e3afe9 10053 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 10054 {
2d5ffe46
AP
10055 /* TBZ/TBNZ. */
10056 if (comparator == const0_rtx)
10057 return true;
10058 }
10059 }
10060 }
b9e3afe9 10061 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 10062 {
786298dc 10063 /* CCMP. */
6dfeb7ce 10064 if (GET_CODE (op1) == COMPARE)
786298dc
WD
10065 {
10066 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
10067 if (XEXP (op1, 1) == const0_rtx)
10068 *cost += 1;
10069 if (speed)
10070 {
10071 machine_mode mode = GET_MODE (XEXP (op1, 0));
10072 const struct cpu_cost_table *extra_cost
10073 = aarch64_tune_params.insn_extra_cost;
10074
10075 if (GET_MODE_CLASS (mode) == MODE_INT)
10076 *cost += extra_cost->alu.arith;
10077 else
10078 *cost += extra_cost->fp[mode == DFmode].compare;
10079 }
10080 return true;
10081 }
10082
2d5ffe46
AP
10083 /* It's a conditional operation based on the status flags,
10084 so it must be some flavor of CSEL. */
10085
10086 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
10087 if (GET_CODE (op1) == NEG
10088 || GET_CODE (op1) == NOT
10089 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
10090 op1 = XEXP (op1, 0);
bad00732
KT
10091 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
10092 {
10093 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
10094 op1 = XEXP (op1, 0);
10095 op2 = XEXP (op2, 0);
10096 }
2d5ffe46 10097
e548c9df
AM
10098 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
10099 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
10100 return true;
10101 }
10102
10103 /* We don't know what this is, cost all operands. */
10104 return false;
10105}
10106
283b6c85
KT
10107/* Check whether X is a bitfield operation of the form shift + extend that
10108 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
10109 operand to which the bitfield operation is applied. Otherwise return
10110 NULL_RTX. */
10111
10112static rtx
10113aarch64_extend_bitfield_pattern_p (rtx x)
10114{
10115 rtx_code outer_code = GET_CODE (x);
10116 machine_mode outer_mode = GET_MODE (x);
10117
10118 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
10119 && outer_mode != SImode && outer_mode != DImode)
10120 return NULL_RTX;
10121
10122 rtx inner = XEXP (x, 0);
10123 rtx_code inner_code = GET_CODE (inner);
10124 machine_mode inner_mode = GET_MODE (inner);
10125 rtx op = NULL_RTX;
10126
10127 switch (inner_code)
10128 {
10129 case ASHIFT:
10130 if (CONST_INT_P (XEXP (inner, 1))
10131 && (inner_mode == QImode || inner_mode == HImode))
10132 op = XEXP (inner, 0);
10133 break;
10134 case LSHIFTRT:
10135 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
10136 && (inner_mode == QImode || inner_mode == HImode))
10137 op = XEXP (inner, 0);
10138 break;
10139 case ASHIFTRT:
10140 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
10141 && (inner_mode == QImode || inner_mode == HImode))
10142 op = XEXP (inner, 0);
10143 break;
10144 default:
10145 break;
10146 }
10147
10148 return op;
10149}
10150
8c83f71d
KT
10151/* Return true if the mask and a shift amount from an RTX of the form
10152 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
10153 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
10154
10155bool
77e994c9
RS
10156aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
10157 rtx shft_amnt)
8c83f71d
KT
10158{
10159 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
10160 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
10161 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
10162 && (INTVAL (mask)
10163 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
10164}
10165
6a0d3939
SE
10166/* Return true if the masks and a shift amount from an RTX of the form
10167 ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
10168 a BFI instruction of mode MODE. See *arch64_bfi patterns. */
10169
10170bool
10171aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
10172 unsigned HOST_WIDE_INT mask1,
10173 unsigned HOST_WIDE_INT shft_amnt,
10174 unsigned HOST_WIDE_INT mask2)
10175{
10176 unsigned HOST_WIDE_INT t;
10177
10178 /* Verify that there is no overlap in what bits are set in the two masks. */
10179 if (mask1 != ~mask2)
10180 return false;
10181
10182 /* Verify that mask2 is not all zeros or ones. */
10183 if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
10184 return false;
10185
10186 /* The shift amount should always be less than the mode size. */
10187 gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
10188
10189 /* Verify that the mask being shifted is contiguous and would be in the
10190 least significant bits after shifting by shft_amnt. */
10191 t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
10192 return (t == (t & -t));
10193}
10194
43e9d192
IB
10195/* Calculate the cost of calculating X, storing it in *COST. Result
10196 is true if the total cost of the operation has now been calculated. */
10197static bool
e548c9df 10198aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
10199 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
10200{
a8eecd00 10201 rtx op0, op1, op2;
73250c4c 10202 const struct cpu_cost_table *extra_cost
b175b679 10203 = aarch64_tune_params.insn_extra_cost;
e548c9df 10204 int code = GET_CODE (x);
b4206259 10205 scalar_int_mode int_mode;
43e9d192 10206
7fc5ef02
JG
10207 /* By default, assume that everything has equivalent cost to the
10208 cheapest instruction. Any additional costs are applied as a delta
10209 above this default. */
10210 *cost = COSTS_N_INSNS (1);
10211
43e9d192
IB
10212 switch (code)
10213 {
10214 case SET:
ba123b0d
JG
10215 /* The cost depends entirely on the operands to SET. */
10216 *cost = 0;
43e9d192
IB
10217 op0 = SET_DEST (x);
10218 op1 = SET_SRC (x);
10219
10220 switch (GET_CODE (op0))
10221 {
10222 case MEM:
10223 if (speed)
2961177e
JG
10224 {
10225 rtx address = XEXP (op0, 0);
b6875aac
KV
10226 if (VECTOR_MODE_P (mode))
10227 *cost += extra_cost->ldst.storev;
10228 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
10229 *cost += extra_cost->ldst.store;
10230 else if (mode == SFmode)
10231 *cost += extra_cost->ldst.storef;
10232 else if (mode == DFmode)
10233 *cost += extra_cost->ldst.stored;
10234
10235 *cost +=
10236 COSTS_N_INSNS (aarch64_address_cost (address, mode,
10237 0, speed));
10238 }
43e9d192 10239
e548c9df 10240 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
10241 return true;
10242
10243 case SUBREG:
10244 if (! REG_P (SUBREG_REG (op0)))
e548c9df 10245 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 10246
43e9d192
IB
10247 /* Fall through. */
10248 case REG:
b6875aac
KV
10249 /* The cost is one per vector-register copied. */
10250 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
10251 {
fe1447a1
RS
10252 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
10253 *cost = COSTS_N_INSNS (nregs);
b6875aac 10254 }
ba123b0d
JG
10255 /* const0_rtx is in general free, but we will use an
10256 instruction to set a register to 0. */
b6875aac
KV
10257 else if (REG_P (op1) || op1 == const0_rtx)
10258 {
10259 /* The cost is 1 per register copied. */
fe1447a1
RS
10260 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
10261 *cost = COSTS_N_INSNS (nregs);
b6875aac 10262 }
ba123b0d
JG
10263 else
10264 /* Cost is just the cost of the RHS of the set. */
e548c9df 10265 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
10266 return true;
10267
ba123b0d 10268 case ZERO_EXTRACT:
43e9d192 10269 case SIGN_EXTRACT:
ba123b0d
JG
10270 /* Bit-field insertion. Strip any redundant widening of
10271 the RHS to meet the width of the target. */
43e9d192
IB
10272 if (GET_CODE (op1) == SUBREG)
10273 op1 = SUBREG_REG (op1);
10274 if ((GET_CODE (op1) == ZERO_EXTEND
10275 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 10276 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
10277 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
10278 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 10279 op1 = XEXP (op1, 0);
ba123b0d
JG
10280
10281 if (CONST_INT_P (op1))
10282 {
10283 /* MOV immediate is assumed to always be cheap. */
10284 *cost = COSTS_N_INSNS (1);
10285 }
10286 else
10287 {
10288 /* BFM. */
10289 if (speed)
10290 *cost += extra_cost->alu.bfi;
e548c9df 10291 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
10292 }
10293
43e9d192
IB
10294 return true;
10295
10296 default:
ba123b0d
JG
10297 /* We can't make sense of this, assume default cost. */
10298 *cost = COSTS_N_INSNS (1);
61263118 10299 return false;
43e9d192
IB
10300 }
10301 return false;
10302
9dfc162c
JG
10303 case CONST_INT:
10304 /* If an instruction can incorporate a constant within the
10305 instruction, the instruction's expression avoids calling
10306 rtx_cost() on the constant. If rtx_cost() is called on a
10307 constant, then it is usually because the constant must be
10308 moved into a register by one or more instructions.
10309
10310 The exception is constant 0, which can be expressed
10311 as XZR/WZR and is therefore free. The exception to this is
10312 if we have (set (reg) (const0_rtx)) in which case we must cost
10313 the move. However, we can catch that when we cost the SET, so
10314 we don't need to consider that here. */
10315 if (x == const0_rtx)
10316 *cost = 0;
10317 else
10318 {
10319 /* To an approximation, building any other constant is
10320 proportionally expensive to the number of instructions
10321 required to build that constant. This is true whether we
10322 are compiling for SPEED or otherwise. */
77e994c9
RS
10323 if (!is_a <scalar_int_mode> (mode, &int_mode))
10324 int_mode = word_mode;
82614948 10325 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 10326 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
10327 }
10328 return true;
10329
10330 case CONST_DOUBLE:
a2170965
TC
10331
10332 /* First determine number of instructions to do the move
10333 as an integer constant. */
10334 if (!aarch64_float_const_representable_p (x)
10335 && !aarch64_can_const_movi_rtx_p (x, mode)
10336 && aarch64_float_const_rtx_p (x))
10337 {
10338 unsigned HOST_WIDE_INT ival;
10339 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
10340 gcc_assert (succeed);
10341
77e994c9
RS
10342 scalar_int_mode imode = (mode == HFmode
10343 ? SImode
10344 : int_mode_for_mode (mode).require ());
a2170965
TC
10345 int ncost = aarch64_internal_mov_immediate
10346 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
10347 *cost += COSTS_N_INSNS (ncost);
10348 return true;
10349 }
10350
9dfc162c
JG
10351 if (speed)
10352 {
10353 /* mov[df,sf]_aarch64. */
10354 if (aarch64_float_const_representable_p (x))
10355 /* FMOV (scalar immediate). */
10356 *cost += extra_cost->fp[mode == DFmode].fpconst;
10357 else if (!aarch64_float_const_zero_rtx_p (x))
10358 {
10359 /* This will be a load from memory. */
10360 if (mode == DFmode)
10361 *cost += extra_cost->ldst.loadd;
10362 else
10363 *cost += extra_cost->ldst.loadf;
10364 }
10365 else
10366 /* Otherwise this is +0.0. We get this using MOVI d0, #0
10367 or MOV v0.s[0], wzr - neither of which are modeled by the
10368 cost tables. Just use the default cost. */
10369 {
10370 }
10371 }
10372
10373 return true;
10374
43e9d192
IB
10375 case MEM:
10376 if (speed)
2961177e
JG
10377 {
10378 /* For loads we want the base cost of a load, plus an
10379 approximation for the additional cost of the addressing
10380 mode. */
10381 rtx address = XEXP (x, 0);
b6875aac
KV
10382 if (VECTOR_MODE_P (mode))
10383 *cost += extra_cost->ldst.loadv;
10384 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
10385 *cost += extra_cost->ldst.load;
10386 else if (mode == SFmode)
10387 *cost += extra_cost->ldst.loadf;
10388 else if (mode == DFmode)
10389 *cost += extra_cost->ldst.loadd;
10390
10391 *cost +=
10392 COSTS_N_INSNS (aarch64_address_cost (address, mode,
10393 0, speed));
10394 }
43e9d192
IB
10395
10396 return true;
10397
10398 case NEG:
4745e701
JG
10399 op0 = XEXP (x, 0);
10400
b6875aac
KV
10401 if (VECTOR_MODE_P (mode))
10402 {
10403 if (speed)
10404 {
10405 /* FNEG. */
10406 *cost += extra_cost->vect.alu;
10407 }
10408 return false;
10409 }
10410
e548c9df
AM
10411 if (GET_MODE_CLASS (mode) == MODE_INT)
10412 {
4745e701
JG
10413 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
10414 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
10415 {
10416 /* CSETM. */
e548c9df 10417 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
10418 return true;
10419 }
10420
10421 /* Cost this as SUB wzr, X. */
e548c9df 10422 op0 = CONST0_RTX (mode);
4745e701
JG
10423 op1 = XEXP (x, 0);
10424 goto cost_minus;
10425 }
10426
e548c9df 10427 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
10428 {
10429 /* Support (neg(fma...)) as a single instruction only if
10430 sign of zeros is unimportant. This matches the decision
10431 making in aarch64.md. */
10432 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
10433 {
10434 /* FNMADD. */
e548c9df 10435 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
10436 return true;
10437 }
d318517d
SN
10438 if (GET_CODE (op0) == MULT)
10439 {
10440 /* FNMUL. */
10441 *cost = rtx_cost (op0, mode, NEG, 0, speed);
10442 return true;
10443 }
4745e701
JG
10444 if (speed)
10445 /* FNEG. */
10446 *cost += extra_cost->fp[mode == DFmode].neg;
10447 return false;
10448 }
10449
10450 return false;
43e9d192 10451
781aeb73
KT
10452 case CLRSB:
10453 case CLZ:
10454 if (speed)
b6875aac
KV
10455 {
10456 if (VECTOR_MODE_P (mode))
10457 *cost += extra_cost->vect.alu;
10458 else
10459 *cost += extra_cost->alu.clz;
10460 }
781aeb73
KT
10461
10462 return false;
10463
43e9d192
IB
10464 case COMPARE:
10465 op0 = XEXP (x, 0);
10466 op1 = XEXP (x, 1);
10467
10468 if (op1 == const0_rtx
10469 && GET_CODE (op0) == AND)
10470 {
10471 x = op0;
e548c9df 10472 mode = GET_MODE (op0);
43e9d192
IB
10473 goto cost_logic;
10474 }
10475
a8eecd00
JG
10476 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
10477 {
10478 /* TODO: A write to the CC flags possibly costs extra, this
10479 needs encoding in the cost tables. */
10480
e548c9df 10481 mode = GET_MODE (op0);
a8eecd00
JG
10482 /* ANDS. */
10483 if (GET_CODE (op0) == AND)
10484 {
10485 x = op0;
10486 goto cost_logic;
10487 }
10488
10489 if (GET_CODE (op0) == PLUS)
10490 {
10491 /* ADDS (and CMN alias). */
10492 x = op0;
10493 goto cost_plus;
10494 }
10495
10496 if (GET_CODE (op0) == MINUS)
10497 {
10498 /* SUBS. */
10499 x = op0;
10500 goto cost_minus;
10501 }
10502
345854d8
KT
10503 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
10504 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
10505 && CONST_INT_P (XEXP (op0, 2)))
10506 {
10507 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
10508 Handle it here directly rather than going to cost_logic
10509 since we know the immediate generated for the TST is valid
10510 so we can avoid creating an intermediate rtx for it only
10511 for costing purposes. */
10512 if (speed)
10513 *cost += extra_cost->alu.logical;
10514
10515 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
10516 ZERO_EXTRACT, 0, speed);
10517 return true;
10518 }
10519
a8eecd00
JG
10520 if (GET_CODE (op1) == NEG)
10521 {
10522 /* CMN. */
10523 if (speed)
10524 *cost += extra_cost->alu.arith;
10525
e548c9df
AM
10526 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
10527 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
10528 return true;
10529 }
10530
10531 /* CMP.
10532
10533 Compare can freely swap the order of operands, and
10534 canonicalization puts the more complex operation first.
10535 But the integer MINUS logic expects the shift/extend
10536 operation in op1. */
10537 if (! (REG_P (op0)
10538 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
10539 {
10540 op0 = XEXP (x, 1);
10541 op1 = XEXP (x, 0);
10542 }
10543 goto cost_minus;
10544 }
10545
10546 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
10547 {
10548 /* FCMP. */
10549 if (speed)
10550 *cost += extra_cost->fp[mode == DFmode].compare;
10551
10552 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
10553 {
e548c9df 10554 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
10555 /* FCMP supports constant 0.0 for no extra cost. */
10556 return true;
10557 }
10558 return false;
10559 }
10560
b6875aac
KV
10561 if (VECTOR_MODE_P (mode))
10562 {
10563 /* Vector compare. */
10564 if (speed)
10565 *cost += extra_cost->vect.alu;
10566
10567 if (aarch64_float_const_zero_rtx_p (op1))
10568 {
10569 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
10570 cost. */
10571 return true;
10572 }
10573 return false;
10574 }
a8eecd00 10575 return false;
43e9d192
IB
10576
10577 case MINUS:
4745e701
JG
10578 {
10579 op0 = XEXP (x, 0);
10580 op1 = XEXP (x, 1);
10581
10582cost_minus:
e548c9df 10583 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 10584
4745e701
JG
10585 /* Detect valid immediates. */
10586 if ((GET_MODE_CLASS (mode) == MODE_INT
10587 || (GET_MODE_CLASS (mode) == MODE_CC
10588 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
10589 && CONST_INT_P (op1)
10590 && aarch64_uimm12_shift (INTVAL (op1)))
10591 {
4745e701
JG
10592 if (speed)
10593 /* SUB(S) (immediate). */
10594 *cost += extra_cost->alu.arith;
10595 return true;
4745e701
JG
10596 }
10597
7cc2145f 10598 /* Look for SUB (extended register). */
77e994c9
RS
10599 if (is_a <scalar_int_mode> (mode, &int_mode)
10600 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
10601 {
10602 if (speed)
2533c820 10603 *cost += extra_cost->alu.extend_arith;
7cc2145f 10604
b10f1009 10605 op1 = aarch64_strip_extend (op1, true);
e47c4031 10606 *cost += rtx_cost (op1, VOIDmode,
e548c9df 10607 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
10608 return true;
10609 }
10610
b10f1009 10611 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
10612
10613 /* Cost this as an FMA-alike operation. */
10614 if ((GET_CODE (new_op1) == MULT
0a78ebe4 10615 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
10616 && code != COMPARE)
10617 {
10618 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
10619 (enum rtx_code) code,
10620 speed);
4745e701
JG
10621 return true;
10622 }
43e9d192 10623
e548c9df 10624 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 10625
4745e701
JG
10626 if (speed)
10627 {
b6875aac
KV
10628 if (VECTOR_MODE_P (mode))
10629 {
10630 /* Vector SUB. */
10631 *cost += extra_cost->vect.alu;
10632 }
10633 else if (GET_MODE_CLASS (mode) == MODE_INT)
10634 {
10635 /* SUB(S). */
10636 *cost += extra_cost->alu.arith;
10637 }
4745e701 10638 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
10639 {
10640 /* FSUB. */
10641 *cost += extra_cost->fp[mode == DFmode].addsub;
10642 }
4745e701
JG
10643 }
10644 return true;
10645 }
43e9d192
IB
10646
10647 case PLUS:
4745e701
JG
10648 {
10649 rtx new_op0;
43e9d192 10650
4745e701
JG
10651 op0 = XEXP (x, 0);
10652 op1 = XEXP (x, 1);
43e9d192 10653
a8eecd00 10654cost_plus:
4745e701
JG
10655 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
10656 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
10657 {
10658 /* CSINC. */
e548c9df
AM
10659 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
10660 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
10661 return true;
10662 }
43e9d192 10663
4745e701 10664 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
10665 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
10666 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 10667 {
e548c9df 10668 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 10669
4745e701
JG
10670 if (speed)
10671 /* ADD (immediate). */
10672 *cost += extra_cost->alu.arith;
10673 return true;
10674 }
10675
e548c9df 10676 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 10677
7cc2145f 10678 /* Look for ADD (extended register). */
77e994c9
RS
10679 if (is_a <scalar_int_mode> (mode, &int_mode)
10680 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
10681 {
10682 if (speed)
2533c820 10683 *cost += extra_cost->alu.extend_arith;
7cc2145f 10684
b10f1009 10685 op0 = aarch64_strip_extend (op0, true);
e47c4031 10686 *cost += rtx_cost (op0, VOIDmode,
e548c9df 10687 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
10688 return true;
10689 }
10690
4745e701
JG
10691 /* Strip any extend, leave shifts behind as we will
10692 cost them through mult_cost. */
b10f1009 10693 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
10694
10695 if (GET_CODE (new_op0) == MULT
0a78ebe4 10696 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
10697 {
10698 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
10699 speed);
4745e701
JG
10700 return true;
10701 }
10702
e548c9df 10703 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
10704
10705 if (speed)
10706 {
b6875aac
KV
10707 if (VECTOR_MODE_P (mode))
10708 {
10709 /* Vector ADD. */
10710 *cost += extra_cost->vect.alu;
10711 }
10712 else if (GET_MODE_CLASS (mode) == MODE_INT)
10713 {
10714 /* ADD. */
10715 *cost += extra_cost->alu.arith;
10716 }
4745e701 10717 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
10718 {
10719 /* FADD. */
10720 *cost += extra_cost->fp[mode == DFmode].addsub;
10721 }
4745e701
JG
10722 }
10723 return true;
10724 }
43e9d192 10725
18b42b2a
KT
10726 case BSWAP:
10727 *cost = COSTS_N_INSNS (1);
10728
10729 if (speed)
b6875aac
KV
10730 {
10731 if (VECTOR_MODE_P (mode))
10732 *cost += extra_cost->vect.alu;
10733 else
10734 *cost += extra_cost->alu.rev;
10735 }
18b42b2a
KT
10736 return false;
10737
43e9d192 10738 case IOR:
f7d5cf8d
KT
10739 if (aarch_rev16_p (x))
10740 {
10741 *cost = COSTS_N_INSNS (1);
10742
b6875aac
KV
10743 if (speed)
10744 {
10745 if (VECTOR_MODE_P (mode))
10746 *cost += extra_cost->vect.alu;
10747 else
10748 *cost += extra_cost->alu.rev;
10749 }
10750 return true;
f7d5cf8d 10751 }
fb0cb7fa
KT
10752
10753 if (aarch64_extr_rtx_p (x, &op0, &op1))
10754 {
e548c9df
AM
10755 *cost += rtx_cost (op0, mode, IOR, 0, speed);
10756 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
10757 if (speed)
10758 *cost += extra_cost->alu.shift;
10759
10760 return true;
10761 }
f7d5cf8d 10762 /* Fall through. */
43e9d192
IB
10763 case XOR:
10764 case AND:
10765 cost_logic:
10766 op0 = XEXP (x, 0);
10767 op1 = XEXP (x, 1);
10768
b6875aac
KV
10769 if (VECTOR_MODE_P (mode))
10770 {
10771 if (speed)
10772 *cost += extra_cost->vect.alu;
10773 return true;
10774 }
10775
268c3b47
JG
10776 if (code == AND
10777 && GET_CODE (op0) == MULT
10778 && CONST_INT_P (XEXP (op0, 1))
10779 && CONST_INT_P (op1)
10780 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
10781 INTVAL (op1)) != 0)
10782 {
10783 /* This is a UBFM/SBFM. */
e548c9df 10784 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
10785 if (speed)
10786 *cost += extra_cost->alu.bfx;
10787 return true;
10788 }
10789
b4206259 10790 if (is_int_mode (mode, &int_mode))
43e9d192 10791 {
8c83f71d 10792 if (CONST_INT_P (op1))
43e9d192 10793 {
8c83f71d
KT
10794 /* We have a mask + shift version of a UBFIZ
10795 i.e. the *andim_ashift<mode>_bfiz pattern. */
10796 if (GET_CODE (op0) == ASHIFT
b4206259
RS
10797 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
10798 XEXP (op0, 1)))
8c83f71d 10799 {
b4206259 10800 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
10801 (enum rtx_code) code, 0, speed);
10802 if (speed)
10803 *cost += extra_cost->alu.bfx;
268c3b47 10804
8c83f71d
KT
10805 return true;
10806 }
b4206259 10807 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
10808 {
10809 /* We possibly get the immediate for free, this is not
10810 modelled. */
b4206259
RS
10811 *cost += rtx_cost (op0, int_mode,
10812 (enum rtx_code) code, 0, speed);
8c83f71d
KT
10813 if (speed)
10814 *cost += extra_cost->alu.logical;
268c3b47 10815
8c83f71d
KT
10816 return true;
10817 }
43e9d192
IB
10818 }
10819 else
10820 {
268c3b47
JG
10821 rtx new_op0 = op0;
10822
10823 /* Handle ORN, EON, or BIC. */
43e9d192
IB
10824 if (GET_CODE (op0) == NOT)
10825 op0 = XEXP (op0, 0);
268c3b47
JG
10826
10827 new_op0 = aarch64_strip_shift (op0);
10828
10829 /* If we had a shift on op0 then this is a logical-shift-
10830 by-register/immediate operation. Otherwise, this is just
10831 a logical operation. */
10832 if (speed)
10833 {
10834 if (new_op0 != op0)
10835 {
10836 /* Shift by immediate. */
10837 if (CONST_INT_P (XEXP (op0, 1)))
10838 *cost += extra_cost->alu.log_shift;
10839 else
10840 *cost += extra_cost->alu.log_shift_reg;
10841 }
10842 else
10843 *cost += extra_cost->alu.logical;
10844 }
10845
10846 /* In both cases we want to cost both operands. */
b4206259
RS
10847 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
10848 0, speed);
10849 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
10850 1, speed);
268c3b47
JG
10851
10852 return true;
43e9d192 10853 }
43e9d192
IB
10854 }
10855 return false;
10856
268c3b47 10857 case NOT:
6365da9e
KT
10858 x = XEXP (x, 0);
10859 op0 = aarch64_strip_shift (x);
10860
b6875aac
KV
10861 if (VECTOR_MODE_P (mode))
10862 {
10863 /* Vector NOT. */
10864 *cost += extra_cost->vect.alu;
10865 return false;
10866 }
10867
6365da9e
KT
10868 /* MVN-shifted-reg. */
10869 if (op0 != x)
10870 {
e548c9df 10871 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
10872
10873 if (speed)
10874 *cost += extra_cost->alu.log_shift;
10875
10876 return true;
10877 }
10878 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
10879 Handle the second form here taking care that 'a' in the above can
10880 be a shift. */
10881 else if (GET_CODE (op0) == XOR)
10882 {
10883 rtx newop0 = XEXP (op0, 0);
10884 rtx newop1 = XEXP (op0, 1);
10885 rtx op0_stripped = aarch64_strip_shift (newop0);
10886
e548c9df
AM
10887 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
10888 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
10889
10890 if (speed)
10891 {
10892 if (op0_stripped != newop0)
10893 *cost += extra_cost->alu.log_shift;
10894 else
10895 *cost += extra_cost->alu.logical;
10896 }
10897
10898 return true;
10899 }
268c3b47
JG
10900 /* MVN. */
10901 if (speed)
10902 *cost += extra_cost->alu.logical;
10903
268c3b47
JG
10904 return false;
10905
43e9d192 10906 case ZERO_EXTEND:
b1685e62
JG
10907
10908 op0 = XEXP (x, 0);
10909 /* If a value is written in SI mode, then zero extended to DI
10910 mode, the operation will in general be free as a write to
10911 a 'w' register implicitly zeroes the upper bits of an 'x'
10912 register. However, if this is
10913
10914 (set (reg) (zero_extend (reg)))
10915
10916 we must cost the explicit register move. */
10917 if (mode == DImode
10918 && GET_MODE (op0) == SImode
10919 && outer == SET)
10920 {
e548c9df 10921 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 10922
dde23f43
KM
10923 /* If OP_COST is non-zero, then the cost of the zero extend
10924 is effectively the cost of the inner operation. Otherwise
10925 we have a MOV instruction and we take the cost from the MOV
10926 itself. This is true independently of whether we are
10927 optimizing for space or time. */
10928 if (op_cost)
b1685e62
JG
10929 *cost = op_cost;
10930
10931 return true;
10932 }
e548c9df 10933 else if (MEM_P (op0))
43e9d192 10934 {
b1685e62 10935 /* All loads can zero extend to any size for free. */
e548c9df 10936 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
10937 return true;
10938 }
b1685e62 10939
283b6c85
KT
10940 op0 = aarch64_extend_bitfield_pattern_p (x);
10941 if (op0)
10942 {
10943 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
10944 if (speed)
10945 *cost += extra_cost->alu.bfx;
10946 return true;
10947 }
10948
b1685e62 10949 if (speed)
b6875aac
KV
10950 {
10951 if (VECTOR_MODE_P (mode))
10952 {
10953 /* UMOV. */
10954 *cost += extra_cost->vect.alu;
10955 }
10956 else
10957 {
63715e5e
WD
10958 /* We generate an AND instead of UXTB/UXTH. */
10959 *cost += extra_cost->alu.logical;
b6875aac
KV
10960 }
10961 }
43e9d192
IB
10962 return false;
10963
10964 case SIGN_EXTEND:
b1685e62 10965 if (MEM_P (XEXP (x, 0)))
43e9d192 10966 {
b1685e62
JG
10967 /* LDRSH. */
10968 if (speed)
10969 {
10970 rtx address = XEXP (XEXP (x, 0), 0);
10971 *cost += extra_cost->ldst.load_sign_extend;
10972
10973 *cost +=
10974 COSTS_N_INSNS (aarch64_address_cost (address, mode,
10975 0, speed));
10976 }
43e9d192
IB
10977 return true;
10978 }
b1685e62 10979
283b6c85
KT
10980 op0 = aarch64_extend_bitfield_pattern_p (x);
10981 if (op0)
10982 {
10983 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
10984 if (speed)
10985 *cost += extra_cost->alu.bfx;
10986 return true;
10987 }
10988
b1685e62 10989 if (speed)
b6875aac
KV
10990 {
10991 if (VECTOR_MODE_P (mode))
10992 *cost += extra_cost->vect.alu;
10993 else
10994 *cost += extra_cost->alu.extend;
10995 }
43e9d192
IB
10996 return false;
10997
ba0cfa17
JG
10998 case ASHIFT:
10999 op0 = XEXP (x, 0);
11000 op1 = XEXP (x, 1);
11001
11002 if (CONST_INT_P (op1))
11003 {
ba0cfa17 11004 if (speed)
b6875aac
KV
11005 {
11006 if (VECTOR_MODE_P (mode))
11007 {
11008 /* Vector shift (immediate). */
11009 *cost += extra_cost->vect.alu;
11010 }
11011 else
11012 {
11013 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
11014 aliases. */
11015 *cost += extra_cost->alu.shift;
11016 }
11017 }
ba0cfa17
JG
11018
11019 /* We can incorporate zero/sign extend for free. */
11020 if (GET_CODE (op0) == ZERO_EXTEND
11021 || GET_CODE (op0) == SIGN_EXTEND)
11022 op0 = XEXP (op0, 0);
11023
e548c9df 11024 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
11025 return true;
11026 }
11027 else
11028 {
7813b280 11029 if (VECTOR_MODE_P (mode))
b6875aac 11030 {
7813b280
KT
11031 if (speed)
11032 /* Vector shift (register). */
11033 *cost += extra_cost->vect.alu;
11034 }
11035 else
11036 {
11037 if (speed)
11038 /* LSLV. */
11039 *cost += extra_cost->alu.shift_reg;
11040
11041 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
11042 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
11043 && known_eq (INTVAL (XEXP (op1, 1)),
11044 GET_MODE_BITSIZE (mode) - 1))
b6875aac 11045 {
7813b280
KT
11046 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
11047 /* We already demanded XEXP (op1, 0) to be REG_P, so
11048 don't recurse into it. */
11049 return true;
b6875aac
KV
11050 }
11051 }
ba0cfa17
JG
11052 return false; /* All arguments need to be in registers. */
11053 }
11054
43e9d192 11055 case ROTATE:
43e9d192
IB
11056 case ROTATERT:
11057 case LSHIFTRT:
43e9d192 11058 case ASHIFTRT:
ba0cfa17
JG
11059 op0 = XEXP (x, 0);
11060 op1 = XEXP (x, 1);
43e9d192 11061
ba0cfa17
JG
11062 if (CONST_INT_P (op1))
11063 {
11064 /* ASR (immediate) and friends. */
11065 if (speed)
b6875aac
KV
11066 {
11067 if (VECTOR_MODE_P (mode))
11068 *cost += extra_cost->vect.alu;
11069 else
11070 *cost += extra_cost->alu.shift;
11071 }
43e9d192 11072
e548c9df 11073 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
11074 return true;
11075 }
11076 else
11077 {
7813b280 11078 if (VECTOR_MODE_P (mode))
b6875aac 11079 {
7813b280
KT
11080 if (speed)
11081 /* Vector shift (register). */
b6875aac 11082 *cost += extra_cost->vect.alu;
7813b280
KT
11083 }
11084 else
11085 {
11086 if (speed)
11087 /* ASR (register) and friends. */
b6875aac 11088 *cost += extra_cost->alu.shift_reg;
7813b280
KT
11089
11090 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
11091 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
11092 && known_eq (INTVAL (XEXP (op1, 1)),
11093 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
11094 {
11095 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
11096 /* We already demanded XEXP (op1, 0) to be REG_P, so
11097 don't recurse into it. */
11098 return true;
11099 }
b6875aac 11100 }
ba0cfa17
JG
11101 return false; /* All arguments need to be in registers. */
11102 }
43e9d192 11103
909734be
JG
11104 case SYMBOL_REF:
11105
1b1e81f8
JW
11106 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
11107 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
11108 {
11109 /* LDR. */
11110 if (speed)
11111 *cost += extra_cost->ldst.load;
11112 }
11113 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
11114 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
11115 {
11116 /* ADRP, followed by ADD. */
11117 *cost += COSTS_N_INSNS (1);
11118 if (speed)
11119 *cost += 2 * extra_cost->alu.arith;
11120 }
11121 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
11122 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
11123 {
11124 /* ADR. */
11125 if (speed)
11126 *cost += extra_cost->alu.arith;
11127 }
11128
11129 if (flag_pic)
11130 {
11131 /* One extra load instruction, after accessing the GOT. */
11132 *cost += COSTS_N_INSNS (1);
11133 if (speed)
11134 *cost += extra_cost->ldst.load;
11135 }
43e9d192
IB
11136 return true;
11137
909734be 11138 case HIGH:
43e9d192 11139 case LO_SUM:
909734be
JG
11140 /* ADRP/ADD (immediate). */
11141 if (speed)
11142 *cost += extra_cost->alu.arith;
43e9d192
IB
11143 return true;
11144
11145 case ZERO_EXTRACT:
11146 case SIGN_EXTRACT:
7cc2145f
JG
11147 /* UBFX/SBFX. */
11148 if (speed)
b6875aac
KV
11149 {
11150 if (VECTOR_MODE_P (mode))
11151 *cost += extra_cost->vect.alu;
11152 else
11153 *cost += extra_cost->alu.bfx;
11154 }
7cc2145f
JG
11155
11156 /* We can trust that the immediates used will be correct (there
11157 are no by-register forms), so we need only cost op0. */
e548c9df 11158 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
11159 return true;
11160
11161 case MULT:
4745e701
JG
11162 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
11163 /* aarch64_rtx_mult_cost always handles recursion to its
11164 operands. */
11165 return true;
43e9d192
IB
11166
11167 case MOD:
4f58fe36
KT
11168 /* We can expand signed mod by power of 2 using a NEGS, two parallel
11169 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
11170 an unconditional negate. This case should only ever be reached through
11171 the set_smod_pow2_cheap check in expmed.c. */
11172 if (CONST_INT_P (XEXP (x, 1))
11173 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
11174 && (mode == SImode || mode == DImode))
11175 {
11176 /* We expand to 4 instructions. Reset the baseline. */
11177 *cost = COSTS_N_INSNS (4);
11178
11179 if (speed)
11180 *cost += 2 * extra_cost->alu.logical
11181 + 2 * extra_cost->alu.arith;
11182
11183 return true;
11184 }
11185
11186 /* Fall-through. */
43e9d192 11187 case UMOD:
43e9d192
IB
11188 if (speed)
11189 {
cb9ac430 11190 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
11191 if (VECTOR_MODE_P (mode))
11192 *cost += extra_cost->vect.alu;
e548c9df
AM
11193 else if (GET_MODE_CLASS (mode) == MODE_INT)
11194 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
11195 + extra_cost->mult[mode == DImode].idiv
11196 + (code == MOD ? 1 : 0));
43e9d192
IB
11197 }
11198 return false; /* All arguments need to be in registers. */
11199
11200 case DIV:
11201 case UDIV:
4105fe38 11202 case SQRT:
43e9d192
IB
11203 if (speed)
11204 {
b6875aac
KV
11205 if (VECTOR_MODE_P (mode))
11206 *cost += extra_cost->vect.alu;
11207 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
11208 /* There is no integer SQRT, so only DIV and UDIV can get
11209 here. */
cb9ac430
TC
11210 *cost += (extra_cost->mult[mode == DImode].idiv
11211 /* Slighly prefer UDIV over SDIV. */
11212 + (code == DIV ? 1 : 0));
4105fe38
JG
11213 else
11214 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
11215 }
11216 return false; /* All arguments need to be in registers. */
11217
a8eecd00 11218 case IF_THEN_ELSE:
2d5ffe46
AP
11219 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
11220 XEXP (x, 2), cost, speed);
a8eecd00
JG
11221
11222 case EQ:
11223 case NE:
11224 case GT:
11225 case GTU:
11226 case LT:
11227 case LTU:
11228 case GE:
11229 case GEU:
11230 case LE:
11231 case LEU:
11232
11233 return false; /* All arguments must be in registers. */
11234
b292109f
JG
11235 case FMA:
11236 op0 = XEXP (x, 0);
11237 op1 = XEXP (x, 1);
11238 op2 = XEXP (x, 2);
11239
11240 if (speed)
b6875aac
KV
11241 {
11242 if (VECTOR_MODE_P (mode))
11243 *cost += extra_cost->vect.alu;
11244 else
11245 *cost += extra_cost->fp[mode == DFmode].fma;
11246 }
b292109f
JG
11247
11248 /* FMSUB, FNMADD, and FNMSUB are free. */
11249 if (GET_CODE (op0) == NEG)
11250 op0 = XEXP (op0, 0);
11251
11252 if (GET_CODE (op2) == NEG)
11253 op2 = XEXP (op2, 0);
11254
11255 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
11256 and the by-element operand as operand 0. */
11257 if (GET_CODE (op1) == NEG)
11258 op1 = XEXP (op1, 0);
11259
11260 /* Catch vector-by-element operations. The by-element operand can
11261 either be (vec_duplicate (vec_select (x))) or just
11262 (vec_select (x)), depending on whether we are multiplying by
11263 a vector or a scalar.
11264
11265 Canonicalization is not very good in these cases, FMA4 will put the
11266 by-element operand as operand 0, FNMA4 will have it as operand 1. */
11267 if (GET_CODE (op0) == VEC_DUPLICATE)
11268 op0 = XEXP (op0, 0);
11269 else if (GET_CODE (op1) == VEC_DUPLICATE)
11270 op1 = XEXP (op1, 0);
11271
11272 if (GET_CODE (op0) == VEC_SELECT)
11273 op0 = XEXP (op0, 0);
11274 else if (GET_CODE (op1) == VEC_SELECT)
11275 op1 = XEXP (op1, 0);
11276
11277 /* If the remaining parameters are not registers,
11278 get the cost to put them into registers. */
e548c9df
AM
11279 *cost += rtx_cost (op0, mode, FMA, 0, speed);
11280 *cost += rtx_cost (op1, mode, FMA, 1, speed);
11281 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
11282 return true;
11283
5e2a765b
KT
11284 case FLOAT:
11285 case UNSIGNED_FLOAT:
11286 if (speed)
11287 *cost += extra_cost->fp[mode == DFmode].fromint;
11288 return false;
11289
b292109f
JG
11290 case FLOAT_EXTEND:
11291 if (speed)
b6875aac
KV
11292 {
11293 if (VECTOR_MODE_P (mode))
11294 {
11295 /*Vector truncate. */
11296 *cost += extra_cost->vect.alu;
11297 }
11298 else
11299 *cost += extra_cost->fp[mode == DFmode].widen;
11300 }
b292109f
JG
11301 return false;
11302
11303 case FLOAT_TRUNCATE:
11304 if (speed)
b6875aac
KV
11305 {
11306 if (VECTOR_MODE_P (mode))
11307 {
11308 /*Vector conversion. */
11309 *cost += extra_cost->vect.alu;
11310 }
11311 else
11312 *cost += extra_cost->fp[mode == DFmode].narrow;
11313 }
b292109f
JG
11314 return false;
11315
61263118
KT
11316 case FIX:
11317 case UNSIGNED_FIX:
11318 x = XEXP (x, 0);
11319 /* Strip the rounding part. They will all be implemented
11320 by the fcvt* family of instructions anyway. */
11321 if (GET_CODE (x) == UNSPEC)
11322 {
11323 unsigned int uns_code = XINT (x, 1);
11324
11325 if (uns_code == UNSPEC_FRINTA
11326 || uns_code == UNSPEC_FRINTM
11327 || uns_code == UNSPEC_FRINTN
11328 || uns_code == UNSPEC_FRINTP
11329 || uns_code == UNSPEC_FRINTZ)
11330 x = XVECEXP (x, 0, 0);
11331 }
11332
11333 if (speed)
b6875aac
KV
11334 {
11335 if (VECTOR_MODE_P (mode))
11336 *cost += extra_cost->vect.alu;
11337 else
11338 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
11339 }
39252973
KT
11340
11341 /* We can combine fmul by a power of 2 followed by a fcvt into a single
11342 fixed-point fcvt. */
11343 if (GET_CODE (x) == MULT
11344 && ((VECTOR_MODE_P (mode)
11345 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
11346 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
11347 {
11348 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
11349 0, speed);
11350 return true;
11351 }
11352
e548c9df 11353 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
11354 return true;
11355
b292109f 11356 case ABS:
b6875aac
KV
11357 if (VECTOR_MODE_P (mode))
11358 {
11359 /* ABS (vector). */
11360 if (speed)
11361 *cost += extra_cost->vect.alu;
11362 }
11363 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 11364 {
19261b99
KT
11365 op0 = XEXP (x, 0);
11366
11367 /* FABD, which is analogous to FADD. */
11368 if (GET_CODE (op0) == MINUS)
11369 {
e548c9df
AM
11370 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
11371 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
11372 if (speed)
11373 *cost += extra_cost->fp[mode == DFmode].addsub;
11374
11375 return true;
11376 }
11377 /* Simple FABS is analogous to FNEG. */
b292109f
JG
11378 if (speed)
11379 *cost += extra_cost->fp[mode == DFmode].neg;
11380 }
11381 else
11382 {
11383 /* Integer ABS will either be split to
11384 two arithmetic instructions, or will be an ABS
11385 (scalar), which we don't model. */
11386 *cost = COSTS_N_INSNS (2);
11387 if (speed)
11388 *cost += 2 * extra_cost->alu.arith;
11389 }
11390 return false;
11391
11392 case SMAX:
11393 case SMIN:
11394 if (speed)
11395 {
b6875aac
KV
11396 if (VECTOR_MODE_P (mode))
11397 *cost += extra_cost->vect.alu;
11398 else
11399 {
11400 /* FMAXNM/FMINNM/FMAX/FMIN.
11401 TODO: This may not be accurate for all implementations, but
11402 we do not model this in the cost tables. */
11403 *cost += extra_cost->fp[mode == DFmode].addsub;
11404 }
b292109f
JG
11405 }
11406 return false;
11407
61263118
KT
11408 case UNSPEC:
11409 /* The floating point round to integer frint* instructions. */
11410 if (aarch64_frint_unspec_p (XINT (x, 1)))
11411 {
11412 if (speed)
11413 *cost += extra_cost->fp[mode == DFmode].roundint;
11414
11415 return false;
11416 }
781aeb73
KT
11417
11418 if (XINT (x, 1) == UNSPEC_RBIT)
11419 {
11420 if (speed)
11421 *cost += extra_cost->alu.rev;
11422
11423 return false;
11424 }
61263118
KT
11425 break;
11426
fb620c4a
JG
11427 case TRUNCATE:
11428
11429 /* Decompose <su>muldi3_highpart. */
11430 if (/* (truncate:DI */
11431 mode == DImode
11432 /* (lshiftrt:TI */
11433 && GET_MODE (XEXP (x, 0)) == TImode
11434 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
11435 /* (mult:TI */
11436 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11437 /* (ANY_EXTEND:TI (reg:DI))
11438 (ANY_EXTEND:TI (reg:DI))) */
11439 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11440 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
11441 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11442 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
11443 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
11444 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
11445 /* (const_int 64) */
11446 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11447 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
11448 {
11449 /* UMULH/SMULH. */
11450 if (speed)
11451 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
11452 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
11453 mode, MULT, 0, speed);
11454 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
11455 mode, MULT, 1, speed);
fb620c4a
JG
11456 return true;
11457 }
11458
11459 /* Fall through. */
43e9d192 11460 default:
61263118 11461 break;
43e9d192 11462 }
61263118 11463
c10e3d7f
AP
11464 if (dump_file
11465 && flag_aarch64_verbose_cost)
61263118
KT
11466 fprintf (dump_file,
11467 "\nFailed to cost RTX. Assuming default cost.\n");
11468
11469 return true;
43e9d192
IB
11470}
11471
0ee859b5
JG
11472/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
11473 calculated for X. This cost is stored in *COST. Returns true
11474 if the total cost of X was calculated. */
11475static bool
e548c9df 11476aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
11477 int param, int *cost, bool speed)
11478{
e548c9df 11479 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 11480
c10e3d7f
AP
11481 if (dump_file
11482 && flag_aarch64_verbose_cost)
0ee859b5
JG
11483 {
11484 print_rtl_single (dump_file, x);
11485 fprintf (dump_file, "\n%s cost: %d (%s)\n",
11486 speed ? "Hot" : "Cold",
11487 *cost, result ? "final" : "partial");
11488 }
11489
11490 return result;
11491}
11492
43e9d192 11493static int
ef4bddc2 11494aarch64_register_move_cost (machine_mode mode,
8a3a7e67 11495 reg_class_t from_i, reg_class_t to_i)
43e9d192 11496{
8a3a7e67
RH
11497 enum reg_class from = (enum reg_class) from_i;
11498 enum reg_class to = (enum reg_class) to_i;
43e9d192 11499 const struct cpu_regmove_cost *regmove_cost
b175b679 11500 = aarch64_tune_params.regmove_cost;
43e9d192 11501
3be07662 11502 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 11503 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
11504 to = GENERAL_REGS;
11505
d677263e 11506 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
11507 from = GENERAL_REGS;
11508
6ee70f81
AP
11509 /* Moving between GPR and stack cost is the same as GP2GP. */
11510 if ((from == GENERAL_REGS && to == STACK_REG)
11511 || (to == GENERAL_REGS && from == STACK_REG))
11512 return regmove_cost->GP2GP;
11513
11514 /* To/From the stack register, we move via the gprs. */
11515 if (to == STACK_REG || from == STACK_REG)
11516 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
11517 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
11518
6a70badb 11519 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
11520 {
11521 /* 128-bit operations on general registers require 2 instructions. */
11522 if (from == GENERAL_REGS && to == GENERAL_REGS)
11523 return regmove_cost->GP2GP * 2;
11524 else if (from == GENERAL_REGS)
11525 return regmove_cost->GP2FP * 2;
11526 else if (to == GENERAL_REGS)
11527 return regmove_cost->FP2GP * 2;
11528
11529 /* When AdvSIMD instructions are disabled it is not possible to move
11530 a 128-bit value directly between Q registers. This is handled in
11531 secondary reload. A general register is used as a scratch to move
11532 the upper DI value and the lower DI value is moved directly,
11533 hence the cost is the sum of three moves. */
11534 if (! TARGET_SIMD)
11535 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
11536
11537 return regmove_cost->FP2FP;
11538 }
11539
43e9d192
IB
11540 if (from == GENERAL_REGS && to == GENERAL_REGS)
11541 return regmove_cost->GP2GP;
11542 else if (from == GENERAL_REGS)
11543 return regmove_cost->GP2FP;
11544 else if (to == GENERAL_REGS)
11545 return regmove_cost->FP2GP;
11546
43e9d192
IB
11547 return regmove_cost->FP2FP;
11548}
11549
11550static int
ef4bddc2 11551aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
11552 reg_class_t rclass ATTRIBUTE_UNUSED,
11553 bool in ATTRIBUTE_UNUSED)
11554{
b175b679 11555 return aarch64_tune_params.memmov_cost;
43e9d192
IB
11556}
11557
0c30e0f3
EM
11558/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
11559 to optimize 1.0/sqrt. */
ee62a5a6
RS
11560
11561static bool
9acc9cbe 11562use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
11563{
11564 return (!flag_trapping_math
11565 && flag_unsafe_math_optimizations
9acc9cbe
EM
11566 && ((aarch64_tune_params.approx_modes->recip_sqrt
11567 & AARCH64_APPROX_MODE (mode))
1a33079e 11568 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
11569}
11570
0c30e0f3
EM
11571/* Function to decide when to use the approximate reciprocal square root
11572 builtin. */
a6fc00da
BH
11573
11574static tree
ee62a5a6 11575aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 11576{
9acc9cbe
EM
11577 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
11578
11579 if (!use_rsqrt_p (mode))
a6fc00da 11580 return NULL_TREE;
4d732405 11581 return aarch64_builtin_rsqrt (DECL_MD_FUNCTION_CODE (fndecl));
a6fc00da
BH
11582}
11583
98daafa0
EM
11584/* Emit instruction sequence to compute either the approximate square root
11585 or its approximate reciprocal, depending on the flag RECP, and return
11586 whether the sequence was emitted or not. */
a6fc00da 11587
98daafa0
EM
11588bool
11589aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 11590{
98daafa0 11591 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
11592
11593 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
11594 {
11595 gcc_assert (!recp);
11596 return false;
11597 }
11598
2e19adc8
RE
11599 if (!recp)
11600 {
11601 if (!(flag_mlow_precision_sqrt
11602 || (aarch64_tune_params.approx_modes->sqrt
11603 & AARCH64_APPROX_MODE (mode))))
11604 return false;
11605
11606 if (flag_finite_math_only
11607 || flag_trapping_math
11608 || !flag_unsafe_math_optimizations
11609 || optimize_function_for_size_p (cfun))
11610 return false;
11611 }
11612 else
11613 /* Caller assumes we cannot fail. */
11614 gcc_assert (use_rsqrt_p (mode));
daef0a8c 11615
ddc203a7 11616 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
11617 rtx xmsk = gen_reg_rtx (mmsk);
11618 if (!recp)
2e19adc8
RE
11619 /* When calculating the approximate square root, compare the
11620 argument with 0.0 and create a mask. */
11621 emit_insn (gen_rtx_SET (xmsk,
11622 gen_rtx_NEG (mmsk,
11623 gen_rtx_EQ (mmsk, src,
11624 CONST0_RTX (mode)))));
a6fc00da 11625
98daafa0
EM
11626 /* Estimate the approximate reciprocal square root. */
11627 rtx xdst = gen_reg_rtx (mode);
0016d8d9 11628 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 11629
98daafa0
EM
11630 /* Iterate over the series twice for SF and thrice for DF. */
11631 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 11632
98daafa0
EM
11633 /* Optionally iterate over the series once less for faster performance
11634 while sacrificing the accuracy. */
11635 if ((recp && flag_mrecip_low_precision_sqrt)
11636 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
11637 iterations--;
11638
98daafa0
EM
11639 /* Iterate over the series to calculate the approximate reciprocal square
11640 root. */
11641 rtx x1 = gen_reg_rtx (mode);
11642 while (iterations--)
a6fc00da 11643 {
a6fc00da 11644 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
11645 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
11646
0016d8d9 11647 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 11648
98daafa0
EM
11649 if (iterations > 0)
11650 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
11651 }
11652
11653 if (!recp)
11654 {
11655 /* Qualify the approximate reciprocal square root when the argument is
11656 0.0 by squashing the intermediary result to 0.0. */
11657 rtx xtmp = gen_reg_rtx (mmsk);
11658 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
11659 gen_rtx_SUBREG (mmsk, xdst, 0)));
11660 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 11661
98daafa0
EM
11662 /* Calculate the approximate square root. */
11663 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
11664 }
11665
98daafa0
EM
11666 /* Finalize the approximation. */
11667 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
11668
11669 return true;
a6fc00da
BH
11670}
11671
79a2bc2d
EM
11672/* Emit the instruction sequence to compute the approximation for the division
11673 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
11674
11675bool
11676aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
11677{
11678 machine_mode mode = GET_MODE (quo);
33d72b63
JW
11679
11680 if (GET_MODE_INNER (mode) == HFmode)
11681 return false;
11682
79a2bc2d
EM
11683 bool use_approx_division_p = (flag_mlow_precision_div
11684 || (aarch64_tune_params.approx_modes->division
11685 & AARCH64_APPROX_MODE (mode)));
11686
11687 if (!flag_finite_math_only
11688 || flag_trapping_math
11689 || !flag_unsafe_math_optimizations
11690 || optimize_function_for_size_p (cfun)
11691 || !use_approx_division_p)
11692 return false;
11693
1be49a38
RR
11694 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
11695 return false;
11696
79a2bc2d
EM
11697 /* Estimate the approximate reciprocal. */
11698 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 11699 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
11700
11701 /* Iterate over the series twice for SF and thrice for DF. */
11702 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
11703
11704 /* Optionally iterate over the series once less for faster performance,
11705 while sacrificing the accuracy. */
11706 if (flag_mlow_precision_div)
11707 iterations--;
11708
11709 /* Iterate over the series to calculate the approximate reciprocal. */
11710 rtx xtmp = gen_reg_rtx (mode);
11711 while (iterations--)
11712 {
0016d8d9 11713 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
11714
11715 if (iterations > 0)
11716 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
11717 }
11718
11719 if (num != CONST1_RTX (mode))
11720 {
11721 /* As the approximate reciprocal of DEN is already calculated, only
11722 calculate the approximate division when NUM is not 1.0. */
11723 rtx xnum = force_reg (mode, num);
11724 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
11725 }
11726
11727 /* Finalize the approximation. */
11728 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
11729 return true;
11730}
11731
d126a4ae
AP
11732/* Return the number of instructions that can be issued per cycle. */
11733static int
11734aarch64_sched_issue_rate (void)
11735{
b175b679 11736 return aarch64_tune_params.issue_rate;
d126a4ae
AP
11737}
11738
d03f7e44
MK
11739static int
11740aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
11741{
11742 int issue_rate = aarch64_sched_issue_rate ();
11743
11744 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
11745}
11746
2d6bc7fa
KT
11747
11748/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
11749 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
11750 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
11751
11752static int
11753aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
11754 int ready_index)
11755{
11756 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
11757}
11758
11759
8990e73a
TB
11760/* Vectorizer cost model target hooks. */
11761
11762/* Implement targetm.vectorize.builtin_vectorization_cost. */
11763static int
11764aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11765 tree vectype,
11766 int misalign ATTRIBUTE_UNUSED)
11767{
11768 unsigned elements;
cd8ae5ed
AP
11769 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
11770 bool fp = false;
11771
11772 if (vectype != NULL)
11773 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
11774
11775 switch (type_of_cost)
11776 {
11777 case scalar_stmt:
cd8ae5ed 11778 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
11779
11780 case scalar_load:
cd8ae5ed 11781 return costs->scalar_load_cost;
8990e73a
TB
11782
11783 case scalar_store:
cd8ae5ed 11784 return costs->scalar_store_cost;
8990e73a
TB
11785
11786 case vector_stmt:
cd8ae5ed 11787 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
11788
11789 case vector_load:
cd8ae5ed 11790 return costs->vec_align_load_cost;
8990e73a
TB
11791
11792 case vector_store:
cd8ae5ed 11793 return costs->vec_store_cost;
8990e73a
TB
11794
11795 case vec_to_scalar:
cd8ae5ed 11796 return costs->vec_to_scalar_cost;
8990e73a
TB
11797
11798 case scalar_to_vec:
cd8ae5ed 11799 return costs->scalar_to_vec_cost;
8990e73a
TB
11800
11801 case unaligned_load:
cc9fe6bb 11802 case vector_gather_load:
cd8ae5ed 11803 return costs->vec_unalign_load_cost;
8990e73a
TB
11804
11805 case unaligned_store:
cc9fe6bb 11806 case vector_scatter_store:
cd8ae5ed 11807 return costs->vec_unalign_store_cost;
8990e73a
TB
11808
11809 case cond_branch_taken:
cd8ae5ed 11810 return costs->cond_taken_branch_cost;
8990e73a
TB
11811
11812 case cond_branch_not_taken:
cd8ae5ed 11813 return costs->cond_not_taken_branch_cost;
8990e73a
TB
11814
11815 case vec_perm:
cd8ae5ed 11816 return costs->vec_permute_cost;
c428f91c 11817
8990e73a 11818 case vec_promote_demote:
cd8ae5ed 11819 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
11820
11821 case vec_construct:
6a70badb 11822 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
11823 return elements / 2 + 1;
11824
11825 default:
11826 gcc_unreachable ();
11827 }
11828}
11829
11830/* Implement targetm.vectorize.add_stmt_cost. */
11831static unsigned
11832aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11833 struct _stmt_vec_info *stmt_info, int misalign,
11834 enum vect_cost_model_location where)
11835{
11836 unsigned *cost = (unsigned *) data;
11837 unsigned retval = 0;
11838
11839 if (flag_vect_cost_model)
11840 {
11841 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11842 int stmt_cost =
11843 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
11844
11845 /* Statements in an inner loop relative to the loop being
11846 vectorized are weighted more heavily. The value here is
058e4c71 11847 arbitrary and could potentially be improved with analysis. */
8990e73a 11848 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 11849 count *= 50; /* FIXME */
8990e73a
TB
11850
11851 retval = (unsigned) (count * stmt_cost);
11852 cost[where] += retval;
11853 }
11854
11855 return retval;
11856}
11857
0cfff2a1 11858static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 11859
0cfff2a1
KT
11860/* Parse the TO_PARSE string and put the architecture struct that it
11861 selects into RES and the architectural features into ISA_FLAGS.
11862 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
11863 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
11864 When the TO_PARSE string contains an invalid extension,
11865 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 11866
0cfff2a1
KT
11867static enum aarch64_parse_opt_result
11868aarch64_parse_arch (const char *to_parse, const struct processor **res,
28108a53 11869 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 11870{
ff150bc4 11871 const char *ext;
43e9d192 11872 const struct processor *arch;
43e9d192
IB
11873 size_t len;
11874
ff150bc4 11875 ext = strchr (to_parse, '+');
43e9d192
IB
11876
11877 if (ext != NULL)
ff150bc4 11878 len = ext - to_parse;
43e9d192 11879 else
ff150bc4 11880 len = strlen (to_parse);
43e9d192
IB
11881
11882 if (len == 0)
0cfff2a1
KT
11883 return AARCH64_PARSE_MISSING_ARG;
11884
43e9d192 11885
0cfff2a1 11886 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
11887 for (arch = all_architectures; arch->name != NULL; arch++)
11888 {
ff150bc4
ML
11889 if (strlen (arch->name) == len
11890 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 11891 {
28108a53 11892 uint64_t isa_temp = arch->flags;
43e9d192
IB
11893
11894 if (ext != NULL)
11895 {
0cfff2a1
KT
11896 /* TO_PARSE string contains at least one extension. */
11897 enum aarch64_parse_opt_result ext_res
c7887347 11898 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 11899
0cfff2a1
KT
11900 if (ext_res != AARCH64_PARSE_OK)
11901 return ext_res;
ffee7aa9 11902 }
0cfff2a1
KT
11903 /* Extension parsing was successful. Confirm the result
11904 arch and ISA flags. */
11905 *res = arch;
11906 *isa_flags = isa_temp;
11907 return AARCH64_PARSE_OK;
43e9d192
IB
11908 }
11909 }
11910
11911 /* ARCH name not found in list. */
0cfff2a1 11912 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11913}
11914
0cfff2a1
KT
11915/* Parse the TO_PARSE string and put the result tuning in RES and the
11916 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
11917 describing the parse result. If there is an error parsing, RES and
c7887347
ML
11918 ISA_FLAGS are left unchanged.
11919 When the TO_PARSE string contains an invalid extension,
11920 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 11921
0cfff2a1
KT
11922static enum aarch64_parse_opt_result
11923aarch64_parse_cpu (const char *to_parse, const struct processor **res,
28108a53 11924 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 11925{
ff150bc4 11926 const char *ext;
43e9d192 11927 const struct processor *cpu;
43e9d192
IB
11928 size_t len;
11929
ff150bc4 11930 ext = strchr (to_parse, '+');
43e9d192
IB
11931
11932 if (ext != NULL)
ff150bc4 11933 len = ext - to_parse;
43e9d192 11934 else
ff150bc4 11935 len = strlen (to_parse);
43e9d192
IB
11936
11937 if (len == 0)
0cfff2a1
KT
11938 return AARCH64_PARSE_MISSING_ARG;
11939
43e9d192
IB
11940
11941 /* Loop through the list of supported CPUs to find a match. */
11942 for (cpu = all_cores; cpu->name != NULL; cpu++)
11943 {
ff150bc4 11944 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 11945 {
28108a53 11946 uint64_t isa_temp = cpu->flags;
0cfff2a1 11947
43e9d192
IB
11948
11949 if (ext != NULL)
11950 {
0cfff2a1
KT
11951 /* TO_PARSE string contains at least one extension. */
11952 enum aarch64_parse_opt_result ext_res
c7887347 11953 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 11954
0cfff2a1
KT
11955 if (ext_res != AARCH64_PARSE_OK)
11956 return ext_res;
11957 }
11958 /* Extension parsing was successfull. Confirm the result
11959 cpu and ISA flags. */
11960 *res = cpu;
11961 *isa_flags = isa_temp;
11962 return AARCH64_PARSE_OK;
43e9d192
IB
11963 }
11964 }
11965
11966 /* CPU name not found in list. */
0cfff2a1 11967 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11968}
11969
0cfff2a1
KT
11970/* Parse the TO_PARSE string and put the cpu it selects into RES.
11971 Return an aarch64_parse_opt_result describing the parse result.
11972 If the parsing fails the RES does not change. */
43e9d192 11973
0cfff2a1
KT
11974static enum aarch64_parse_opt_result
11975aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
11976{
11977 const struct processor *cpu;
43e9d192
IB
11978
11979 /* Loop through the list of supported CPUs to find a match. */
11980 for (cpu = all_cores; cpu->name != NULL; cpu++)
11981 {
ff150bc4 11982 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 11983 {
0cfff2a1
KT
11984 *res = cpu;
11985 return AARCH64_PARSE_OK;
43e9d192
IB
11986 }
11987 }
11988
11989 /* CPU name not found in list. */
0cfff2a1 11990 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
11991}
11992
8dec06f2
JG
11993/* Parse TOKEN, which has length LENGTH to see if it is an option
11994 described in FLAG. If it is, return the index bit for that fusion type.
11995 If not, error (printing OPTION_NAME) and return zero. */
11996
11997static unsigned int
11998aarch64_parse_one_option_token (const char *token,
11999 size_t length,
12000 const struct aarch64_flag_desc *flag,
12001 const char *option_name)
12002{
12003 for (; flag->name != NULL; flag++)
12004 {
12005 if (length == strlen (flag->name)
12006 && !strncmp (flag->name, token, length))
12007 return flag->flag;
12008 }
12009
a3f9f006 12010 error ("unknown flag passed in %<-moverride=%s%> (%s)", option_name, token);
8dec06f2
JG
12011 return 0;
12012}
12013
12014/* Parse OPTION which is a comma-separated list of flags to enable.
12015 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
12016 default state we inherit from the CPU tuning structures. OPTION_NAME
12017 gives the top-level option we are parsing in the -moverride string,
12018 for use in error messages. */
12019
12020static unsigned int
12021aarch64_parse_boolean_options (const char *option,
12022 const struct aarch64_flag_desc *flags,
12023 unsigned int initial_state,
12024 const char *option_name)
12025{
12026 const char separator = '.';
12027 const char* specs = option;
12028 const char* ntoken = option;
12029 unsigned int found_flags = initial_state;
12030
12031 while ((ntoken = strchr (specs, separator)))
12032 {
12033 size_t token_length = ntoken - specs;
12034 unsigned token_ops = aarch64_parse_one_option_token (specs,
12035 token_length,
12036 flags,
12037 option_name);
12038 /* If we find "none" (or, for simplicity's sake, an error) anywhere
12039 in the token stream, reset the supported operations. So:
12040
12041 adrp+add.cmp+branch.none.adrp+add
12042
12043 would have the result of turning on only adrp+add fusion. */
12044 if (!token_ops)
12045 found_flags = 0;
12046
12047 found_flags |= token_ops;
12048 specs = ++ntoken;
12049 }
12050
12051 /* We ended with a comma, print something. */
12052 if (!(*specs))
12053 {
12054 error ("%s string ill-formed\n", option_name);
12055 return 0;
12056 }
12057
12058 /* We still have one more token to parse. */
12059 size_t token_length = strlen (specs);
12060 unsigned token_ops = aarch64_parse_one_option_token (specs,
12061 token_length,
12062 flags,
12063 option_name);
12064 if (!token_ops)
12065 found_flags = 0;
12066
12067 found_flags |= token_ops;
12068 return found_flags;
12069}
12070
12071/* Support for overriding instruction fusion. */
12072
12073static void
12074aarch64_parse_fuse_string (const char *fuse_string,
12075 struct tune_params *tune)
12076{
12077 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
12078 aarch64_fusible_pairs,
12079 tune->fusible_ops,
12080 "fuse=");
12081}
12082
12083/* Support for overriding other tuning flags. */
12084
12085static void
12086aarch64_parse_tune_string (const char *tune_string,
12087 struct tune_params *tune)
12088{
12089 tune->extra_tuning_flags
12090 = aarch64_parse_boolean_options (tune_string,
12091 aarch64_tuning_flags,
12092 tune->extra_tuning_flags,
12093 "tune=");
12094}
12095
886f092f
KT
12096/* Parse the sve_width tuning moverride string in TUNE_STRING.
12097 Accept the valid SVE vector widths allowed by
12098 aarch64_sve_vector_bits_enum and use it to override sve_width
12099 in TUNE. */
12100
12101static void
12102aarch64_parse_sve_width_string (const char *tune_string,
12103 struct tune_params *tune)
12104{
12105 int width = -1;
12106
12107 int n = sscanf (tune_string, "%d", &width);
12108 if (n == EOF)
12109 {
12110 error ("invalid format for sve_width");
12111 return;
12112 }
12113 switch (width)
12114 {
12115 case SVE_128:
12116 case SVE_256:
12117 case SVE_512:
12118 case SVE_1024:
12119 case SVE_2048:
12120 break;
12121 default:
12122 error ("invalid sve_width value: %d", width);
12123 }
12124 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
12125}
12126
8dec06f2
JG
12127/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
12128 we understand. If it is, extract the option string and handoff to
12129 the appropriate function. */
12130
12131void
12132aarch64_parse_one_override_token (const char* token,
12133 size_t length,
12134 struct tune_params *tune)
12135{
12136 const struct aarch64_tuning_override_function *fn
12137 = aarch64_tuning_override_functions;
12138
12139 const char *option_part = strchr (token, '=');
12140 if (!option_part)
12141 {
12142 error ("tuning string missing in option (%s)", token);
12143 return;
12144 }
12145
12146 /* Get the length of the option name. */
12147 length = option_part - token;
12148 /* Skip the '=' to get to the option string. */
12149 option_part++;
12150
12151 for (; fn->name != NULL; fn++)
12152 {
12153 if (!strncmp (fn->name, token, length))
12154 {
12155 fn->parse_override (option_part, tune);
12156 return;
12157 }
12158 }
12159
12160 error ("unknown tuning option (%s)",token);
12161 return;
12162}
12163
5eee3c34
JW
12164/* A checking mechanism for the implementation of the tls size. */
12165
12166static void
12167initialize_aarch64_tls_size (struct gcc_options *opts)
12168{
12169 if (aarch64_tls_size == 0)
12170 aarch64_tls_size = 24;
12171
12172 switch (opts->x_aarch64_cmodel_var)
12173 {
12174 case AARCH64_CMODEL_TINY:
12175 /* Both the default and maximum TLS size allowed under tiny is 1M which
12176 needs two instructions to address, so we clamp the size to 24. */
12177 if (aarch64_tls_size > 24)
12178 aarch64_tls_size = 24;
12179 break;
12180 case AARCH64_CMODEL_SMALL:
12181 /* The maximum TLS size allowed under small is 4G. */
12182 if (aarch64_tls_size > 32)
12183 aarch64_tls_size = 32;
12184 break;
12185 case AARCH64_CMODEL_LARGE:
12186 /* The maximum TLS size allowed under large is 16E.
12187 FIXME: 16E should be 64bit, we only support 48bit offset now. */
12188 if (aarch64_tls_size > 48)
12189 aarch64_tls_size = 48;
12190 break;
12191 default:
12192 gcc_unreachable ();
12193 }
12194
12195 return;
12196}
12197
8dec06f2
JG
12198/* Parse STRING looking for options in the format:
12199 string :: option:string
12200 option :: name=substring
12201 name :: {a-z}
12202 substring :: defined by option. */
12203
12204static void
12205aarch64_parse_override_string (const char* input_string,
12206 struct tune_params* tune)
12207{
12208 const char separator = ':';
12209 size_t string_length = strlen (input_string) + 1;
12210 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
12211 char *string = string_root;
12212 strncpy (string, input_string, string_length);
12213 string[string_length - 1] = '\0';
12214
12215 char* ntoken = string;
12216
12217 while ((ntoken = strchr (string, separator)))
12218 {
12219 size_t token_length = ntoken - string;
12220 /* Make this substring look like a string. */
12221 *ntoken = '\0';
12222 aarch64_parse_one_override_token (string, token_length, tune);
12223 string = ++ntoken;
12224 }
12225
12226 /* One last option to parse. */
12227 aarch64_parse_one_override_token (string, strlen (string), tune);
12228 free (string_root);
12229}
43e9d192 12230
43e9d192
IB
12231
12232static void
0cfff2a1 12233aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 12234{
efac62a3
ST
12235 if (accepted_branch_protection_string)
12236 {
12237 opts->x_aarch64_branch_protection_string
12238 = xstrdup (accepted_branch_protection_string);
12239 }
12240
acea40ac
WD
12241 /* PR 70044: We have to be careful about being called multiple times for the
12242 same function. This means all changes should be repeatable. */
12243
d6cb6d6a
WD
12244 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
12245 Disable the frame pointer flag so the mid-end will not use a frame
12246 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
12247 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
12248 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
12249 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 12250 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 12251 opts->x_flag_omit_frame_pointer = 2;
43e9d192 12252
1be34295 12253 /* If not optimizing for size, set the default
0cfff2a1
KT
12254 alignment to what the target wants. */
12255 if (!opts->x_optimize_size)
43e9d192 12256 {
c518c102
ML
12257 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
12258 opts->x_str_align_loops = aarch64_tune_params.loop_align;
12259 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
12260 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
12261 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
12262 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 12263 }
b4f50fd4 12264
9ee6540a
WD
12265 /* We default to no pc-relative literal loads. */
12266
12267 aarch64_pcrelative_literal_loads = false;
12268
12269 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 12270 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
12271 if (opts->x_pcrelative_literal_loads == 1)
12272 aarch64_pcrelative_literal_loads = true;
b4f50fd4 12273
9ee6540a
WD
12274 /* In the tiny memory model it makes no sense to disallow PC relative
12275 literal pool loads. */
12276 if (aarch64_cmodel == AARCH64_CMODEL_TINY
12277 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
12278 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
12279
12280 /* When enabling the lower precision Newton series for the square root, also
12281 enable it for the reciprocal square root, since the latter is an
12282 intermediary step for the former. */
12283 if (flag_mlow_precision_sqrt)
12284 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 12285}
43e9d192 12286
0cfff2a1
KT
12287/* 'Unpack' up the internal tuning structs and update the options
12288 in OPTS. The caller must have set up selected_tune and selected_arch
12289 as all the other target-specific codegen decisions are
12290 derived from them. */
12291
e4ea20c8 12292void
0cfff2a1
KT
12293aarch64_override_options_internal (struct gcc_options *opts)
12294{
12295 aarch64_tune_flags = selected_tune->flags;
12296 aarch64_tune = selected_tune->sched_core;
12297 /* Make a copy of the tuning parameters attached to the core, which
12298 we may later overwrite. */
12299 aarch64_tune_params = *(selected_tune->tune);
12300 aarch64_architecture_version = selected_arch->architecture_version;
12301
12302 if (opts->x_aarch64_override_tune_string)
12303 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
12304 &aarch64_tune_params);
12305
12306 /* This target defaults to strict volatile bitfields. */
12307 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
12308 opts->x_flag_strict_volatile_bitfields = 1;
12309
cd0b2d36
RR
12310 if (aarch64_stack_protector_guard == SSP_GLOBAL
12311 && opts->x_aarch64_stack_protector_guard_offset_str)
12312 {
41804907 12313 error ("incompatible options %<-mstack-protector-guard=global%> and "
63d42e89 12314 "%<-mstack-protector-guard-offset=%s%>",
cd0b2d36
RR
12315 aarch64_stack_protector_guard_offset_str);
12316 }
12317
12318 if (aarch64_stack_protector_guard == SSP_SYSREG
12319 && !(opts->x_aarch64_stack_protector_guard_offset_str
12320 && opts->x_aarch64_stack_protector_guard_reg_str))
12321 {
a3f9f006
ML
12322 error ("both %<-mstack-protector-guard-offset%> and "
12323 "%<-mstack-protector-guard-reg%> must be used "
12324 "with %<-mstack-protector-guard=sysreg%>");
cd0b2d36
RR
12325 }
12326
12327 if (opts->x_aarch64_stack_protector_guard_reg_str)
12328 {
12329 if (strlen (opts->x_aarch64_stack_protector_guard_reg_str) > 100)
12330 error ("specify a system register with a small string length.");
12331 }
12332
12333 if (opts->x_aarch64_stack_protector_guard_offset_str)
12334 {
12335 char *end;
12336 const char *str = aarch64_stack_protector_guard_offset_str;
12337 errno = 0;
12338 long offs = strtol (aarch64_stack_protector_guard_offset_str, &end, 0);
12339 if (!*str || *end || errno)
12340 error ("%qs is not a valid offset in %qs", str,
63d42e89 12341 "-mstack-protector-guard-offset=");
cd0b2d36
RR
12342 aarch64_stack_protector_guard_offset = offs;
12343 }
12344
0cfff2a1 12345 initialize_aarch64_code_model (opts);
5eee3c34 12346 initialize_aarch64_tls_size (opts);
63892fa2 12347
2d6bc7fa
KT
12348 int queue_depth = 0;
12349 switch (aarch64_tune_params.autoprefetcher_model)
12350 {
12351 case tune_params::AUTOPREFETCHER_OFF:
12352 queue_depth = -1;
12353 break;
12354 case tune_params::AUTOPREFETCHER_WEAK:
12355 queue_depth = 0;
12356 break;
12357 case tune_params::AUTOPREFETCHER_STRONG:
12358 queue_depth = max_insn_queue_index + 1;
12359 break;
12360 default:
12361 gcc_unreachable ();
12362 }
12363
12364 /* We don't mind passing in global_options_set here as we don't use
12365 the *options_set structs anyway. */
12366 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
12367 queue_depth,
12368 opts->x_param_values,
12369 global_options_set.x_param_values);
12370
9d2c6e2e
MK
12371 /* Set up parameters to be used in prefetching algorithm. Do not
12372 override the defaults unless we are tuning for a core we have
12373 researched values for. */
12374 if (aarch64_tune_params.prefetch->num_slots > 0)
12375 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
12376 aarch64_tune_params.prefetch->num_slots,
12377 opts->x_param_values,
12378 global_options_set.x_param_values);
12379 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
12380 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
12381 aarch64_tune_params.prefetch->l1_cache_size,
12382 opts->x_param_values,
12383 global_options_set.x_param_values);
12384 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 12385 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
12386 aarch64_tune_params.prefetch->l1_cache_line_size,
12387 opts->x_param_values,
12388 global_options_set.x_param_values);
12389 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
12390 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
12391 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
12392 opts->x_param_values,
12393 global_options_set.x_param_values);
d2ff35c0
LM
12394 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
12395 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
12396 0,
12397 opts->x_param_values,
12398 global_options_set.x_param_values);
59100dfc
LM
12399 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
12400 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
12401 aarch64_tune_params.prefetch->minimum_stride,
12402 opts->x_param_values,
12403 global_options_set.x_param_values);
50487d79 12404
13494fcb
WD
12405 /* Use the alternative scheduling-pressure algorithm by default. */
12406 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
12407 opts->x_param_values,
12408 global_options_set.x_param_values);
12409
fbe9af50
TC
12410 /* If the user hasn't changed it via configure then set the default to 64 KB
12411 for the backend. */
12412 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
12413 DEFAULT_STK_CLASH_GUARD_SIZE == 0
12414 ? 16 : DEFAULT_STK_CLASH_GUARD_SIZE,
12415 opts->x_param_values,
12416 global_options_set.x_param_values);
12417
12418 /* Validate the guard size. */
12419 int guard_size = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
fbe9af50
TC
12420
12421 /* Enforce that interval is the same size as size so the mid-end does the
12422 right thing. */
12423 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
12424 guard_size,
12425 opts->x_param_values,
12426 global_options_set.x_param_values);
12427
12428 /* The maybe_set calls won't update the value if the user has explicitly set
12429 one. Which means we need to validate that probing interval and guard size
12430 are equal. */
12431 int probe_interval
12432 = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
12433 if (guard_size != probe_interval)
904f3daa
ML
12434 error ("stack clash guard size %<%d%> must be equal to probing interval "
12435 "%<%d%>", guard_size, probe_interval);
fbe9af50 12436
16b2cafd
MK
12437 /* Enable sw prefetching at specified optimization level for
12438 CPUS that have prefetch. Lower optimization level threshold by 1
12439 when profiling is enabled. */
12440 if (opts->x_flag_prefetch_loop_arrays < 0
12441 && !opts->x_optimize_size
12442 && aarch64_tune_params.prefetch->default_opt_level >= 0
12443 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
12444 opts->x_flag_prefetch_loop_arrays = 1;
12445
266c2b54
ML
12446 if (opts->x_aarch64_arch_string == NULL)
12447 opts->x_aarch64_arch_string = selected_arch->name;
12448 if (opts->x_aarch64_cpu_string == NULL)
12449 opts->x_aarch64_cpu_string = selected_cpu->name;
12450 if (opts->x_aarch64_tune_string == NULL)
12451 opts->x_aarch64_tune_string = selected_tune->name;
12452
0cfff2a1
KT
12453 aarch64_override_options_after_change_1 (opts);
12454}
43e9d192 12455
01f44038
KT
12456/* Print a hint with a suggestion for a core or architecture name that
12457 most closely resembles what the user passed in STR. ARCH is true if
12458 the user is asking for an architecture name. ARCH is false if the user
12459 is asking for a core name. */
12460
12461static void
12462aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
12463{
12464 auto_vec<const char *> candidates;
12465 const struct processor *entry = arch ? all_architectures : all_cores;
12466 for (; entry->name != NULL; entry++)
12467 candidates.safe_push (entry->name);
a08b5429
ML
12468
12469#ifdef HAVE_LOCAL_CPU_DETECT
12470 /* Add also "native" as possible value. */
12471 if (arch)
12472 candidates.safe_push ("native");
12473#endif
12474
01f44038
KT
12475 char *s;
12476 const char *hint = candidates_list_and_hint (str, s, candidates);
12477 if (hint)
12478 inform (input_location, "valid arguments are: %s;"
12479 " did you mean %qs?", s, hint);
6285e915
ML
12480 else
12481 inform (input_location, "valid arguments are: %s", s);
12482
01f44038
KT
12483 XDELETEVEC (s);
12484}
12485
12486/* Print a hint with a suggestion for a core name that most closely resembles
12487 what the user passed in STR. */
12488
12489inline static void
12490aarch64_print_hint_for_core (const char *str)
12491{
12492 aarch64_print_hint_for_core_or_arch (str, false);
12493}
12494
12495/* Print a hint with a suggestion for an architecture name that most closely
12496 resembles what the user passed in STR. */
12497
12498inline static void
12499aarch64_print_hint_for_arch (const char *str)
12500{
12501 aarch64_print_hint_for_core_or_arch (str, true);
12502}
12503
c7887347
ML
12504
12505/* Print a hint with a suggestion for an extension name
12506 that most closely resembles what the user passed in STR. */
12507
12508void
12509aarch64_print_hint_for_extensions (const std::string &str)
12510{
12511 auto_vec<const char *> candidates;
12512 aarch64_get_all_extension_candidates (&candidates);
12513 char *s;
12514 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
12515 if (hint)
12516 inform (input_location, "valid arguments are: %s;"
12517 " did you mean %qs?", s, hint);
12518 else
12519 inform (input_location, "valid arguments are: %s;", s);
12520
12521 XDELETEVEC (s);
12522}
12523
0cfff2a1
KT
12524/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
12525 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
12526 they are valid in RES and ISA_FLAGS. Return whether the option is
12527 valid. */
43e9d192 12528
361fb3ee 12529static bool
0cfff2a1 12530aarch64_validate_mcpu (const char *str, const struct processor **res,
28108a53 12531 uint64_t *isa_flags)
0cfff2a1 12532{
c7887347 12533 std::string invalid_extension;
0cfff2a1 12534 enum aarch64_parse_opt_result parse_res
c7887347 12535 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
12536
12537 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 12538 return true;
0cfff2a1
KT
12539
12540 switch (parse_res)
12541 {
12542 case AARCH64_PARSE_MISSING_ARG:
fb241da2 12543 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
12544 break;
12545 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 12546 error ("unknown value %qs for %<-mcpu%>", str);
01f44038 12547 aarch64_print_hint_for_core (str);
0cfff2a1
KT
12548 break;
12549 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
12550 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
12551 invalid_extension.c_str (), str);
12552 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
12553 break;
12554 default:
12555 gcc_unreachable ();
12556 }
361fb3ee
KT
12557
12558 return false;
0cfff2a1
KT
12559}
12560
efac62a3
ST
12561/* Parses CONST_STR for branch protection features specified in
12562 aarch64_branch_protect_types, and set any global variables required. Returns
12563 the parsing result and assigns LAST_STR to the last processed token from
12564 CONST_STR so that it can be used for error reporting. */
12565
12566static enum
12567aarch64_parse_opt_result aarch64_parse_branch_protection (const char *const_str,
12568 char** last_str)
12569{
12570 char *str_root = xstrdup (const_str);
12571 char* token_save = NULL;
12572 char *str = strtok_r (str_root, "+", &token_save);
12573 enum aarch64_parse_opt_result res = AARCH64_PARSE_OK;
12574 if (!str)
12575 res = AARCH64_PARSE_MISSING_ARG;
12576 else
12577 {
12578 char *next_str = strtok_r (NULL, "+", &token_save);
12579 /* Reset the branch protection features to their defaults. */
12580 aarch64_handle_no_branch_protection (NULL, NULL);
12581
12582 while (str && res == AARCH64_PARSE_OK)
12583 {
12584 const aarch64_branch_protect_type* type = aarch64_branch_protect_types;
12585 bool found = false;
12586 /* Search for this type. */
12587 while (type && type->name && !found && res == AARCH64_PARSE_OK)
12588 {
12589 if (strcmp (str, type->name) == 0)
12590 {
12591 found = true;
12592 res = type->handler (str, next_str);
12593 str = next_str;
12594 next_str = strtok_r (NULL, "+", &token_save);
12595 }
12596 else
12597 type++;
12598 }
12599 if (found && res == AARCH64_PARSE_OK)
12600 {
12601 bool found_subtype = true;
12602 /* Loop through each token until we find one that isn't a
12603 subtype. */
12604 while (found_subtype)
12605 {
12606 found_subtype = false;
12607 const aarch64_branch_protect_type *subtype = type->subtypes;
12608 /* Search for the subtype. */
12609 while (str && subtype && subtype->name && !found_subtype
12610 && res == AARCH64_PARSE_OK)
12611 {
12612 if (strcmp (str, subtype->name) == 0)
12613 {
12614 found_subtype = true;
12615 res = subtype->handler (str, next_str);
12616 str = next_str;
12617 next_str = strtok_r (NULL, "+", &token_save);
12618 }
12619 else
12620 subtype++;
12621 }
12622 }
12623 }
12624 else if (!found)
12625 res = AARCH64_PARSE_INVALID_ARG;
12626 }
12627 }
12628 /* Copy the last processed token into the argument to pass it back.
12629 Used by option and attribute validation to print the offending token. */
12630 if (last_str)
12631 {
12632 if (str) strcpy (*last_str, str);
12633 else *last_str = NULL;
12634 }
12635 if (res == AARCH64_PARSE_OK)
12636 {
12637 /* If needed, alloc the accepted string then copy in const_str.
12638 Used by override_option_after_change_1. */
12639 if (!accepted_branch_protection_string)
12640 accepted_branch_protection_string = (char *) xmalloc (
12641 BRANCH_PROTECT_STR_MAX
12642 + 1);
12643 strncpy (accepted_branch_protection_string, const_str,
12644 BRANCH_PROTECT_STR_MAX + 1);
12645 /* Forcibly null-terminate. */
12646 accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0';
12647 }
12648 return res;
12649}
12650
12651static bool
12652aarch64_validate_mbranch_protection (const char *const_str)
12653{
12654 char *str = (char *) xmalloc (strlen (const_str));
12655 enum aarch64_parse_opt_result res =
12656 aarch64_parse_branch_protection (const_str, &str);
12657 if (res == AARCH64_PARSE_INVALID_ARG)
a9c697b8 12658 error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str);
efac62a3 12659 else if (res == AARCH64_PARSE_MISSING_ARG)
a9c697b8 12660 error ("missing argument for %<-mbranch-protection=%>");
efac62a3
ST
12661 free (str);
12662 return res == AARCH64_PARSE_OK;
12663}
12664
0cfff2a1
KT
12665/* Validate a command-line -march option. Parse the arch and extensions
12666 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
12667 results, if they are valid, in RES and ISA_FLAGS. Return whether the
12668 option is valid. */
0cfff2a1 12669
361fb3ee 12670static bool
0cfff2a1 12671aarch64_validate_march (const char *str, const struct processor **res,
28108a53 12672 uint64_t *isa_flags)
0cfff2a1 12673{
c7887347 12674 std::string invalid_extension;
0cfff2a1 12675 enum aarch64_parse_opt_result parse_res
c7887347 12676 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
12677
12678 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 12679 return true;
0cfff2a1
KT
12680
12681 switch (parse_res)
12682 {
12683 case AARCH64_PARSE_MISSING_ARG:
fb241da2 12684 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
12685 break;
12686 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 12687 error ("unknown value %qs for %<-march%>", str);
01f44038 12688 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
12689 break;
12690 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
12691 error ("invalid feature modifier %qs in %<-march=%s%>",
12692 invalid_extension.c_str (), str);
12693 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
12694 break;
12695 default:
12696 gcc_unreachable ();
12697 }
361fb3ee
KT
12698
12699 return false;
0cfff2a1
KT
12700}
12701
12702/* Validate a command-line -mtune option. Parse the cpu
12703 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
12704 result, if it is valid, in RES. Return whether the option is
12705 valid. */
0cfff2a1 12706
361fb3ee 12707static bool
0cfff2a1
KT
12708aarch64_validate_mtune (const char *str, const struct processor **res)
12709{
12710 enum aarch64_parse_opt_result parse_res
12711 = aarch64_parse_tune (str, res);
12712
12713 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 12714 return true;
0cfff2a1
KT
12715
12716 switch (parse_res)
12717 {
12718 case AARCH64_PARSE_MISSING_ARG:
fb241da2 12719 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
12720 break;
12721 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 12722 error ("unknown value %qs for %<-mtune%>", str);
01f44038 12723 aarch64_print_hint_for_core (str);
0cfff2a1
KT
12724 break;
12725 default:
12726 gcc_unreachable ();
12727 }
361fb3ee
KT
12728 return false;
12729}
12730
12731/* Return the CPU corresponding to the enum CPU.
12732 If it doesn't specify a cpu, return the default. */
12733
12734static const struct processor *
12735aarch64_get_tune_cpu (enum aarch64_processor cpu)
12736{
12737 if (cpu != aarch64_none)
12738 return &all_cores[cpu];
12739
12740 /* The & 0x3f is to extract the bottom 6 bits that encode the
12741 default cpu as selected by the --with-cpu GCC configure option
12742 in config.gcc.
12743 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
12744 flags mechanism should be reworked to make it more sane. */
12745 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
12746}
12747
12748/* Return the architecture corresponding to the enum ARCH.
12749 If it doesn't specify a valid architecture, return the default. */
12750
12751static const struct processor *
12752aarch64_get_arch (enum aarch64_arch arch)
12753{
12754 if (arch != aarch64_no_arch)
12755 return &all_architectures[arch];
12756
12757 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
12758
12759 return &all_architectures[cpu->arch];
0cfff2a1
KT
12760}
12761
43cacb12
RS
12762/* Return the VG value associated with -msve-vector-bits= value VALUE. */
12763
12764static poly_uint16
12765aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
12766{
12767 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
12768 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
12769 deciding which .md file patterns to use and when deciding whether
12770 something is a legitimate address or constant. */
12771 if (value == SVE_SCALABLE || value == SVE_128)
12772 return poly_uint16 (2, 2);
12773 else
12774 return (int) value / 64;
12775}
12776
0cfff2a1
KT
12777/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
12778 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
12779 tuning structs. In particular it must set selected_tune and
12780 aarch64_isa_flags that define the available ISA features and tuning
12781 decisions. It must also set selected_arch as this will be used to
12782 output the .arch asm tags for each function. */
12783
12784static void
12785aarch64_override_options (void)
12786{
28108a53
MM
12787 uint64_t cpu_isa = 0;
12788 uint64_t arch_isa = 0;
0cfff2a1
KT
12789 aarch64_isa_flags = 0;
12790
361fb3ee
KT
12791 bool valid_cpu = true;
12792 bool valid_tune = true;
12793 bool valid_arch = true;
12794
0cfff2a1
KT
12795 selected_cpu = NULL;
12796 selected_arch = NULL;
12797 selected_tune = NULL;
12798
efac62a3
ST
12799 if (aarch64_branch_protection_string)
12800 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
12801
0cfff2a1
KT
12802 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
12803 If either of -march or -mtune is given, they override their
12804 respective component of -mcpu. */
12805 if (aarch64_cpu_string)
361fb3ee
KT
12806 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
12807 &cpu_isa);
0cfff2a1
KT
12808
12809 if (aarch64_arch_string)
361fb3ee
KT
12810 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
12811 &arch_isa);
0cfff2a1
KT
12812
12813 if (aarch64_tune_string)
361fb3ee 12814 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192 12815
6881e3c1
OH
12816#ifdef SUBTARGET_OVERRIDE_OPTIONS
12817 SUBTARGET_OVERRIDE_OPTIONS;
12818#endif
12819
43e9d192
IB
12820 /* If the user did not specify a processor, choose the default
12821 one for them. This will be the CPU set during configuration using
a3cd0246 12822 --with-cpu, otherwise it is "generic". */
43e9d192
IB
12823 if (!selected_cpu)
12824 {
0cfff2a1
KT
12825 if (selected_arch)
12826 {
12827 selected_cpu = &all_cores[selected_arch->ident];
12828 aarch64_isa_flags = arch_isa;
361fb3ee 12829 explicit_arch = selected_arch->arch;
0cfff2a1
KT
12830 }
12831 else
12832 {
361fb3ee
KT
12833 /* Get default configure-time CPU. */
12834 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
12835 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
12836 }
361fb3ee
KT
12837
12838 if (selected_tune)
12839 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
12840 }
12841 /* If both -mcpu and -march are specified check that they are architecturally
12842 compatible, warn if they're not and prefer the -march ISA flags. */
12843 else if (selected_arch)
12844 {
12845 if (selected_arch->arch != selected_cpu->arch)
12846 {
a3f9f006 12847 warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
0cfff2a1
KT
12848 all_architectures[selected_cpu->arch].name,
12849 selected_arch->name);
12850 }
12851 aarch64_isa_flags = arch_isa;
361fb3ee
KT
12852 explicit_arch = selected_arch->arch;
12853 explicit_tune_core = selected_tune ? selected_tune->ident
12854 : selected_cpu->ident;
0cfff2a1
KT
12855 }
12856 else
12857 {
12858 /* -mcpu but no -march. */
12859 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
12860 explicit_tune_core = selected_tune ? selected_tune->ident
12861 : selected_cpu->ident;
12862 gcc_assert (selected_cpu);
12863 selected_arch = &all_architectures[selected_cpu->arch];
12864 explicit_arch = selected_arch->arch;
43e9d192
IB
12865 }
12866
0cfff2a1
KT
12867 /* Set the arch as well as we will need it when outputing
12868 the .arch directive in assembly. */
12869 if (!selected_arch)
12870 {
12871 gcc_assert (selected_cpu);
12872 selected_arch = &all_architectures[selected_cpu->arch];
12873 }
43e9d192 12874
43e9d192 12875 if (!selected_tune)
3edaf26d 12876 selected_tune = selected_cpu;
43e9d192 12877
c7ff4f0f
SD
12878 if (aarch64_enable_bti == 2)
12879 {
12880#ifdef TARGET_ENABLE_BTI
12881 aarch64_enable_bti = 1;
12882#else
12883 aarch64_enable_bti = 0;
12884#endif
12885 }
12886
12887 /* Return address signing is currently not supported for ILP32 targets. For
12888 LP64 targets use the configured option in the absence of a command-line
12889 option for -mbranch-protection. */
12890 if (!TARGET_ILP32 && accepted_branch_protection_string == NULL)
12891 {
12892#ifdef TARGET_ENABLE_PAC_RET
12893 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
c7ff4f0f
SD
12894#else
12895 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
12896#endif
12897 }
12898
0cfff2a1
KT
12899#ifndef HAVE_AS_MABI_OPTION
12900 /* The compiler may have been configured with 2.23.* binutils, which does
12901 not have support for ILP32. */
12902 if (TARGET_ILP32)
a3f9f006 12903 error ("assembler does not support %<-mabi=ilp32%>");
0cfff2a1 12904#endif
43e9d192 12905
43cacb12
RS
12906 /* Convert -msve-vector-bits to a VG count. */
12907 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
12908
db58fd89 12909 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
a3f9f006 12910 sorry ("return address signing is only supported for %<-mabi=lp64%>");
db58fd89 12911
361fb3ee
KT
12912 /* Make sure we properly set up the explicit options. */
12913 if ((aarch64_cpu_string && valid_cpu)
12914 || (aarch64_tune_string && valid_tune))
12915 gcc_assert (explicit_tune_core != aarch64_none);
12916
12917 if ((aarch64_cpu_string && valid_cpu)
12918 || (aarch64_arch_string && valid_arch))
12919 gcc_assert (explicit_arch != aarch64_no_arch);
12920
5f7dbaa0
RE
12921 /* The pass to insert speculation tracking runs before
12922 shrink-wrapping and the latter does not know how to update the
12923 tracking status. So disable it in this case. */
12924 if (aarch64_track_speculation)
12925 flag_shrink_wrap = 0;
12926
0cfff2a1
KT
12927 aarch64_override_options_internal (&global_options);
12928
12929 /* Save these options as the default ones in case we push and pop them later
12930 while processing functions with potential target attributes. */
12931 target_option_default_node = target_option_current_node
12932 = build_target_option_node (&global_options);
43e9d192
IB
12933}
12934
12935/* Implement targetm.override_options_after_change. */
12936
12937static void
12938aarch64_override_options_after_change (void)
12939{
0cfff2a1 12940 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
12941}
12942
12943static struct machine_function *
12944aarch64_init_machine_status (void)
12945{
12946 struct machine_function *machine;
766090c2 12947 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
12948 return machine;
12949}
12950
12951void
12952aarch64_init_expanders (void)
12953{
12954 init_machine_status = aarch64_init_machine_status;
12955}
12956
12957/* A checking mechanism for the implementation of the various code models. */
12958static void
0cfff2a1 12959initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 12960{
0cfff2a1 12961 if (opts->x_flag_pic)
43e9d192 12962 {
0cfff2a1 12963 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
12964 {
12965 case AARCH64_CMODEL_TINY:
12966 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
12967 break;
12968 case AARCH64_CMODEL_SMALL:
34ecdb0f 12969#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
12970 aarch64_cmodel = (flag_pic == 2
12971 ? AARCH64_CMODEL_SMALL_PIC
12972 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
12973#else
12974 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
12975#endif
43e9d192
IB
12976 break;
12977 case AARCH64_CMODEL_LARGE:
a3f9f006 12978 sorry ("code model %qs with %<-f%s%>", "large",
0cfff2a1 12979 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 12980 break;
43e9d192
IB
12981 default:
12982 gcc_unreachable ();
12983 }
12984 }
12985 else
0cfff2a1 12986 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
12987}
12988
361fb3ee
KT
12989/* Implement TARGET_OPTION_SAVE. */
12990
12991static void
12992aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
12993{
12994 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
efac62a3
ST
12995 ptr->x_aarch64_branch_protection_string
12996 = opts->x_aarch64_branch_protection_string;
361fb3ee
KT
12997}
12998
12999/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
13000 using the information saved in PTR. */
13001
13002static void
13003aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
13004{
13005 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
13006 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
13007 opts->x_explicit_arch = ptr->x_explicit_arch;
13008 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
13009 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
efac62a3
ST
13010 opts->x_aarch64_branch_protection_string
13011 = ptr->x_aarch64_branch_protection_string;
13012 if (opts->x_aarch64_branch_protection_string)
13013 {
13014 aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
13015 NULL);
13016 }
361fb3ee
KT
13017
13018 aarch64_override_options_internal (opts);
13019}
13020
13021/* Implement TARGET_OPTION_PRINT. */
13022
13023static void
13024aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
13025{
13026 const struct processor *cpu
13027 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
28108a53 13028 uint64_t isa_flags = ptr->x_aarch64_isa_flags;
361fb3ee 13029 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 13030 std::string extension
04a99ebe 13031 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
13032
13033 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
13034 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
13035 arch->name, extension.c_str ());
361fb3ee
KT
13036}
13037
d78006d9
KT
13038static GTY(()) tree aarch64_previous_fndecl;
13039
e4ea20c8
KT
13040void
13041aarch64_reset_previous_fndecl (void)
13042{
13043 aarch64_previous_fndecl = NULL;
13044}
13045
acfc1ac1
KT
13046/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
13047 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
13048 make sure optab availability predicates are recomputed when necessary. */
13049
13050void
13051aarch64_save_restore_target_globals (tree new_tree)
13052{
13053 if (TREE_TARGET_GLOBALS (new_tree))
13054 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
13055 else if (new_tree == target_option_default_node)
13056 restore_target_globals (&default_target_globals);
13057 else
13058 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
13059}
13060
d78006d9
KT
13061/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
13062 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
13063 of the function, if such exists. This function may be called multiple
13064 times on a single function so use aarch64_previous_fndecl to avoid
13065 setting up identical state. */
13066
13067static void
13068aarch64_set_current_function (tree fndecl)
13069{
acfc1ac1
KT
13070 if (!fndecl || fndecl == aarch64_previous_fndecl)
13071 return;
13072
d78006d9
KT
13073 tree old_tree = (aarch64_previous_fndecl
13074 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
13075 : NULL_TREE);
13076
acfc1ac1 13077 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 13078
acfc1ac1
KT
13079 /* If current function has no attributes but the previous one did,
13080 use the default node. */
13081 if (!new_tree && old_tree)
13082 new_tree = target_option_default_node;
d78006d9 13083
acfc1ac1
KT
13084 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
13085 the default have been handled by aarch64_save_restore_target_globals from
13086 aarch64_pragma_target_parse. */
13087 if (old_tree == new_tree)
13088 return;
d78006d9 13089
acfc1ac1 13090 aarch64_previous_fndecl = fndecl;
6e17a23b 13091
acfc1ac1
KT
13092 /* First set the target options. */
13093 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 13094
acfc1ac1 13095 aarch64_save_restore_target_globals (new_tree);
d78006d9 13096}
361fb3ee 13097
5a2c8331
KT
13098/* Enum describing the various ways we can handle attributes.
13099 In many cases we can reuse the generic option handling machinery. */
13100
13101enum aarch64_attr_opt_type
13102{
13103 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
13104 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
13105 aarch64_attr_enum, /* Attribute sets an enum variable. */
13106 aarch64_attr_custom /* Attribute requires a custom handling function. */
13107};
13108
13109/* All the information needed to handle a target attribute.
13110 NAME is the name of the attribute.
9c582551 13111 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
13112 in the definition of enum aarch64_attr_opt_type.
13113 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
13114 HANDLER is the function that takes the attribute string as an argument
13115 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 13116 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 13117 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
13118 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
13119 aarch64_attr_enum. */
13120
13121struct aarch64_attribute_info
13122{
13123 const char *name;
13124 enum aarch64_attr_opt_type attr_type;
13125 bool allow_neg;
ab93e9b7 13126 bool (*handler) (const char *);
5a2c8331
KT
13127 enum opt_code opt_num;
13128};
13129
ab93e9b7 13130/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
13131
13132static bool
ab93e9b7 13133aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
13134{
13135 const struct processor *tmp_arch = NULL;
c7887347 13136 std::string invalid_extension;
5a2c8331 13137 enum aarch64_parse_opt_result parse_res
c7887347 13138 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
13139
13140 if (parse_res == AARCH64_PARSE_OK)
13141 {
13142 gcc_assert (tmp_arch);
13143 selected_arch = tmp_arch;
13144 explicit_arch = selected_arch->arch;
13145 return true;
13146 }
13147
13148 switch (parse_res)
13149 {
13150 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 13151 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
13152 break;
13153 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 13154 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 13155 aarch64_print_hint_for_arch (str);
5a2c8331
KT
13156 break;
13157 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
13158 error ("invalid feature modifier %s of value (\"%s\") in "
13159 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
13160 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
13161 break;
13162 default:
13163 gcc_unreachable ();
13164 }
13165
13166 return false;
13167}
13168
ab93e9b7 13169/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
13170
13171static bool
ab93e9b7 13172aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
13173{
13174 const struct processor *tmp_cpu = NULL;
c7887347 13175 std::string invalid_extension;
5a2c8331 13176 enum aarch64_parse_opt_result parse_res
c7887347 13177 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
13178
13179 if (parse_res == AARCH64_PARSE_OK)
13180 {
13181 gcc_assert (tmp_cpu);
13182 selected_tune = tmp_cpu;
13183 explicit_tune_core = selected_tune->ident;
13184
13185 selected_arch = &all_architectures[tmp_cpu->arch];
13186 explicit_arch = selected_arch->arch;
13187 return true;
13188 }
13189
13190 switch (parse_res)
13191 {
13192 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 13193 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
13194 break;
13195 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 13196 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 13197 aarch64_print_hint_for_core (str);
5a2c8331
KT
13198 break;
13199 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
13200 error ("invalid feature modifier %s of value (\"%s\") in "
13201 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
13202 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
13203 break;
13204 default:
13205 gcc_unreachable ();
13206 }
13207
13208 return false;
13209}
13210
efac62a3
ST
13211/* Handle the argument STR to the branch-protection= attribute. */
13212
13213 static bool
13214 aarch64_handle_attr_branch_protection (const char* str)
13215 {
13216 char *err_str = (char *) xmalloc (strlen (str));
13217 enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
13218 &err_str);
13219 bool success = false;
13220 switch (res)
13221 {
13222 case AARCH64_PARSE_MISSING_ARG:
13223 error ("missing argument to %<target(\"branch-protection=\")%> pragma or"
13224 " attribute");
13225 break;
13226 case AARCH64_PARSE_INVALID_ARG:
13227 error ("invalid protection type (\"%s\") in %<target(\"branch-protection"
13228 "=\")%> pragma or attribute", err_str);
13229 break;
13230 case AARCH64_PARSE_OK:
13231 success = true;
13232 /* Fall through. */
13233 case AARCH64_PARSE_INVALID_FEATURE:
13234 break;
13235 default:
13236 gcc_unreachable ();
13237 }
13238 free (err_str);
13239 return success;
13240 }
13241
ab93e9b7 13242/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
13243
13244static bool
ab93e9b7 13245aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
13246{
13247 const struct processor *tmp_tune = NULL;
13248 enum aarch64_parse_opt_result parse_res
13249 = aarch64_parse_tune (str, &tmp_tune);
13250
13251 if (parse_res == AARCH64_PARSE_OK)
13252 {
13253 gcc_assert (tmp_tune);
13254 selected_tune = tmp_tune;
13255 explicit_tune_core = selected_tune->ident;
13256 return true;
13257 }
13258
13259 switch (parse_res)
13260 {
13261 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 13262 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 13263 aarch64_print_hint_for_core (str);
5a2c8331
KT
13264 break;
13265 default:
13266 gcc_unreachable ();
13267 }
13268
13269 return false;
13270}
13271
13272/* Parse an architecture extensions target attribute string specified in STR.
13273 For example "+fp+nosimd". Show any errors if needed. Return TRUE
13274 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 13275 modified. */
5a2c8331
KT
13276
13277static bool
ab93e9b7 13278aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
13279{
13280 enum aarch64_parse_opt_result parse_res;
28108a53 13281 uint64_t isa_flags = aarch64_isa_flags;
5a2c8331 13282
e4ea20c8
KT
13283 /* We allow "+nothing" in the beginning to clear out all architectural
13284 features if the user wants to handpick specific features. */
13285 if (strncmp ("+nothing", str, 8) == 0)
13286 {
13287 isa_flags = 0;
13288 str += 8;
13289 }
13290
c7887347
ML
13291 std::string invalid_extension;
13292 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
13293
13294 if (parse_res == AARCH64_PARSE_OK)
13295 {
13296 aarch64_isa_flags = isa_flags;
13297 return true;
13298 }
13299
13300 switch (parse_res)
13301 {
13302 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 13303 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
13304 break;
13305
13306 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
13307 error ("invalid feature modifier %s of value (\"%s\") in "
13308 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
13309 break;
13310
13311 default:
13312 gcc_unreachable ();
13313 }
13314
13315 return false;
13316}
13317
13318/* The target attributes that we support. On top of these we also support just
13319 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
13320 handled explicitly in aarch64_process_one_target_attr. */
13321
13322static const struct aarch64_attribute_info aarch64_attributes[] =
13323{
13324 { "general-regs-only", aarch64_attr_mask, false, NULL,
13325 OPT_mgeneral_regs_only },
13326 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
13327 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
13328 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
13329 OPT_mfix_cortex_a53_843419 },
5a2c8331 13330 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 13331 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
13332 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
13333 OPT_momit_leaf_frame_pointer },
13334 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
13335 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
13336 OPT_march_ },
13337 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
13338 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
13339 OPT_mtune_ },
efac62a3
ST
13340 { "branch-protection", aarch64_attr_custom, false,
13341 aarch64_handle_attr_branch_protection, OPT_mbranch_protection_ },
db58fd89
JW
13342 { "sign-return-address", aarch64_attr_enum, false, NULL,
13343 OPT_msign_return_address_ },
5a2c8331
KT
13344 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
13345};
13346
13347/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 13348 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
13349
13350static bool
ab93e9b7 13351aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
13352{
13353 bool invert = false;
13354
13355 size_t len = strlen (arg_str);
13356
13357 if (len == 0)
13358 {
ab93e9b7 13359 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
13360 return false;
13361 }
13362
13363 char *str_to_check = (char *) alloca (len + 1);
13364 strcpy (str_to_check, arg_str);
13365
5a2c8331
KT
13366 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
13367 It is easier to detect and handle it explicitly here rather than going
13368 through the machinery for the rest of the target attributes in this
13369 function. */
13370 if (*str_to_check == '+')
ab93e9b7 13371 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
13372
13373 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
13374 {
13375 invert = true;
13376 str_to_check += 3;
13377 }
13378 char *arg = strchr (str_to_check, '=');
13379
13380 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
13381 and point ARG to "foo". */
13382 if (arg)
13383 {
13384 *arg = '\0';
13385 arg++;
13386 }
13387 const struct aarch64_attribute_info *p_attr;
16d12992 13388 bool found = false;
5a2c8331
KT
13389 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
13390 {
13391 /* If the names don't match up, or the user has given an argument
13392 to an attribute that doesn't accept one, or didn't give an argument
13393 to an attribute that expects one, fail to match. */
13394 if (strcmp (str_to_check, p_attr->name) != 0)
13395 continue;
13396
16d12992 13397 found = true;
5a2c8331
KT
13398 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
13399 || p_attr->attr_type == aarch64_attr_enum;
13400
13401 if (attr_need_arg_p ^ (arg != NULL))
13402 {
ab93e9b7 13403 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
13404 return false;
13405 }
13406
13407 /* If the name matches but the attribute does not allow "no-" versions
13408 then we can't match. */
13409 if (invert && !p_attr->allow_neg)
13410 {
ab93e9b7 13411 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
13412 return false;
13413 }
13414
13415 switch (p_attr->attr_type)
13416 {
13417 /* Has a custom handler registered.
13418 For example, cpu=, arch=, tune=. */
13419 case aarch64_attr_custom:
13420 gcc_assert (p_attr->handler);
ab93e9b7 13421 if (!p_attr->handler (arg))
5a2c8331
KT
13422 return false;
13423 break;
13424
13425 /* Either set or unset a boolean option. */
13426 case aarch64_attr_bool:
13427 {
13428 struct cl_decoded_option decoded;
13429
13430 generate_option (p_attr->opt_num, NULL, !invert,
13431 CL_TARGET, &decoded);
13432 aarch64_handle_option (&global_options, &global_options_set,
13433 &decoded, input_location);
13434 break;
13435 }
13436 /* Set or unset a bit in the target_flags. aarch64_handle_option
13437 should know what mask to apply given the option number. */
13438 case aarch64_attr_mask:
13439 {
13440 struct cl_decoded_option decoded;
13441 /* We only need to specify the option number.
13442 aarch64_handle_option will know which mask to apply. */
13443 decoded.opt_index = p_attr->opt_num;
13444 decoded.value = !invert;
13445 aarch64_handle_option (&global_options, &global_options_set,
13446 &decoded, input_location);
13447 break;
13448 }
13449 /* Use the option setting machinery to set an option to an enum. */
13450 case aarch64_attr_enum:
13451 {
13452 gcc_assert (arg);
13453 bool valid;
13454 int value;
13455 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
13456 &value, CL_TARGET);
13457 if (valid)
13458 {
13459 set_option (&global_options, NULL, p_attr->opt_num, value,
13460 NULL, DK_UNSPECIFIED, input_location,
13461 global_dc);
13462 }
13463 else
13464 {
ab93e9b7 13465 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
13466 }
13467 break;
13468 }
13469 default:
13470 gcc_unreachable ();
13471 }
13472 }
13473
16d12992
KT
13474 /* If we reached here we either have found an attribute and validated
13475 it or didn't match any. If we matched an attribute but its arguments
13476 were malformed we will have returned false already. */
13477 return found;
5a2c8331
KT
13478}
13479
13480/* Count how many times the character C appears in
13481 NULL-terminated string STR. */
13482
13483static unsigned int
13484num_occurences_in_str (char c, char *str)
13485{
13486 unsigned int res = 0;
13487 while (*str != '\0')
13488 {
13489 if (*str == c)
13490 res++;
13491
13492 str++;
13493 }
13494
13495 return res;
13496}
13497
13498/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 13499 and update the global target options space. */
5a2c8331
KT
13500
13501bool
ab93e9b7 13502aarch64_process_target_attr (tree args)
5a2c8331
KT
13503{
13504 if (TREE_CODE (args) == TREE_LIST)
13505 {
13506 do
13507 {
13508 tree head = TREE_VALUE (args);
13509 if (head)
13510 {
ab93e9b7 13511 if (!aarch64_process_target_attr (head))
5a2c8331
KT
13512 return false;
13513 }
13514 args = TREE_CHAIN (args);
13515 } while (args);
13516
13517 return true;
13518 }
3b6cb9e3
ML
13519
13520 if (TREE_CODE (args) != STRING_CST)
13521 {
13522 error ("attribute %<target%> argument not a string");
13523 return false;
13524 }
5a2c8331
KT
13525
13526 size_t len = strlen (TREE_STRING_POINTER (args));
13527 char *str_to_check = (char *) alloca (len + 1);
13528 strcpy (str_to_check, TREE_STRING_POINTER (args));
13529
13530 if (len == 0)
13531 {
ab93e9b7 13532 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
13533 return false;
13534 }
13535
13536 /* Used to catch empty spaces between commas i.e.
13537 attribute ((target ("attr1,,attr2"))). */
13538 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
13539
13540 /* Handle multiple target attributes separated by ','. */
7185a4eb 13541 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
13542
13543 unsigned int num_attrs = 0;
13544 while (token)
13545 {
13546 num_attrs++;
ab93e9b7 13547 if (!aarch64_process_one_target_attr (token))
5a2c8331 13548 {
ab93e9b7 13549 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
13550 return false;
13551 }
13552
7185a4eb 13553 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
13554 }
13555
13556 if (num_attrs != num_commas + 1)
13557 {
ab93e9b7 13558 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
13559 return false;
13560 }
13561
13562 return true;
13563}
13564
13565/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
13566 process attribute ((target ("..."))). */
13567
13568static bool
13569aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
13570{
13571 struct cl_target_option cur_target;
13572 bool ret;
13573 tree old_optimize;
13574 tree new_target, new_optimize;
13575 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
13576
13577 /* If what we're processing is the current pragma string then the
13578 target option node is already stored in target_option_current_node
13579 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
13580 having to re-parse the string. This is especially useful to keep
13581 arm_neon.h compile times down since that header contains a lot
13582 of intrinsics enclosed in pragmas. */
13583 if (!existing_target && args == current_target_pragma)
13584 {
13585 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
13586 return true;
13587 }
5a2c8331
KT
13588 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
13589
13590 old_optimize = build_optimization_node (&global_options);
13591 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
13592
13593 /* If the function changed the optimization levels as well as setting
13594 target options, start with the optimizations specified. */
13595 if (func_optimize && func_optimize != old_optimize)
13596 cl_optimization_restore (&global_options,
13597 TREE_OPTIMIZATION (func_optimize));
13598
13599 /* Save the current target options to restore at the end. */
13600 cl_target_option_save (&cur_target, &global_options);
13601
13602 /* If fndecl already has some target attributes applied to it, unpack
13603 them so that we add this attribute on top of them, rather than
13604 overwriting them. */
13605 if (existing_target)
13606 {
13607 struct cl_target_option *existing_options
13608 = TREE_TARGET_OPTION (existing_target);
13609
13610 if (existing_options)
13611 cl_target_option_restore (&global_options, existing_options);
13612 }
13613 else
13614 cl_target_option_restore (&global_options,
13615 TREE_TARGET_OPTION (target_option_current_node));
13616
ab93e9b7 13617 ret = aarch64_process_target_attr (args);
5a2c8331
KT
13618
13619 /* Set up any additional state. */
13620 if (ret)
13621 {
13622 aarch64_override_options_internal (&global_options);
e95a988a
KT
13623 /* Initialize SIMD builtins if we haven't already.
13624 Set current_target_pragma to NULL for the duration so that
13625 the builtin initialization code doesn't try to tag the functions
13626 being built with the attributes specified by any current pragma, thus
13627 going into an infinite recursion. */
13628 if (TARGET_SIMD)
13629 {
13630 tree saved_current_target_pragma = current_target_pragma;
13631 current_target_pragma = NULL;
13632 aarch64_init_simd_builtins ();
13633 current_target_pragma = saved_current_target_pragma;
13634 }
5a2c8331
KT
13635 new_target = build_target_option_node (&global_options);
13636 }
13637 else
13638 new_target = NULL;
13639
13640 new_optimize = build_optimization_node (&global_options);
13641
13642 if (fndecl && ret)
13643 {
13644 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
13645
13646 if (old_optimize != new_optimize)
13647 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
13648 }
13649
13650 cl_target_option_restore (&global_options, &cur_target);
13651
13652 if (old_optimize != new_optimize)
13653 cl_optimization_restore (&global_options,
13654 TREE_OPTIMIZATION (old_optimize));
13655 return ret;
13656}
13657
1fd8d40c
KT
13658/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
13659 tri-bool options (yes, no, don't care) and the default value is
13660 DEF, determine whether to reject inlining. */
13661
13662static bool
13663aarch64_tribools_ok_for_inlining_p (int caller, int callee,
13664 int dont_care, int def)
13665{
13666 /* If the callee doesn't care, always allow inlining. */
13667 if (callee == dont_care)
13668 return true;
13669
13670 /* If the caller doesn't care, always allow inlining. */
13671 if (caller == dont_care)
13672 return true;
13673
13674 /* Otherwise, allow inlining if either the callee and caller values
13675 agree, or if the callee is using the default value. */
13676 return (callee == caller || callee == def);
13677}
13678
13679/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
13680 to inline CALLEE into CALLER based on target-specific info.
13681 Make sure that the caller and callee have compatible architectural
13682 features. Then go through the other possible target attributes
13683 and see if they can block inlining. Try not to reject always_inline
13684 callees unless they are incompatible architecturally. */
13685
13686static bool
13687aarch64_can_inline_p (tree caller, tree callee)
13688{
13689 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
13690 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
13691
1fd8d40c
KT
13692 struct cl_target_option *caller_opts
13693 = TREE_TARGET_OPTION (caller_tree ? caller_tree
13694 : target_option_default_node);
13695
675d044c
SD
13696 struct cl_target_option *callee_opts
13697 = TREE_TARGET_OPTION (callee_tree ? callee_tree
13698 : target_option_default_node);
1fd8d40c
KT
13699
13700 /* Callee's ISA flags should be a subset of the caller's. */
13701 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
13702 != callee_opts->x_aarch64_isa_flags)
13703 return false;
13704
13705 /* Allow non-strict aligned functions inlining into strict
13706 aligned ones. */
13707 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
13708 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
13709 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
13710 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
13711 return false;
13712
13713 bool always_inline = lookup_attribute ("always_inline",
13714 DECL_ATTRIBUTES (callee));
13715
13716 /* If the architectural features match up and the callee is always_inline
13717 then the other attributes don't matter. */
13718 if (always_inline)
13719 return true;
13720
13721 if (caller_opts->x_aarch64_cmodel_var
13722 != callee_opts->x_aarch64_cmodel_var)
13723 return false;
13724
13725 if (caller_opts->x_aarch64_tls_dialect
13726 != callee_opts->x_aarch64_tls_dialect)
13727 return false;
13728
13729 /* Honour explicit requests to workaround errata. */
13730 if (!aarch64_tribools_ok_for_inlining_p (
13731 caller_opts->x_aarch64_fix_a53_err835769,
13732 callee_opts->x_aarch64_fix_a53_err835769,
13733 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
13734 return false;
13735
48bb1a55
CL
13736 if (!aarch64_tribools_ok_for_inlining_p (
13737 caller_opts->x_aarch64_fix_a53_err843419,
13738 callee_opts->x_aarch64_fix_a53_err843419,
13739 2, TARGET_FIX_ERR_A53_843419))
13740 return false;
13741
1fd8d40c
KT
13742 /* If the user explicitly specified -momit-leaf-frame-pointer for the
13743 caller and calle and they don't match up, reject inlining. */
13744 if (!aarch64_tribools_ok_for_inlining_p (
13745 caller_opts->x_flag_omit_leaf_frame_pointer,
13746 callee_opts->x_flag_omit_leaf_frame_pointer,
13747 2, 1))
13748 return false;
13749
13750 /* If the callee has specific tuning overrides, respect them. */
13751 if (callee_opts->x_aarch64_override_tune_string != NULL
13752 && caller_opts->x_aarch64_override_tune_string == NULL)
13753 return false;
13754
13755 /* If the user specified tuning override strings for the
13756 caller and callee and they don't match up, reject inlining.
13757 We just do a string compare here, we don't analyze the meaning
13758 of the string, as it would be too costly for little gain. */
13759 if (callee_opts->x_aarch64_override_tune_string
13760 && caller_opts->x_aarch64_override_tune_string
13761 && (strcmp (callee_opts->x_aarch64_override_tune_string,
13762 caller_opts->x_aarch64_override_tune_string) != 0))
13763 return false;
13764
13765 return true;
13766}
13767
43e9d192
IB
13768/* Return true if SYMBOL_REF X binds locally. */
13769
13770static bool
13771aarch64_symbol_binds_local_p (const_rtx x)
13772{
13773 return (SYMBOL_REF_DECL (x)
13774 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
13775 : SYMBOL_REF_LOCAL_P (x));
13776}
13777
13778/* Return true if SYMBOL_REF X is thread local */
13779static bool
13780aarch64_tls_symbol_p (rtx x)
13781{
13782 if (! TARGET_HAVE_TLS)
13783 return false;
13784
13785 if (GET_CODE (x) != SYMBOL_REF)
13786 return false;
13787
13788 return SYMBOL_REF_TLS_MODEL (x) != 0;
13789}
13790
13791/* Classify a TLS symbol into one of the TLS kinds. */
13792enum aarch64_symbol_type
13793aarch64_classify_tls_symbol (rtx x)
13794{
13795 enum tls_model tls_kind = tls_symbolic_operand_type (x);
13796
13797 switch (tls_kind)
13798 {
13799 case TLS_MODEL_GLOBAL_DYNAMIC:
13800 case TLS_MODEL_LOCAL_DYNAMIC:
13801 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
13802
13803 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
13804 switch (aarch64_cmodel)
13805 {
13806 case AARCH64_CMODEL_TINY:
13807 case AARCH64_CMODEL_TINY_PIC:
13808 return SYMBOL_TINY_TLSIE;
13809 default:
79496620 13810 return SYMBOL_SMALL_TLSIE;
5ae7caad 13811 }
43e9d192
IB
13812
13813 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
13814 if (aarch64_tls_size == 12)
13815 return SYMBOL_TLSLE12;
13816 else if (aarch64_tls_size == 24)
13817 return SYMBOL_TLSLE24;
13818 else if (aarch64_tls_size == 32)
13819 return SYMBOL_TLSLE32;
13820 else if (aarch64_tls_size == 48)
13821 return SYMBOL_TLSLE48;
13822 else
13823 gcc_unreachable ();
43e9d192
IB
13824
13825 case TLS_MODEL_EMULATED:
13826 case TLS_MODEL_NONE:
13827 return SYMBOL_FORCE_TO_MEM;
13828
13829 default:
13830 gcc_unreachable ();
13831 }
13832}
13833
43cacb12
RS
13834/* Return the correct method for accessing X + OFFSET, where X is either
13835 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 13836
43e9d192 13837enum aarch64_symbol_type
43cacb12 13838aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
13839{
13840 if (GET_CODE (x) == LABEL_REF)
13841 {
13842 switch (aarch64_cmodel)
13843 {
13844 case AARCH64_CMODEL_LARGE:
13845 return SYMBOL_FORCE_TO_MEM;
13846
13847 case AARCH64_CMODEL_TINY_PIC:
13848 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
13849 return SYMBOL_TINY_ABSOLUTE;
13850
1b1e81f8 13851 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
13852 case AARCH64_CMODEL_SMALL_PIC:
13853 case AARCH64_CMODEL_SMALL:
13854 return SYMBOL_SMALL_ABSOLUTE;
13855
13856 default:
13857 gcc_unreachable ();
13858 }
13859 }
13860
17f4d4bf 13861 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 13862 {
43e9d192
IB
13863 if (aarch64_tls_symbol_p (x))
13864 return aarch64_classify_tls_symbol (x);
13865
17f4d4bf
CSS
13866 switch (aarch64_cmodel)
13867 {
13868 case AARCH64_CMODEL_TINY:
15f6e0da 13869 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
13870 the offset does not cause overflow of the final address. But
13871 we have no way of knowing the address of symbol at compile time
13872 so we can't accurately say if the distance between the PC and
13873 symbol + offset is outside the addressible range of +/-1M in the
13874 TINY code model. So we rely on images not being greater than
13875 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
13876 be loaded using an alternative mechanism. Furthermore if the
13877 symbol is a weak reference to something that isn't known to
13878 resolve to a symbol in this module, then force to memory. */
13879 if ((SYMBOL_REF_WEAK (x)
13880 && !aarch64_symbol_binds_local_p (x))
43cacb12 13881 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
13882 return SYMBOL_FORCE_TO_MEM;
13883 return SYMBOL_TINY_ABSOLUTE;
13884
17f4d4bf 13885 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
13886 /* Same reasoning as the tiny code model, but the offset cap here is
13887 4G. */
15f6e0da
RR
13888 if ((SYMBOL_REF_WEAK (x)
13889 && !aarch64_symbol_binds_local_p (x))
43cacb12 13890 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 13891 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
13892 return SYMBOL_FORCE_TO_MEM;
13893 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 13894
17f4d4bf 13895 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 13896 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 13897 return SYMBOL_TINY_GOT;
38e6c9a6
MS
13898 return SYMBOL_TINY_ABSOLUTE;
13899
1b1e81f8 13900 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
13901 case AARCH64_CMODEL_SMALL_PIC:
13902 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
13903 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
13904 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 13905 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 13906
9ee6540a
WD
13907 case AARCH64_CMODEL_LARGE:
13908 /* This is alright even in PIC code as the constant
13909 pool reference is always PC relative and within
13910 the same translation unit. */
d47d34bb 13911 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
13912 return SYMBOL_SMALL_ABSOLUTE;
13913 else
13914 return SYMBOL_FORCE_TO_MEM;
13915
17f4d4bf
CSS
13916 default:
13917 gcc_unreachable ();
13918 }
43e9d192 13919 }
17f4d4bf 13920
43e9d192
IB
13921 /* By default push everything into the constant pool. */
13922 return SYMBOL_FORCE_TO_MEM;
13923}
13924
43e9d192
IB
13925bool
13926aarch64_constant_address_p (rtx x)
13927{
13928 return (CONSTANT_P (x) && memory_address_p (DImode, x));
13929}
13930
13931bool
13932aarch64_legitimate_pic_operand_p (rtx x)
13933{
13934 if (GET_CODE (x) == SYMBOL_REF
13935 || (GET_CODE (x) == CONST
13936 && GET_CODE (XEXP (x, 0)) == PLUS
13937 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
13938 return false;
13939
13940 return true;
13941}
13942
26895c21
WD
13943/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
13944 that should be rematerialized rather than spilled. */
3520f7cc 13945
43e9d192 13946static bool
ef4bddc2 13947aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 13948{
26895c21 13949 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 13950 if (CONST_INT_P (x)
9f7b87ca 13951 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 13952 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
13953 return true;
13954
43cacb12
RS
13955 /* Do not allow vector struct mode constants for Advanced SIMD.
13956 We could support 0 and -1 easily, but they need support in
13957 aarch64-simd.md. */
13958 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13959 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
13960 return false;
13961
43cacb12
RS
13962 /* Only accept variable-length vector constants if they can be
13963 handled directly.
13964
13965 ??? It would be possible to handle rematerialization of other
13966 constants via secondary reloads. */
13967 if (vec_flags & VEC_ANY_SVE)
13968 return aarch64_simd_valid_immediate (x, NULL);
13969
509bb9b6
RS
13970 if (GET_CODE (x) == HIGH)
13971 x = XEXP (x, 0);
13972
43cacb12
RS
13973 /* Accept polynomial constants that can be calculated by using the
13974 destination of a move as the sole temporary. Constants that
13975 require a second temporary cannot be rematerialized (they can't be
13976 forced to memory and also aren't legitimate constants). */
13977 poly_int64 offset;
13978 if (poly_int_rtx_p (x, &offset))
13979 return aarch64_offset_temporaries (false, offset) <= 1;
13980
13981 /* If an offset is being added to something else, we need to allow the
13982 base to be moved into the destination register, meaning that there
13983 are no free temporaries for the offset. */
13984 x = strip_offset (x, &offset);
13985 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
13986 return false;
26895c21 13987
43cacb12
RS
13988 /* Do not allow const (plus (anchor_symbol, const_int)). */
13989 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
13990 return false;
26895c21 13991
f28e54bd
WD
13992 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
13993 so spilling them is better than rematerialization. */
13994 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
13995 return true;
13996
26895c21
WD
13997 /* Label references are always constant. */
13998 if (GET_CODE (x) == LABEL_REF)
13999 return true;
14000
14001 return false;
43e9d192
IB
14002}
14003
a5bc806c 14004rtx
43e9d192
IB
14005aarch64_load_tp (rtx target)
14006{
14007 if (!target
14008 || GET_MODE (target) != Pmode
14009 || !register_operand (target, Pmode))
14010 target = gen_reg_rtx (Pmode);
14011
14012 /* Can return in any reg. */
14013 emit_insn (gen_aarch64_load_tp_hard (target));
14014 return target;
14015}
14016
43e9d192
IB
14017/* On AAPCS systems, this is the "struct __va_list". */
14018static GTY(()) tree va_list_type;
14019
14020/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
14021 Return the type to use as __builtin_va_list.
14022
14023 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
14024
14025 struct __va_list
14026 {
14027 void *__stack;
14028 void *__gr_top;
14029 void *__vr_top;
14030 int __gr_offs;
14031 int __vr_offs;
14032 }; */
14033
14034static tree
14035aarch64_build_builtin_va_list (void)
14036{
14037 tree va_list_name;
14038 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
14039
14040 /* Create the type. */
14041 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
14042 /* Give it the required name. */
14043 va_list_name = build_decl (BUILTINS_LOCATION,
14044 TYPE_DECL,
14045 get_identifier ("__va_list"),
14046 va_list_type);
14047 DECL_ARTIFICIAL (va_list_name) = 1;
14048 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 14049 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
14050
14051 /* Create the fields. */
14052 f_stack = build_decl (BUILTINS_LOCATION,
14053 FIELD_DECL, get_identifier ("__stack"),
14054 ptr_type_node);
14055 f_grtop = build_decl (BUILTINS_LOCATION,
14056 FIELD_DECL, get_identifier ("__gr_top"),
14057 ptr_type_node);
14058 f_vrtop = build_decl (BUILTINS_LOCATION,
14059 FIELD_DECL, get_identifier ("__vr_top"),
14060 ptr_type_node);
14061 f_groff = build_decl (BUILTINS_LOCATION,
14062 FIELD_DECL, get_identifier ("__gr_offs"),
14063 integer_type_node);
14064 f_vroff = build_decl (BUILTINS_LOCATION,
14065 FIELD_DECL, get_identifier ("__vr_offs"),
14066 integer_type_node);
14067
88e3bdd1 14068 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
14069 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
14070 purpose to identify whether the code is updating va_list internal
14071 offset fields through irregular way. */
14072 va_list_gpr_counter_field = f_groff;
14073 va_list_fpr_counter_field = f_vroff;
14074
43e9d192
IB
14075 DECL_ARTIFICIAL (f_stack) = 1;
14076 DECL_ARTIFICIAL (f_grtop) = 1;
14077 DECL_ARTIFICIAL (f_vrtop) = 1;
14078 DECL_ARTIFICIAL (f_groff) = 1;
14079 DECL_ARTIFICIAL (f_vroff) = 1;
14080
14081 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
14082 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
14083 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
14084 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
14085 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
14086
14087 TYPE_FIELDS (va_list_type) = f_stack;
14088 DECL_CHAIN (f_stack) = f_grtop;
14089 DECL_CHAIN (f_grtop) = f_vrtop;
14090 DECL_CHAIN (f_vrtop) = f_groff;
14091 DECL_CHAIN (f_groff) = f_vroff;
14092
14093 /* Compute its layout. */
14094 layout_type (va_list_type);
14095
14096 return va_list_type;
14097}
14098
14099/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
14100static void
14101aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
14102{
14103 const CUMULATIVE_ARGS *cum;
14104 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
14105 tree stack, grtop, vrtop, groff, vroff;
14106 tree t;
88e3bdd1
JW
14107 int gr_save_area_size = cfun->va_list_gpr_size;
14108 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
14109 int vr_offset;
14110
14111 cum = &crtl->args.info;
88e3bdd1
JW
14112 if (cfun->va_list_gpr_size)
14113 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
14114 cfun->va_list_gpr_size);
14115 if (cfun->va_list_fpr_size)
14116 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
14117 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 14118
d5726973 14119 if (!TARGET_FLOAT)
43e9d192 14120 {
261fb553 14121 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
14122 vr_save_area_size = 0;
14123 }
14124
14125 f_stack = TYPE_FIELDS (va_list_type_node);
14126 f_grtop = DECL_CHAIN (f_stack);
14127 f_vrtop = DECL_CHAIN (f_grtop);
14128 f_groff = DECL_CHAIN (f_vrtop);
14129 f_vroff = DECL_CHAIN (f_groff);
14130
14131 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
14132 NULL_TREE);
14133 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
14134 NULL_TREE);
14135 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
14136 NULL_TREE);
14137 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
14138 NULL_TREE);
14139 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
14140 NULL_TREE);
14141
14142 /* Emit code to initialize STACK, which points to the next varargs stack
14143 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
14144 by named arguments. STACK is 8-byte aligned. */
14145 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
14146 if (cum->aapcs_stack_size > 0)
14147 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
14148 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
14149 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14150
14151 /* Emit code to initialize GRTOP, the top of the GR save area.
14152 virtual_incoming_args_rtx should have been 16 byte aligned. */
14153 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
14154 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
14155 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14156
14157 /* Emit code to initialize VRTOP, the top of the VR save area.
14158 This address is gr_save_area_bytes below GRTOP, rounded
14159 down to the next 16-byte boundary. */
14160 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
14161 vr_offset = ROUND_UP (gr_save_area_size,
14162 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
14163
14164 if (vr_offset)
14165 t = fold_build_pointer_plus_hwi (t, -vr_offset);
14166 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
14167 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14168
14169 /* Emit code to initialize GROFF, the offset from GRTOP of the
14170 next GPR argument. */
14171 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
14172 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
14173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14174
14175 /* Likewise emit code to initialize VROFF, the offset from FTOP
14176 of the next VR argument. */
14177 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
14178 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
14179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14180}
14181
14182/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
14183
14184static tree
14185aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
14186 gimple_seq *post_p ATTRIBUTE_UNUSED)
14187{
14188 tree addr;
14189 bool indirect_p;
14190 bool is_ha; /* is HFA or HVA. */
14191 bool dw_align; /* double-word align. */
ef4bddc2 14192 machine_mode ag_mode = VOIDmode;
43e9d192 14193 int nregs;
ef4bddc2 14194 machine_mode mode;
43e9d192
IB
14195
14196 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
14197 tree stack, f_top, f_off, off, arg, roundup, on_stack;
14198 HOST_WIDE_INT size, rsize, adjust, align;
14199 tree t, u, cond1, cond2;
14200
14201 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
14202 if (indirect_p)
14203 type = build_pointer_type (type);
14204
14205 mode = TYPE_MODE (type);
14206
14207 f_stack = TYPE_FIELDS (va_list_type_node);
14208 f_grtop = DECL_CHAIN (f_stack);
14209 f_vrtop = DECL_CHAIN (f_grtop);
14210 f_groff = DECL_CHAIN (f_vrtop);
14211 f_vroff = DECL_CHAIN (f_groff);
14212
14213 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
14214 f_stack, NULL_TREE);
14215 size = int_size_in_bytes (type);
c590597c
RE
14216
14217 bool abi_break;
14218 align
14219 = aarch64_function_arg_alignment (mode, type, &abi_break) / BITS_PER_UNIT;
43e9d192
IB
14220
14221 dw_align = false;
14222 adjust = 0;
14223 if (aarch64_vfp_is_call_or_return_candidate (mode,
14224 type,
14225 &ag_mode,
14226 &nregs,
14227 &is_ha))
14228 {
6a70badb
RS
14229 /* No frontends can create types with variable-sized modes, so we
14230 shouldn't be asked to pass or return them. */
14231 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
14232
43e9d192 14233 /* TYPE passed in fp/simd registers. */
d5726973 14234 if (!TARGET_FLOAT)
fc29dfc9 14235 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
14236
14237 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
14238 unshare_expr (valist), f_vrtop, NULL_TREE);
14239 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
14240 unshare_expr (valist), f_vroff, NULL_TREE);
14241
14242 rsize = nregs * UNITS_PER_VREG;
14243
14244 if (is_ha)
14245 {
6a70badb
RS
14246 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
14247 adjust = UNITS_PER_VREG - ag_size;
43e9d192 14248 }
76b0cbf8 14249 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
14250 && size < UNITS_PER_VREG)
14251 {
14252 adjust = UNITS_PER_VREG - size;
14253 }
14254 }
14255 else
14256 {
14257 /* TYPE passed in general registers. */
14258 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
14259 unshare_expr (valist), f_grtop, NULL_TREE);
14260 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
14261 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 14262 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
14263 nregs = rsize / UNITS_PER_WORD;
14264
14265 if (align > 8)
c590597c
RE
14266 {
14267 if (abi_break && warn_psabi)
14268 inform (input_location, "parameter passing for argument of type "
14269 "%qT changed in GCC 9.1", type);
14270 dw_align = true;
14271 }
43e9d192 14272
76b0cbf8 14273 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
14274 && size < UNITS_PER_WORD)
14275 {
14276 adjust = UNITS_PER_WORD - size;
14277 }
14278 }
14279
14280 /* Get a local temporary for the field value. */
14281 off = get_initialized_tmp_var (f_off, pre_p, NULL);
14282
14283 /* Emit code to branch if off >= 0. */
14284 t = build2 (GE_EXPR, boolean_type_node, off,
14285 build_int_cst (TREE_TYPE (off), 0));
14286 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
14287
14288 if (dw_align)
14289 {
14290 /* Emit: offs = (offs + 15) & -16. */
14291 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
14292 build_int_cst (TREE_TYPE (off), 15));
14293 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
14294 build_int_cst (TREE_TYPE (off), -16));
14295 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
14296 }
14297 else
14298 roundup = NULL;
14299
14300 /* Update ap.__[g|v]r_offs */
14301 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
14302 build_int_cst (TREE_TYPE (off), rsize));
14303 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
14304
14305 /* String up. */
14306 if (roundup)
14307 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
14308
14309 /* [cond2] if (ap.__[g|v]r_offs > 0) */
14310 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
14311 build_int_cst (TREE_TYPE (f_off), 0));
14312 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
14313
14314 /* String up: make sure the assignment happens before the use. */
14315 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
14316 COND_EXPR_ELSE (cond1) = t;
14317
14318 /* Prepare the trees handling the argument that is passed on the stack;
14319 the top level node will store in ON_STACK. */
14320 arg = get_initialized_tmp_var (stack, pre_p, NULL);
14321 if (align > 8)
14322 {
14323 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 14324 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
14325 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14326 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
14327 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
14328 }
14329 else
14330 roundup = NULL;
14331 /* Advance ap.__stack */
4bdc2738 14332 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
14333 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14334 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
14335 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
14336 /* String up roundup and advance. */
14337 if (roundup)
14338 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
14339 /* String up with arg */
14340 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
14341 /* Big-endianness related address adjustment. */
76b0cbf8 14342 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
14343 && size < UNITS_PER_WORD)
14344 {
14345 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
14346 size_int (UNITS_PER_WORD - size));
14347 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
14348 }
14349
14350 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
14351 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
14352
14353 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
14354 t = off;
14355 if (adjust)
14356 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
14357 build_int_cst (TREE_TYPE (off), adjust));
14358
14359 t = fold_convert (sizetype, t);
14360 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
14361
14362 if (is_ha)
14363 {
14364 /* type ha; // treat as "struct {ftype field[n];}"
14365 ... [computing offs]
14366 for (i = 0; i <nregs; ++i, offs += 16)
14367 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
14368 return ha; */
14369 int i;
14370 tree tmp_ha, field_t, field_ptr_t;
14371
14372 /* Declare a local variable. */
14373 tmp_ha = create_tmp_var_raw (type, "ha");
14374 gimple_add_tmp_var (tmp_ha);
14375
14376 /* Establish the base type. */
14377 switch (ag_mode)
14378 {
4e10a5a7 14379 case E_SFmode:
43e9d192
IB
14380 field_t = float_type_node;
14381 field_ptr_t = float_ptr_type_node;
14382 break;
4e10a5a7 14383 case E_DFmode:
43e9d192
IB
14384 field_t = double_type_node;
14385 field_ptr_t = double_ptr_type_node;
14386 break;
4e10a5a7 14387 case E_TFmode:
43e9d192
IB
14388 field_t = long_double_type_node;
14389 field_ptr_t = long_double_ptr_type_node;
14390 break;
4e10a5a7 14391 case E_HFmode:
1b62ed4f
JG
14392 field_t = aarch64_fp16_type_node;
14393 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 14394 break;
4e10a5a7
RS
14395 case E_V2SImode:
14396 case E_V4SImode:
43e9d192
IB
14397 {
14398 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
14399 field_t = build_vector_type_for_mode (innertype, ag_mode);
14400 field_ptr_t = build_pointer_type (field_t);
14401 }
14402 break;
14403 default:
14404 gcc_assert (0);
14405 }
14406
14407 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
14408 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
14409 addr = t;
14410 t = fold_convert (field_ptr_t, addr);
14411 t = build2 (MODIFY_EXPR, field_t,
14412 build1 (INDIRECT_REF, field_t, tmp_ha),
14413 build1 (INDIRECT_REF, field_t, t));
14414
14415 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
14416 for (i = 1; i < nregs; ++i)
14417 {
14418 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
14419 u = fold_convert (field_ptr_t, addr);
14420 u = build2 (MODIFY_EXPR, field_t,
14421 build2 (MEM_REF, field_t, tmp_ha,
14422 build_int_cst (field_ptr_t,
14423 (i *
14424 int_size_in_bytes (field_t)))),
14425 build1 (INDIRECT_REF, field_t, u));
14426 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
14427 }
14428
14429 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
14430 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
14431 }
14432
14433 COND_EXPR_ELSE (cond2) = t;
14434 addr = fold_convert (build_pointer_type (type), cond1);
14435 addr = build_va_arg_indirect_ref (addr);
14436
14437 if (indirect_p)
14438 addr = build_va_arg_indirect_ref (addr);
14439
14440 return addr;
14441}
14442
14443/* Implement TARGET_SETUP_INCOMING_VARARGS. */
14444
14445static void
ef4bddc2 14446aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
14447 tree type, int *pretend_size ATTRIBUTE_UNUSED,
14448 int no_rtl)
14449{
14450 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
14451 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
14452 int gr_saved = cfun->va_list_gpr_size;
14453 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
14454
14455 /* The caller has advanced CUM up to, but not beyond, the last named
14456 argument. Advance a local copy of CUM past the last "real" named
14457 argument, to find out how many registers are left over. */
14458 local_cum = *cum;
14459 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
14460
88e3bdd1
JW
14461 /* Found out how many registers we need to save.
14462 Honor tree-stdvar analysis results. */
14463 if (cfun->va_list_gpr_size)
14464 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
14465 cfun->va_list_gpr_size / UNITS_PER_WORD);
14466 if (cfun->va_list_fpr_size)
14467 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
14468 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 14469
d5726973 14470 if (!TARGET_FLOAT)
43e9d192 14471 {
261fb553 14472 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
14473 vr_saved = 0;
14474 }
14475
14476 if (!no_rtl)
14477 {
14478 if (gr_saved > 0)
14479 {
14480 rtx ptr, mem;
14481
14482 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
14483 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
14484 - gr_saved * UNITS_PER_WORD);
14485 mem = gen_frame_mem (BLKmode, ptr);
14486 set_mem_alias_set (mem, get_varargs_alias_set ());
14487
14488 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
14489 mem, gr_saved);
14490 }
14491 if (vr_saved > 0)
14492 {
14493 /* We can't use move_block_from_reg, because it will use
14494 the wrong mode, storing D regs only. */
ef4bddc2 14495 machine_mode mode = TImode;
88e3bdd1 14496 int off, i, vr_start;
43e9d192
IB
14497
14498 /* Set OFF to the offset from virtual_incoming_args_rtx of
14499 the first vector register. The VR save area lies below
14500 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
14501 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
14502 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
14503 off -= vr_saved * UNITS_PER_VREG;
14504
88e3bdd1
JW
14505 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
14506 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
14507 {
14508 rtx ptr, mem;
14509
14510 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
14511 mem = gen_frame_mem (mode, ptr);
14512 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 14513 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
14514 off += UNITS_PER_VREG;
14515 }
14516 }
14517 }
14518
14519 /* We don't save the size into *PRETEND_SIZE because we want to avoid
14520 any complication of having crtl->args.pretend_args_size changed. */
8799637a 14521 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
14522 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
14523 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
14524 + vr_saved * UNITS_PER_VREG);
14525}
14526
14527static void
14528aarch64_conditional_register_usage (void)
14529{
14530 int i;
14531 if (!TARGET_FLOAT)
14532 {
14533 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
14534 {
14535 fixed_regs[i] = 1;
14536 call_used_regs[i] = 1;
14537 }
14538 }
43cacb12
RS
14539 if (!TARGET_SVE)
14540 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
14541 {
14542 fixed_regs[i] = 1;
14543 call_used_regs[i] = 1;
14544 }
3751345d
RE
14545
14546 /* When tracking speculation, we need a couple of call-clobbered registers
14547 to track the speculation state. It would be nice to just use
14548 IP0 and IP1, but currently there are numerous places that just
14549 assume these registers are free for other uses (eg pointer
14550 authentication). */
14551 if (aarch64_track_speculation)
14552 {
14553 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
14554 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
14555 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
14556 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
14557 }
43e9d192
IB
14558}
14559
14560/* Walk down the type tree of TYPE counting consecutive base elements.
14561 If *MODEP is VOIDmode, then set it to the first valid floating point
14562 type. If a non-floating point type is found, or if a floating point
14563 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
14564 otherwise return the count in the sub-tree. */
14565static int
ef4bddc2 14566aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 14567{
ef4bddc2 14568 machine_mode mode;
43e9d192
IB
14569 HOST_WIDE_INT size;
14570
14571 switch (TREE_CODE (type))
14572 {
14573 case REAL_TYPE:
14574 mode = TYPE_MODE (type);
1b62ed4f
JG
14575 if (mode != DFmode && mode != SFmode
14576 && mode != TFmode && mode != HFmode)
43e9d192
IB
14577 return -1;
14578
14579 if (*modep == VOIDmode)
14580 *modep = mode;
14581
14582 if (*modep == mode)
14583 return 1;
14584
14585 break;
14586
14587 case COMPLEX_TYPE:
14588 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
14589 if (mode != DFmode && mode != SFmode
14590 && mode != TFmode && mode != HFmode)
43e9d192
IB
14591 return -1;
14592
14593 if (*modep == VOIDmode)
14594 *modep = mode;
14595
14596 if (*modep == mode)
14597 return 2;
14598
14599 break;
14600
14601 case VECTOR_TYPE:
14602 /* Use V2SImode and V4SImode as representatives of all 64-bit
14603 and 128-bit vector types. */
14604 size = int_size_in_bytes (type);
14605 switch (size)
14606 {
14607 case 8:
14608 mode = V2SImode;
14609 break;
14610 case 16:
14611 mode = V4SImode;
14612 break;
14613 default:
14614 return -1;
14615 }
14616
14617 if (*modep == VOIDmode)
14618 *modep = mode;
14619
14620 /* Vector modes are considered to be opaque: two vectors are
14621 equivalent for the purposes of being homogeneous aggregates
14622 if they are the same size. */
14623 if (*modep == mode)
14624 return 1;
14625
14626 break;
14627
14628 case ARRAY_TYPE:
14629 {
14630 int count;
14631 tree index = TYPE_DOMAIN (type);
14632
807e902e
KZ
14633 /* Can't handle incomplete types nor sizes that are not
14634 fixed. */
14635 if (!COMPLETE_TYPE_P (type)
14636 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
14637 return -1;
14638
14639 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
14640 if (count == -1
14641 || !index
14642 || !TYPE_MAX_VALUE (index)
cc269bb6 14643 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 14644 || !TYPE_MIN_VALUE (index)
cc269bb6 14645 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
14646 || count < 0)
14647 return -1;
14648
ae7e9ddd
RS
14649 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
14650 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
14651
14652 /* There must be no padding. */
6a70badb
RS
14653 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
14654 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
14655 return -1;
14656
14657 return count;
14658 }
14659
14660 case RECORD_TYPE:
14661 {
14662 int count = 0;
14663 int sub_count;
14664 tree field;
14665
807e902e
KZ
14666 /* Can't handle incomplete types nor sizes that are not
14667 fixed. */
14668 if (!COMPLETE_TYPE_P (type)
14669 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
14670 return -1;
14671
14672 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
14673 {
14674 if (TREE_CODE (field) != FIELD_DECL)
14675 continue;
14676
14677 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
14678 if (sub_count < 0)
14679 return -1;
14680 count += sub_count;
14681 }
14682
14683 /* There must be no padding. */
6a70badb
RS
14684 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
14685 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
14686 return -1;
14687
14688 return count;
14689 }
14690
14691 case UNION_TYPE:
14692 case QUAL_UNION_TYPE:
14693 {
14694 /* These aren't very interesting except in a degenerate case. */
14695 int count = 0;
14696 int sub_count;
14697 tree field;
14698
807e902e
KZ
14699 /* Can't handle incomplete types nor sizes that are not
14700 fixed. */
14701 if (!COMPLETE_TYPE_P (type)
14702 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
14703 return -1;
14704
14705 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
14706 {
14707 if (TREE_CODE (field) != FIELD_DECL)
14708 continue;
14709
14710 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
14711 if (sub_count < 0)
14712 return -1;
14713 count = count > sub_count ? count : sub_count;
14714 }
14715
14716 /* There must be no padding. */
6a70badb
RS
14717 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
14718 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
14719 return -1;
14720
14721 return count;
14722 }
14723
14724 default:
14725 break;
14726 }
14727
14728 return -1;
14729}
14730
b6ec6215
KT
14731/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
14732 type as described in AAPCS64 \S 4.1.2.
14733
14734 See the comment above aarch64_composite_type_p for the notes on MODE. */
14735
14736static bool
14737aarch64_short_vector_p (const_tree type,
14738 machine_mode mode)
14739{
6a70badb 14740 poly_int64 size = -1;
b6ec6215
KT
14741
14742 if (type && TREE_CODE (type) == VECTOR_TYPE)
14743 size = int_size_in_bytes (type);
14744 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
14745 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
14746 size = GET_MODE_SIZE (mode);
14747
6a70badb 14748 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
14749}
14750
43e9d192
IB
14751/* Return TRUE if the type, as described by TYPE and MODE, is a composite
14752 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
14753 array types. The C99 floating-point complex types are also considered
14754 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
14755 types, which are GCC extensions and out of the scope of AAPCS64, are
14756 treated as composite types here as well.
14757
14758 Note that MODE itself is not sufficient in determining whether a type
14759 is such a composite type or not. This is because
14760 stor-layout.c:compute_record_mode may have already changed the MODE
14761 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
14762 structure with only one field may have its MODE set to the mode of the
14763 field. Also an integer mode whose size matches the size of the
14764 RECORD_TYPE type may be used to substitute the original mode
14765 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
14766 solely relied on. */
14767
14768static bool
14769aarch64_composite_type_p (const_tree type,
ef4bddc2 14770 machine_mode mode)
43e9d192 14771{
b6ec6215
KT
14772 if (aarch64_short_vector_p (type, mode))
14773 return false;
14774
43e9d192
IB
14775 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
14776 return true;
14777
14778 if (mode == BLKmode
14779 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
14780 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14781 return true;
14782
14783 return false;
14784}
14785
43e9d192
IB
14786/* Return TRUE if an argument, whose type is described by TYPE and MODE,
14787 shall be passed or returned in simd/fp register(s) (providing these
14788 parameter passing registers are available).
14789
14790 Upon successful return, *COUNT returns the number of needed registers,
14791 *BASE_MODE returns the mode of the individual register and when IS_HAF
14792 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
14793 floating-point aggregate or a homogeneous short-vector aggregate. */
14794
14795static bool
ef4bddc2 14796aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 14797 const_tree type,
ef4bddc2 14798 machine_mode *base_mode,
43e9d192
IB
14799 int *count,
14800 bool *is_ha)
14801{
ef4bddc2 14802 machine_mode new_mode = VOIDmode;
43e9d192
IB
14803 bool composite_p = aarch64_composite_type_p (type, mode);
14804
14805 if (is_ha != NULL) *is_ha = false;
14806
14807 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
14808 || aarch64_short_vector_p (type, mode))
14809 {
14810 *count = 1;
14811 new_mode = mode;
14812 }
14813 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
14814 {
14815 if (is_ha != NULL) *is_ha = true;
14816 *count = 2;
14817 new_mode = GET_MODE_INNER (mode);
14818 }
14819 else if (type && composite_p)
14820 {
14821 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
14822
14823 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
14824 {
14825 if (is_ha != NULL) *is_ha = true;
14826 *count = ag_count;
14827 }
14828 else
14829 return false;
14830 }
14831 else
14832 return false;
14833
14834 *base_mode = new_mode;
14835 return true;
14836}
14837
14838/* Implement TARGET_STRUCT_VALUE_RTX. */
14839
14840static rtx
14841aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
14842 int incoming ATTRIBUTE_UNUSED)
14843{
14844 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
14845}
14846
14847/* Implements target hook vector_mode_supported_p. */
14848static bool
ef4bddc2 14849aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 14850{
43cacb12
RS
14851 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
14852 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
14853}
14854
4aeb1ba7
RS
14855/* Return the full-width SVE vector mode for element mode MODE, if one
14856 exists. */
14857opt_machine_mode
14858aarch64_full_sve_mode (scalar_mode mode)
14859{
14860 switch (mode)
14861 {
14862 case E_DFmode:
14863 return VNx2DFmode;
14864 case E_SFmode:
14865 return VNx4SFmode;
14866 case E_HFmode:
14867 return VNx8HFmode;
14868 case E_DImode:
14869 return VNx2DImode;
14870 case E_SImode:
14871 return VNx4SImode;
14872 case E_HImode:
14873 return VNx8HImode;
14874 case E_QImode:
14875 return VNx16QImode;
14876 default:
14877 return opt_machine_mode ();
14878 }
14879}
14880
14881/* Return the 128-bit Advanced SIMD vector mode for element mode MODE,
14882 if it exists. */
14883opt_machine_mode
14884aarch64_vq_mode (scalar_mode mode)
14885{
14886 switch (mode)
14887 {
14888 case E_DFmode:
14889 return V2DFmode;
14890 case E_SFmode:
14891 return V4SFmode;
14892 case E_HFmode:
14893 return V8HFmode;
14894 case E_SImode:
14895 return V4SImode;
14896 case E_HImode:
14897 return V8HImode;
14898 case E_QImode:
14899 return V16QImode;
14900 case E_DImode:
14901 return V2DImode;
14902 default:
14903 return opt_machine_mode ();
14904 }
14905}
14906
b7342d25
IB
14907/* Return appropriate SIMD container
14908 for MODE within a vector of WIDTH bits. */
ef4bddc2 14909static machine_mode
43cacb12 14910aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 14911{
43cacb12 14912 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
4aeb1ba7 14913 return aarch64_full_sve_mode (mode).else_mode (word_mode);
43cacb12
RS
14914
14915 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 14916 if (TARGET_SIMD)
b7342d25 14917 {
43cacb12 14918 if (known_eq (width, 128))
4aeb1ba7 14919 return aarch64_vq_mode (mode).else_mode (word_mode);
b7342d25
IB
14920 else
14921 switch (mode)
14922 {
4e10a5a7 14923 case E_SFmode:
b7342d25 14924 return V2SFmode;
4e10a5a7 14925 case E_HFmode:
b719f884 14926 return V4HFmode;
4e10a5a7 14927 case E_SImode:
b7342d25 14928 return V2SImode;
4e10a5a7 14929 case E_HImode:
b7342d25 14930 return V4HImode;
4e10a5a7 14931 case E_QImode:
b7342d25
IB
14932 return V8QImode;
14933 default:
14934 break;
14935 }
14936 }
43e9d192
IB
14937 return word_mode;
14938}
14939
b7342d25 14940/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 14941static machine_mode
005ba29c 14942aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 14943{
43cacb12
RS
14944 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
14945 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
14946}
14947
86e36728 14948/* Return a list of possible vector sizes for the vectorizer
3b357264 14949 to iterate over. */
86e36728 14950static void
f63445e5 14951aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
3b357264 14952{
43cacb12
RS
14953 if (TARGET_SVE)
14954 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
14955 sizes->safe_push (16);
14956 sizes->safe_push (8);
3b357264
JG
14957}
14958
ac2b960f
YZ
14959/* Implement TARGET_MANGLE_TYPE. */
14960
6f549691 14961static const char *
ac2b960f
YZ
14962aarch64_mangle_type (const_tree type)
14963{
14964 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 14965 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
14966 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
14967 return "St9__va_list";
14968
c2ec330c
AL
14969 /* Half-precision float. */
14970 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
14971 return "Dh";
14972
f9d53c27
TB
14973 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
14974 builtin types. */
14975 if (TYPE_NAME (type) != NULL)
14976 return aarch64_mangle_builtin_type (type);
c6fc9e43 14977
ac2b960f
YZ
14978 /* Use the default mangling. */
14979 return NULL;
14980}
14981
75cf1494
KT
14982/* Find the first rtx_insn before insn that will generate an assembly
14983 instruction. */
14984
14985static rtx_insn *
14986aarch64_prev_real_insn (rtx_insn *insn)
14987{
14988 if (!insn)
14989 return NULL;
14990
14991 do
14992 {
14993 insn = prev_real_insn (insn);
14994 }
14995 while (insn && recog_memoized (insn) < 0);
14996
14997 return insn;
14998}
14999
15000static bool
15001is_madd_op (enum attr_type t1)
15002{
15003 unsigned int i;
15004 /* A number of these may be AArch32 only. */
15005 enum attr_type mlatypes[] = {
15006 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
15007 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
15008 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
15009 };
15010
15011 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
15012 {
15013 if (t1 == mlatypes[i])
15014 return true;
15015 }
15016
15017 return false;
15018}
15019
15020/* Check if there is a register dependency between a load and the insn
15021 for which we hold recog_data. */
15022
15023static bool
15024dep_between_memop_and_curr (rtx memop)
15025{
15026 rtx load_reg;
15027 int opno;
15028
8baff86e 15029 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
15030
15031 if (!REG_P (SET_DEST (memop)))
15032 return false;
15033
15034 load_reg = SET_DEST (memop);
8baff86e 15035 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
15036 {
15037 rtx operand = recog_data.operand[opno];
15038 if (REG_P (operand)
15039 && reg_overlap_mentioned_p (load_reg, operand))
15040 return true;
15041
15042 }
15043 return false;
15044}
15045
8baff86e
KT
15046
15047/* When working around the Cortex-A53 erratum 835769,
15048 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
15049 instruction and has a preceding memory instruction such that a NOP
15050 should be inserted between them. */
15051
75cf1494
KT
15052bool
15053aarch64_madd_needs_nop (rtx_insn* insn)
15054{
15055 enum attr_type attr_type;
15056 rtx_insn *prev;
15057 rtx body;
15058
b32c1043 15059 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
15060 return false;
15061
e322d6e3 15062 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
15063 return false;
15064
15065 attr_type = get_attr_type (insn);
15066 if (!is_madd_op (attr_type))
15067 return false;
15068
15069 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
15070 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
15071 Restore recog state to INSN to avoid state corruption. */
15072 extract_constrain_insn_cached (insn);
15073
550e2205 15074 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
15075 return false;
15076
15077 body = single_set (prev);
15078
15079 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
15080 it and the DImode madd, emit a NOP between them. If body is NULL then we
15081 have a complex memory operation, probably a load/store pair.
15082 Be conservative for now and emit a NOP. */
15083 if (GET_MODE (recog_data.operand[0]) == DImode
15084 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
15085 return true;
15086
15087 return false;
15088
15089}
15090
8baff86e
KT
15091
15092/* Implement FINAL_PRESCAN_INSN. */
15093
75cf1494
KT
15094void
15095aarch64_final_prescan_insn (rtx_insn *insn)
15096{
15097 if (aarch64_madd_needs_nop (insn))
15098 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
15099}
15100
15101
43cacb12
RS
15102/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
15103 instruction. */
15104
15105bool
15106aarch64_sve_index_immediate_p (rtx base_or_step)
15107{
15108 return (CONST_INT_P (base_or_step)
15109 && IN_RANGE (INTVAL (base_or_step), -16, 15));
15110}
15111
15112/* Return true if X is a valid immediate for the SVE ADD and SUB
15113 instructions. Negate X first if NEGATE_P is true. */
15114
15115bool
15116aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
15117{
15118 rtx elt;
15119
15120 if (!const_vec_duplicate_p (x, &elt)
15121 || !CONST_INT_P (elt))
15122 return false;
15123
15124 HOST_WIDE_INT val = INTVAL (elt);
15125 if (negate_p)
15126 val = -val;
15127 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
15128
15129 if (val & 0xff)
15130 return IN_RANGE (val, 0, 0xff);
15131 return IN_RANGE (val, 0, 0xff00);
15132}
15133
15134/* Return true if X is a valid immediate operand for an SVE logical
15135 instruction such as AND. */
15136
15137bool
15138aarch64_sve_bitmask_immediate_p (rtx x)
15139{
15140 rtx elt;
15141
15142 return (const_vec_duplicate_p (x, &elt)
15143 && CONST_INT_P (elt)
15144 && aarch64_bitmask_imm (INTVAL (elt),
15145 GET_MODE_INNER (GET_MODE (x))));
15146}
15147
15148/* Return true if X is a valid immediate for the SVE DUP and CPY
15149 instructions. */
15150
15151bool
15152aarch64_sve_dup_immediate_p (rtx x)
15153{
d29f7dd5
RS
15154 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
15155 if (!CONST_INT_P (x))
43cacb12
RS
15156 return false;
15157
d29f7dd5 15158 HOST_WIDE_INT val = INTVAL (x);
43cacb12
RS
15159 if (val & 0xff)
15160 return IN_RANGE (val, -0x80, 0x7f);
15161 return IN_RANGE (val, -0x8000, 0x7f00);
15162}
15163
15164/* Return true if X is a valid immediate operand for an SVE CMP instruction.
15165 SIGNED_P says whether the operand is signed rather than unsigned. */
15166
15167bool
15168aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
15169{
15170 rtx elt;
15171
15172 return (const_vec_duplicate_p (x, &elt)
15173 && CONST_INT_P (elt)
15174 && (signed_p
15175 ? IN_RANGE (INTVAL (elt), -16, 15)
15176 : IN_RANGE (INTVAL (elt), 0, 127)));
15177}
15178
15179/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
15180 instruction. Negate X first if NEGATE_P is true. */
15181
15182bool
15183aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
15184{
15185 rtx elt;
15186 REAL_VALUE_TYPE r;
15187
15188 if (!const_vec_duplicate_p (x, &elt)
15189 || GET_CODE (elt) != CONST_DOUBLE)
15190 return false;
15191
15192 r = *CONST_DOUBLE_REAL_VALUE (elt);
15193
15194 if (negate_p)
15195 r = real_value_negate (&r);
15196
15197 if (real_equal (&r, &dconst1))
15198 return true;
15199 if (real_equal (&r, &dconsthalf))
15200 return true;
15201 return false;
15202}
15203
15204/* Return true if X is a valid immediate operand for an SVE FMUL
15205 instruction. */
15206
15207bool
15208aarch64_sve_float_mul_immediate_p (rtx x)
15209{
15210 rtx elt;
15211
15212 /* GCC will never generate a multiply with an immediate of 2, so there is no
15213 point testing for it (even though it is a valid constant). */
15214 return (const_vec_duplicate_p (x, &elt)
15215 && GET_CODE (elt) == CONST_DOUBLE
15216 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
15217}
15218
b187677b
RS
15219/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
15220 for the Advanced SIMD operation described by WHICH and INSN. If INFO
15221 is nonnull, use it to describe valid immediates. */
3520f7cc 15222static bool
b187677b
RS
15223aarch64_advsimd_valid_immediate_hs (unsigned int val32,
15224 simd_immediate_info *info,
15225 enum simd_immediate_check which,
15226 simd_immediate_info::insn_type insn)
15227{
15228 /* Try a 4-byte immediate with LSL. */
15229 for (unsigned int shift = 0; shift < 32; shift += 8)
15230 if ((val32 & (0xff << shift)) == val32)
15231 {
15232 if (info)
15233 *info = simd_immediate_info (SImode, val32 >> shift, insn,
15234 simd_immediate_info::LSL, shift);
15235 return true;
15236 }
3520f7cc 15237
b187677b
RS
15238 /* Try a 2-byte immediate with LSL. */
15239 unsigned int imm16 = val32 & 0xffff;
15240 if (imm16 == (val32 >> 16))
15241 for (unsigned int shift = 0; shift < 16; shift += 8)
15242 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 15243 {
b187677b
RS
15244 if (info)
15245 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
15246 simd_immediate_info::LSL, shift);
15247 return true;
48063b9d 15248 }
3520f7cc 15249
b187677b
RS
15250 /* Try a 4-byte immediate with MSL, except for cases that MVN
15251 can handle. */
15252 if (which == AARCH64_CHECK_MOV)
15253 for (unsigned int shift = 8; shift < 24; shift += 8)
15254 {
15255 unsigned int low = (1 << shift) - 1;
15256 if (((val32 & (0xff << shift)) | low) == val32)
15257 {
15258 if (info)
15259 *info = simd_immediate_info (SImode, val32 >> shift, insn,
15260 simd_immediate_info::MSL, shift);
15261 return true;
15262 }
15263 }
43e9d192 15264
b187677b
RS
15265 return false;
15266}
15267
15268/* Return true if replicating VAL64 is a valid immediate for the
15269 Advanced SIMD operation described by WHICH. If INFO is nonnull,
15270 use it to describe valid immediates. */
15271static bool
15272aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
15273 simd_immediate_info *info,
15274 enum simd_immediate_check which)
15275{
15276 unsigned int val32 = val64 & 0xffffffff;
15277 unsigned int val16 = val64 & 0xffff;
15278 unsigned int val8 = val64 & 0xff;
15279
15280 if (val32 == (val64 >> 32))
43e9d192 15281 {
b187677b
RS
15282 if ((which & AARCH64_CHECK_ORR) != 0
15283 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
15284 simd_immediate_info::MOV))
15285 return true;
43e9d192 15286
b187677b
RS
15287 if ((which & AARCH64_CHECK_BIC) != 0
15288 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
15289 simd_immediate_info::MVN))
15290 return true;
ee78df47 15291
b187677b
RS
15292 /* Try using a replicated byte. */
15293 if (which == AARCH64_CHECK_MOV
15294 && val16 == (val32 >> 16)
15295 && val8 == (val16 >> 8))
ee78df47 15296 {
b187677b
RS
15297 if (info)
15298 *info = simd_immediate_info (QImode, val8);
15299 return true;
ee78df47 15300 }
43e9d192
IB
15301 }
15302
b187677b
RS
15303 /* Try using a bit-to-bytemask. */
15304 if (which == AARCH64_CHECK_MOV)
43e9d192 15305 {
b187677b
RS
15306 unsigned int i;
15307 for (i = 0; i < 64; i += 8)
ab6501d7 15308 {
b187677b
RS
15309 unsigned char byte = (val64 >> i) & 0xff;
15310 if (byte != 0 && byte != 0xff)
15311 break;
ab6501d7 15312 }
b187677b 15313 if (i == 64)
ab6501d7 15314 {
b187677b
RS
15315 if (info)
15316 *info = simd_immediate_info (DImode, val64);
15317 return true;
ab6501d7 15318 }
43e9d192 15319 }
b187677b
RS
15320 return false;
15321}
43e9d192 15322
43cacb12
RS
15323/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
15324 instruction. If INFO is nonnull, use it to describe valid immediates. */
15325
15326static bool
15327aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
15328 simd_immediate_info *info)
15329{
15330 scalar_int_mode mode = DImode;
15331 unsigned int val32 = val64 & 0xffffffff;
15332 if (val32 == (val64 >> 32))
15333 {
15334 mode = SImode;
15335 unsigned int val16 = val32 & 0xffff;
15336 if (val16 == (val32 >> 16))
15337 {
15338 mode = HImode;
15339 unsigned int val8 = val16 & 0xff;
15340 if (val8 == (val16 >> 8))
15341 mode = QImode;
15342 }
15343 }
15344 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
15345 if (IN_RANGE (val, -0x80, 0x7f))
15346 {
15347 /* DUP with no shift. */
15348 if (info)
15349 *info = simd_immediate_info (mode, val);
15350 return true;
15351 }
15352 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
15353 {
15354 /* DUP with LSL #8. */
15355 if (info)
15356 *info = simd_immediate_info (mode, val);
15357 return true;
15358 }
15359 if (aarch64_bitmask_imm (val64, mode))
15360 {
15361 /* DUPM. */
15362 if (info)
15363 *info = simd_immediate_info (mode, val);
15364 return true;
15365 }
15366 return false;
15367}
15368
0b1fe8cf
RS
15369/* Return true if X is a valid SVE predicate. If INFO is nonnull, use
15370 it to describe valid immediates. */
15371
15372static bool
15373aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
15374{
15375 if (x == CONST0_RTX (GET_MODE (x)))
15376 {
15377 if (info)
15378 *info = simd_immediate_info (DImode, 0);
15379 return true;
15380 }
15381
15382 /* Analyze the value as a VNx16BImode. This should be relatively
15383 efficient, since rtx_vector_builder has enough built-in capacity
15384 to store all VLA predicate constants without needing the heap. */
15385 rtx_vector_builder builder;
15386 if (!aarch64_get_sve_pred_bits (builder, x))
15387 return false;
15388
15389 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
15390 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
15391 {
15392 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
15393 aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl);
15394 if (pattern != AARCH64_NUM_SVPATTERNS)
15395 {
15396 if (info)
15397 {
15398 scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
15399 *info = simd_immediate_info (int_mode, pattern);
15400 }
15401 return true;
15402 }
15403 }
15404 return false;
15405}
15406
b187677b
RS
15407/* Return true if OP is a valid SIMD immediate for the operation
15408 described by WHICH. If INFO is nonnull, use it to describe valid
15409 immediates. */
15410bool
15411aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
15412 enum simd_immediate_check which)
15413{
43cacb12
RS
15414 machine_mode mode = GET_MODE (op);
15415 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
15416 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
15417 return false;
15418
0b1fe8cf
RS
15419 if (vec_flags & VEC_SVE_PRED)
15420 return aarch64_sve_pred_valid_immediate (op, info);
15421
43cacb12 15422 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 15423 rtx base, step;
b187677b 15424 unsigned int n_elts;
f9093f23
RS
15425 if (GET_CODE (op) == CONST_VECTOR
15426 && CONST_VECTOR_DUPLICATE_P (op))
15427 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
15428 else if ((vec_flags & VEC_SVE_DATA)
15429 && const_vec_series_p (op, &base, &step))
15430 {
15431 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
15432 if (!aarch64_sve_index_immediate_p (base)
15433 || !aarch64_sve_index_immediate_p (step))
15434 return false;
15435
15436 if (info)
15437 *info = simd_immediate_info (elt_mode, base, step);
15438 return true;
15439 }
6a70badb
RS
15440 else if (GET_CODE (op) == CONST_VECTOR
15441 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
15442 /* N_ELTS set above. */;
b187677b 15443 else
d8edd899 15444 return false;
43e9d192 15445
b187677b 15446 scalar_float_mode elt_float_mode;
f9093f23
RS
15447 if (n_elts == 1
15448 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 15449 {
f9093f23
RS
15450 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
15451 if (aarch64_float_const_zero_rtx_p (elt)
15452 || aarch64_float_const_representable_p (elt))
15453 {
15454 if (info)
15455 *info = simd_immediate_info (elt_float_mode, elt);
15456 return true;
15457 }
b187677b 15458 }
43e9d192 15459
b187677b
RS
15460 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
15461 if (elt_size > 8)
15462 return false;
e4f0f84d 15463
b187677b 15464 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 15465
b187677b
RS
15466 /* Expand the vector constant out into a byte vector, with the least
15467 significant byte of the register first. */
15468 auto_vec<unsigned char, 16> bytes;
15469 bytes.reserve (n_elts * elt_size);
15470 for (unsigned int i = 0; i < n_elts; i++)
15471 {
f9093f23
RS
15472 /* The vector is provided in gcc endian-neutral fashion.
15473 For aarch64_be Advanced SIMD, it must be laid out in the vector
15474 register in reverse order. */
15475 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
15476 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 15477
b187677b
RS
15478 if (elt_mode != elt_int_mode)
15479 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 15480
b187677b
RS
15481 if (!CONST_INT_P (elt))
15482 return false;
43e9d192 15483
b187677b
RS
15484 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
15485 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 15486 {
b187677b
RS
15487 bytes.quick_push (elt_val & 0xff);
15488 elt_val >>= BITS_PER_UNIT;
48063b9d 15489 }
43e9d192
IB
15490 }
15491
b187677b
RS
15492 /* The immediate must repeat every eight bytes. */
15493 unsigned int nbytes = bytes.length ();
15494 for (unsigned i = 8; i < nbytes; ++i)
15495 if (bytes[i] != bytes[i - 8])
15496 return false;
15497
15498 /* Get the repeating 8-byte value as an integer. No endian correction
15499 is needed here because bytes is already in lsb-first order. */
15500 unsigned HOST_WIDE_INT val64 = 0;
15501 for (unsigned int i = 0; i < 8; i++)
15502 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
15503 << (i * BITS_PER_UNIT));
15504
43cacb12
RS
15505 if (vec_flags & VEC_SVE_DATA)
15506 return aarch64_sve_valid_immediate (val64, info);
15507 else
15508 return aarch64_advsimd_valid_immediate (val64, info, which);
15509}
15510
15511/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
15512 has a step in the range of INDEX. Return the index expression if so,
15513 otherwise return null. */
15514rtx
15515aarch64_check_zero_based_sve_index_immediate (rtx x)
15516{
15517 rtx base, step;
15518 if (const_vec_series_p (x, &base, &step)
15519 && base == const0_rtx
15520 && aarch64_sve_index_immediate_p (step))
15521 return step;
15522 return NULL_RTX;
43e9d192
IB
15523}
15524
43e9d192
IB
15525/* Check of immediate shift constants are within range. */
15526bool
ef4bddc2 15527aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
15528{
15529 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
15530 if (left)
ddeabd3e 15531 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 15532 else
ddeabd3e 15533 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
15534}
15535
7325d85a
KT
15536/* Return the bitmask CONST_INT to select the bits required by a zero extract
15537 operation of width WIDTH at bit position POS. */
15538
15539rtx
15540aarch64_mask_from_zextract_ops (rtx width, rtx pos)
15541{
15542 gcc_assert (CONST_INT_P (width));
15543 gcc_assert (CONST_INT_P (pos));
15544
15545 unsigned HOST_WIDE_INT mask
15546 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
15547 return GEN_INT (mask << UINTVAL (pos));
15548}
15549
83f8c414 15550bool
a6e0bfa7 15551aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 15552{
83f8c414
CSS
15553 if (GET_CODE (x) == HIGH
15554 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
15555 return true;
15556
82614948 15557 if (CONST_INT_P (x))
83f8c414
CSS
15558 return true;
15559
43cacb12 15560 if (VECTOR_MODE_P (GET_MODE (x)))
678faefc
RS
15561 {
15562 /* Require predicate constants to be VNx16BI before RA, so that we
15563 force everything to have a canonical form. */
15564 if (!lra_in_progress
15565 && !reload_completed
15566 && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
15567 && GET_MODE (x) != VNx16BImode)
15568 return false;
15569
15570 return aarch64_simd_valid_immediate (x, NULL);
15571 }
43cacb12 15572
83f8c414
CSS
15573 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
15574 return true;
15575
43cacb12
RS
15576 if (aarch64_sve_cnt_immediate_p (x))
15577 return true;
15578
a6e0bfa7 15579 return aarch64_classify_symbolic_expression (x)
a5350ddc 15580 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
15581}
15582
43e9d192
IB
15583/* Return a const_int vector of VAL. */
15584rtx
ab014eb3 15585aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 15586{
59d06c05
RS
15587 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
15588 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
15589}
15590
051d0e2f
SN
15591/* Check OP is a legal scalar immediate for the MOVI instruction. */
15592
15593bool
77e994c9 15594aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 15595{
ef4bddc2 15596 machine_mode vmode;
051d0e2f 15597
43cacb12 15598 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 15599 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 15600 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
15601}
15602
988fa693
JG
15603/* Construct and return a PARALLEL RTX vector with elements numbering the
15604 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
15605 the vector - from the perspective of the architecture. This does not
15606 line up with GCC's perspective on lane numbers, so we end up with
15607 different masks depending on our target endian-ness. The diagram
15608 below may help. We must draw the distinction when building masks
15609 which select one half of the vector. An instruction selecting
15610 architectural low-lanes for a big-endian target, must be described using
15611 a mask selecting GCC high-lanes.
15612
15613 Big-Endian Little-Endian
15614
15615GCC 0 1 2 3 3 2 1 0
15616 | x | x | x | x | | x | x | x | x |
15617Architecture 3 2 1 0 3 2 1 0
15618
15619Low Mask: { 2, 3 } { 0, 1 }
15620High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
15621
15622 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 15623
43e9d192 15624rtx
f5cbabc1 15625aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 15626{
43e9d192 15627 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
15628 int high_base = nunits / 2;
15629 int low_base = 0;
15630 int base;
43e9d192
IB
15631 rtx t1;
15632 int i;
15633
988fa693
JG
15634 if (BYTES_BIG_ENDIAN)
15635 base = high ? low_base : high_base;
15636 else
15637 base = high ? high_base : low_base;
15638
15639 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
15640 RTVEC_ELT (v, i) = GEN_INT (base + i);
15641
15642 t1 = gen_rtx_PARALLEL (mode, v);
15643 return t1;
15644}
15645
988fa693
JG
15646/* Check OP for validity as a PARALLEL RTX vector with elements
15647 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
15648 from the perspective of the architecture. See the diagram above
15649 aarch64_simd_vect_par_cnst_half for more details. */
15650
15651bool
ef4bddc2 15652aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
15653 bool high)
15654{
6a70badb
RS
15655 int nelts;
15656 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
15657 return false;
15658
6a70badb 15659 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
15660 HOST_WIDE_INT count_op = XVECLEN (op, 0);
15661 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
15662 int i = 0;
15663
988fa693
JG
15664 if (count_op != count_ideal)
15665 return false;
15666
15667 for (i = 0; i < count_ideal; i++)
15668 {
15669 rtx elt_op = XVECEXP (op, 0, i);
15670 rtx elt_ideal = XVECEXP (ideal, 0, i);
15671
4aa81c2e 15672 if (!CONST_INT_P (elt_op)
988fa693
JG
15673 || INTVAL (elt_ideal) != INTVAL (elt_op))
15674 return false;
15675 }
15676 return true;
15677}
15678
4aeb1ba7
RS
15679/* Return a PARALLEL containing NELTS elements, with element I equal
15680 to BASE + I * STEP. */
15681
15682rtx
15683aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step)
15684{
15685 rtvec vec = rtvec_alloc (nelts);
15686 for (unsigned int i = 0; i < nelts; ++i)
15687 RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode);
15688 return gen_rtx_PARALLEL (VOIDmode, vec);
15689}
15690
15691/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
15692 series with step STEP. */
15693
15694bool
15695aarch64_stepped_int_parallel_p (rtx op, int step)
15696{
15697 if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
15698 return false;
15699
15700 unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0));
15701 for (int i = 1; i < XVECLEN (op, 0); ++i)
15702 if (!CONST_INT_P (XVECEXP (op, 0, i))
15703 || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
15704 return false;
15705
15706 return true;
15707}
15708
43e9d192
IB
15709/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
15710 HIGH (exclusive). */
15711void
46ed6024
CB
15712aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
15713 const_tree exp)
43e9d192
IB
15714{
15715 HOST_WIDE_INT lane;
4aa81c2e 15716 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
15717 lane = INTVAL (operand);
15718
15719 if (lane < low || lane >= high)
46ed6024
CB
15720 {
15721 if (exp)
cf0c27ef 15722 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 15723 else
cf0c27ef 15724 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 15725 }
43e9d192
IB
15726}
15727
7ac29c0f
RS
15728/* Peform endian correction on lane number N, which indexes a vector
15729 of mode MODE, and return the result as an SImode rtx. */
15730
15731rtx
15732aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
15733{
15734 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
15735}
15736
43e9d192 15737/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 15738
43e9d192
IB
15739bool
15740aarch64_simd_mem_operand_p (rtx op)
15741{
15742 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 15743 || REG_P (XEXP (op, 0)));
43e9d192
IB
15744}
15745
43cacb12
RS
15746/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
15747
15748bool
15749aarch64_sve_ld1r_operand_p (rtx op)
15750{
15751 struct aarch64_address_info addr;
15752 scalar_mode mode;
15753
15754 return (MEM_P (op)
15755 && is_a <scalar_mode> (GET_MODE (op), &mode)
15756 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
15757 && addr.type == ADDRESS_REG_IMM
15758 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
15759}
15760
4aeb1ba7
RS
15761/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */
15762bool
15763aarch64_sve_ld1rq_operand_p (rtx op)
15764{
15765 struct aarch64_address_info addr;
15766 scalar_mode elem_mode = GET_MODE_INNER (GET_MODE (op));
15767 if (!MEM_P (op)
15768 || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false))
15769 return false;
15770
15771 if (addr.type == ADDRESS_REG_IMM)
15772 return offset_4bit_signed_scaled_p (TImode, addr.const_offset);
15773
15774 if (addr.type == ADDRESS_REG_REG)
15775 return (1U << addr.shift) == GET_MODE_SIZE (elem_mode);
15776
15777 return false;
15778}
15779
43cacb12
RS
15780/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
15781 The conditions for STR are the same. */
15782bool
15783aarch64_sve_ldr_operand_p (rtx op)
15784{
15785 struct aarch64_address_info addr;
15786
15787 return (MEM_P (op)
15788 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
15789 false, ADDR_QUERY_ANY)
15790 && addr.type == ADDRESS_REG_IMM);
15791}
15792
9f4cbab8
RS
15793/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
15794 We need to be able to access the individual pieces, so the range
15795 is different from LD[234] and ST[234]. */
15796bool
15797aarch64_sve_struct_memory_operand_p (rtx op)
15798{
15799 if (!MEM_P (op))
15800 return false;
15801
15802 machine_mode mode = GET_MODE (op);
15803 struct aarch64_address_info addr;
15804 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
15805 ADDR_QUERY_ANY)
15806 || addr.type != ADDRESS_REG_IMM)
15807 return false;
15808
15809 poly_int64 first = addr.const_offset;
15810 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
15811 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
15812 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
15813}
15814
2d8c6dc1
AH
15815/* Emit a register copy from operand to operand, taking care not to
15816 early-clobber source registers in the process.
43e9d192 15817
2d8c6dc1
AH
15818 COUNT is the number of components into which the copy needs to be
15819 decomposed. */
43e9d192 15820void
b8506a8a 15821aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 15822 unsigned int count)
43e9d192
IB
15823{
15824 unsigned int i;
2d8c6dc1
AH
15825 int rdest = REGNO (operands[0]);
15826 int rsrc = REGNO (operands[1]);
43e9d192
IB
15827
15828 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
15829 || rdest < rsrc)
15830 for (i = 0; i < count; i++)
15831 emit_move_insn (gen_rtx_REG (mode, rdest + i),
15832 gen_rtx_REG (mode, rsrc + i));
43e9d192 15833 else
2d8c6dc1
AH
15834 for (i = 0; i < count; i++)
15835 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
15836 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
15837}
15838
668046d1 15839/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 15840 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 15841int
b8506a8a 15842aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 15843{
6a70badb
RS
15844 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
15845 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
15846}
15847
db0253a4 15848/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
15849 alignment of a vector to 128 bits. SVE predicates have an alignment of
15850 16 bits. */
db0253a4
TB
15851static HOST_WIDE_INT
15852aarch64_simd_vector_alignment (const_tree type)
15853{
43cacb12
RS
15854 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
15855 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
15856 be set for non-predicate vectors of booleans. Modes are the most
15857 direct way we have of identifying real SVE predicate types. */
15858 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
6c76c0e4 15859 return wi::umin (wi::to_wide (TYPE_SIZE (type)), 128).to_uhwi ();
db0253a4
TB
15860}
15861
43cacb12 15862/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 15863static poly_uint64
43cacb12
RS
15864aarch64_vectorize_preferred_vector_alignment (const_tree type)
15865{
15866 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
15867 {
15868 /* If the length of the vector is fixed, try to align to that length,
15869 otherwise don't try to align at all. */
15870 HOST_WIDE_INT result;
15871 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
15872 result = TYPE_ALIGN (TREE_TYPE (type));
15873 return result;
15874 }
15875 return TYPE_ALIGN (type);
15876}
15877
db0253a4
TB
15878/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
15879static bool
15880aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
15881{
15882 if (is_packed)
15883 return false;
15884
43cacb12
RS
15885 /* For fixed-length vectors, check that the vectorizer will aim for
15886 full-vector alignment. This isn't true for generic GCC vectors
15887 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
15888 poly_uint64 preferred_alignment =
15889 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 15890 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
15891 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
15892 preferred_alignment))
db0253a4
TB
15893 return false;
15894
15895 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
15896 return true;
15897}
15898
7df76747
N
15899/* Return true if the vector misalignment factor is supported by the
15900 target. */
15901static bool
15902aarch64_builtin_support_vector_misalignment (machine_mode mode,
15903 const_tree type, int misalignment,
15904 bool is_packed)
15905{
15906 if (TARGET_SIMD && STRICT_ALIGNMENT)
15907 {
15908 /* Return if movmisalign pattern is not supported for this mode. */
15909 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
15910 return false;
15911
a509c571 15912 /* Misalignment factor is unknown at compile time. */
7df76747 15913 if (misalignment == -1)
a509c571 15914 return false;
7df76747
N
15915 }
15916 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15917 is_packed);
15918}
15919
4369c11e
TB
15920/* If VALS is a vector constant that can be loaded into a register
15921 using DUP, generate instructions to do so and return an RTX to
15922 assign to the register. Otherwise return NULL_RTX. */
15923static rtx
15924aarch64_simd_dup_constant (rtx vals)
15925{
ef4bddc2
RS
15926 machine_mode mode = GET_MODE (vals);
15927 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 15928 rtx x;
4369c11e 15929
92695fbb 15930 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
15931 return NULL_RTX;
15932
15933 /* We can load this constant by using DUP and a constant in a
15934 single ARM register. This will be cheaper than a vector
15935 load. */
92695fbb 15936 x = copy_to_mode_reg (inner_mode, x);
59d06c05 15937 return gen_vec_duplicate (mode, x);
4369c11e
TB
15938}
15939
15940
15941/* Generate code to load VALS, which is a PARALLEL containing only
15942 constants (for vec_init) or CONST_VECTOR, efficiently into a
15943 register. Returns an RTX to copy into the register, or NULL_RTX
67914693 15944 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
1df3f464 15945static rtx
4369c11e
TB
15946aarch64_simd_make_constant (rtx vals)
15947{
ef4bddc2 15948 machine_mode mode = GET_MODE (vals);
4369c11e
TB
15949 rtx const_dup;
15950 rtx const_vec = NULL_RTX;
4369c11e
TB
15951 int n_const = 0;
15952 int i;
15953
15954 if (GET_CODE (vals) == CONST_VECTOR)
15955 const_vec = vals;
15956 else if (GET_CODE (vals) == PARALLEL)
15957 {
15958 /* A CONST_VECTOR must contain only CONST_INTs and
15959 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
15960 Only store valid constants in a CONST_VECTOR. */
6a70badb 15961 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
15962 for (i = 0; i < n_elts; ++i)
15963 {
15964 rtx x = XVECEXP (vals, 0, i);
15965 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
15966 n_const++;
15967 }
15968 if (n_const == n_elts)
15969 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
15970 }
15971 else
15972 gcc_unreachable ();
15973
15974 if (const_vec != NULL_RTX
b187677b 15975 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
15976 /* Load using MOVI/MVNI. */
15977 return const_vec;
15978 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
15979 /* Loaded using DUP. */
15980 return const_dup;
15981 else if (const_vec != NULL_RTX)
67914693 15982 /* Load from constant pool. We cannot take advantage of single-cycle
4369c11e
TB
15983 LD1 because we need a PC-relative addressing mode. */
15984 return const_vec;
15985 else
15986 /* A PARALLEL containing something not valid inside CONST_VECTOR.
67914693 15987 We cannot construct an initializer. */
4369c11e
TB
15988 return NULL_RTX;
15989}
15990
35a093b6
JG
15991/* Expand a vector initialisation sequence, such that TARGET is
15992 initialised to contain VALS. */
15993
4369c11e
TB
15994void
15995aarch64_expand_vector_init (rtx target, rtx vals)
15996{
ef4bddc2 15997 machine_mode mode = GET_MODE (target);
146c2e3a 15998 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 15999 /* The number of vector elements. */
6a70badb 16000 int n_elts = XVECLEN (vals, 0);
35a093b6 16001 /* The number of vector elements which are not constant. */
8b66a2d4
AL
16002 int n_var = 0;
16003 rtx any_const = NULL_RTX;
35a093b6
JG
16004 /* The first element of vals. */
16005 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 16006 bool all_same = true;
4369c11e 16007
41dab855
KT
16008 /* This is a special vec_init<M><N> where N is not an element mode but a
16009 vector mode with half the elements of M. We expect to find two entries
16010 of mode N in VALS and we must put their concatentation into TARGET. */
16011 if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
16012 {
16013 gcc_assert (known_eq (GET_MODE_SIZE (mode),
16014 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
16015 rtx lo = XVECEXP (vals, 0, 0);
16016 rtx hi = XVECEXP (vals, 0, 1);
16017 machine_mode narrow_mode = GET_MODE (lo);
16018 gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
16019 gcc_assert (narrow_mode == GET_MODE (hi));
16020
16021 /* When we want to concatenate a half-width vector with zeroes we can
16022 use the aarch64_combinez[_be] patterns. Just make sure that the
16023 zeroes are in the right half. */
16024 if (BYTES_BIG_ENDIAN
16025 && aarch64_simd_imm_zero (lo, narrow_mode)
16026 && general_operand (hi, narrow_mode))
16027 emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
16028 else if (!BYTES_BIG_ENDIAN
16029 && aarch64_simd_imm_zero (hi, narrow_mode)
16030 && general_operand (lo, narrow_mode))
16031 emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
16032 else
16033 {
16034 /* Else create the two half-width registers and combine them. */
16035 if (!REG_P (lo))
16036 lo = force_reg (GET_MODE (lo), lo);
16037 if (!REG_P (hi))
16038 hi = force_reg (GET_MODE (hi), hi);
16039
16040 if (BYTES_BIG_ENDIAN)
16041 std::swap (lo, hi);
16042 emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
16043 }
16044 return;
16045 }
16046
35a093b6 16047 /* Count the number of variable elements to initialise. */
8b66a2d4 16048 for (int i = 0; i < n_elts; ++i)
4369c11e 16049 {
8b66a2d4 16050 rtx x = XVECEXP (vals, 0, i);
35a093b6 16051 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
16052 ++n_var;
16053 else
16054 any_const = x;
4369c11e 16055
35a093b6 16056 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
16057 }
16058
35a093b6
JG
16059 /* No variable elements, hand off to aarch64_simd_make_constant which knows
16060 how best to handle this. */
4369c11e
TB
16061 if (n_var == 0)
16062 {
16063 rtx constant = aarch64_simd_make_constant (vals);
16064 if (constant != NULL_RTX)
16065 {
16066 emit_move_insn (target, constant);
16067 return;
16068 }
16069 }
16070
16071 /* Splat a single non-constant element if we can. */
16072 if (all_same)
16073 {
35a093b6 16074 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 16075 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
16076 return;
16077 }
16078
85c1b6d7
AP
16079 enum insn_code icode = optab_handler (vec_set_optab, mode);
16080 gcc_assert (icode != CODE_FOR_nothing);
16081
16082 /* If there are only variable elements, try to optimize
16083 the insertion using dup for the most common element
16084 followed by insertions. */
16085
16086 /* The algorithm will fill matches[*][0] with the earliest matching element,
16087 and matches[X][1] with the count of duplicate elements (if X is the
16088 earliest element which has duplicates). */
16089
16090 if (n_var == n_elts && n_elts <= 16)
16091 {
16092 int matches[16][2] = {0};
16093 for (int i = 0; i < n_elts; i++)
16094 {
16095 for (int j = 0; j <= i; j++)
16096 {
16097 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
16098 {
16099 matches[i][0] = j;
16100 matches[j][1]++;
16101 break;
16102 }
16103 }
16104 }
16105 int maxelement = 0;
16106 int maxv = 0;
16107 for (int i = 0; i < n_elts; i++)
16108 if (matches[i][1] > maxv)
16109 {
16110 maxelement = i;
16111 maxv = matches[i][1];
16112 }
16113
b4e2cd5b
JG
16114 /* Create a duplicate of the most common element, unless all elements
16115 are equally useless to us, in which case just immediately set the
16116 vector register using the first element. */
16117
16118 if (maxv == 1)
16119 {
16120 /* For vectors of two 64-bit elements, we can do even better. */
16121 if (n_elts == 2
16122 && (inner_mode == E_DImode
16123 || inner_mode == E_DFmode))
16124
16125 {
16126 rtx x0 = XVECEXP (vals, 0, 0);
16127 rtx x1 = XVECEXP (vals, 0, 1);
16128 /* Combine can pick up this case, but handling it directly
16129 here leaves clearer RTL.
16130
16131 This is load_pair_lanes<mode>, and also gives us a clean-up
16132 for store_pair_lanes<mode>. */
16133 if (memory_operand (x0, inner_mode)
16134 && memory_operand (x1, inner_mode)
16135 && !STRICT_ALIGNMENT
16136 && rtx_equal_p (XEXP (x1, 0),
16137 plus_constant (Pmode,
16138 XEXP (x0, 0),
16139 GET_MODE_SIZE (inner_mode))))
16140 {
16141 rtx t;
16142 if (inner_mode == DFmode)
16143 t = gen_load_pair_lanesdf (target, x0, x1);
16144 else
16145 t = gen_load_pair_lanesdi (target, x0, x1);
16146 emit_insn (t);
16147 return;
16148 }
16149 }
16150 /* The subreg-move sequence below will move into lane zero of the
16151 vector register. For big-endian we want that position to hold
16152 the last element of VALS. */
16153 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
16154 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
16155 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
16156 }
16157 else
16158 {
16159 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
16160 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
16161 }
85c1b6d7
AP
16162
16163 /* Insert the rest. */
16164 for (int i = 0; i < n_elts; i++)
16165 {
16166 rtx x = XVECEXP (vals, 0, i);
16167 if (matches[i][0] == maxelement)
16168 continue;
16169 x = copy_to_mode_reg (inner_mode, x);
16170 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
16171 }
16172 return;
16173 }
16174
35a093b6
JG
16175 /* Initialise a vector which is part-variable. We want to first try
16176 to build those lanes which are constant in the most efficient way we
16177 can. */
16178 if (n_var != n_elts)
4369c11e
TB
16179 {
16180 rtx copy = copy_rtx (vals);
4369c11e 16181
8b66a2d4
AL
16182 /* Load constant part of vector. We really don't care what goes into the
16183 parts we will overwrite, but we're more likely to be able to load the
16184 constant efficiently if it has fewer, larger, repeating parts
16185 (see aarch64_simd_valid_immediate). */
16186 for (int i = 0; i < n_elts; i++)
16187 {
16188 rtx x = XVECEXP (vals, 0, i);
16189 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
16190 continue;
16191 rtx subst = any_const;
16192 for (int bit = n_elts / 2; bit > 0; bit /= 2)
16193 {
16194 /* Look in the copied vector, as more elements are const. */
16195 rtx test = XVECEXP (copy, 0, i ^ bit);
16196 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
16197 {
16198 subst = test;
16199 break;
16200 }
16201 }
16202 XVECEXP (copy, 0, i) = subst;
16203 }
4369c11e 16204 aarch64_expand_vector_init (target, copy);
35a093b6 16205 }
4369c11e 16206
35a093b6 16207 /* Insert the variable lanes directly. */
8b66a2d4 16208 for (int i = 0; i < n_elts; i++)
35a093b6
JG
16209 {
16210 rtx x = XVECEXP (vals, 0, i);
16211 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
16212 continue;
16213 x = copy_to_mode_reg (inner_mode, x);
16214 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
16215 }
4369c11e
TB
16216}
16217
3a0afad0
PK
16218/* Emit RTL corresponding to:
16219 insr TARGET, ELEM. */
16220
16221static void
16222emit_insr (rtx target, rtx elem)
16223{
16224 machine_mode mode = GET_MODE (target);
16225 scalar_mode elem_mode = GET_MODE_INNER (mode);
16226 elem = force_reg (elem_mode, elem);
16227
16228 insn_code icode = optab_handler (vec_shl_insert_optab, mode);
16229 gcc_assert (icode != CODE_FOR_nothing);
16230 emit_insn (GEN_FCN (icode) (target, target, elem));
16231}
16232
16233/* Subroutine of aarch64_sve_expand_vector_init for handling
16234 trailing constants.
16235 This function works as follows:
16236 (a) Create a new vector consisting of trailing constants.
16237 (b) Initialize TARGET with the constant vector using emit_move_insn.
16238 (c) Insert remaining elements in TARGET using insr.
16239 NELTS is the total number of elements in original vector while
16240 while NELTS_REQD is the number of elements that are actually
16241 significant.
16242
16243 ??? The heuristic used is to do above only if number of constants
16244 is at least half the total number of elements. May need fine tuning. */
16245
16246static bool
16247aarch64_sve_expand_vector_init_handle_trailing_constants
16248 (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd)
16249{
16250 machine_mode mode = GET_MODE (target);
16251 scalar_mode elem_mode = GET_MODE_INNER (mode);
16252 int n_trailing_constants = 0;
16253
16254 for (int i = nelts_reqd - 1;
16255 i >= 0 && aarch64_legitimate_constant_p (elem_mode, builder.elt (i));
16256 i--)
16257 n_trailing_constants++;
16258
16259 if (n_trailing_constants >= nelts_reqd / 2)
16260 {
16261 rtx_vector_builder v (mode, 1, nelts);
16262 for (int i = 0; i < nelts; i++)
16263 v.quick_push (builder.elt (i + nelts_reqd - n_trailing_constants));
16264 rtx const_vec = v.build ();
16265 emit_move_insn (target, const_vec);
16266
16267 for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--)
16268 emit_insr (target, builder.elt (i));
16269
16270 return true;
16271 }
16272
16273 return false;
16274}
16275
16276/* Subroutine of aarch64_sve_expand_vector_init.
16277 Works as follows:
16278 (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
16279 (b) Skip trailing elements from BUILDER, which are the same as
16280 element NELTS_REQD - 1.
16281 (c) Insert earlier elements in reverse order in TARGET using insr. */
16282
16283static void
16284aarch64_sve_expand_vector_init_insert_elems (rtx target,
16285 const rtx_vector_builder &builder,
16286 int nelts_reqd)
16287{
16288 machine_mode mode = GET_MODE (target);
16289 scalar_mode elem_mode = GET_MODE_INNER (mode);
16290
16291 struct expand_operand ops[2];
16292 enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
16293 gcc_assert (icode != CODE_FOR_nothing);
16294
16295 create_output_operand (&ops[0], target, mode);
16296 create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode);
16297 expand_insn (icode, 2, ops);
16298
16299 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
16300 for (int i = nelts_reqd - ndups - 1; i >= 0; i--)
16301 emit_insr (target, builder.elt (i));
16302}
16303
16304/* Subroutine of aarch64_sve_expand_vector_init to handle case
16305 when all trailing elements of builder are same.
16306 This works as follows:
16307 (a) Use expand_insn interface to broadcast last vector element in TARGET.
16308 (b) Insert remaining elements in TARGET using insr.
16309
16310 ??? The heuristic used is to do above if number of same trailing elements
16311 is at least 3/4 of total number of elements, loosely based on
16312 heuristic from mostly_zeros_p. May need fine-tuning. */
16313
16314static bool
16315aarch64_sve_expand_vector_init_handle_trailing_same_elem
16316 (rtx target, const rtx_vector_builder &builder, int nelts_reqd)
16317{
16318 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
16319 if (ndups >= (3 * nelts_reqd) / 4)
16320 {
16321 aarch64_sve_expand_vector_init_insert_elems (target, builder,
16322 nelts_reqd - ndups + 1);
16323 return true;
16324 }
16325
16326 return false;
16327}
16328
16329/* Initialize register TARGET from BUILDER. NELTS is the constant number
16330 of elements in BUILDER.
16331
16332 The function tries to initialize TARGET from BUILDER if it fits one
16333 of the special cases outlined below.
16334
16335 Failing that, the function divides BUILDER into two sub-vectors:
16336 v_even = even elements of BUILDER;
16337 v_odd = odd elements of BUILDER;
16338
16339 and recursively calls itself with v_even and v_odd.
16340
16341 if (recursive call succeeded for v_even or v_odd)
16342 TARGET = zip (v_even, v_odd)
16343
16344 The function returns true if it managed to build TARGET from BUILDER
16345 with one of the special cases, false otherwise.
16346
16347 Example: {a, 1, b, 2, c, 3, d, 4}
16348
16349 The vector gets divided into:
16350 v_even = {a, b, c, d}
16351 v_odd = {1, 2, 3, 4}
16352
16353 aarch64_sve_expand_vector_init(v_odd) hits case 1 and
16354 initialize tmp2 from constant vector v_odd using emit_move_insn.
16355
16356 aarch64_sve_expand_vector_init(v_even) fails since v_even contains
16357 4 elements, so we construct tmp1 from v_even using insr:
16358 tmp1 = dup(d)
16359 insr tmp1, c
16360 insr tmp1, b
16361 insr tmp1, a
16362
16363 And finally:
16364 TARGET = zip (tmp1, tmp2)
16365 which sets TARGET to {a, 1, b, 2, c, 3, d, 4}. */
16366
16367static bool
16368aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder,
16369 int nelts, int nelts_reqd)
16370{
16371 machine_mode mode = GET_MODE (target);
16372
16373 /* Case 1: Vector contains trailing constants. */
16374
16375 if (aarch64_sve_expand_vector_init_handle_trailing_constants
16376 (target, builder, nelts, nelts_reqd))
16377 return true;
16378
16379 /* Case 2: Vector contains leading constants. */
16380
16381 rtx_vector_builder rev_builder (mode, 1, nelts_reqd);
16382 for (int i = 0; i < nelts_reqd; i++)
16383 rev_builder.quick_push (builder.elt (nelts_reqd - i - 1));
16384 rev_builder.finalize ();
16385
16386 if (aarch64_sve_expand_vector_init_handle_trailing_constants
16387 (target, rev_builder, nelts, nelts_reqd))
16388 {
16389 emit_insn (gen_aarch64_sve_rev (mode, target, target));
16390 return true;
16391 }
16392
16393 /* Case 3: Vector contains trailing same element. */
16394
16395 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
16396 (target, builder, nelts_reqd))
16397 return true;
16398
16399 /* Case 4: Vector contains leading same element. */
16400
16401 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
16402 (target, rev_builder, nelts_reqd) && nelts_reqd == nelts)
16403 {
16404 emit_insn (gen_aarch64_sve_rev (mode, target, target));
16405 return true;
16406 }
16407
16408 /* Avoid recursing below 4-elements.
16409 ??? The threshold 4 may need fine-tuning. */
16410
16411 if (nelts_reqd <= 4)
16412 return false;
16413
16414 rtx_vector_builder v_even (mode, 1, nelts);
16415 rtx_vector_builder v_odd (mode, 1, nelts);
16416
16417 for (int i = 0; i < nelts * 2; i += 2)
16418 {
16419 v_even.quick_push (builder.elt (i));
16420 v_odd.quick_push (builder.elt (i + 1));
16421 }
16422
16423 v_even.finalize ();
16424 v_odd.finalize ();
16425
16426 rtx tmp1 = gen_reg_rtx (mode);
16427 bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even,
16428 nelts, nelts_reqd / 2);
16429
16430 rtx tmp2 = gen_reg_rtx (mode);
16431 bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd,
16432 nelts, nelts_reqd / 2);
16433
16434 if (!did_even_p && !did_odd_p)
16435 return false;
16436
16437 /* Initialize v_even and v_odd using INSR if it didn't match any of the
16438 special cases and zip v_even, v_odd. */
16439
16440 if (!did_even_p)
16441 aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2);
16442
16443 if (!did_odd_p)
16444 aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2);
16445
16446 rtvec v = gen_rtvec (2, tmp1, tmp2);
16447 emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
16448 return true;
16449}
16450
16451/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */
16452
16453void
16454aarch64_sve_expand_vector_init (rtx target, rtx vals)
16455{
16456 machine_mode mode = GET_MODE (target);
16457 int nelts = XVECLEN (vals, 0);
16458
16459 rtx_vector_builder v (mode, 1, nelts);
16460 for (int i = 0; i < nelts; i++)
16461 v.quick_push (XVECEXP (vals, 0, i));
16462 v.finalize ();
16463
16464 /* If neither sub-vectors of v could be initialized specially,
16465 then use INSR to insert all elements from v into TARGET.
16466 ??? This might not be optimal for vectors with large
16467 initializers like 16-element or above.
16468 For nelts < 4, it probably isn't useful to handle specially. */
16469
16470 if (nelts < 4
16471 || !aarch64_sve_expand_vector_init (target, v, nelts, nelts))
16472 aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
16473}
16474
43e9d192 16475static unsigned HOST_WIDE_INT
ef4bddc2 16476aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 16477{
43cacb12
RS
16478 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
16479 return 0;
16480 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
16481}
16482
43e9d192
IB
16483/* Select a format to encode pointers in exception handling data. */
16484int
16485aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
16486{
16487 int type;
16488 switch (aarch64_cmodel)
16489 {
16490 case AARCH64_CMODEL_TINY:
16491 case AARCH64_CMODEL_TINY_PIC:
16492 case AARCH64_CMODEL_SMALL:
16493 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 16494 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
16495 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
16496 for everything. */
16497 type = DW_EH_PE_sdata4;
16498 break;
16499 default:
16500 /* No assumptions here. 8-byte relocs required. */
16501 type = DW_EH_PE_sdata8;
16502 break;
16503 }
16504 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
16505}
16506
b07fc91c
SN
16507/* Output .variant_pcs for aarch64_vector_pcs function symbols. */
16508
16509static void
16510aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
16511{
16512 if (aarch64_simd_decl_p (decl))
16513 {
16514 fprintf (stream, "\t.variant_pcs\t");
16515 assemble_name (stream, name);
16516 fprintf (stream, "\n");
16517 }
16518}
16519
e1c1ecb0
KT
16520/* The last .arch and .tune assembly strings that we printed. */
16521static std::string aarch64_last_printed_arch_string;
16522static std::string aarch64_last_printed_tune_string;
16523
361fb3ee
KT
16524/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
16525 by the function fndecl. */
16526
16527void
16528aarch64_declare_function_name (FILE *stream, const char* name,
16529 tree fndecl)
16530{
16531 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16532
16533 struct cl_target_option *targ_options;
16534 if (target_parts)
16535 targ_options = TREE_TARGET_OPTION (target_parts);
16536 else
16537 targ_options = TREE_TARGET_OPTION (target_option_current_node);
16538 gcc_assert (targ_options);
16539
16540 const struct processor *this_arch
16541 = aarch64_get_arch (targ_options->x_explicit_arch);
16542
28108a53 16543 uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
054b4005 16544 std::string extension
04a99ebe
JG
16545 = aarch64_get_extension_string_for_isa_flags (isa_flags,
16546 this_arch->flags);
e1c1ecb0
KT
16547 /* Only update the assembler .arch string if it is distinct from the last
16548 such string we printed. */
16549 std::string to_print = this_arch->name + extension;
16550 if (to_print != aarch64_last_printed_arch_string)
16551 {
16552 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
16553 aarch64_last_printed_arch_string = to_print;
16554 }
361fb3ee
KT
16555
16556 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
16557 useful to readers of the generated asm. Do it only when it changes
16558 from function to function and verbose assembly is requested. */
361fb3ee
KT
16559 const struct processor *this_tune
16560 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
16561
e1c1ecb0
KT
16562 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
16563 {
16564 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
16565 this_tune->name);
16566 aarch64_last_printed_tune_string = this_tune->name;
16567 }
361fb3ee 16568
b07fc91c
SN
16569 aarch64_asm_output_variant_pcs (stream, fndecl, name);
16570
361fb3ee
KT
16571 /* Don't forget the type directive for ELF. */
16572 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
16573 ASM_OUTPUT_LABEL (stream, name);
16574}
16575
b07fc91c
SN
16576/* Implement ASM_OUTPUT_DEF_FROM_DECLS. Output .variant_pcs for aliases. */
16577
16578void
16579aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
16580{
16581 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
16582 const char *value = IDENTIFIER_POINTER (target);
16583 aarch64_asm_output_variant_pcs (stream, decl, name);
16584 ASM_OUTPUT_DEF (stream, name, value);
16585}
16586
16587/* Implement ASM_OUTPUT_EXTERNAL. Output .variant_pcs for undefined
16588 function symbol references. */
16589
16590void
e8c47069 16591aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
b07fc91c 16592{
e8c47069 16593 default_elf_asm_output_external (stream, decl, name);
b07fc91c
SN
16594 aarch64_asm_output_variant_pcs (stream, decl, name);
16595}
16596
8fc16d72
ST
16597/* Triggered after a .cfi_startproc directive is emitted into the assembly file.
16598 Used to output the .cfi_b_key_frame directive when signing the current
16599 function with the B key. */
16600
16601void
16602aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED)
16603{
2bdc7dcb 16604 if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled ()
8fc16d72
ST
16605 && aarch64_ra_sign_key == AARCH64_KEY_B)
16606 asm_fprintf (f, "\t.cfi_b_key_frame\n");
16607}
16608
e1c1ecb0
KT
16609/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
16610
16611static void
16612aarch64_start_file (void)
16613{
16614 struct cl_target_option *default_options
16615 = TREE_TARGET_OPTION (target_option_default_node);
16616
16617 const struct processor *default_arch
16618 = aarch64_get_arch (default_options->x_explicit_arch);
28108a53 16619 uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
e1c1ecb0 16620 std::string extension
04a99ebe
JG
16621 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
16622 default_arch->flags);
e1c1ecb0
KT
16623
16624 aarch64_last_printed_arch_string = default_arch->name + extension;
16625 aarch64_last_printed_tune_string = "";
16626 asm_fprintf (asm_out_file, "\t.arch %s\n",
16627 aarch64_last_printed_arch_string.c_str ());
16628
16629 default_file_start ();
16630}
16631
0462169c
SN
16632/* Emit load exclusive. */
16633
16634static void
ef4bddc2 16635aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
16636 rtx mem, rtx model_rtx)
16637{
0016d8d9 16638 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
16639}
16640
16641/* Emit store exclusive. */
16642
16643static void
ef4bddc2 16644aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
16645 rtx rval, rtx mem, rtx model_rtx)
16646{
0016d8d9 16647 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
16648}
16649
16650/* Mark the previous jump instruction as unlikely. */
16651
16652static void
16653aarch64_emit_unlikely_jump (rtx insn)
16654{
f370536c 16655 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 16656 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
16657}
16658
16659/* Expand a compare and swap pattern. */
16660
16661void
16662aarch64_expand_compare_and_swap (rtx operands[])
16663{
d400fda3
RH
16664 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
16665 machine_mode mode, r_mode;
0462169c
SN
16666
16667 bval = operands[0];
16668 rval = operands[1];
16669 mem = operands[2];
16670 oldval = operands[3];
16671 newval = operands[4];
16672 is_weak = operands[5];
16673 mod_s = operands[6];
16674 mod_f = operands[7];
16675 mode = GET_MODE (mem);
0462169c
SN
16676
16677 /* Normally the succ memory model must be stronger than fail, but in the
16678 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
16679 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
16680 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
16681 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
16682 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
16683
d400fda3
RH
16684 r_mode = mode;
16685 if (mode == QImode || mode == HImode)
0462169c 16686 {
d400fda3
RH
16687 r_mode = SImode;
16688 rval = gen_reg_rtx (r_mode);
0462169c
SN
16689 }
16690
b0770c0f 16691 if (TARGET_LSE)
77f33f44
RH
16692 {
16693 /* The CAS insn requires oldval and rval overlap, but we need to
16694 have a copy of oldval saved across the operation to tell if
16695 the operation is successful. */
d400fda3
RH
16696 if (reg_overlap_mentioned_p (rval, oldval))
16697 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 16698 else
d400fda3
RH
16699 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
16700
77f33f44
RH
16701 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
16702 newval, mod_s));
d400fda3 16703 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 16704 }
b0770c0f 16705 else
d400fda3
RH
16706 {
16707 /* The oldval predicate varies by mode. Test it and force to reg. */
16708 insn_code code = code_for_aarch64_compare_and_swap (mode);
16709 if (!insn_data[code].operand[2].predicate (oldval, mode))
16710 oldval = force_reg (mode, oldval);
0462169c 16711
d400fda3
RH
16712 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
16713 is_weak, mod_s, mod_f));
16714 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
16715 }
16716
16717 if (r_mode != mode)
77f33f44
RH
16718 rval = gen_lowpart (mode, rval);
16719 emit_move_insn (operands[1], rval);
0462169c 16720
d400fda3 16721 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 16722 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
16723}
16724
f70fb3b6
MW
16725/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
16726 sequence implementing an atomic operation. */
16727
16728static void
16729aarch64_emit_post_barrier (enum memmodel model)
16730{
16731 const enum memmodel base_model = memmodel_base (model);
16732
16733 if (is_mm_sync (model)
16734 && (base_model == MEMMODEL_ACQUIRE
16735 || base_model == MEMMODEL_ACQ_REL
16736 || base_model == MEMMODEL_SEQ_CST))
16737 {
16738 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
16739 }
16740}
16741
0462169c
SN
16742/* Split a compare and swap pattern. */
16743
16744void
16745aarch64_split_compare_and_swap (rtx operands[])
16746{
16747 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 16748 machine_mode mode;
0462169c 16749 bool is_weak;
5d8a22a5
DM
16750 rtx_code_label *label1, *label2;
16751 rtx x, cond;
ab876106
MW
16752 enum memmodel model;
16753 rtx model_rtx;
0462169c
SN
16754
16755 rval = operands[0];
16756 mem = operands[1];
16757 oldval = operands[2];
16758 newval = operands[3];
16759 is_weak = (operands[4] != const0_rtx);
ab876106 16760 model_rtx = operands[5];
0462169c
SN
16761 scratch = operands[7];
16762 mode = GET_MODE (mem);
ab876106 16763 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 16764
17f47f86
KT
16765 /* When OLDVAL is zero and we want the strong version we can emit a tighter
16766 loop:
16767 .label1:
16768 LD[A]XR rval, [mem]
16769 CBNZ rval, .label2
16770 ST[L]XR scratch, newval, [mem]
16771 CBNZ scratch, .label1
16772 .label2:
16773 CMP rval, 0. */
16774 bool strong_zero_p = !is_weak && oldval == const0_rtx;
16775
5d8a22a5 16776 label1 = NULL;
0462169c
SN
16777 if (!is_weak)
16778 {
16779 label1 = gen_label_rtx ();
16780 emit_label (label1);
16781 }
16782 label2 = gen_label_rtx ();
16783
ab876106
MW
16784 /* The initial load can be relaxed for a __sync operation since a final
16785 barrier will be emitted to stop code hoisting. */
16786 if (is_mm_sync (model))
16787 aarch64_emit_load_exclusive (mode, rval, mem,
16788 GEN_INT (MEMMODEL_RELAXED));
16789 else
16790 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 16791
17f47f86
KT
16792 if (strong_zero_p)
16793 {
6e1eaca9
RE
16794 if (aarch64_track_speculation)
16795 {
16796 /* Emit an explicit compare instruction, so that we can correctly
16797 track the condition codes. */
16798 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
16799 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
16800 }
16801 else
16802 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
16803
17f47f86
KT
16804 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16805 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
16806 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16807 }
16808 else
16809 {
d400fda3 16810 cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
17f47f86
KT
16811 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16812 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
d400fda3 16813 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
17f47f86
KT
16814 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16815 }
0462169c 16816
ab876106 16817 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
16818
16819 if (!is_weak)
16820 {
6e1eaca9
RE
16821 if (aarch64_track_speculation)
16822 {
16823 /* Emit an explicit compare instruction, so that we can correctly
16824 track the condition codes. */
16825 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
16826 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
16827 }
16828 else
16829 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
16830
0462169c
SN
16831 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16832 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 16833 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
16834 }
16835 else
16836 {
16837 cond = gen_rtx_REG (CCmode, CC_REGNUM);
16838 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 16839 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
16840 }
16841
16842 emit_label (label2);
17f47f86
KT
16843 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
16844 to set the condition flags. If this is not used it will be removed by
16845 later passes. */
16846 if (strong_zero_p)
16847 {
16848 cond = gen_rtx_REG (CCmode, CC_REGNUM);
16849 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
16850 emit_insn (gen_rtx_SET (cond, x));
16851 }
ab876106
MW
16852 /* Emit any final barrier needed for a __sync operation. */
16853 if (is_mm_sync (model))
16854 aarch64_emit_post_barrier (model);
0462169c 16855}
9cd7b720 16856
0462169c
SN
16857/* Split an atomic operation. */
16858
16859void
16860aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 16861 rtx value, rtx model_rtx, rtx cond)
0462169c 16862{
ef4bddc2
RS
16863 machine_mode mode = GET_MODE (mem);
16864 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
16865 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
16866 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
16867 rtx_code_label *label;
16868 rtx x;
0462169c 16869
9cd7b720 16870 /* Split the atomic operation into a sequence. */
0462169c
SN
16871 label = gen_label_rtx ();
16872 emit_label (label);
16873
16874 if (new_out)
16875 new_out = gen_lowpart (wmode, new_out);
16876 if (old_out)
16877 old_out = gen_lowpart (wmode, old_out);
16878 else
16879 old_out = new_out;
16880 value = simplify_gen_subreg (wmode, value, mode, 0);
16881
f70fb3b6
MW
16882 /* The initial load can be relaxed for a __sync operation since a final
16883 barrier will be emitted to stop code hoisting. */
16884 if (is_sync)
16885 aarch64_emit_load_exclusive (mode, old_out, mem,
16886 GEN_INT (MEMMODEL_RELAXED));
16887 else
16888 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
16889
16890 switch (code)
16891 {
16892 case SET:
16893 new_out = value;
16894 break;
16895
16896 case NOT:
16897 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 16898 emit_insn (gen_rtx_SET (new_out, x));
0462169c 16899 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 16900 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
16901 break;
16902
16903 case MINUS:
16904 if (CONST_INT_P (value))
16905 {
16906 value = GEN_INT (-INTVAL (value));
16907 code = PLUS;
16908 }
16909 /* Fall through. */
16910
16911 default:
16912 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 16913 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
16914 break;
16915 }
16916
16917 aarch64_emit_store_exclusive (mode, cond, mem,
16918 gen_lowpart (mode, new_out), model_rtx);
16919
6e1eaca9
RE
16920 if (aarch64_track_speculation)
16921 {
16922 /* Emit an explicit compare instruction, so that we can correctly
16923 track the condition codes. */
16924 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
16925 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
16926 }
16927 else
16928 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16929
0462169c
SN
16930 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16931 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 16932 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
16933
16934 /* Emit any final barrier needed for a __sync operation. */
16935 if (is_sync)
16936 aarch64_emit_post_barrier (model);
0462169c
SN
16937}
16938
c2ec330c
AL
16939static void
16940aarch64_init_libfuncs (void)
16941{
16942 /* Half-precision float operations. The compiler handles all operations
16943 with NULL libfuncs by converting to SFmode. */
16944
16945 /* Conversions. */
16946 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
16947 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
16948
16949 /* Arithmetic. */
16950 set_optab_libfunc (add_optab, HFmode, NULL);
16951 set_optab_libfunc (sdiv_optab, HFmode, NULL);
16952 set_optab_libfunc (smul_optab, HFmode, NULL);
16953 set_optab_libfunc (neg_optab, HFmode, NULL);
16954 set_optab_libfunc (sub_optab, HFmode, NULL);
16955
16956 /* Comparisons. */
16957 set_optab_libfunc (eq_optab, HFmode, NULL);
16958 set_optab_libfunc (ne_optab, HFmode, NULL);
16959 set_optab_libfunc (lt_optab, HFmode, NULL);
16960 set_optab_libfunc (le_optab, HFmode, NULL);
16961 set_optab_libfunc (ge_optab, HFmode, NULL);
16962 set_optab_libfunc (gt_optab, HFmode, NULL);
16963 set_optab_libfunc (unord_optab, HFmode, NULL);
16964}
16965
43e9d192 16966/* Target hook for c_mode_for_suffix. */
ef4bddc2 16967static machine_mode
43e9d192
IB
16968aarch64_c_mode_for_suffix (char suffix)
16969{
16970 if (suffix == 'q')
16971 return TFmode;
16972
16973 return VOIDmode;
16974}
16975
3520f7cc
JG
16976/* We can only represent floating point constants which will fit in
16977 "quarter-precision" values. These values are characterised by
16978 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
16979 by:
16980
16981 (-1)^s * (n/16) * 2^r
16982
16983 Where:
16984 's' is the sign bit.
16985 'n' is an integer in the range 16 <= n <= 31.
16986 'r' is an integer in the range -3 <= r <= 4. */
16987
16988/* Return true iff X can be represented by a quarter-precision
16989 floating point immediate operand X. Note, we cannot represent 0.0. */
16990bool
16991aarch64_float_const_representable_p (rtx x)
16992{
16993 /* This represents our current view of how many bits
16994 make up the mantissa. */
16995 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 16996 int exponent;
3520f7cc 16997 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 16998 REAL_VALUE_TYPE r, m;
807e902e 16999 bool fail;
3520f7cc 17000
d29f7dd5 17001 x = unwrap_const_vec_duplicate (x);
3520f7cc
JG
17002 if (!CONST_DOUBLE_P (x))
17003 return false;
17004
a4518821
RS
17005 if (GET_MODE (x) == VOIDmode
17006 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
17007 return false;
17008
34a72c33 17009 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
17010
17011 /* We cannot represent infinities, NaNs or +/-zero. We won't
17012 know if we have +zero until we analyse the mantissa, but we
17013 can reject the other invalid values. */
17014 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
17015 || REAL_VALUE_MINUS_ZERO (r))
17016 return false;
17017
ba96cdfb 17018 /* Extract exponent. */
3520f7cc
JG
17019 r = real_value_abs (&r);
17020 exponent = REAL_EXP (&r);
17021
17022 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
17023 highest (sign) bit, with a fixed binary point at bit point_pos.
17024 m1 holds the low part of the mantissa, m2 the high part.
17025 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
17026 bits for the mantissa, this can fail (low bits will be lost). */
17027 real_ldexp (&m, &r, point_pos - exponent);
807e902e 17028 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
17029
17030 /* If the low part of the mantissa has bits set we cannot represent
17031 the value. */
d9074b29 17032 if (w.ulow () != 0)
3520f7cc
JG
17033 return false;
17034 /* We have rejected the lower HOST_WIDE_INT, so update our
17035 understanding of how many bits lie in the mantissa and
17036 look only at the high HOST_WIDE_INT. */
807e902e 17037 mantissa = w.elt (1);
3520f7cc
JG
17038 point_pos -= HOST_BITS_PER_WIDE_INT;
17039
17040 /* We can only represent values with a mantissa of the form 1.xxxx. */
17041 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
17042 if ((mantissa & mask) != 0)
17043 return false;
17044
17045 /* Having filtered unrepresentable values, we may now remove all
17046 but the highest 5 bits. */
17047 mantissa >>= point_pos - 5;
17048
17049 /* We cannot represent the value 0.0, so reject it. This is handled
17050 elsewhere. */
17051 if (mantissa == 0)
17052 return false;
17053
17054 /* Then, as bit 4 is always set, we can mask it off, leaving
17055 the mantissa in the range [0, 15]. */
17056 mantissa &= ~(1 << 4);
17057 gcc_assert (mantissa <= 15);
17058
17059 /* GCC internally does not use IEEE754-like encoding (where normalized
17060 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
17061 Our mantissa values are shifted 4 places to the left relative to
17062 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
17063 by 5 places to correct for GCC's representation. */
17064 exponent = 5 - exponent;
17065
17066 return (exponent >= 0 && exponent <= 7);
17067}
17068
ab6501d7
SD
17069/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
17070 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
17071 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 17072char*
b187677b 17073aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 17074 enum simd_immediate_check which)
3520f7cc 17075{
3ea63f60 17076 bool is_valid;
3520f7cc 17077 static char templ[40];
3520f7cc 17078 const char *mnemonic;
e4f0f84d 17079 const char *shift_op;
3520f7cc 17080 unsigned int lane_count = 0;
81c2dfb9 17081 char element_char;
3520f7cc 17082
b187677b 17083 struct simd_immediate_info info;
48063b9d
IB
17084
17085 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
17086 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
17087 It will also update INFO to show how the immediate should be generated.
17088 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 17089 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
17090 gcc_assert (is_valid);
17091
b187677b
RS
17092 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
17093 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 17094
b187677b 17095 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 17096 {
1da83cce
RS
17097 gcc_assert (info.insn == simd_immediate_info::MOV
17098 && info.u.mov.shift == 0);
0d8e1702
KT
17099 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
17100 move immediate path. */
1da83cce
RS
17101 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
17102 info.u.mov.value = GEN_INT (0);
48063b9d
IB
17103 else
17104 {
83faf7d0 17105 const unsigned int buf_size = 20;
48063b9d 17106 char float_buf[buf_size] = {'\0'};
34a72c33 17107 real_to_decimal_for_mode (float_buf,
1da83cce 17108 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
b187677b 17109 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
17110
17111 if (lane_count == 1)
17112 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
17113 else
17114 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 17115 lane_count, element_char, float_buf);
48063b9d
IB
17116 return templ;
17117 }
3520f7cc 17118 }
3520f7cc 17119
1da83cce 17120 gcc_assert (CONST_INT_P (info.u.mov.value));
ab6501d7
SD
17121
17122 if (which == AARCH64_CHECK_MOV)
17123 {
b187677b 17124 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
1da83cce
RS
17125 shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
17126 ? "msl" : "lsl");
ab6501d7
SD
17127 if (lane_count == 1)
17128 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
1da83cce
RS
17129 mnemonic, UINTVAL (info.u.mov.value));
17130 else if (info.u.mov.shift)
ab6501d7
SD
17131 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
17132 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
1da83cce
RS
17133 element_char, UINTVAL (info.u.mov.value), shift_op,
17134 info.u.mov.shift);
ab6501d7
SD
17135 else
17136 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
17137 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
1da83cce 17138 element_char, UINTVAL (info.u.mov.value));
ab6501d7 17139 }
3520f7cc 17140 else
ab6501d7
SD
17141 {
17142 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 17143 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
1da83cce 17144 if (info.u.mov.shift)
ab6501d7
SD
17145 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
17146 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
1da83cce
RS
17147 element_char, UINTVAL (info.u.mov.value), "lsl",
17148 info.u.mov.shift);
ab6501d7
SD
17149 else
17150 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
17151 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
1da83cce 17152 element_char, UINTVAL (info.u.mov.value));
ab6501d7 17153 }
3520f7cc
JG
17154 return templ;
17155}
17156
b7342d25 17157char*
77e994c9 17158aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 17159{
a2170965
TC
17160
17161 /* If a floating point number was passed and we desire to use it in an
17162 integer mode do the conversion to integer. */
17163 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
17164 {
17165 unsigned HOST_WIDE_INT ival;
17166 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
17167 gcc_unreachable ();
17168 immediate = gen_int_mode (ival, mode);
17169 }
17170
ef4bddc2 17171 machine_mode vmode;
a2170965
TC
17172 /* use a 64 bit mode for everything except for DI/DF mode, where we use
17173 a 128 bit vector mode. */
17174 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 17175
a2170965 17176 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 17177 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 17178 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
17179}
17180
43cacb12
RS
17181/* Return the output string to use for moving immediate CONST_VECTOR
17182 into an SVE register. */
17183
17184char *
17185aarch64_output_sve_mov_immediate (rtx const_vector)
17186{
17187 static char templ[40];
17188 struct simd_immediate_info info;
17189 char element_char;
17190
17191 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
17192 gcc_assert (is_valid);
17193
17194 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
17195
1044fa32
RS
17196 machine_mode vec_mode = GET_MODE (const_vector);
17197 if (aarch64_sve_pred_mode_p (vec_mode))
17198 {
17199 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
0b1fe8cf
RS
17200 if (info.insn == simd_immediate_info::MOV)
17201 {
17202 gcc_assert (info.u.mov.value == const0_rtx);
17203 snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
17204 }
1044fa32 17205 else
0b1fe8cf
RS
17206 {
17207 gcc_assert (info.insn == simd_immediate_info::PTRUE);
17208 unsigned int total_bytes;
17209 if (info.u.pattern == AARCH64_SV_ALL
17210 && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
17211 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
17212 total_bytes / GET_MODE_SIZE (info.elt_mode));
17213 else
17214 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char,
17215 svpattern_token (info.u.pattern));
17216 }
1044fa32
RS
17217 return buf;
17218 }
17219
1da83cce 17220 if (info.insn == simd_immediate_info::INDEX)
43cacb12
RS
17221 {
17222 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
17223 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
1da83cce
RS
17224 element_char, INTVAL (info.u.index.base),
17225 INTVAL (info.u.index.step));
43cacb12
RS
17226 return templ;
17227 }
17228
17229 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
17230 {
1da83cce
RS
17231 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
17232 info.u.mov.value = GEN_INT (0);
43cacb12
RS
17233 else
17234 {
17235 const int buf_size = 20;
17236 char float_buf[buf_size] = {};
17237 real_to_decimal_for_mode (float_buf,
1da83cce 17238 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
43cacb12
RS
17239 buf_size, buf_size, 1, info.elt_mode);
17240
17241 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
17242 element_char, float_buf);
17243 return templ;
17244 }
17245 }
17246
17247 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
1da83cce 17248 element_char, INTVAL (info.u.mov.value));
43cacb12
RS
17249 return templ;
17250}
17251
88b08073
JG
17252/* Split operands into moves from op[1] + op[2] into op[0]. */
17253
17254void
17255aarch64_split_combinev16qi (rtx operands[3])
17256{
17257 unsigned int dest = REGNO (operands[0]);
17258 unsigned int src1 = REGNO (operands[1]);
17259 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 17260 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 17261 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
17262 rtx destlo, desthi;
17263
17264 gcc_assert (halfmode == V16QImode);
17265
17266 if (src1 == dest && src2 == dest + halfregs)
17267 {
17268 /* No-op move. Can't split to nothing; emit something. */
17269 emit_note (NOTE_INSN_DELETED);
17270 return;
17271 }
17272
17273 /* Preserve register attributes for variable tracking. */
17274 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
17275 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
17276 GET_MODE_SIZE (halfmode));
17277
17278 /* Special case of reversed high/low parts. */
17279 if (reg_overlap_mentioned_p (operands[2], destlo)
17280 && reg_overlap_mentioned_p (operands[1], desthi))
17281 {
17282 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
17283 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
17284 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
17285 }
17286 else if (!reg_overlap_mentioned_p (operands[2], destlo))
17287 {
17288 /* Try to avoid unnecessary moves if part of the result
17289 is in the right place already. */
17290 if (src1 != dest)
17291 emit_move_insn (destlo, operands[1]);
17292 if (src2 != dest + halfregs)
17293 emit_move_insn (desthi, operands[2]);
17294 }
17295 else
17296 {
17297 if (src2 != dest + halfregs)
17298 emit_move_insn (desthi, operands[2]);
17299 if (src1 != dest)
17300 emit_move_insn (destlo, operands[1]);
17301 }
17302}
17303
17304/* vec_perm support. */
17305
88b08073
JG
17306struct expand_vec_perm_d
17307{
17308 rtx target, op0, op1;
e3342de4 17309 vec_perm_indices perm;
ef4bddc2 17310 machine_mode vmode;
43cacb12 17311 unsigned int vec_flags;
88b08073
JG
17312 bool one_vector_p;
17313 bool testing_p;
17314};
17315
17316/* Generate a variable permutation. */
17317
17318static void
17319aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
17320{
ef4bddc2 17321 machine_mode vmode = GET_MODE (target);
88b08073
JG
17322 bool one_vector_p = rtx_equal_p (op0, op1);
17323
17324 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
17325 gcc_checking_assert (GET_MODE (op0) == vmode);
17326 gcc_checking_assert (GET_MODE (op1) == vmode);
17327 gcc_checking_assert (GET_MODE (sel) == vmode);
17328 gcc_checking_assert (TARGET_SIMD);
17329
17330 if (one_vector_p)
17331 {
17332 if (vmode == V8QImode)
17333 {
17334 /* Expand the argument to a V16QI mode by duplicating it. */
17335 rtx pair = gen_reg_rtx (V16QImode);
17336 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
17337 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
17338 }
17339 else
17340 {
17341 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
17342 }
17343 }
17344 else
17345 {
17346 rtx pair;
17347
17348 if (vmode == V8QImode)
17349 {
17350 pair = gen_reg_rtx (V16QImode);
17351 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
17352 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
17353 }
17354 else
17355 {
17356 pair = gen_reg_rtx (OImode);
17357 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
17358 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
17359 }
17360 }
17361}
17362
80940017
RS
17363/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
17364 NELT is the number of elements in the vector. */
17365
88b08073 17366void
80940017
RS
17367aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
17368 unsigned int nelt)
88b08073 17369{
ef4bddc2 17370 machine_mode vmode = GET_MODE (target);
88b08073 17371 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 17372 rtx mask;
88b08073
JG
17373
17374 /* The TBL instruction does not use a modulo index, so we must take care
17375 of that ourselves. */
f7c4e5b8
AL
17376 mask = aarch64_simd_gen_const_vector_dup (vmode,
17377 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
17378 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
17379
f7c4e5b8
AL
17380 /* For big-endian, we also need to reverse the index within the vector
17381 (but not which vector). */
17382 if (BYTES_BIG_ENDIAN)
17383 {
17384 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
17385 if (!one_vector_p)
17386 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
17387 sel = expand_simple_binop (vmode, XOR, sel, mask,
17388 NULL, 0, OPTAB_LIB_WIDEN);
17389 }
88b08073
JG
17390 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
17391}
17392
43cacb12
RS
17393/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
17394
17395static void
17396emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
17397{
17398 emit_insn (gen_rtx_SET (target,
17399 gen_rtx_UNSPEC (GET_MODE (target),
17400 gen_rtvec (2, op0, op1), code)));
17401}
17402
17403/* Expand an SVE vec_perm with the given operands. */
17404
17405void
17406aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
17407{
17408 machine_mode data_mode = GET_MODE (target);
17409 machine_mode sel_mode = GET_MODE (sel);
17410 /* Enforced by the pattern condition. */
17411 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
17412
17413 /* Note: vec_perm indices are supposed to wrap when they go beyond the
17414 size of the two value vectors, i.e. the upper bits of the indices
17415 are effectively ignored. SVE TBL instead produces 0 for any
17416 out-of-range indices, so we need to modulo all the vec_perm indices
17417 to ensure they are all in range. */
17418 rtx sel_reg = force_reg (sel_mode, sel);
17419
17420 /* Check if the sel only references the first values vector. */
17421 if (GET_CODE (sel) == CONST_VECTOR
17422 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
17423 {
17424 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
17425 return;
17426 }
17427
17428 /* Check if the two values vectors are the same. */
17429 if (rtx_equal_p (op0, op1))
17430 {
17431 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
17432 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
17433 NULL, 0, OPTAB_DIRECT);
17434 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
17435 return;
17436 }
17437
17438 /* Run TBL on for each value vector and combine the results. */
17439
17440 rtx res0 = gen_reg_rtx (data_mode);
17441 rtx res1 = gen_reg_rtx (data_mode);
17442 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
17443 if (GET_CODE (sel) != CONST_VECTOR
17444 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
17445 {
17446 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
17447 2 * nunits - 1);
17448 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
17449 NULL, 0, OPTAB_DIRECT);
17450 }
17451 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
17452 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
17453 NULL, 0, OPTAB_DIRECT);
17454 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
17455 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
17456 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
17457 else
17458 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
17459}
17460
cc4d934f
JG
17461/* Recognize patterns suitable for the TRN instructions. */
17462static bool
17463aarch64_evpc_trn (struct expand_vec_perm_d *d)
17464{
6a70badb
RS
17465 HOST_WIDE_INT odd;
17466 poly_uint64 nelt = d->perm.length ();
cc4d934f 17467 rtx out, in0, in1, x;
ef4bddc2 17468 machine_mode vmode = d->vmode;
cc4d934f
JG
17469
17470 if (GET_MODE_UNIT_SIZE (vmode) > 8)
17471 return false;
17472
17473 /* Note that these are little-endian tests.
17474 We correct for big-endian later. */
6a70badb
RS
17475 if (!d->perm[0].is_constant (&odd)
17476 || (odd != 0 && odd != 1)
326ac20e
RS
17477 || !d->perm.series_p (0, 2, odd, 2)
17478 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 17479 return false;
cc4d934f
JG
17480
17481 /* Success! */
17482 if (d->testing_p)
17483 return true;
17484
17485 in0 = d->op0;
17486 in1 = d->op1;
43cacb12
RS
17487 /* We don't need a big-endian lane correction for SVE; see the comment
17488 at the head of aarch64-sve.md for details. */
17489 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
17490 {
17491 x = in0, in0 = in1, in1 = x;
17492 odd = !odd;
17493 }
17494 out = d->target;
17495
3f8334a5
RS
17496 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
17497 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
17498 return true;
17499}
17500
17501/* Recognize patterns suitable for the UZP instructions. */
17502static bool
17503aarch64_evpc_uzp (struct expand_vec_perm_d *d)
17504{
6a70badb 17505 HOST_WIDE_INT odd;
cc4d934f 17506 rtx out, in0, in1, x;
ef4bddc2 17507 machine_mode vmode = d->vmode;
cc4d934f
JG
17508
17509 if (GET_MODE_UNIT_SIZE (vmode) > 8)
17510 return false;
17511
17512 /* Note that these are little-endian tests.
17513 We correct for big-endian later. */
6a70badb
RS
17514 if (!d->perm[0].is_constant (&odd)
17515 || (odd != 0 && odd != 1)
326ac20e 17516 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 17517 return false;
cc4d934f
JG
17518
17519 /* Success! */
17520 if (d->testing_p)
17521 return true;
17522
17523 in0 = d->op0;
17524 in1 = d->op1;
43cacb12
RS
17525 /* We don't need a big-endian lane correction for SVE; see the comment
17526 at the head of aarch64-sve.md for details. */
17527 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
17528 {
17529 x = in0, in0 = in1, in1 = x;
17530 odd = !odd;
17531 }
17532 out = d->target;
17533
3f8334a5
RS
17534 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
17535 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
17536 return true;
17537}
17538
17539/* Recognize patterns suitable for the ZIP instructions. */
17540static bool
17541aarch64_evpc_zip (struct expand_vec_perm_d *d)
17542{
6a70badb
RS
17543 unsigned int high;
17544 poly_uint64 nelt = d->perm.length ();
cc4d934f 17545 rtx out, in0, in1, x;
ef4bddc2 17546 machine_mode vmode = d->vmode;
cc4d934f
JG
17547
17548 if (GET_MODE_UNIT_SIZE (vmode) > 8)
17549 return false;
17550
17551 /* Note that these are little-endian tests.
17552 We correct for big-endian later. */
6a70badb
RS
17553 poly_uint64 first = d->perm[0];
17554 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
17555 || !d->perm.series_p (0, 2, first, 1)
17556 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 17557 return false;
6a70badb 17558 high = maybe_ne (first, 0U);
cc4d934f
JG
17559
17560 /* Success! */
17561 if (d->testing_p)
17562 return true;
17563
17564 in0 = d->op0;
17565 in1 = d->op1;
43cacb12
RS
17566 /* We don't need a big-endian lane correction for SVE; see the comment
17567 at the head of aarch64-sve.md for details. */
17568 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
17569 {
17570 x = in0, in0 = in1, in1 = x;
17571 high = !high;
17572 }
17573 out = d->target;
17574
3f8334a5
RS
17575 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
17576 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
17577 return true;
17578}
17579
ae0533da
AL
17580/* Recognize patterns for the EXT insn. */
17581
17582static bool
17583aarch64_evpc_ext (struct expand_vec_perm_d *d)
17584{
6a70badb 17585 HOST_WIDE_INT location;
ae0533da
AL
17586 rtx offset;
17587
6a70badb
RS
17588 /* The first element always refers to the first vector.
17589 Check if the extracted indices are increasing by one. */
43cacb12
RS
17590 if (d->vec_flags == VEC_SVE_PRED
17591 || !d->perm[0].is_constant (&location)
6a70badb 17592 || !d->perm.series_p (0, 1, location, 1))
326ac20e 17593 return false;
ae0533da 17594
ae0533da
AL
17595 /* Success! */
17596 if (d->testing_p)
17597 return true;
17598
b31e65bb 17599 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 17600 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 17601
43cacb12
RS
17602 We don't need a big-endian lane correction for SVE; see the comment
17603 at the head of aarch64-sve.md for details. */
17604 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
17605 {
17606 /* After setup, we want the high elements of the first vector (stored
17607 at the LSB end of the register), and the low elements of the second
17608 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 17609 std::swap (d->op0, d->op1);
6a70badb
RS
17610 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
17611 to_constant () is safe since this is restricted to Advanced SIMD
17612 vectors. */
17613 location = d->perm.length ().to_constant () - location;
ae0533da
AL
17614 }
17615
17616 offset = GEN_INT (location);
3f8334a5
RS
17617 emit_set_insn (d->target,
17618 gen_rtx_UNSPEC (d->vmode,
17619 gen_rtvec (3, d->op0, d->op1, offset),
17620 UNSPEC_EXT));
ae0533da
AL
17621 return true;
17622}
17623
43cacb12
RS
17624/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
17625 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
17626
17627static bool
43cacb12 17628aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 17629{
6a70badb
RS
17630 HOST_WIDE_INT diff;
17631 unsigned int i, size, unspec;
43cacb12 17632 machine_mode pred_mode;
923fcec3 17633
43cacb12
RS
17634 if (d->vec_flags == VEC_SVE_PRED
17635 || !d->one_vector_p
6a70badb 17636 || !d->perm[0].is_constant (&diff))
923fcec3
AL
17637 return false;
17638
3f8334a5
RS
17639 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
17640 if (size == 8)
43cacb12
RS
17641 {
17642 unspec = UNSPEC_REV64;
17643 pred_mode = VNx2BImode;
17644 }
3f8334a5 17645 else if (size == 4)
43cacb12
RS
17646 {
17647 unspec = UNSPEC_REV32;
17648 pred_mode = VNx4BImode;
17649 }
3f8334a5 17650 else if (size == 2)
43cacb12
RS
17651 {
17652 unspec = UNSPEC_REV16;
17653 pred_mode = VNx8BImode;
17654 }
3f8334a5
RS
17655 else
17656 return false;
923fcec3 17657
326ac20e
RS
17658 unsigned int step = diff + 1;
17659 for (i = 0; i < step; ++i)
17660 if (!d->perm.series_p (i, step, diff - i, step))
17661 return false;
923fcec3
AL
17662
17663 /* Success! */
17664 if (d->testing_p)
17665 return true;
17666
43cacb12
RS
17667 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
17668 if (d->vec_flags == VEC_SVE_DATA)
17669 {
16de3637 17670 rtx pred = aarch64_ptrue_reg (pred_mode);
43cacb12 17671 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
06308276 17672 UNSPEC_PRED_X);
43cacb12
RS
17673 }
17674 emit_set_insn (d->target, src);
17675 return true;
17676}
17677
17678/* Recognize patterns for the REV insn, which reverses elements within
17679 a full vector. */
17680
17681static bool
17682aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
17683{
17684 poly_uint64 nelt = d->perm.length ();
17685
17686 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
17687 return false;
17688
17689 if (!d->perm.series_p (0, 1, nelt - 1, -1))
17690 return false;
17691
17692 /* Success! */
17693 if (d->testing_p)
17694 return true;
17695
17696 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
17697 emit_set_insn (d->target, src);
923fcec3
AL
17698 return true;
17699}
17700
91bd4114
JG
17701static bool
17702aarch64_evpc_dup (struct expand_vec_perm_d *d)
17703{
91bd4114
JG
17704 rtx out = d->target;
17705 rtx in0;
6a70badb 17706 HOST_WIDE_INT elt;
ef4bddc2 17707 machine_mode vmode = d->vmode;
91bd4114
JG
17708 rtx lane;
17709
43cacb12
RS
17710 if (d->vec_flags == VEC_SVE_PRED
17711 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 17712 || !d->perm[0].is_constant (&elt))
326ac20e
RS
17713 return false;
17714
43cacb12
RS
17715 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
17716 return false;
17717
326ac20e
RS
17718 /* Success! */
17719 if (d->testing_p)
17720 return true;
17721
91bd4114
JG
17722 /* The generic preparation in aarch64_expand_vec_perm_const_1
17723 swaps the operand order and the permute indices if it finds
17724 d->perm[0] to be in the second operand. Thus, we can always
17725 use d->op0 and need not do any extra arithmetic to get the
17726 correct lane number. */
17727 in0 = d->op0;
f901401e 17728 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 17729
3f8334a5
RS
17730 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
17731 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
17732 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
17733 return true;
17734}
17735
88b08073
JG
17736static bool
17737aarch64_evpc_tbl (struct expand_vec_perm_d *d)
17738{
43cacb12 17739 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 17740 machine_mode vmode = d->vmode;
6a70badb
RS
17741
17742 /* Make sure that the indices are constant. */
17743 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
17744 for (unsigned int i = 0; i < encoded_nelts; ++i)
17745 if (!d->perm[i].is_constant ())
17746 return false;
88b08073 17747
88b08073
JG
17748 if (d->testing_p)
17749 return true;
17750
17751 /* Generic code will try constant permutation twice. Once with the
17752 original mode and again with the elements lowered to QImode.
17753 So wait and don't do the selector expansion ourselves. */
17754 if (vmode != V8QImode && vmode != V16QImode)
17755 return false;
17756
6a70badb
RS
17757 /* to_constant is safe since this routine is specific to Advanced SIMD
17758 vectors. */
17759 unsigned int nelt = d->perm.length ().to_constant ();
17760 for (unsigned int i = 0; i < nelt; ++i)
17761 /* If big-endian and two vectors we end up with a weird mixed-endian
17762 mode on NEON. Reverse the index within each word but not the word
17763 itself. to_constant is safe because we checked is_constant above. */
17764 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
17765 ? d->perm[i].to_constant () ^ (nelt - 1)
17766 : d->perm[i].to_constant ());
bbcc9c00 17767
88b08073
JG
17768 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
17769 sel = force_reg (vmode, sel);
17770
17771 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
17772 return true;
17773}
17774
43cacb12
RS
17775/* Try to implement D using an SVE TBL instruction. */
17776
17777static bool
17778aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
17779{
17780 unsigned HOST_WIDE_INT nelt;
17781
17782 /* Permuting two variable-length vectors could overflow the
17783 index range. */
17784 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
17785 return false;
17786
17787 if (d->testing_p)
17788 return true;
17789
17790 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
17791 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
17792 if (d->one_vector_p)
17793 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
17794 else
17795 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
17796 return true;
17797}
17798
88b08073
JG
17799static bool
17800aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
17801{
17802 /* The pattern matching functions above are written to look for a small
17803 number to begin the sequence (0, 1, N/2). If we begin with an index
17804 from the second operand, we can swap the operands. */
6a70badb
RS
17805 poly_int64 nelt = d->perm.length ();
17806 if (known_ge (d->perm[0], nelt))
88b08073 17807 {
e3342de4 17808 d->perm.rotate_inputs (1);
cb5c6c29 17809 std::swap (d->op0, d->op1);
88b08073
JG
17810 }
17811
43cacb12
RS
17812 if ((d->vec_flags == VEC_ADVSIMD
17813 || d->vec_flags == VEC_SVE_DATA
17814 || d->vec_flags == VEC_SVE_PRED)
17815 && known_gt (nelt, 1))
cc4d934f 17816 {
43cacb12
RS
17817 if (aarch64_evpc_rev_local (d))
17818 return true;
17819 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
17820 return true;
17821 else if (aarch64_evpc_ext (d))
ae0533da 17822 return true;
f901401e
AL
17823 else if (aarch64_evpc_dup (d))
17824 return true;
ae0533da 17825 else if (aarch64_evpc_zip (d))
cc4d934f
JG
17826 return true;
17827 else if (aarch64_evpc_uzp (d))
17828 return true;
17829 else if (aarch64_evpc_trn (d))
17830 return true;
43cacb12
RS
17831 if (d->vec_flags == VEC_SVE_DATA)
17832 return aarch64_evpc_sve_tbl (d);
4ec8bb67 17833 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 17834 return aarch64_evpc_tbl (d);
cc4d934f 17835 }
88b08073
JG
17836 return false;
17837}
17838
f151c9e1 17839/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 17840
f151c9e1
RS
17841static bool
17842aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
17843 rtx op1, const vec_perm_indices &sel)
88b08073
JG
17844{
17845 struct expand_vec_perm_d d;
88b08073 17846
326ac20e 17847 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
17848 if (sel.ninputs () == 1
17849 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
17850 d.one_vector_p = true;
17851 else if (sel.all_from_input_p (0))
88b08073 17852 {
326ac20e
RS
17853 d.one_vector_p = true;
17854 op1 = op0;
88b08073 17855 }
326ac20e 17856 else if (sel.all_from_input_p (1))
88b08073 17857 {
88b08073 17858 d.one_vector_p = true;
326ac20e 17859 op0 = op1;
88b08073 17860 }
326ac20e
RS
17861 else
17862 d.one_vector_p = false;
88b08073 17863
326ac20e
RS
17864 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
17865 sel.nelts_per_input ());
17866 d.vmode = vmode;
43cacb12 17867 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
17868 d.target = target;
17869 d.op0 = op0;
17870 d.op1 = op1;
17871 d.testing_p = !target;
e3342de4 17872
f151c9e1
RS
17873 if (!d.testing_p)
17874 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 17875
326ac20e 17876 rtx_insn *last = get_last_insn ();
f151c9e1 17877 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 17878 gcc_assert (last == get_last_insn ());
88b08073
JG
17879
17880 return ret;
17881}
17882
73e3da51
RS
17883/* Generate a byte permute mask for a register of mode MODE,
17884 which has NUNITS units. */
17885
668046d1 17886rtx
73e3da51 17887aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
17888{
17889 /* We have to reverse each vector because we dont have
17890 a permuted load that can reverse-load according to ABI rules. */
17891 rtx mask;
17892 rtvec v = rtvec_alloc (16);
73e3da51
RS
17893 unsigned int i, j;
17894 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
17895
17896 gcc_assert (BYTES_BIG_ENDIAN);
17897 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
17898
17899 for (i = 0; i < nunits; i++)
17900 for (j = 0; j < usize; j++)
17901 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
17902 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
17903 return force_reg (V16QImode, mask);
17904}
17905
4a942af6 17906/* Expand an SVE integer comparison using the SVE equivalent of:
f22d7973 17907
4a942af6
RS
17908 (set TARGET (CODE OP0 OP1)). */
17909
17910void
17911aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
f22d7973 17912{
4a942af6
RS
17913 machine_mode pred_mode = GET_MODE (target);
17914 machine_mode data_mode = GET_MODE (op0);
00fa90d9
RS
17915 rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
17916 op0, op1);
17917 if (!rtx_equal_p (target, res))
17918 emit_move_insn (target, res);
f22d7973
RS
17919}
17920
43cacb12
RS
17921/* Return the UNSPEC_COND_* code for comparison CODE. */
17922
17923static unsigned int
17924aarch64_unspec_cond_code (rtx_code code)
17925{
17926 switch (code)
17927 {
17928 case NE:
cb18e86d 17929 return UNSPEC_COND_FCMNE;
43cacb12 17930 case EQ:
cb18e86d 17931 return UNSPEC_COND_FCMEQ;
43cacb12 17932 case LT:
cb18e86d 17933 return UNSPEC_COND_FCMLT;
43cacb12 17934 case GT:
cb18e86d 17935 return UNSPEC_COND_FCMGT;
43cacb12 17936 case LE:
cb18e86d 17937 return UNSPEC_COND_FCMLE;
43cacb12 17938 case GE:
cb18e86d 17939 return UNSPEC_COND_FCMGE;
4a942af6
RS
17940 case UNORDERED:
17941 return UNSPEC_COND_FCMUO;
43cacb12
RS
17942 default:
17943 gcc_unreachable ();
17944 }
17945}
17946
f22d7973 17947/* Emit:
43cacb12 17948
4a942af6 17949 (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 17950
4a942af6
RS
17951 where <X> is the operation associated with comparison CODE.
17952 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
f22d7973
RS
17953
17954static void
4a942af6
RS
17955aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
17956 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 17957{
4a942af6 17958 rtx flag = gen_int_mode (known_ptrue_p, SImode);
f22d7973 17959 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
4a942af6 17960 gen_rtvec (4, pred, flag, op0, op1),
f22d7973
RS
17961 aarch64_unspec_cond_code (code));
17962 emit_set_insn (target, unspec);
43cacb12
RS
17963}
17964
f22d7973 17965/* Emit the SVE equivalent of:
43cacb12 17966
4a942af6
RS
17967 (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
17968 (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
f22d7973 17969 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 17970
4a942af6
RS
17971 where <Xi> is the operation associated with comparison CODEi.
17972 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
17973
17974static void
4a942af6
RS
17975aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
17976 rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 17977{
4a942af6 17978 machine_mode pred_mode = GET_MODE (pred);
43cacb12 17979 rtx tmp1 = gen_reg_rtx (pred_mode);
4a942af6 17980 aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
43cacb12 17981 rtx tmp2 = gen_reg_rtx (pred_mode);
4a942af6 17982 aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
f22d7973 17983 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
17984}
17985
f22d7973 17986/* Emit the SVE equivalent of:
43cacb12 17987
4a942af6 17988 (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 17989 (set TARGET (not TMP))
43cacb12 17990
4a942af6
RS
17991 where <X> is the operation associated with comparison CODE.
17992 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
17993
17994static void
4a942af6
RS
17995aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
17996 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 17997{
4a942af6 17998 machine_mode pred_mode = GET_MODE (pred);
f22d7973 17999 rtx tmp = gen_reg_rtx (pred_mode);
4a942af6 18000 aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
f22d7973 18001 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
18002}
18003
f22d7973 18004/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 18005
f22d7973 18006 (set TARGET (CODE OP0 OP1))
43cacb12
RS
18007
18008 If CAN_INVERT_P is true, the caller can also handle inverted results;
18009 return true if the result is in fact inverted. */
18010
18011bool
18012aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
18013 rtx op0, rtx op1, bool can_invert_p)
18014{
18015 machine_mode pred_mode = GET_MODE (target);
18016 machine_mode data_mode = GET_MODE (op0);
18017
16de3637 18018 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
18019 switch (code)
18020 {
18021 case UNORDERED:
18022 /* UNORDERED has no immediate form. */
18023 op1 = force_reg (data_mode, op1);
f22d7973 18024 /* fall through */
43cacb12
RS
18025 case LT:
18026 case LE:
18027 case GT:
18028 case GE:
18029 case EQ:
18030 case NE:
f22d7973
RS
18031 {
18032 /* There is native support for the comparison. */
4a942af6 18033 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
18034 return false;
18035 }
43cacb12
RS
18036
18037 case LTGT:
18038 /* This is a trapping operation (LT or GT). */
4a942af6 18039 aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
43cacb12
RS
18040 return false;
18041
18042 case UNEQ:
18043 if (!flag_trapping_math)
18044 {
18045 /* This would trap for signaling NaNs. */
18046 op1 = force_reg (data_mode, op1);
4a942af6
RS
18047 aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
18048 ptrue, true, op0, op1);
43cacb12
RS
18049 return false;
18050 }
18051 /* fall through */
43cacb12
RS
18052 case UNLT:
18053 case UNLE:
18054 case UNGT:
18055 case UNGE:
f22d7973
RS
18056 if (flag_trapping_math)
18057 {
18058 /* Work out which elements are ordered. */
18059 rtx ordered = gen_reg_rtx (pred_mode);
18060 op1 = force_reg (data_mode, op1);
4a942af6
RS
18061 aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
18062 ptrue, true, op0, op1);
f22d7973
RS
18063
18064 /* Test the opposite condition for the ordered elements,
18065 then invert the result. */
18066 if (code == UNEQ)
18067 code = NE;
18068 else
18069 code = reverse_condition_maybe_unordered (code);
18070 if (can_invert_p)
18071 {
4a942af6
RS
18072 aarch64_emit_sve_fp_cond (target, code,
18073 ordered, false, op0, op1);
f22d7973
RS
18074 return true;
18075 }
4a942af6
RS
18076 aarch64_emit_sve_invert_fp_cond (target, code,
18077 ordered, false, op0, op1);
f22d7973
RS
18078 return false;
18079 }
18080 break;
18081
18082 case ORDERED:
18083 /* ORDERED has no immediate form. */
18084 op1 = force_reg (data_mode, op1);
18085 break;
43cacb12
RS
18086
18087 default:
18088 gcc_unreachable ();
18089 }
f22d7973
RS
18090
18091 /* There is native support for the inverse comparison. */
18092 code = reverse_condition_maybe_unordered (code);
18093 if (can_invert_p)
18094 {
4a942af6 18095 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
18096 return true;
18097 }
4a942af6 18098 aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973 18099 return false;
43cacb12
RS
18100}
18101
18102/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
18103 of the data being selected and CMP_MODE is the mode of the values being
18104 compared. */
18105
18106void
18107aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
18108 rtx *ops)
18109{
18110 machine_mode pred_mode
18111 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
18112 GET_MODE_SIZE (cmp_mode)).require ();
18113 rtx pred = gen_reg_rtx (pred_mode);
18114 if (FLOAT_MODE_P (cmp_mode))
18115 {
18116 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
18117 ops[4], ops[5], true))
18118 std::swap (ops[1], ops[2]);
18119 }
18120 else
18121 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
18122
d29f7dd5
RS
18123 if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
18124 ops[1] = force_reg (data_mode, ops[1]);
18125 /* The "false" value can only be zero if the "true" value is a constant. */
18126 if (register_operand (ops[1], data_mode)
18127 || !aarch64_simd_reg_or_zero (ops[2], data_mode))
18128 ops[2] = force_reg (data_mode, ops[2]);
18129
43cacb12
RS
18130 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
18131 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
18132}
18133
99e1629f
RS
18134/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
18135 true. However due to issues with register allocation it is preferable
18136 to avoid tieing integer scalar and FP scalar modes. Executing integer
18137 operations in general registers is better than treating them as scalar
18138 vector operations. This reduces latency and avoids redundant int<->FP
18139 moves. So tie modes if they are either the same class, or vector modes
18140 with other vector modes, vector structs or any scalar mode. */
97e1ad78 18141
99e1629f 18142static bool
ef4bddc2 18143aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
18144{
18145 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18146 return true;
18147
18148 /* We specifically want to allow elements of "structure" modes to
18149 be tieable to the structure. This more general condition allows
43cacb12
RS
18150 other rarer situations too. The reason we don't extend this to
18151 predicate modes is that there are no predicate structure modes
18152 nor any specific instructions for extracting part of a predicate
18153 register. */
18154 if (aarch64_vector_data_mode_p (mode1)
18155 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
18156 return true;
18157
18158 /* Also allow any scalar modes with vectors. */
18159 if (aarch64_vector_mode_supported_p (mode1)
18160 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
18161 return true;
18162
18163 return false;
18164}
18165
e2c75eea
JG
18166/* Return a new RTX holding the result of moving POINTER forward by
18167 AMOUNT bytes. */
18168
18169static rtx
6a70badb 18170aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
18171{
18172 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
18173
18174 return adjust_automodify_address (pointer, GET_MODE (pointer),
18175 next, amount);
18176}
18177
18178/* Return a new RTX holding the result of moving POINTER forward by the
18179 size of the mode it points to. */
18180
18181static rtx
18182aarch64_progress_pointer (rtx pointer)
18183{
6a70badb 18184 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
18185}
18186
18187/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
18188 MODE bytes. */
18189
18190static void
18191aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 18192 machine_mode mode)
e2c75eea
JG
18193{
18194 rtx reg = gen_reg_rtx (mode);
18195
18196 /* "Cast" the pointers to the correct mode. */
18197 *src = adjust_address (*src, mode, 0);
18198 *dst = adjust_address (*dst, mode, 0);
18199 /* Emit the memcpy. */
18200 emit_move_insn (reg, *src);
18201 emit_move_insn (*dst, reg);
18202 /* Move the pointers forward. */
18203 *src = aarch64_progress_pointer (*src);
18204 *dst = aarch64_progress_pointer (*dst);
18205}
18206
76715c32 18207/* Expand cpymem, as if from a __builtin_memcpy. Return true if
e2c75eea
JG
18208 we succeed, otherwise return false. */
18209
18210bool
76715c32 18211aarch64_expand_cpymem (rtx *operands)
e2c75eea 18212{
89c52e5e 18213 int n, mode_bits;
e2c75eea
JG
18214 rtx dst = operands[0];
18215 rtx src = operands[1];
18216 rtx base;
89c52e5e 18217 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
18218 bool speed_p = !optimize_function_for_size_p (cfun);
18219
18220 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
18221 memcpy call, but use the default otherwise. Moves larger than 8 bytes
18222 will always require an even number of instructions to do now. And each
18223 operation requires both a load+store, so devide the max number by 2. */
18224 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
18225
18226 /* We can't do anything smart if the amount to copy is not constant. */
18227 if (!CONST_INT_P (operands[2]))
18228 return false;
18229
89c52e5e 18230 n = INTVAL (operands[2]);
e2c75eea 18231
89c52e5e
TC
18232 /* Try to keep the number of instructions low. For all cases we will do at
18233 most two moves for the residual amount, since we'll always overlap the
18234 remainder. */
18235 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
18236 return false;
18237
18238 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18239 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
18240
18241 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
18242 src = adjust_automodify_address (src, VOIDmode, base, 0);
18243
89c52e5e
TC
18244 /* Convert n to bits to make the rest of the code simpler. */
18245 n = n * BITS_PER_UNIT;
e2c75eea 18246
f7e1d19d
TC
18247 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
18248 larger than TImode, but we should not use them for loads/stores here. */
18249 const int copy_limit = GET_MODE_BITSIZE (TImode);
18250
89c52e5e 18251 while (n > 0)
e2c75eea 18252 {
89c52e5e
TC
18253 /* Find the largest mode in which to do the copy in without over reading
18254 or writing. */
18255 opt_scalar_int_mode mode_iter;
18256 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 18257 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 18258 cur_mode = mode_iter.require ();
e2c75eea 18259
89c52e5e 18260 gcc_assert (cur_mode != BLKmode);
e2c75eea 18261
89c52e5e
TC
18262 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
18263 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 18264
89c52e5e 18265 n -= mode_bits;
e2c75eea 18266
89c52e5e
TC
18267 /* Do certain trailing copies as overlapping if it's going to be
18268 cheaper. i.e. less instructions to do so. For instance doing a 15
18269 byte copy it's more efficient to do two overlapping 8 byte copies than
18270 8 + 6 + 1. */
f7e1d19d 18271 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 18272 {
f7e1d19d
TC
18273 next_mode = smallest_mode_for_size (n, MODE_INT);
18274 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
18275 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
18276 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
18277 n = n_bits;
e2c75eea
JG
18278 }
18279 }
18280
18281 return true;
18282}
18283
141a3ccf
KT
18284/* Split a DImode store of a CONST_INT SRC to MEM DST as two
18285 SImode stores. Handle the case when the constant has identical
18286 bottom and top halves. This is beneficial when the two stores can be
18287 merged into an STP and we avoid synthesising potentially expensive
18288 immediates twice. Return true if such a split is possible. */
18289
18290bool
18291aarch64_split_dimode_const_store (rtx dst, rtx src)
18292{
18293 rtx lo = gen_lowpart (SImode, src);
18294 rtx hi = gen_highpart_mode (SImode, DImode, src);
18295
18296 bool size_p = optimize_function_for_size_p (cfun);
18297
18298 if (!rtx_equal_p (lo, hi))
18299 return false;
18300
18301 unsigned int orig_cost
18302 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
18303 unsigned int lo_cost
18304 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
18305
18306 /* We want to transform:
18307 MOV x1, 49370
18308 MOVK x1, 0x140, lsl 16
18309 MOVK x1, 0xc0da, lsl 32
18310 MOVK x1, 0x140, lsl 48
18311 STR x1, [x0]
18312 into:
18313 MOV w1, 49370
18314 MOVK w1, 0x140, lsl 16
18315 STP w1, w1, [x0]
18316 So we want to perform this only when we save two instructions
18317 or more. When optimizing for size, however, accept any code size
18318 savings we can. */
18319 if (size_p && orig_cost <= lo_cost)
18320 return false;
18321
18322 if (!size_p
18323 && (orig_cost <= lo_cost + 1))
18324 return false;
18325
18326 rtx mem_lo = adjust_address (dst, SImode, 0);
18327 if (!aarch64_mem_pair_operand (mem_lo, SImode))
18328 return false;
18329
18330 rtx tmp_reg = gen_reg_rtx (SImode);
18331 aarch64_expand_mov_immediate (tmp_reg, lo);
18332 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
18333 /* Don't emit an explicit store pair as this may not be always profitable.
18334 Let the sched-fusion logic decide whether to merge them. */
18335 emit_move_insn (mem_lo, tmp_reg);
18336 emit_move_insn (mem_hi, tmp_reg);
18337
18338 return true;
18339}
18340
30c46053
MC
18341/* Generate RTL for a conditional branch with rtx comparison CODE in
18342 mode CC_MODE. The destination of the unlikely conditional branch
18343 is LABEL_REF. */
18344
18345void
18346aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
18347 rtx label_ref)
18348{
18349 rtx x;
18350 x = gen_rtx_fmt_ee (code, VOIDmode,
18351 gen_rtx_REG (cc_mode, CC_REGNUM),
18352 const0_rtx);
18353
18354 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
18355 gen_rtx_LABEL_REF (VOIDmode, label_ref),
18356 pc_rtx);
18357 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
18358}
18359
18360/* Generate DImode scratch registers for 128-bit (TImode) addition.
18361
18362 OP1 represents the TImode destination operand 1
18363 OP2 represents the TImode destination operand 2
18364 LOW_DEST represents the low half (DImode) of TImode operand 0
18365 LOW_IN1 represents the low half (DImode) of TImode operand 1
18366 LOW_IN2 represents the low half (DImode) of TImode operand 2
18367 HIGH_DEST represents the high half (DImode) of TImode operand 0
18368 HIGH_IN1 represents the high half (DImode) of TImode operand 1
18369 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
18370
18371void
18372aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
18373 rtx *low_in1, rtx *low_in2,
18374 rtx *high_dest, rtx *high_in1,
18375 rtx *high_in2)
18376{
18377 *low_dest = gen_reg_rtx (DImode);
18378 *low_in1 = gen_lowpart (DImode, op1);
18379 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
18380 subreg_lowpart_offset (DImode, TImode));
18381 *high_dest = gen_reg_rtx (DImode);
18382 *high_in1 = gen_highpart (DImode, op1);
18383 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
18384 subreg_highpart_offset (DImode, TImode));
18385}
18386
18387/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
18388
18389 This function differs from 'arch64_addti_scratch_regs' in that
18390 OP1 can be an immediate constant (zero). We must call
18391 subreg_highpart_offset with DImode and TImode arguments, otherwise
18392 VOIDmode will be used for the const_int which generates an internal
18393 error from subreg_size_highpart_offset which does not expect a size of zero.
18394
18395 OP1 represents the TImode destination operand 1
18396 OP2 represents the TImode destination operand 2
18397 LOW_DEST represents the low half (DImode) of TImode operand 0
18398 LOW_IN1 represents the low half (DImode) of TImode operand 1
18399 LOW_IN2 represents the low half (DImode) of TImode operand 2
18400 HIGH_DEST represents the high half (DImode) of TImode operand 0
18401 HIGH_IN1 represents the high half (DImode) of TImode operand 1
18402 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
18403
18404
18405void
18406aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
18407 rtx *low_in1, rtx *low_in2,
18408 rtx *high_dest, rtx *high_in1,
18409 rtx *high_in2)
18410{
18411 *low_dest = gen_reg_rtx (DImode);
18412 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
18413 subreg_lowpart_offset (DImode, TImode));
18414
18415 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
18416 subreg_lowpart_offset (DImode, TImode));
18417 *high_dest = gen_reg_rtx (DImode);
18418
18419 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
18420 subreg_highpart_offset (DImode, TImode));
18421 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
18422 subreg_highpart_offset (DImode, TImode));
18423}
18424
18425/* Generate RTL for 128-bit (TImode) subtraction with overflow.
18426
18427 OP0 represents the TImode destination operand 0
18428 LOW_DEST represents the low half (DImode) of TImode operand 0
18429 LOW_IN1 represents the low half (DImode) of TImode operand 1
18430 LOW_IN2 represents the low half (DImode) of TImode operand 2
18431 HIGH_DEST represents the high half (DImode) of TImode operand 0
18432 HIGH_IN1 represents the high half (DImode) of TImode operand 1
a58fe3c5
RE
18433 HIGH_IN2 represents the high half (DImode) of TImode operand 2
18434 UNSIGNED_P is true if the operation is being performed on unsigned
18435 values. */
30c46053
MC
18436void
18437aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
18438 rtx low_in2, rtx high_dest, rtx high_in1,
a58fe3c5 18439 rtx high_in2, bool unsigned_p)
30c46053
MC
18440{
18441 if (low_in2 == const0_rtx)
18442 {
18443 low_dest = low_in1;
a58fe3c5
RE
18444 high_in2 = force_reg (DImode, high_in2);
18445 if (unsigned_p)
18446 emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2));
18447 else
18448 emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2));
30c46053
MC
18449 }
18450 else
18451 {
18452 if (CONST_INT_P (low_in2))
18453 {
30c46053 18454 high_in2 = force_reg (DImode, high_in2);
a58fe3c5
RE
18455 emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
18456 GEN_INT (-INTVAL (low_in2))));
30c46053
MC
18457 }
18458 else
18459 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
a58fe3c5
RE
18460
18461 if (unsigned_p)
18462 emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
18463 else
18464 emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2));
30c46053
MC
18465 }
18466
18467 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
18468 emit_move_insn (gen_highpart (DImode, op0), high_dest);
18469
18470}
18471
a3125fc2
CL
18472/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
18473
18474static unsigned HOST_WIDE_INT
18475aarch64_asan_shadow_offset (void)
18476{
10078f3e
AP
18477 if (TARGET_ILP32)
18478 return (HOST_WIDE_INT_1 << 29);
18479 else
18480 return (HOST_WIDE_INT_1 << 36);
a3125fc2
CL
18481}
18482
5f3bc026 18483static rtx
cb4347e8 18484aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
18485 int code, tree treeop0, tree treeop1)
18486{
c8012fbc
WD
18487 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
18488 rtx op0, op1;
5f3bc026 18489 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 18490 insn_code icode;
5f3bc026
ZC
18491 struct expand_operand ops[4];
18492
5f3bc026
ZC
18493 start_sequence ();
18494 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
18495
18496 op_mode = GET_MODE (op0);
18497 if (op_mode == VOIDmode)
18498 op_mode = GET_MODE (op1);
18499
18500 switch (op_mode)
18501 {
4e10a5a7
RS
18502 case E_QImode:
18503 case E_HImode:
18504 case E_SImode:
5f3bc026
ZC
18505 cmp_mode = SImode;
18506 icode = CODE_FOR_cmpsi;
18507 break;
18508
4e10a5a7 18509 case E_DImode:
5f3bc026
ZC
18510 cmp_mode = DImode;
18511 icode = CODE_FOR_cmpdi;
18512 break;
18513
4e10a5a7 18514 case E_SFmode:
786e3c06
WD
18515 cmp_mode = SFmode;
18516 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
18517 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
18518 break;
18519
4e10a5a7 18520 case E_DFmode:
786e3c06
WD
18521 cmp_mode = DFmode;
18522 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
18523 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
18524 break;
18525
5f3bc026
ZC
18526 default:
18527 end_sequence ();
18528 return NULL_RTX;
18529 }
18530
c8012fbc
WD
18531 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
18532 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
18533 if (!op0 || !op1)
18534 {
18535 end_sequence ();
18536 return NULL_RTX;
18537 }
18538 *prep_seq = get_insns ();
18539 end_sequence ();
18540
c8012fbc
WD
18541 create_fixed_operand (&ops[0], op0);
18542 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
18543
18544 start_sequence ();
c8012fbc 18545 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
18546 {
18547 end_sequence ();
18548 return NULL_RTX;
18549 }
18550 *gen_seq = get_insns ();
18551 end_sequence ();
18552
c8012fbc
WD
18553 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
18554 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
18555}
18556
18557static rtx
cb4347e8
TS
18558aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
18559 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 18560{
c8012fbc
WD
18561 rtx op0, op1, target;
18562 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 18563 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 18564 insn_code icode;
5f3bc026 18565 struct expand_operand ops[6];
c8012fbc 18566 int aarch64_cond;
5f3bc026 18567
cb4347e8 18568 push_to_sequence (*prep_seq);
5f3bc026
ZC
18569 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
18570
18571 op_mode = GET_MODE (op0);
18572 if (op_mode == VOIDmode)
18573 op_mode = GET_MODE (op1);
18574
18575 switch (op_mode)
18576 {
4e10a5a7
RS
18577 case E_QImode:
18578 case E_HImode:
18579 case E_SImode:
5f3bc026 18580 cmp_mode = SImode;
c8012fbc 18581 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
18582 break;
18583
4e10a5a7 18584 case E_DImode:
5f3bc026 18585 cmp_mode = DImode;
c8012fbc 18586 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
18587 break;
18588
4e10a5a7 18589 case E_SFmode:
786e3c06
WD
18590 cmp_mode = SFmode;
18591 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
18592 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
18593 break;
18594
4e10a5a7 18595 case E_DFmode:
786e3c06
WD
18596 cmp_mode = DFmode;
18597 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
18598 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
18599 break;
18600
5f3bc026
ZC
18601 default:
18602 end_sequence ();
18603 return NULL_RTX;
18604 }
18605
18606 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
18607 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
18608 if (!op0 || !op1)
18609 {
18610 end_sequence ();
18611 return NULL_RTX;
18612 }
18613 *prep_seq = get_insns ();
18614 end_sequence ();
18615
18616 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 18617 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 18618
c8012fbc
WD
18619 if (bit_code != AND)
18620 {
18621 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
18622 GET_MODE (XEXP (prev, 0))),
18623 VOIDmode, XEXP (prev, 0), const0_rtx);
18624 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
18625 }
18626
18627 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
18628 create_fixed_operand (&ops[1], target);
18629 create_fixed_operand (&ops[2], op0);
18630 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
18631 create_fixed_operand (&ops[4], prev);
18632 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 18633
cb4347e8 18634 push_to_sequence (*gen_seq);
5f3bc026
ZC
18635 if (!maybe_expand_insn (icode, 6, ops))
18636 {
18637 end_sequence ();
18638 return NULL_RTX;
18639 }
18640
18641 *gen_seq = get_insns ();
18642 end_sequence ();
18643
c8012fbc 18644 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
18645}
18646
18647#undef TARGET_GEN_CCMP_FIRST
18648#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
18649
18650#undef TARGET_GEN_CCMP_NEXT
18651#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
18652
6a569cdd
KT
18653/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
18654 instruction fusion of some sort. */
18655
18656static bool
18657aarch64_macro_fusion_p (void)
18658{
b175b679 18659 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
18660}
18661
18662
18663/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
18664 should be kept together during scheduling. */
18665
18666static bool
18667aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
18668{
18669 rtx set_dest;
18670 rtx prev_set = single_set (prev);
18671 rtx curr_set = single_set (curr);
18672 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
18673 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
18674
18675 if (!aarch64_macro_fusion_p ())
18676 return false;
18677
d7b03373 18678 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
18679 {
18680 /* We are trying to match:
18681 prev (mov) == (set (reg r0) (const_int imm16))
18682 curr (movk) == (set (zero_extract (reg r0)
18683 (const_int 16)
18684 (const_int 16))
18685 (const_int imm16_1)) */
18686
18687 set_dest = SET_DEST (curr_set);
18688
18689 if (GET_CODE (set_dest) == ZERO_EXTRACT
18690 && CONST_INT_P (SET_SRC (curr_set))
18691 && CONST_INT_P (SET_SRC (prev_set))
18692 && CONST_INT_P (XEXP (set_dest, 2))
18693 && INTVAL (XEXP (set_dest, 2)) == 16
18694 && REG_P (XEXP (set_dest, 0))
18695 && REG_P (SET_DEST (prev_set))
18696 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
18697 {
18698 return true;
18699 }
18700 }
18701
d7b03373 18702 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
18703 {
18704
18705 /* We're trying to match:
18706 prev (adrp) == (set (reg r1)
18707 (high (symbol_ref ("SYM"))))
18708 curr (add) == (set (reg r0)
18709 (lo_sum (reg r1)
18710 (symbol_ref ("SYM"))))
18711 Note that r0 need not necessarily be the same as r1, especially
18712 during pre-regalloc scheduling. */
18713
18714 if (satisfies_constraint_Ush (SET_SRC (prev_set))
18715 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
18716 {
18717 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
18718 && REG_P (XEXP (SET_SRC (curr_set), 0))
18719 && REGNO (XEXP (SET_SRC (curr_set), 0))
18720 == REGNO (SET_DEST (prev_set))
18721 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
18722 XEXP (SET_SRC (curr_set), 1)))
18723 return true;
18724 }
18725 }
18726
d7b03373 18727 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
18728 {
18729
18730 /* We're trying to match:
18731 prev (movk) == (set (zero_extract (reg r0)
18732 (const_int 16)
18733 (const_int 32))
18734 (const_int imm16_1))
18735 curr (movk) == (set (zero_extract (reg r0)
18736 (const_int 16)
18737 (const_int 48))
18738 (const_int imm16_2)) */
18739
18740 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
18741 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
18742 && REG_P (XEXP (SET_DEST (prev_set), 0))
18743 && REG_P (XEXP (SET_DEST (curr_set), 0))
18744 && REGNO (XEXP (SET_DEST (prev_set), 0))
18745 == REGNO (XEXP (SET_DEST (curr_set), 0))
18746 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
18747 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
18748 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
18749 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
18750 && CONST_INT_P (SET_SRC (prev_set))
18751 && CONST_INT_P (SET_SRC (curr_set)))
18752 return true;
18753
18754 }
d7b03373 18755 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
18756 {
18757 /* We're trying to match:
18758 prev (adrp) == (set (reg r0)
18759 (high (symbol_ref ("SYM"))))
18760 curr (ldr) == (set (reg r1)
18761 (mem (lo_sum (reg r0)
18762 (symbol_ref ("SYM")))))
18763 or
18764 curr (ldr) == (set (reg r1)
18765 (zero_extend (mem
18766 (lo_sum (reg r0)
18767 (symbol_ref ("SYM")))))) */
18768 if (satisfies_constraint_Ush (SET_SRC (prev_set))
18769 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
18770 {
18771 rtx curr_src = SET_SRC (curr_set);
18772
18773 if (GET_CODE (curr_src) == ZERO_EXTEND)
18774 curr_src = XEXP (curr_src, 0);
18775
18776 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
18777 && REG_P (XEXP (XEXP (curr_src, 0), 0))
18778 && REGNO (XEXP (XEXP (curr_src, 0), 0))
18779 == REGNO (SET_DEST (prev_set))
18780 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
18781 XEXP (SET_SRC (prev_set), 0)))
18782 return true;
18783 }
18784 }
cd0cb232 18785
d7b03373 18786 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
18787 && any_condjump_p (curr))
18788 {
509f819a
N
18789 unsigned int condreg1, condreg2;
18790 rtx cc_reg_1;
18791 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
18792 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
18793
18794 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
18795 && prev
18796 && modified_in_p (cc_reg_1, prev))
18797 {
f8a27206
AP
18798 enum attr_type prev_type = get_attr_type (prev);
18799
509f819a
N
18800 /* FIXME: this misses some which is considered simple arthematic
18801 instructions for ThunderX. Simple shifts are missed here. */
18802 if (prev_type == TYPE_ALUS_SREG
18803 || prev_type == TYPE_ALUS_IMM
18804 || prev_type == TYPE_LOGICS_REG
18805 || prev_type == TYPE_LOGICS_IMM)
18806 return true;
18807 }
3759108f
AP
18808 }
18809
bee7e0fc
AP
18810 if (prev_set
18811 && curr_set
18812 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
18813 && any_condjump_p (curr))
18814 {
18815 /* We're trying to match:
18816 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
18817 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
18818 (const_int 0))
18819 (label_ref ("SYM"))
18820 (pc)) */
18821 if (SET_DEST (curr_set) == (pc_rtx)
18822 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
18823 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
18824 && REG_P (SET_DEST (prev_set))
18825 && REGNO (SET_DEST (prev_set))
18826 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
18827 {
18828 /* Fuse ALU operations followed by conditional branch instruction. */
18829 switch (get_attr_type (prev))
18830 {
18831 case TYPE_ALU_IMM:
18832 case TYPE_ALU_SREG:
18833 case TYPE_ADC_REG:
18834 case TYPE_ADC_IMM:
18835 case TYPE_ADCS_REG:
18836 case TYPE_ADCS_IMM:
18837 case TYPE_LOGIC_REG:
18838 case TYPE_LOGIC_IMM:
18839 case TYPE_CSEL:
18840 case TYPE_ADR:
18841 case TYPE_MOV_IMM:
18842 case TYPE_SHIFT_REG:
18843 case TYPE_SHIFT_IMM:
18844 case TYPE_BFM:
18845 case TYPE_RBIT:
18846 case TYPE_REV:
18847 case TYPE_EXTEND:
18848 return true;
18849
18850 default:;
18851 }
18852 }
18853 }
18854
6a569cdd
KT
18855 return false;
18856}
18857
f2879a90
KT
18858/* Return true iff the instruction fusion described by OP is enabled. */
18859
18860bool
18861aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
18862{
18863 return (aarch64_tune_params.fusible_ops & op) != 0;
18864}
18865
350013bc
BC
18866/* If MEM is in the form of [base+offset], extract the two parts
18867 of address and set to BASE and OFFSET, otherwise return false
18868 after clearing BASE and OFFSET. */
18869
18870bool
18871extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
18872{
18873 rtx addr;
18874
18875 gcc_assert (MEM_P (mem));
18876
18877 addr = XEXP (mem, 0);
18878
18879 if (REG_P (addr))
18880 {
18881 *base = addr;
18882 *offset = const0_rtx;
18883 return true;
18884 }
18885
18886 if (GET_CODE (addr) == PLUS
18887 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
18888 {
18889 *base = XEXP (addr, 0);
18890 *offset = XEXP (addr, 1);
18891 return true;
18892 }
18893
18894 *base = NULL_RTX;
18895 *offset = NULL_RTX;
18896
18897 return false;
18898}
18899
18900/* Types for scheduling fusion. */
18901enum sched_fusion_type
18902{
18903 SCHED_FUSION_NONE = 0,
18904 SCHED_FUSION_LD_SIGN_EXTEND,
18905 SCHED_FUSION_LD_ZERO_EXTEND,
18906 SCHED_FUSION_LD,
18907 SCHED_FUSION_ST,
18908 SCHED_FUSION_NUM
18909};
18910
18911/* If INSN is a load or store of address in the form of [base+offset],
18912 extract the two parts and set to BASE and OFFSET. Return scheduling
18913 fusion type this INSN is. */
18914
18915static enum sched_fusion_type
18916fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
18917{
18918 rtx x, dest, src;
18919 enum sched_fusion_type fusion = SCHED_FUSION_LD;
18920
18921 gcc_assert (INSN_P (insn));
18922 x = PATTERN (insn);
18923 if (GET_CODE (x) != SET)
18924 return SCHED_FUSION_NONE;
18925
18926 src = SET_SRC (x);
18927 dest = SET_DEST (x);
18928
abc52318
KT
18929 machine_mode dest_mode = GET_MODE (dest);
18930
18931 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
18932 return SCHED_FUSION_NONE;
18933
18934 if (GET_CODE (src) == SIGN_EXTEND)
18935 {
18936 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
18937 src = XEXP (src, 0);
18938 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
18939 return SCHED_FUSION_NONE;
18940 }
18941 else if (GET_CODE (src) == ZERO_EXTEND)
18942 {
18943 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
18944 src = XEXP (src, 0);
18945 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
18946 return SCHED_FUSION_NONE;
18947 }
18948
18949 if (GET_CODE (src) == MEM && REG_P (dest))
18950 extract_base_offset_in_addr (src, base, offset);
18951 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
18952 {
18953 fusion = SCHED_FUSION_ST;
18954 extract_base_offset_in_addr (dest, base, offset);
18955 }
18956 else
18957 return SCHED_FUSION_NONE;
18958
18959 if (*base == NULL_RTX || *offset == NULL_RTX)
18960 fusion = SCHED_FUSION_NONE;
18961
18962 return fusion;
18963}
18964
18965/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
18966
18967 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
18968 and PRI are only calculated for these instructions. For other instruction,
18969 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
18970 type instruction fusion can be added by returning different priorities.
18971
18972 It's important that irrelevant instructions get the largest FUSION_PRI. */
18973
18974static void
18975aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
18976 int *fusion_pri, int *pri)
18977{
18978 int tmp, off_val;
18979 rtx base, offset;
18980 enum sched_fusion_type fusion;
18981
18982 gcc_assert (INSN_P (insn));
18983
18984 tmp = max_pri - 1;
18985 fusion = fusion_load_store (insn, &base, &offset);
18986 if (fusion == SCHED_FUSION_NONE)
18987 {
18988 *pri = tmp;
18989 *fusion_pri = tmp;
18990 return;
18991 }
18992
18993 /* Set FUSION_PRI according to fusion type and base register. */
18994 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
18995
18996 /* Calculate PRI. */
18997 tmp /= 2;
18998
18999 /* INSN with smaller offset goes first. */
19000 off_val = (int)(INTVAL (offset));
19001 if (off_val >= 0)
19002 tmp -= (off_val & 0xfffff);
19003 else
19004 tmp += ((- off_val) & 0xfffff);
19005
19006 *pri = tmp;
19007 return;
19008}
19009
9bca63d4
WD
19010/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
19011 Adjust priority of sha1h instructions so they are scheduled before
19012 other SHA1 instructions. */
19013
19014static int
19015aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
19016{
19017 rtx x = PATTERN (insn);
19018
19019 if (GET_CODE (x) == SET)
19020 {
19021 x = SET_SRC (x);
19022
19023 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
19024 return priority + 10;
19025 }
19026
19027 return priority;
19028}
19029
350013bc
BC
19030/* Given OPERANDS of consecutive load/store, check if we can merge
19031 them into ldp/stp. LOAD is true if they are load instructions.
19032 MODE is the mode of memory operands. */
19033
19034bool
19035aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 19036 machine_mode mode)
350013bc
BC
19037{
19038 HOST_WIDE_INT offval_1, offval_2, msize;
19039 enum reg_class rclass_1, rclass_2;
19040 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
19041
19042 if (load)
19043 {
19044 mem_1 = operands[1];
19045 mem_2 = operands[3];
19046 reg_1 = operands[0];
19047 reg_2 = operands[2];
19048 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
19049 if (REGNO (reg_1) == REGNO (reg_2))
19050 return false;
19051 }
19052 else
19053 {
19054 mem_1 = operands[0];
19055 mem_2 = operands[2];
19056 reg_1 = operands[1];
19057 reg_2 = operands[3];
19058 }
19059
bf84ac44
AP
19060 /* The mems cannot be volatile. */
19061 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
19062 return false;
19063
54700e2e
AP
19064 /* If we have SImode and slow unaligned ldp,
19065 check the alignment to be at least 8 byte. */
19066 if (mode == SImode
19067 && (aarch64_tune_params.extra_tuning_flags
19068 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
19069 && !optimize_size
19070 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
19071 return false;
19072
350013bc
BC
19073 /* Check if the addresses are in the form of [base+offset]. */
19074 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
19075 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
19076 return false;
19077 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
19078 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
19079 return false;
19080
19081 /* Check if the bases are same. */
19082 if (!rtx_equal_p (base_1, base_2))
19083 return false;
19084
dfe1da23
JW
19085 /* The operands must be of the same size. */
19086 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
19087 GET_MODE_SIZE (GET_MODE (mem_2))));
19088
350013bc
BC
19089 offval_1 = INTVAL (offset_1);
19090 offval_2 = INTVAL (offset_2);
6a70badb
RS
19091 /* We should only be trying this for fixed-sized modes. There is no
19092 SVE LDP/STP instruction. */
19093 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
19094 /* Check if the offsets are consecutive. */
19095 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
19096 return false;
19097
19098 /* Check if the addresses are clobbered by load. */
19099 if (load)
19100 {
19101 if (reg_mentioned_p (reg_1, mem_1))
19102 return false;
19103
19104 /* In increasing order, the last load can clobber the address. */
19105 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 19106 return false;
350013bc
BC
19107 }
19108
9b56ec11
JW
19109 /* One of the memory accesses must be a mempair operand.
19110 If it is not the first one, they need to be swapped by the
19111 peephole. */
19112 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
19113 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
19114 return false;
19115
350013bc
BC
19116 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
19117 rclass_1 = FP_REGS;
19118 else
19119 rclass_1 = GENERAL_REGS;
19120
19121 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
19122 rclass_2 = FP_REGS;
19123 else
19124 rclass_2 = GENERAL_REGS;
19125
19126 /* Check if the registers are of same class. */
19127 if (rclass_1 != rclass_2)
19128 return false;
19129
19130 return true;
19131}
19132
9b56ec11
JW
19133/* Given OPERANDS of consecutive load/store that can be merged,
19134 swap them if they are not in ascending order. */
19135void
19136aarch64_swap_ldrstr_operands (rtx* operands, bool load)
19137{
19138 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
19139 HOST_WIDE_INT offval_1, offval_2;
19140
19141 if (load)
19142 {
19143 mem_1 = operands[1];
19144 mem_2 = operands[3];
19145 }
19146 else
19147 {
19148 mem_1 = operands[0];
19149 mem_2 = operands[2];
19150 }
19151
19152 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
19153 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
19154
19155 offval_1 = INTVAL (offset_1);
19156 offval_2 = INTVAL (offset_2);
19157
19158 if (offval_1 > offval_2)
19159 {
19160 /* Irrespective of whether this is a load or a store,
19161 we do the same swap. */
19162 std::swap (operands[0], operands[2]);
19163 std::swap (operands[1], operands[3]);
19164 }
19165}
19166
d0b51297
JW
19167/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
19168 comparison between the two. */
19169int
19170aarch64_host_wide_int_compare (const void *x, const void *y)
19171{
19172 return wi::cmps (* ((const HOST_WIDE_INT *) x),
19173 * ((const HOST_WIDE_INT *) y));
19174}
19175
19176/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
19177 other pointing to a REG rtx containing an offset, compare the offsets
19178 of the two pairs.
19179
19180 Return:
19181
19182 1 iff offset (X) > offset (Y)
19183 0 iff offset (X) == offset (Y)
19184 -1 iff offset (X) < offset (Y) */
19185int
19186aarch64_ldrstr_offset_compare (const void *x, const void *y)
19187{
19188 const rtx * operands_1 = (const rtx *) x;
19189 const rtx * operands_2 = (const rtx *) y;
19190 rtx mem_1, mem_2, base, offset_1, offset_2;
19191
19192 if (MEM_P (operands_1[0]))
19193 mem_1 = operands_1[0];
19194 else
19195 mem_1 = operands_1[1];
19196
19197 if (MEM_P (operands_2[0]))
19198 mem_2 = operands_2[0];
19199 else
19200 mem_2 = operands_2[1];
19201
19202 /* Extract the offsets. */
19203 extract_base_offset_in_addr (mem_1, &base, &offset_1);
19204 extract_base_offset_in_addr (mem_2, &base, &offset_2);
19205
19206 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
19207
19208 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
19209}
19210
350013bc
BC
19211/* Given OPERANDS of consecutive load/store, check if we can merge
19212 them into ldp/stp by adjusting the offset. LOAD is true if they
19213 are load instructions. MODE is the mode of memory operands.
19214
19215 Given below consecutive stores:
19216
19217 str w1, [xb, 0x100]
19218 str w1, [xb, 0x104]
19219 str w1, [xb, 0x108]
19220 str w1, [xb, 0x10c]
19221
19222 Though the offsets are out of the range supported by stp, we can
19223 still pair them after adjusting the offset, like:
19224
19225 add scratch, xb, 0x100
19226 stp w1, w1, [scratch]
19227 stp w1, w1, [scratch, 0x8]
19228
19229 The peephole patterns detecting this opportunity should guarantee
19230 the scratch register is avaliable. */
19231
19232bool
19233aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 19234 scalar_mode mode)
350013bc 19235{
34d7854d
JW
19236 const int num_insns = 4;
19237 enum reg_class rclass;
19238 HOST_WIDE_INT offvals[num_insns], msize;
19239 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
19240
19241 if (load)
19242 {
34d7854d
JW
19243 for (int i = 0; i < num_insns; i++)
19244 {
19245 reg[i] = operands[2 * i];
19246 mem[i] = operands[2 * i + 1];
19247
19248 gcc_assert (REG_P (reg[i]));
19249 }
d0b51297
JW
19250
19251 /* Do not attempt to merge the loads if the loads clobber each other. */
19252 for (int i = 0; i < 8; i += 2)
19253 for (int j = i + 2; j < 8; j += 2)
19254 if (reg_overlap_mentioned_p (operands[i], operands[j]))
19255 return false;
350013bc
BC
19256 }
19257 else
34d7854d
JW
19258 for (int i = 0; i < num_insns; i++)
19259 {
19260 mem[i] = operands[2 * i];
19261 reg[i] = operands[2 * i + 1];
19262 }
350013bc 19263
34d7854d
JW
19264 /* Skip if memory operand is by itself valid for ldp/stp. */
19265 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
19266 return false;
19267
34d7854d
JW
19268 for (int i = 0; i < num_insns; i++)
19269 {
19270 /* The mems cannot be volatile. */
19271 if (MEM_VOLATILE_P (mem[i]))
19272 return false;
19273
19274 /* Check if the addresses are in the form of [base+offset]. */
19275 extract_base_offset_in_addr (mem[i], base + i, offset + i);
19276 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
19277 return false;
19278 }
19279
363b395b
JW
19280 /* Check if the registers are of same class. */
19281 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
19282 ? FP_REGS : GENERAL_REGS;
19283
19284 for (int i = 1; i < num_insns; i++)
19285 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
19286 {
19287 if (rclass != FP_REGS)
19288 return false;
19289 }
19290 else
19291 {
19292 if (rclass != GENERAL_REGS)
19293 return false;
19294 }
19295
19296 /* Only the last register in the order in which they occur
19297 may be clobbered by the load. */
19298 if (rclass == GENERAL_REGS && load)
19299 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
19300 if (reg_mentioned_p (reg[i], mem[i]))
19301 return false;
350013bc
BC
19302
19303 /* Check if the bases are same. */
34d7854d
JW
19304 for (int i = 0; i < num_insns - 1; i++)
19305 if (!rtx_equal_p (base[i], base[i + 1]))
19306 return false;
19307
19308 for (int i = 0; i < num_insns; i++)
19309 offvals[i] = INTVAL (offset[i]);
350013bc 19310
350013bc 19311 msize = GET_MODE_SIZE (mode);
d0b51297
JW
19312
19313 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
19314 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
19315 aarch64_host_wide_int_compare);
d0b51297
JW
19316
19317 if (!(offvals[1] == offvals[0] + msize
19318 && offvals[3] == offvals[2] + msize))
350013bc
BC
19319 return false;
19320
d0b51297
JW
19321 /* Check that offsets are within range of each other. The ldp/stp
19322 instructions have 7 bit immediate offsets, so use 0x80. */
19323 if (offvals[2] - offvals[0] >= msize * 0x80)
19324 return false;
350013bc 19325
d0b51297
JW
19326 /* The offsets must be aligned with respect to each other. */
19327 if (offvals[0] % msize != offvals[2] % msize)
19328 return false;
19329
54700e2e
AP
19330 /* If we have SImode and slow unaligned ldp,
19331 check the alignment to be at least 8 byte. */
19332 if (mode == SImode
19333 && (aarch64_tune_params.extra_tuning_flags
34d7854d 19334 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 19335 && !optimize_size
34d7854d 19336 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
19337 return false;
19338
350013bc
BC
19339 return true;
19340}
19341
19342/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
19343 into LDP/STP after adjusting the offset. It depends on the fact
19344 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
19345 MODE is the mode of memory operands. CODE is the rtl operator
19346 which should be applied to all memory operands, it's SIGN_EXTEND,
19347 ZERO_EXTEND or UNKNOWN. */
19348
19349bool
19350aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 19351 scalar_mode mode, RTX_CODE code)
350013bc 19352{
d0b51297 19353 rtx base, offset_1, offset_3, t1, t2;
350013bc 19354 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
19355 rtx temp_operands[8];
19356 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
19357 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 19358
d0b51297
JW
19359 /* We make changes on a copy as we may still bail out. */
19360 for (int i = 0; i < 8; i ++)
19361 temp_operands[i] = operands[i];
9b56ec11 19362
d0b51297
JW
19363 /* Sort the operands. */
19364 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 19365
f6af9c21
RE
19366 /* Copy the memory operands so that if we have to bail for some
19367 reason the original addresses are unchanged. */
350013bc
BC
19368 if (load)
19369 {
f6af9c21
RE
19370 mem_1 = copy_rtx (temp_operands[1]);
19371 mem_2 = copy_rtx (temp_operands[3]);
19372 mem_3 = copy_rtx (temp_operands[5]);
19373 mem_4 = copy_rtx (temp_operands[7]);
350013bc
BC
19374 }
19375 else
19376 {
f6af9c21
RE
19377 mem_1 = copy_rtx (temp_operands[0]);
19378 mem_2 = copy_rtx (temp_operands[2]);
19379 mem_3 = copy_rtx (temp_operands[4]);
19380 mem_4 = copy_rtx (temp_operands[6]);
350013bc
BC
19381 gcc_assert (code == UNKNOWN);
19382 }
19383
9b56ec11 19384 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
19385 extract_base_offset_in_addr (mem_3, &base, &offset_3);
19386 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
19387 && offset_3 != NULL_RTX);
350013bc 19388
d0b51297 19389 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 19390 msize = GET_MODE_SIZE (mode);
d0b51297
JW
19391 stp_off_upper_limit = msize * (0x40 - 1);
19392 stp_off_lower_limit = - msize * 0x40;
350013bc 19393
d0b51297
JW
19394 off_val_1 = INTVAL (offset_1);
19395 off_val_3 = INTVAL (offset_3);
19396
19397 /* The base offset is optimally half way between the two STP/LDP offsets. */
19398 if (msize <= 4)
19399 base_off = (off_val_1 + off_val_3) / 2;
19400 else
19401 /* However, due to issues with negative LDP/STP offset generation for
19402 larger modes, for DF, DI and vector modes. we must not use negative
19403 addresses smaller than 9 signed unadjusted bits can store. This
19404 provides the most range in this case. */
19405 base_off = off_val_1;
19406
19407 /* Adjust the base so that it is aligned with the addresses but still
19408 optimal. */
19409 if (base_off % msize != off_val_1 % msize)
19410 /* Fix the offset, bearing in mind we want to make it bigger not
19411 smaller. */
19412 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
19413 else if (msize <= 4)
19414 /* The negative range of LDP/STP is one larger than the positive range. */
19415 base_off += msize;
19416
19417 /* Check if base offset is too big or too small. We can attempt to resolve
19418 this issue by setting it to the maximum value and seeing if the offsets
19419 still fit. */
19420 if (base_off >= 0x1000)
350013bc 19421 {
d0b51297
JW
19422 base_off = 0x1000 - 1;
19423 /* We must still make sure that the base offset is aligned with respect
19424 to the address. But it may may not be made any bigger. */
19425 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
19426 }
19427
d0b51297
JW
19428 /* Likewise for the case where the base is too small. */
19429 if (base_off <= -0x1000)
350013bc 19430 {
d0b51297
JW
19431 base_off = -0x1000 + 1;
19432 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
19433 }
19434
d0b51297
JW
19435 /* Offset of the first STP/LDP. */
19436 new_off_1 = off_val_1 - base_off;
19437
19438 /* Offset of the second STP/LDP. */
19439 new_off_3 = off_val_3 - base_off;
350013bc 19440
d0b51297
JW
19441 /* The offsets must be within the range of the LDP/STP instructions. */
19442 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
19443 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
19444 return false;
19445
d0b51297
JW
19446 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
19447 new_off_1), true);
19448 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
19449 new_off_1 + msize), true);
19450 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
19451 new_off_3), true);
19452 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
19453 new_off_3 + msize), true);
19454
19455 if (!aarch64_mem_pair_operand (mem_1, mode)
19456 || !aarch64_mem_pair_operand (mem_3, mode))
19457 return false;
350013bc
BC
19458
19459 if (code == ZERO_EXTEND)
19460 {
19461 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
19462 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
19463 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
19464 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
19465 }
19466 else if (code == SIGN_EXTEND)
19467 {
19468 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
19469 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
19470 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
19471 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
19472 }
19473
19474 if (load)
19475 {
d0b51297 19476 operands[0] = temp_operands[0];
350013bc 19477 operands[1] = mem_1;
d0b51297 19478 operands[2] = temp_operands[2];
350013bc 19479 operands[3] = mem_2;
d0b51297 19480 operands[4] = temp_operands[4];
350013bc 19481 operands[5] = mem_3;
d0b51297 19482 operands[6] = temp_operands[6];
350013bc
BC
19483 operands[7] = mem_4;
19484 }
19485 else
19486 {
19487 operands[0] = mem_1;
d0b51297 19488 operands[1] = temp_operands[1];
350013bc 19489 operands[2] = mem_2;
d0b51297 19490 operands[3] = temp_operands[3];
350013bc 19491 operands[4] = mem_3;
d0b51297 19492 operands[5] = temp_operands[5];
350013bc 19493 operands[6] = mem_4;
d0b51297 19494 operands[7] = temp_operands[7];
350013bc
BC
19495 }
19496
19497 /* Emit adjusting instruction. */
d0b51297 19498 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 19499 /* Emit ldp/stp instructions. */
f7df4a84
RS
19500 t1 = gen_rtx_SET (operands[0], operands[1]);
19501 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 19502 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
19503 t1 = gen_rtx_SET (operands[4], operands[5]);
19504 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
19505 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
19506 return true;
19507}
19508
76a34e3f
RS
19509/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
19510 it isn't worth branching around empty masked ops (including masked
19511 stores). */
19512
19513static bool
19514aarch64_empty_mask_is_expensive (unsigned)
19515{
19516 return false;
19517}
19518
1b1e81f8
JW
19519/* Return 1 if pseudo register should be created and used to hold
19520 GOT address for PIC code. */
19521
19522bool
19523aarch64_use_pseudo_pic_reg (void)
19524{
19525 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
19526}
19527
7b841a12
JW
19528/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
19529
19530static int
19531aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
19532{
19533 switch (XINT (x, 1))
19534 {
19535 case UNSPEC_GOTSMALLPIC:
19536 case UNSPEC_GOTSMALLPIC28K:
19537 case UNSPEC_GOTTINYPIC:
19538 return 0;
19539 default:
19540 break;
19541 }
19542
19543 return default_unspec_may_trap_p (x, flags);
19544}
19545
39252973
KT
19546
19547/* If X is a positive CONST_DOUBLE with a value that is a power of 2
19548 return the log2 of that value. Otherwise return -1. */
19549
19550int
19551aarch64_fpconst_pow_of_2 (rtx x)
19552{
19553 const REAL_VALUE_TYPE *r;
19554
19555 if (!CONST_DOUBLE_P (x))
19556 return -1;
19557
19558 r = CONST_DOUBLE_REAL_VALUE (x);
19559
19560 if (REAL_VALUE_NEGATIVE (*r)
19561 || REAL_VALUE_ISNAN (*r)
19562 || REAL_VALUE_ISINF (*r)
19563 || !real_isinteger (r, DFmode))
19564 return -1;
19565
19566 return exact_log2 (real_to_integer (r));
19567}
19568
19569/* If X is a vector of equal CONST_DOUBLE values and that value is
19570 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
19571
19572int
19573aarch64_vec_fpconst_pow_of_2 (rtx x)
19574{
6a70badb
RS
19575 int nelts;
19576 if (GET_CODE (x) != CONST_VECTOR
19577 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
19578 return -1;
19579
19580 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
19581 return -1;
19582
19583 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
19584 if (firstval <= 0)
19585 return -1;
19586
6a70badb 19587 for (int i = 1; i < nelts; i++)
39252973
KT
19588 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
19589 return -1;
19590
19591 return firstval;
19592}
19593
11e554b3
JG
19594/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
19595 to float.
19596
19597 __fp16 always promotes through this hook.
19598 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
19599 through the generic excess precision logic rather than here. */
19600
c2ec330c
AL
19601static tree
19602aarch64_promoted_type (const_tree t)
19603{
11e554b3
JG
19604 if (SCALAR_FLOAT_TYPE_P (t)
19605 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 19606 return float_type_node;
11e554b3 19607
c2ec330c
AL
19608 return NULL_TREE;
19609}
ee62a5a6
RS
19610
19611/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
19612
19613static bool
9acc9cbe 19614aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
19615 optimization_type opt_type)
19616{
19617 switch (op)
19618 {
19619 case rsqrt_optab:
9acc9cbe 19620 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
19621
19622 default:
19623 return true;
19624 }
19625}
19626
43cacb12
RS
19627/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
19628
19629static unsigned int
19630aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
19631 int *offset)
19632{
19633 /* Polynomial invariant 1 == (VG / 2) - 1. */
19634 gcc_assert (i == 1);
19635 *factor = 2;
19636 *offset = 1;
19637 return AARCH64_DWARF_VG;
19638}
19639
11e554b3
JG
19640/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
19641 if MODE is HFmode, and punt to the generic implementation otherwise. */
19642
19643static bool
7c5bd57a 19644aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
19645{
19646 return (mode == HFmode
19647 ? true
19648 : default_libgcc_floating_mode_supported_p (mode));
19649}
19650
2e5f8203
JG
19651/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
19652 if MODE is HFmode, and punt to the generic implementation otherwise. */
19653
19654static bool
18e2a8b8 19655aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
19656{
19657 return (mode == HFmode
19658 ? true
19659 : default_scalar_mode_supported_p (mode));
19660}
19661
11e554b3
JG
19662/* Set the value of FLT_EVAL_METHOD.
19663 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
19664
19665 0: evaluate all operations and constants, whose semantic type has at
19666 most the range and precision of type float, to the range and
19667 precision of float; evaluate all other operations and constants to
19668 the range and precision of the semantic type;
19669
19670 N, where _FloatN is a supported interchange floating type
19671 evaluate all operations and constants, whose semantic type has at
19672 most the range and precision of _FloatN type, to the range and
19673 precision of the _FloatN type; evaluate all other operations and
19674 constants to the range and precision of the semantic type;
19675
19676 If we have the ARMv8.2-A extensions then we support _Float16 in native
19677 precision, so we should set this to 16. Otherwise, we support the type,
19678 but want to evaluate expressions in float precision, so set this to
19679 0. */
19680
19681static enum flt_eval_method
19682aarch64_excess_precision (enum excess_precision_type type)
19683{
19684 switch (type)
19685 {
19686 case EXCESS_PRECISION_TYPE_FAST:
19687 case EXCESS_PRECISION_TYPE_STANDARD:
19688 /* We can calculate either in 16-bit range and precision or
19689 32-bit range and precision. Make that decision based on whether
19690 we have native support for the ARMv8.2-A 16-bit floating-point
19691 instructions or not. */
19692 return (TARGET_FP_F16INST
19693 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
19694 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
19695 case EXCESS_PRECISION_TYPE_IMPLICIT:
19696 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
19697 default:
19698 gcc_unreachable ();
19699 }
19700 return FLT_EVAL_METHOD_UNPREDICTABLE;
19701}
19702
b48d6421
KT
19703/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
19704 scheduled for speculative execution. Reject the long-running division
19705 and square-root instructions. */
19706
19707static bool
19708aarch64_sched_can_speculate_insn (rtx_insn *insn)
19709{
19710 switch (get_attr_type (insn))
19711 {
19712 case TYPE_SDIV:
19713 case TYPE_UDIV:
19714 case TYPE_FDIVS:
19715 case TYPE_FDIVD:
19716 case TYPE_FSQRTS:
19717 case TYPE_FSQRTD:
19718 case TYPE_NEON_FP_SQRT_S:
19719 case TYPE_NEON_FP_SQRT_D:
19720 case TYPE_NEON_FP_SQRT_S_Q:
19721 case TYPE_NEON_FP_SQRT_D_Q:
19722 case TYPE_NEON_FP_DIV_S:
19723 case TYPE_NEON_FP_DIV_D:
19724 case TYPE_NEON_FP_DIV_S_Q:
19725 case TYPE_NEON_FP_DIV_D_Q:
19726 return false;
19727 default:
19728 return true;
19729 }
19730}
19731
43cacb12
RS
19732/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
19733
19734static int
19735aarch64_compute_pressure_classes (reg_class *classes)
19736{
19737 int i = 0;
19738 classes[i++] = GENERAL_REGS;
19739 classes[i++] = FP_REGS;
19740 /* PR_REGS isn't a useful pressure class because many predicate pseudo
19741 registers need to go in PR_LO_REGS at some point during their
19742 lifetime. Splitting it into two halves has the effect of making
19743 all predicates count against PR_LO_REGS, so that we try whenever
19744 possible to restrict the number of live predicates to 8. This
19745 greatly reduces the amount of spilling in certain loops. */
19746 classes[i++] = PR_LO_REGS;
19747 classes[i++] = PR_HI_REGS;
19748 return i;
19749}
19750
19751/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19752
19753static bool
19754aarch64_can_change_mode_class (machine_mode from,
19755 machine_mode to, reg_class_t)
19756{
002092be
RS
19757 if (BYTES_BIG_ENDIAN)
19758 {
19759 bool from_sve_p = aarch64_sve_data_mode_p (from);
19760 bool to_sve_p = aarch64_sve_data_mode_p (to);
19761
19762 /* Don't allow changes between SVE data modes and non-SVE modes.
19763 See the comment at the head of aarch64-sve.md for details. */
19764 if (from_sve_p != to_sve_p)
19765 return false;
19766
19767 /* Don't allow changes in element size: lane 0 of the new vector
19768 would not then be lane 0 of the old vector. See the comment
19769 above aarch64_maybe_expand_sve_subreg_move for a more detailed
19770 description.
19771
19772 In the worst case, this forces a register to be spilled in
19773 one mode and reloaded in the other, which handles the
19774 endianness correctly. */
19775 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
19776 return false;
19777 }
43cacb12
RS
19778 return true;
19779}
19780
5cce8171
RS
19781/* Implement TARGET_EARLY_REMAT_MODES. */
19782
19783static void
19784aarch64_select_early_remat_modes (sbitmap modes)
19785{
19786 /* SVE values are not normally live across a call, so it should be
19787 worth doing early rematerialization even in VL-specific mode. */
19788 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
19789 {
19790 machine_mode mode = (machine_mode) i;
19791 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
19792 if (vec_flags & VEC_ANY_SVE)
19793 bitmap_set_bit (modes, i);
19794 }
19795}
19796
c0111dc4
RE
19797/* Override the default target speculation_safe_value. */
19798static rtx
19799aarch64_speculation_safe_value (machine_mode mode,
19800 rtx result, rtx val, rtx failval)
19801{
19802 /* Maybe we should warn if falling back to hard barriers. They are
19803 likely to be noticably more expensive than the alternative below. */
19804 if (!aarch64_track_speculation)
19805 return default_speculation_safe_value (mode, result, val, failval);
19806
19807 if (!REG_P (val))
19808 val = copy_to_mode_reg (mode, val);
19809
19810 if (!aarch64_reg_or_zero (failval, mode))
19811 failval = copy_to_mode_reg (mode, failval);
19812
21cebf90 19813 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
19814 return result;
19815}
19816
2d56d6ba
KT
19817/* Implement TARGET_ESTIMATED_POLY_VALUE.
19818 Look into the tuning structure for an estimate.
19819 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
19820 Advanced SIMD 128 bits. */
19821
19822static HOST_WIDE_INT
19823aarch64_estimated_poly_value (poly_int64 val)
19824{
19825 enum aarch64_sve_vector_bits_enum width_source
19826 = aarch64_tune_params.sve_width;
19827
19828 /* If we still don't have an estimate, use the default. */
19829 if (width_source == SVE_SCALABLE)
19830 return default_estimated_poly_value (val);
19831
19832 HOST_WIDE_INT over_128 = width_source - 128;
19833 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
19834}
19835
d9186814
SE
19836
19837/* Return true for types that could be supported as SIMD return or
19838 argument types. */
19839
19840static bool
19841supported_simd_type (tree t)
19842{
19843 if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
19844 {
19845 HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t));
19846 return s == 1 || s == 2 || s == 4 || s == 8;
19847 }
19848 return false;
19849}
19850
19851/* Return true for types that currently are supported as SIMD return
19852 or argument types. */
19853
19854static bool
19855currently_supported_simd_type (tree t, tree b)
19856{
19857 if (COMPLEX_FLOAT_TYPE_P (t))
19858 return false;
19859
19860 if (TYPE_SIZE (t) != TYPE_SIZE (b))
19861 return false;
19862
19863 return supported_simd_type (t);
19864}
19865
19866/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
19867
19868static int
19869aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
19870 struct cgraph_simd_clone *clonei,
19871 tree base_type, int num)
19872{
19873 tree t, ret_type, arg_type;
19874 unsigned int elt_bits, vec_bits, count;
19875
19876 if (!TARGET_SIMD)
19877 return 0;
19878
19879 if (clonei->simdlen
19880 && (clonei->simdlen < 2
19881 || clonei->simdlen > 1024
19882 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
19883 {
19884 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19885 "unsupported simdlen %d", clonei->simdlen);
19886 return 0;
19887 }
19888
19889 ret_type = TREE_TYPE (TREE_TYPE (node->decl));
19890 if (TREE_CODE (ret_type) != VOID_TYPE
19891 && !currently_supported_simd_type (ret_type, base_type))
19892 {
19893 if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
19894 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19895 "GCC does not currently support mixed size types "
19896 "for %<simd%> functions");
19897 else if (supported_simd_type (ret_type))
19898 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19899 "GCC does not currently support return type %qT "
19900 "for %<simd%> functions", ret_type);
19901 else
19902 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19903 "unsupported return type %qT for %<simd%> functions",
19904 ret_type);
19905 return 0;
19906 }
19907
19908 for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
19909 {
19910 arg_type = TREE_TYPE (t);
19911
19912 if (!currently_supported_simd_type (arg_type, base_type))
19913 {
19914 if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
19915 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19916 "GCC does not currently support mixed size types "
19917 "for %<simd%> functions");
19918 else
19919 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19920 "GCC does not currently support argument type %qT "
19921 "for %<simd%> functions", arg_type);
19922 return 0;
19923 }
19924 }
19925
19926 clonei->vecsize_mangle = 'n';
19927 clonei->mask_mode = VOIDmode;
19928 elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
19929 if (clonei->simdlen == 0)
19930 {
19931 count = 2;
19932 vec_bits = (num == 0 ? 64 : 128);
19933 clonei->simdlen = vec_bits / elt_bits;
19934 }
19935 else
19936 {
19937 count = 1;
19938 vec_bits = clonei->simdlen * elt_bits;
19939 if (vec_bits != 64 && vec_bits != 128)
19940 {
19941 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
19942 "GCC does not currently support simdlen %d for type %qT",
19943 clonei->simdlen, base_type);
19944 return 0;
19945 }
19946 }
19947 clonei->vecsize_int = vec_bits;
19948 clonei->vecsize_float = vec_bits;
19949 return count;
19950}
19951
19952/* Implement TARGET_SIMD_CLONE_ADJUST. */
19953
19954static void
19955aarch64_simd_clone_adjust (struct cgraph_node *node)
19956{
19957 /* Add aarch64_vector_pcs target attribute to SIMD clones so they
19958 use the correct ABI. */
19959
19960 tree t = TREE_TYPE (node->decl);
19961 TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
19962 TYPE_ATTRIBUTES (t));
19963}
19964
19965/* Implement TARGET_SIMD_CLONE_USABLE. */
19966
19967static int
19968aarch64_simd_clone_usable (struct cgraph_node *node)
19969{
19970 switch (node->simdclone->vecsize_mangle)
19971 {
19972 case 'n':
19973 if (!TARGET_SIMD)
19974 return -1;
19975 return 0;
19976 default:
19977 gcc_unreachable ();
19978 }
19979}
19980
497f281c
SE
19981/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
19982
19983static int
19984aarch64_comp_type_attributes (const_tree type1, const_tree type2)
19985{
19986 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type1))
19987 != lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type2)))
19988 return 0;
19989 return 1;
19990}
19991
3bac1e20
SE
19992/* Implement TARGET_GET_MULTILIB_ABI_NAME */
19993
19994static const char *
19995aarch64_get_multilib_abi_name (void)
19996{
19997 if (TARGET_BIG_END)
19998 return TARGET_ILP32 ? "aarch64_be_ilp32" : "aarch64_be";
19999 return TARGET_ILP32 ? "aarch64_ilp32" : "aarch64";
20000}
20001
e76c8e56
JJ
20002/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
20003 global variable based guard use the default else
20004 return a null tree. */
20005static tree
20006aarch64_stack_protect_guard (void)
20007{
20008 if (aarch64_stack_protector_guard == SSP_GLOBAL)
20009 return default_stack_protect_guard ();
20010
20011 return NULL_TREE;
20012}
20013
32efff9f
SD
20014/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
20015 section at the end if needed. */
20016#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
20017#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
20018#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
20019void
20020aarch64_file_end_indicate_exec_stack ()
20021{
20022 file_end_indicate_exec_stack ();
20023
20024 unsigned feature_1_and = 0;
20025 if (aarch64_bti_enabled ())
20026 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
20027
20028 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
20029 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
20030
20031 if (feature_1_and)
20032 {
20033 /* Generate .note.gnu.property section. */
20034 switch_to_section (get_section (".note.gnu.property",
20035 SECTION_NOTYPE, NULL));
20036
20037 /* PT_NOTE header: namesz, descsz, type.
20038 namesz = 4 ("GNU\0")
20039 descsz = 16 (Size of the program property array)
20040 [(12 + padding) * Number of array elements]
20041 type = 5 (NT_GNU_PROPERTY_TYPE_0). */
20042 assemble_align (POINTER_SIZE);
20043 assemble_integer (GEN_INT (4), 4, 32, 1);
20044 assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
20045 assemble_integer (GEN_INT (5), 4, 32, 1);
20046
20047 /* PT_NOTE name. */
20048 assemble_string ("GNU", 4);
20049
20050 /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
20051 type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
20052 datasz = 4
20053 data = feature_1_and. */
20054 assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
20055 assemble_integer (GEN_INT (4), 4, 32, 1);
20056 assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
20057
20058 /* Pad the size of the note to the required alignment. */
20059 assemble_align (POINTER_SIZE);
20060 }
20061}
20062#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
20063#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
20064#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
e76c8e56 20065
51b86113
DM
20066/* Target-specific selftests. */
20067
20068#if CHECKING_P
20069
20070namespace selftest {
20071
20072/* Selftest for the RTL loader.
20073 Verify that the RTL loader copes with a dump from
20074 print_rtx_function. This is essentially just a test that class
20075 function_reader can handle a real dump, but it also verifies
20076 that lookup_reg_by_dump_name correctly handles hard regs.
20077 The presence of hard reg names in the dump means that the test is
20078 target-specific, hence it is in this file. */
20079
20080static void
20081aarch64_test_loading_full_dump ()
20082{
20083 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
20084
20085 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
20086
20087 rtx_insn *insn_1 = get_insn_by_uid (1);
20088 ASSERT_EQ (NOTE, GET_CODE (insn_1));
20089
20090 rtx_insn *insn_15 = get_insn_by_uid (15);
20091 ASSERT_EQ (INSN, GET_CODE (insn_15));
20092 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
20093
20094 /* Verify crtl->return_rtx. */
20095 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
20096 ASSERT_EQ (0, REGNO (crtl->return_rtx));
20097 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
20098}
20099
20100/* Run all target-specific selftests. */
20101
20102static void
20103aarch64_run_selftests (void)
20104{
20105 aarch64_test_loading_full_dump ();
20106}
20107
20108} // namespace selftest
20109
20110#endif /* #if CHECKING_P */
20111
cd0b2d36
RR
20112#undef TARGET_STACK_PROTECT_GUARD
20113#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
20114
43e9d192
IB
20115#undef TARGET_ADDRESS_COST
20116#define TARGET_ADDRESS_COST aarch64_address_cost
20117
20118/* This hook will determines whether unnamed bitfields affect the alignment
20119 of the containing structure. The hook returns true if the structure
20120 should inherit the alignment requirements of an unnamed bitfield's
20121 type. */
20122#undef TARGET_ALIGN_ANON_BITFIELD
20123#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
20124
20125#undef TARGET_ASM_ALIGNED_DI_OP
20126#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
20127
20128#undef TARGET_ASM_ALIGNED_HI_OP
20129#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
20130
20131#undef TARGET_ASM_ALIGNED_SI_OP
20132#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
20133
20134#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
20135#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
20136 hook_bool_const_tree_hwi_hwi_const_tree_true
20137
e1c1ecb0
KT
20138#undef TARGET_ASM_FILE_START
20139#define TARGET_ASM_FILE_START aarch64_start_file
20140
43e9d192
IB
20141#undef TARGET_ASM_OUTPUT_MI_THUNK
20142#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
20143
20144#undef TARGET_ASM_SELECT_RTX_SECTION
20145#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
20146
20147#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
20148#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
20149
20150#undef TARGET_BUILD_BUILTIN_VA_LIST
20151#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
20152
20153#undef TARGET_CALLEE_COPIES
20154#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
20155
20156#undef TARGET_CAN_ELIMINATE
20157#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
20158
1fd8d40c
KT
20159#undef TARGET_CAN_INLINE_P
20160#define TARGET_CAN_INLINE_P aarch64_can_inline_p
20161
43e9d192
IB
20162#undef TARGET_CANNOT_FORCE_CONST_MEM
20163#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
20164
50487d79
EM
20165#undef TARGET_CASE_VALUES_THRESHOLD
20166#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
20167
43e9d192
IB
20168#undef TARGET_CONDITIONAL_REGISTER_USAGE
20169#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
20170
20171/* Only the least significant bit is used for initialization guard
20172 variables. */
20173#undef TARGET_CXX_GUARD_MASK_BIT
20174#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
20175
20176#undef TARGET_C_MODE_FOR_SUFFIX
20177#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
20178
20179#ifdef TARGET_BIG_ENDIAN_DEFAULT
20180#undef TARGET_DEFAULT_TARGET_FLAGS
20181#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
20182#endif
20183
20184#undef TARGET_CLASS_MAX_NREGS
20185#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
20186
119103ca
JG
20187#undef TARGET_BUILTIN_DECL
20188#define TARGET_BUILTIN_DECL aarch64_builtin_decl
20189
a6fc00da
BH
20190#undef TARGET_BUILTIN_RECIPROCAL
20191#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
20192
11e554b3
JG
20193#undef TARGET_C_EXCESS_PRECISION
20194#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
20195
43e9d192
IB
20196#undef TARGET_EXPAND_BUILTIN
20197#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
20198
20199#undef TARGET_EXPAND_BUILTIN_VA_START
20200#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
20201
9697e620
JG
20202#undef TARGET_FOLD_BUILTIN
20203#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
20204
43e9d192
IB
20205#undef TARGET_FUNCTION_ARG
20206#define TARGET_FUNCTION_ARG aarch64_function_arg
20207
20208#undef TARGET_FUNCTION_ARG_ADVANCE
20209#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
20210
20211#undef TARGET_FUNCTION_ARG_BOUNDARY
20212#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
20213
76b0cbf8
RS
20214#undef TARGET_FUNCTION_ARG_PADDING
20215#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
20216
43cacb12
RS
20217#undef TARGET_GET_RAW_RESULT_MODE
20218#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
20219#undef TARGET_GET_RAW_ARG_MODE
20220#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
20221
43e9d192
IB
20222#undef TARGET_FUNCTION_OK_FOR_SIBCALL
20223#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
20224
20225#undef TARGET_FUNCTION_VALUE
20226#define TARGET_FUNCTION_VALUE aarch64_function_value
20227
20228#undef TARGET_FUNCTION_VALUE_REGNO_P
20229#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
20230
fc72cba7
AL
20231#undef TARGET_GIMPLE_FOLD_BUILTIN
20232#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 20233
43e9d192
IB
20234#undef TARGET_GIMPLIFY_VA_ARG_EXPR
20235#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
20236
20237#undef TARGET_INIT_BUILTINS
20238#define TARGET_INIT_BUILTINS aarch64_init_builtins
20239
c64f7d37
WD
20240#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
20241#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
20242 aarch64_ira_change_pseudo_allocno_class
20243
43e9d192
IB
20244#undef TARGET_LEGITIMATE_ADDRESS_P
20245#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
20246
20247#undef TARGET_LEGITIMATE_CONSTANT_P
20248#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
20249
491ec060
WD
20250#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
20251#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
20252 aarch64_legitimize_address_displacement
20253
43e9d192
IB
20254#undef TARGET_LIBGCC_CMP_RETURN_MODE
20255#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
20256
11e554b3
JG
20257#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
20258#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
20259aarch64_libgcc_floating_mode_supported_p
20260
ac2b960f
YZ
20261#undef TARGET_MANGLE_TYPE
20262#define TARGET_MANGLE_TYPE aarch64_mangle_type
20263
43e9d192
IB
20264#undef TARGET_MEMORY_MOVE_COST
20265#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
20266
26e0ff94
WD
20267#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
20268#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
20269
43e9d192
IB
20270#undef TARGET_MUST_PASS_IN_STACK
20271#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
20272
20273/* This target hook should return true if accesses to volatile bitfields
20274 should use the narrowest mode possible. It should return false if these
20275 accesses should use the bitfield container type. */
20276#undef TARGET_NARROW_VOLATILE_BITFIELD
20277#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
20278
20279#undef TARGET_OPTION_OVERRIDE
20280#define TARGET_OPTION_OVERRIDE aarch64_override_options
20281
20282#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
20283#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
20284 aarch64_override_options_after_change
20285
361fb3ee
KT
20286#undef TARGET_OPTION_SAVE
20287#define TARGET_OPTION_SAVE aarch64_option_save
20288
20289#undef TARGET_OPTION_RESTORE
20290#define TARGET_OPTION_RESTORE aarch64_option_restore
20291
20292#undef TARGET_OPTION_PRINT
20293#define TARGET_OPTION_PRINT aarch64_option_print
20294
5a2c8331
KT
20295#undef TARGET_OPTION_VALID_ATTRIBUTE_P
20296#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
20297
d78006d9
KT
20298#undef TARGET_SET_CURRENT_FUNCTION
20299#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
20300
43e9d192
IB
20301#undef TARGET_PASS_BY_REFERENCE
20302#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
20303
20304#undef TARGET_PREFERRED_RELOAD_CLASS
20305#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
20306
cee66c68
WD
20307#undef TARGET_SCHED_REASSOCIATION_WIDTH
20308#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
20309
c2ec330c
AL
20310#undef TARGET_PROMOTED_TYPE
20311#define TARGET_PROMOTED_TYPE aarch64_promoted_type
20312
43e9d192
IB
20313#undef TARGET_SECONDARY_RELOAD
20314#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
20315
20316#undef TARGET_SHIFT_TRUNCATION_MASK
20317#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
20318
20319#undef TARGET_SETUP_INCOMING_VARARGS
20320#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
20321
20322#undef TARGET_STRUCT_VALUE_RTX
20323#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
20324
20325#undef TARGET_REGISTER_MOVE_COST
20326#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
20327
20328#undef TARGET_RETURN_IN_MEMORY
20329#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
20330
20331#undef TARGET_RETURN_IN_MSB
20332#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
20333
20334#undef TARGET_RTX_COSTS
7cc2145f 20335#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 20336
2e5f8203
JG
20337#undef TARGET_SCALAR_MODE_SUPPORTED_P
20338#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
20339
d126a4ae
AP
20340#undef TARGET_SCHED_ISSUE_RATE
20341#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
20342
d03f7e44
MK
20343#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
20344#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
20345 aarch64_sched_first_cycle_multipass_dfa_lookahead
20346
2d6bc7fa
KT
20347#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
20348#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
20349 aarch64_first_cycle_multipass_dfa_lookahead_guard
20350
827ab47a
KT
20351#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
20352#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
20353 aarch64_get_separate_components
20354
20355#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
20356#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
20357 aarch64_components_for_bb
20358
20359#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
20360#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
20361 aarch64_disqualify_components
20362
20363#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
20364#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
20365 aarch64_emit_prologue_components
20366
20367#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
20368#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
20369 aarch64_emit_epilogue_components
20370
20371#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
20372#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
20373 aarch64_set_handled_components
20374
43e9d192
IB
20375#undef TARGET_TRAMPOLINE_INIT
20376#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
20377
20378#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
20379#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
20380
20381#undef TARGET_VECTOR_MODE_SUPPORTED_P
20382#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
20383
7df76747
N
20384#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
20385#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
20386 aarch64_builtin_support_vector_misalignment
20387
9f4cbab8
RS
20388#undef TARGET_ARRAY_MODE
20389#define TARGET_ARRAY_MODE aarch64_array_mode
20390
43e9d192
IB
20391#undef TARGET_ARRAY_MODE_SUPPORTED_P
20392#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
20393
8990e73a
TB
20394#undef TARGET_VECTORIZE_ADD_STMT_COST
20395#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
20396
20397#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
20398#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
20399 aarch64_builtin_vectorization_cost
20400
43e9d192
IB
20401#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
20402#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
20403
42fc9a7f
JG
20404#undef TARGET_VECTORIZE_BUILTINS
20405#define TARGET_VECTORIZE_BUILTINS
20406
20407#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
20408#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
20409 aarch64_builtin_vectorized_function
20410
3b357264
JG
20411#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
20412#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
20413 aarch64_autovectorize_vector_sizes
20414
aa87aced
KV
20415#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
20416#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
20417 aarch64_atomic_assign_expand_fenv
20418
43e9d192
IB
20419/* Section anchor support. */
20420
20421#undef TARGET_MIN_ANCHOR_OFFSET
20422#define TARGET_MIN_ANCHOR_OFFSET -256
20423
20424/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
20425 byte offset; we can do much more for larger data types, but have no way
20426 to determine the size of the access. We assume accesses are aligned. */
20427#undef TARGET_MAX_ANCHOR_OFFSET
20428#define TARGET_MAX_ANCHOR_OFFSET 4095
20429
db0253a4
TB
20430#undef TARGET_VECTOR_ALIGNMENT
20431#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
20432
43cacb12
RS
20433#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
20434#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
20435 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
20436#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
20437#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
20438 aarch64_simd_vector_alignment_reachable
20439
88b08073
JG
20440/* vec_perm support. */
20441
f151c9e1
RS
20442#undef TARGET_VECTORIZE_VEC_PERM_CONST
20443#define TARGET_VECTORIZE_VEC_PERM_CONST \
20444 aarch64_vectorize_vec_perm_const
88b08073 20445
43cacb12
RS
20446#undef TARGET_VECTORIZE_GET_MASK_MODE
20447#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
20448#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
20449#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
20450 aarch64_empty_mask_is_expensive
6a86928d
RS
20451#undef TARGET_PREFERRED_ELSE_VALUE
20452#define TARGET_PREFERRED_ELSE_VALUE \
20453 aarch64_preferred_else_value
43cacb12 20454
c2ec330c
AL
20455#undef TARGET_INIT_LIBFUNCS
20456#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 20457
706b2314 20458#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
20459#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
20460
5cb74e90
RR
20461#undef TARGET_FLAGS_REGNUM
20462#define TARGET_FLAGS_REGNUM CC_REGNUM
20463
78607708
TV
20464#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
20465#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
20466
a3125fc2
CL
20467#undef TARGET_ASAN_SHADOW_OFFSET
20468#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
20469
0c4ec427
RE
20470#undef TARGET_LEGITIMIZE_ADDRESS
20471#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
20472
b48d6421
KT
20473#undef TARGET_SCHED_CAN_SPECULATE_INSN
20474#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
20475
594bdd53
FY
20476#undef TARGET_CAN_USE_DOLOOP_P
20477#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
20478
9bca63d4
WD
20479#undef TARGET_SCHED_ADJUST_PRIORITY
20480#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
20481
6a569cdd
KT
20482#undef TARGET_SCHED_MACRO_FUSION_P
20483#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
20484
20485#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
20486#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
20487
350013bc
BC
20488#undef TARGET_SCHED_FUSION_PRIORITY
20489#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
20490
7b841a12
JW
20491#undef TARGET_UNSPEC_MAY_TRAP_P
20492#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
20493
1b1e81f8
JW
20494#undef TARGET_USE_PSEUDO_PIC_REG
20495#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
20496
cc8ca59e
JB
20497#undef TARGET_PRINT_OPERAND
20498#define TARGET_PRINT_OPERAND aarch64_print_operand
20499
20500#undef TARGET_PRINT_OPERAND_ADDRESS
20501#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
20502
ee62a5a6
RS
20503#undef TARGET_OPTAB_SUPPORTED_P
20504#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
20505
43203dea
RR
20506#undef TARGET_OMIT_STRUCT_RETURN_REG
20507#define TARGET_OMIT_STRUCT_RETURN_REG true
20508
43cacb12
RS
20509#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
20510#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
20511 aarch64_dwarf_poly_indeterminate_value
20512
f46fe37e
EB
20513/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
20514#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
20515#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
20516
c43f4279
RS
20517#undef TARGET_HARD_REGNO_NREGS
20518#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
20519#undef TARGET_HARD_REGNO_MODE_OK
20520#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
20521
99e1629f
RS
20522#undef TARGET_MODES_TIEABLE_P
20523#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
20524
80ec73f4
RS
20525#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
20526#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
20527 aarch64_hard_regno_call_part_clobbered
20528
b3650d40
SE
20529#undef TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS
20530#define TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS \
20531 aarch64_remove_extra_call_preserved_regs
20532
473574ee
SE
20533#undef TARGET_RETURN_CALL_WITH_MAX_CLOBBERS
20534#define TARGET_RETURN_CALL_WITH_MAX_CLOBBERS \
20535 aarch64_return_call_with_max_clobbers
20536
58e17cf8
RS
20537#undef TARGET_CONSTANT_ALIGNMENT
20538#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
20539
8c6e3b23
TC
20540#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
20541#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
20542 aarch64_stack_clash_protection_alloca_probe_range
20543
43cacb12
RS
20544#undef TARGET_COMPUTE_PRESSURE_CLASSES
20545#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
20546
20547#undef TARGET_CAN_CHANGE_MODE_CLASS
20548#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
20549
5cce8171
RS
20550#undef TARGET_SELECT_EARLY_REMAT_MODES
20551#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
20552
c0111dc4
RE
20553#undef TARGET_SPECULATION_SAFE_VALUE
20554#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
20555
2d56d6ba
KT
20556#undef TARGET_ESTIMATED_POLY_VALUE
20557#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
20558
a0d0b980
SE
20559#undef TARGET_ATTRIBUTE_TABLE
20560#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
20561
d9186814
SE
20562#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
20563#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
20564 aarch64_simd_clone_compute_vecsize_and_simdlen
20565
20566#undef TARGET_SIMD_CLONE_ADJUST
20567#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
20568
20569#undef TARGET_SIMD_CLONE_USABLE
20570#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
20571
497f281c
SE
20572#undef TARGET_COMP_TYPE_ATTRIBUTES
20573#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
20574
3bac1e20
SE
20575#undef TARGET_GET_MULTILIB_ABI_NAME
20576#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
20577
51b86113
DM
20578#if CHECKING_P
20579#undef TARGET_RUN_TARGET_SELFTESTS
20580#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
20581#endif /* #if CHECKING_P */
20582
8fc16d72
ST
20583#undef TARGET_ASM_POST_CFI_STARTPROC
20584#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
20585
43e9d192
IB
20586struct gcc_target targetm = TARGET_INITIALIZER;
20587
20588#include "gt-aarch64.h"