]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Don't mangle attributes that have a space in their name
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
8d9254fc 2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
d9186814 43#include "cgraph.h"
e11c4407 44#include "diagnostic.h"
43e9d192 45#include "insn-attr.h"
40e23961 46#include "alias.h"
40e23961 47#include "fold-const.h"
d8a2d370
DN
48#include "stor-layout.h"
49#include "calls.h"
50#include "varasm.h"
43e9d192 51#include "output.h"
36566b39 52#include "flags.h"
36566b39 53#include "explow.h"
43e9d192
IB
54#include "expr.h"
55#include "reload.h"
43e9d192 56#include "langhooks.h"
5a2c8331 57#include "opts.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
d9186814 74#include "intl.h"
7d8bdfa7 75#include "expmed.h"
002ffd3c 76#include "function-abi.h"
43e9d192 77
994c5d85 78/* This file should be included last. */
d58627a0
RS
79#include "target-def.h"
80
28514dda
YZ
81/* Defined for convenience. */
82#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
83
b187677b 84/* Information about a legitimate vector immediate operand. */
48063b9d
IB
85struct simd_immediate_info
86{
0b1fe8cf 87 enum insn_type { MOV, MVN, INDEX, PTRUE };
b187677b
RS
88 enum modifier_type { LSL, MSL };
89
90 simd_immediate_info () {}
91 simd_immediate_info (scalar_float_mode, rtx);
92 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
93 insn_type = MOV, modifier_type = LSL,
94 unsigned int = 0);
43cacb12 95 simd_immediate_info (scalar_mode, rtx, rtx);
0b1fe8cf 96 simd_immediate_info (scalar_int_mode, aarch64_svpattern);
b187677b
RS
97
98 /* The mode of the elements. */
99 scalar_mode elt_mode;
100
b187677b
RS
101 /* The instruction to use to move the immediate into a vector. */
102 insn_type insn;
103
1da83cce
RS
104 union
105 {
106 /* For MOV and MVN. */
107 struct
108 {
109 /* The value of each element. */
110 rtx value;
111
112 /* The kind of shift modifier to use, and the number of bits to shift.
113 This is (LSL, 0) if no shift is needed. */
114 modifier_type modifier;
115 unsigned int shift;
116 } mov;
117
118 /* For INDEX. */
119 struct
120 {
121 /* The value of the first element and the step to be added for each
122 subsequent element. */
123 rtx base, step;
124 } index;
0b1fe8cf
RS
125
126 /* For PTRUE. */
127 aarch64_svpattern pattern;
1da83cce 128 } u;
48063b9d
IB
129};
130
b187677b
RS
131/* Construct a floating-point immediate in which each element has mode
132 ELT_MODE_IN and value VALUE_IN. */
133inline simd_immediate_info
134::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
1da83cce
RS
135 : elt_mode (elt_mode_in), insn (MOV)
136{
137 u.mov.value = value_in;
138 u.mov.modifier = LSL;
139 u.mov.shift = 0;
140}
b187677b
RS
141
142/* Construct an integer immediate in which each element has mode ELT_MODE_IN
143 and value VALUE_IN. The other parameters are as for the structure
144 fields. */
145inline simd_immediate_info
146::simd_immediate_info (scalar_int_mode elt_mode_in,
147 unsigned HOST_WIDE_INT value_in,
148 insn_type insn_in, modifier_type modifier_in,
149 unsigned int shift_in)
1da83cce
RS
150 : elt_mode (elt_mode_in), insn (insn_in)
151{
152 u.mov.value = gen_int_mode (value_in, elt_mode_in);
153 u.mov.modifier = modifier_in;
154 u.mov.shift = shift_in;
155}
43cacb12
RS
156
157/* Construct an integer immediate in which each element has mode ELT_MODE_IN
1da83cce 158 and where element I is equal to BASE_IN + I * STEP_IN. */
43cacb12 159inline simd_immediate_info
1da83cce
RS
160::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
161 : elt_mode (elt_mode_in), insn (INDEX)
162{
163 u.index.base = base_in;
164 u.index.step = step_in;
165}
b187677b 166
0b1fe8cf
RS
167/* Construct a predicate that controls elements of mode ELT_MODE_IN
168 and has PTRUE pattern PATTERN_IN. */
169inline simd_immediate_info
170::simd_immediate_info (scalar_int_mode elt_mode_in,
171 aarch64_svpattern pattern_in)
172 : elt_mode (elt_mode_in), insn (PTRUE)
173{
174 u.pattern = pattern_in;
175}
176
43e9d192
IB
177/* The current code model. */
178enum aarch64_code_model aarch64_cmodel;
179
43cacb12
RS
180/* The number of 64-bit elements in an SVE vector. */
181poly_uint16 aarch64_sve_vg;
182
43e9d192
IB
183#ifdef HAVE_AS_TLS
184#undef TARGET_HAVE_TLS
185#define TARGET_HAVE_TLS 1
186#endif
187
ef4bddc2
RS
188static bool aarch64_composite_type_p (const_tree, machine_mode);
189static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 190 const_tree,
ef4bddc2 191 machine_mode *, int *,
43e9d192
IB
192 bool *);
193static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 195static void aarch64_override_options_after_change (void);
ef4bddc2 196static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 197static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
198static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
199 const_tree type,
200 int misalignment,
201 bool is_packed);
43cacb12 202static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
203static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
204 aarch64_addr_query_type);
eb471ba3 205static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 206
0c6caaf8
RL
207/* Major revision number of the ARM Architecture implemented by the target. */
208unsigned aarch64_architecture_version;
209
43e9d192 210/* The processor for which instructions should be scheduled. */
02fdbd5b 211enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 212
43e9d192 213/* Mask to specify which instruction scheduling options should be used. */
28108a53 214uint64_t aarch64_tune_flags = 0;
43e9d192 215
1be34295 216/* Global flag for PC relative loads. */
9ee6540a 217bool aarch64_pcrelative_literal_loads;
1be34295 218
d6cb6d6a
WD
219/* Global flag for whether frame pointer is enabled. */
220bool aarch64_use_frame_pointer;
221
efac62a3
ST
222#define BRANCH_PROTECT_STR_MAX 255
223char *accepted_branch_protection_string = NULL;
224
225static enum aarch64_parse_opt_result
226aarch64_parse_branch_protection (const char*, char**);
227
8dec06f2
JG
228/* Support for command line parsing of boolean flags in the tuning
229 structures. */
230struct aarch64_flag_desc
231{
232 const char* name;
233 unsigned int flag;
234};
235
ed9fa8d2 236#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
237 { name, AARCH64_FUSE_##internal_name },
238static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
239{
240 { "none", AARCH64_FUSE_NOTHING },
241#include "aarch64-fusion-pairs.def"
242 { "all", AARCH64_FUSE_ALL },
243 { NULL, AARCH64_FUSE_NOTHING }
244};
8dec06f2 245
a339a01c 246#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
247 { name, AARCH64_EXTRA_TUNE_##internal_name },
248static const struct aarch64_flag_desc aarch64_tuning_flags[] =
249{
250 { "none", AARCH64_EXTRA_TUNE_NONE },
251#include "aarch64-tuning-flags.def"
252 { "all", AARCH64_EXTRA_TUNE_ALL },
253 { NULL, AARCH64_EXTRA_TUNE_NONE }
254};
8dec06f2 255
43e9d192
IB
256/* Tuning parameters. */
257
43e9d192
IB
258static const struct cpu_addrcost_table generic_addrcost_table =
259{
67747367 260 {
2fae724a 261 1, /* hi */
bd95e655
JG
262 0, /* si */
263 0, /* di */
2fae724a 264 1, /* ti */
67747367 265 },
bd95e655
JG
266 0, /* pre_modify */
267 0, /* post_modify */
268 0, /* register_offset */
783879e6
EM
269 0, /* register_sextend */
270 0, /* register_zextend */
bd95e655 271 0 /* imm_offset */
43e9d192
IB
272};
273
5ec1ae3b
EM
274static const struct cpu_addrcost_table exynosm1_addrcost_table =
275{
276 {
277 0, /* hi */
278 0, /* si */
279 0, /* di */
280 2, /* ti */
281 },
282 0, /* pre_modify */
283 0, /* post_modify */
284 1, /* register_offset */
285 1, /* register_sextend */
286 2, /* register_zextend */
287 0, /* imm_offset */
288};
289
381e27aa
PT
290static const struct cpu_addrcost_table xgene1_addrcost_table =
291{
381e27aa 292 {
bd95e655
JG
293 1, /* hi */
294 0, /* si */
295 0, /* di */
296 1, /* ti */
381e27aa 297 },
bd95e655 298 1, /* pre_modify */
52ddefd8 299 1, /* post_modify */
bd95e655 300 0, /* register_offset */
783879e6
EM
301 1, /* register_sextend */
302 1, /* register_zextend */
bd95e655 303 0, /* imm_offset */
381e27aa
PT
304};
305
d1261ac6 306static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
307{
308 {
5f407e57
AP
309 1, /* hi */
310 1, /* si */
311 1, /* di */
ad611a4c
VP
312 2, /* ti */
313 },
314 0, /* pre_modify */
315 0, /* post_modify */
316 2, /* register_offset */
317 3, /* register_sextend */
318 3, /* register_zextend */
319 0, /* imm_offset */
320};
321
910f72e7
SZ
322static const struct cpu_addrcost_table tsv110_addrcost_table =
323{
324 {
325 1, /* hi */
326 0, /* si */
327 0, /* di */
328 1, /* ti */
329 },
330 0, /* pre_modify */
331 0, /* post_modify */
332 0, /* register_offset */
333 1, /* register_sextend */
334 1, /* register_zextend */
335 0, /* imm_offset */
336};
337
8d39ea2f
LM
338static const struct cpu_addrcost_table qdf24xx_addrcost_table =
339{
340 {
341 1, /* hi */
342 1, /* si */
343 1, /* di */
344 2, /* ti */
345 },
346 1, /* pre_modify */
347 1, /* post_modify */
348 3, /* register_offset */
31508b39 349 3, /* register_sextend */
8d39ea2f
LM
350 3, /* register_zextend */
351 2, /* imm_offset */
352};
353
43e9d192
IB
354static const struct cpu_regmove_cost generic_regmove_cost =
355{
bd95e655 356 1, /* GP2GP */
3969c510
WD
357 /* Avoid the use of slow int<->fp moves for spilling by setting
358 their cost higher than memmov_cost. */
bd95e655
JG
359 5, /* GP2FP */
360 5, /* FP2GP */
361 2 /* FP2FP */
43e9d192
IB
362};
363
e4a9c55a
WD
364static const struct cpu_regmove_cost cortexa57_regmove_cost =
365{
bd95e655 366 1, /* GP2GP */
e4a9c55a
WD
367 /* Avoid the use of slow int<->fp moves for spilling by setting
368 their cost higher than memmov_cost. */
bd95e655
JG
369 5, /* GP2FP */
370 5, /* FP2GP */
371 2 /* FP2FP */
e4a9c55a
WD
372};
373
374static const struct cpu_regmove_cost cortexa53_regmove_cost =
375{
bd95e655 376 1, /* GP2GP */
e4a9c55a
WD
377 /* Avoid the use of slow int<->fp moves for spilling by setting
378 their cost higher than memmov_cost. */
bd95e655
JG
379 5, /* GP2FP */
380 5, /* FP2GP */
381 2 /* FP2FP */
e4a9c55a
WD
382};
383
5ec1ae3b
EM
384static const struct cpu_regmove_cost exynosm1_regmove_cost =
385{
386 1, /* GP2GP */
387 /* Avoid the use of slow int<->fp moves for spilling by setting
388 their cost higher than memmov_cost (actual, 4 and 9). */
389 9, /* GP2FP */
390 9, /* FP2GP */
391 1 /* FP2FP */
392};
393
d1bcc29f
AP
394static const struct cpu_regmove_cost thunderx_regmove_cost =
395{
bd95e655
JG
396 2, /* GP2GP */
397 2, /* GP2FP */
398 6, /* FP2GP */
399 4 /* FP2FP */
d1bcc29f
AP
400};
401
381e27aa
PT
402static const struct cpu_regmove_cost xgene1_regmove_cost =
403{
bd95e655 404 1, /* GP2GP */
381e27aa
PT
405 /* Avoid the use of slow int<->fp moves for spilling by setting
406 their cost higher than memmov_cost. */
bd95e655
JG
407 8, /* GP2FP */
408 8, /* FP2GP */
409 2 /* FP2FP */
381e27aa
PT
410};
411
ee446d9f
JW
412static const struct cpu_regmove_cost qdf24xx_regmove_cost =
413{
414 2, /* GP2GP */
415 /* Avoid the use of int<->fp moves for spilling. */
416 6, /* GP2FP */
417 6, /* FP2GP */
418 4 /* FP2FP */
419};
420
d1261ac6 421static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
422{
423 1, /* GP2GP */
424 /* Avoid the use of int<->fp moves for spilling. */
425 8, /* GP2FP */
426 8, /* FP2GP */
427 4 /* FP2FP */
428};
429
910f72e7
SZ
430static const struct cpu_regmove_cost tsv110_regmove_cost =
431{
432 1, /* GP2GP */
433 /* Avoid the use of slow int<->fp moves for spilling by setting
434 their cost higher than memmov_cost. */
435 2, /* GP2FP */
436 3, /* FP2GP */
437 2 /* FP2FP */
438};
439
8990e73a 440/* Generic costs for vector insn classes. */
8990e73a
TB
441static const struct cpu_vector_cost generic_vector_cost =
442{
cd8ae5ed
AP
443 1, /* scalar_int_stmt_cost */
444 1, /* scalar_fp_stmt_cost */
bd95e655
JG
445 1, /* scalar_load_cost */
446 1, /* scalar_store_cost */
cd8ae5ed
AP
447 1, /* vec_int_stmt_cost */
448 1, /* vec_fp_stmt_cost */
c428f91c 449 2, /* vec_permute_cost */
4bf29d15 450 2, /* vec_to_scalar_cost */
bd95e655
JG
451 1, /* scalar_to_vec_cost */
452 1, /* vec_align_load_cost */
453 1, /* vec_unalign_load_cost */
454 1, /* vec_unalign_store_cost */
455 1, /* vec_store_cost */
456 3, /* cond_taken_branch_cost */
457 1 /* cond_not_taken_branch_cost */
8990e73a
TB
458};
459
e75bc10e
LM
460/* QDF24XX costs for vector insn classes. */
461static const struct cpu_vector_cost qdf24xx_vector_cost =
462{
463 1, /* scalar_int_stmt_cost */
464 1, /* scalar_fp_stmt_cost */
465 1, /* scalar_load_cost */
466 1, /* scalar_store_cost */
467 1, /* vec_int_stmt_cost */
468 3, /* vec_fp_stmt_cost */
469 2, /* vec_permute_cost */
470 1, /* vec_to_scalar_cost */
471 1, /* scalar_to_vec_cost */
472 1, /* vec_align_load_cost */
473 1, /* vec_unalign_load_cost */
474 1, /* vec_unalign_store_cost */
475 1, /* vec_store_cost */
476 3, /* cond_taken_branch_cost */
477 1 /* cond_not_taken_branch_cost */
478};
479
c3f20327
AP
480/* ThunderX costs for vector insn classes. */
481static const struct cpu_vector_cost thunderx_vector_cost =
482{
cd8ae5ed
AP
483 1, /* scalar_int_stmt_cost */
484 1, /* scalar_fp_stmt_cost */
c3f20327
AP
485 3, /* scalar_load_cost */
486 1, /* scalar_store_cost */
cd8ae5ed 487 4, /* vec_int_stmt_cost */
b29d7591 488 1, /* vec_fp_stmt_cost */
c3f20327
AP
489 4, /* vec_permute_cost */
490 2, /* vec_to_scalar_cost */
491 2, /* scalar_to_vec_cost */
492 3, /* vec_align_load_cost */
7e87a3d9
AP
493 5, /* vec_unalign_load_cost */
494 5, /* vec_unalign_store_cost */
c3f20327
AP
495 1, /* vec_store_cost */
496 3, /* cond_taken_branch_cost */
497 3 /* cond_not_taken_branch_cost */
498};
499
910f72e7
SZ
500static const struct cpu_vector_cost tsv110_vector_cost =
501{
502 1, /* scalar_int_stmt_cost */
503 1, /* scalar_fp_stmt_cost */
504 5, /* scalar_load_cost */
505 1, /* scalar_store_cost */
506 2, /* vec_int_stmt_cost */
507 2, /* vec_fp_stmt_cost */
508 2, /* vec_permute_cost */
509 3, /* vec_to_scalar_cost */
510 2, /* scalar_to_vec_cost */
511 5, /* vec_align_load_cost */
512 5, /* vec_unalign_load_cost */
513 1, /* vec_unalign_store_cost */
514 1, /* vec_store_cost */
515 1, /* cond_taken_branch_cost */
516 1 /* cond_not_taken_branch_cost */
517};
518
60bff090 519/* Generic costs for vector insn classes. */
60bff090
JG
520static const struct cpu_vector_cost cortexa57_vector_cost =
521{
cd8ae5ed
AP
522 1, /* scalar_int_stmt_cost */
523 1, /* scalar_fp_stmt_cost */
bd95e655
JG
524 4, /* scalar_load_cost */
525 1, /* scalar_store_cost */
cd8ae5ed
AP
526 2, /* vec_int_stmt_cost */
527 2, /* vec_fp_stmt_cost */
c428f91c 528 3, /* vec_permute_cost */
bd95e655
JG
529 8, /* vec_to_scalar_cost */
530 8, /* scalar_to_vec_cost */
db4a1c18
WD
531 4, /* vec_align_load_cost */
532 4, /* vec_unalign_load_cost */
bd95e655
JG
533 1, /* vec_unalign_store_cost */
534 1, /* vec_store_cost */
535 1, /* cond_taken_branch_cost */
536 1 /* cond_not_taken_branch_cost */
60bff090
JG
537};
538
5ec1ae3b
EM
539static const struct cpu_vector_cost exynosm1_vector_cost =
540{
cd8ae5ed
AP
541 1, /* scalar_int_stmt_cost */
542 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
543 5, /* scalar_load_cost */
544 1, /* scalar_store_cost */
cd8ae5ed
AP
545 3, /* vec_int_stmt_cost */
546 3, /* vec_fp_stmt_cost */
c428f91c 547 3, /* vec_permute_cost */
5ec1ae3b
EM
548 3, /* vec_to_scalar_cost */
549 3, /* scalar_to_vec_cost */
550 5, /* vec_align_load_cost */
551 5, /* vec_unalign_load_cost */
552 1, /* vec_unalign_store_cost */
553 1, /* vec_store_cost */
554 1, /* cond_taken_branch_cost */
555 1 /* cond_not_taken_branch_cost */
556};
557
381e27aa 558/* Generic costs for vector insn classes. */
381e27aa
PT
559static const struct cpu_vector_cost xgene1_vector_cost =
560{
cd8ae5ed
AP
561 1, /* scalar_int_stmt_cost */
562 1, /* scalar_fp_stmt_cost */
bd95e655
JG
563 5, /* scalar_load_cost */
564 1, /* scalar_store_cost */
cd8ae5ed
AP
565 2, /* vec_int_stmt_cost */
566 2, /* vec_fp_stmt_cost */
c428f91c 567 2, /* vec_permute_cost */
bd95e655
JG
568 4, /* vec_to_scalar_cost */
569 4, /* scalar_to_vec_cost */
570 10, /* vec_align_load_cost */
571 10, /* vec_unalign_load_cost */
572 2, /* vec_unalign_store_cost */
573 2, /* vec_store_cost */
574 2, /* cond_taken_branch_cost */
575 1 /* cond_not_taken_branch_cost */
381e27aa
PT
576};
577
ad611a4c 578/* Costs for vector insn classes for Vulcan. */
d1261ac6 579static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 580{
cd8ae5ed
AP
581 1, /* scalar_int_stmt_cost */
582 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
583 4, /* scalar_load_cost */
584 1, /* scalar_store_cost */
cd8ae5ed
AP
585 5, /* vec_int_stmt_cost */
586 6, /* vec_fp_stmt_cost */
5aef51c2 587 10, /* vec_permute_cost */
ad611a4c
VP
588 6, /* vec_to_scalar_cost */
589 5, /* scalar_to_vec_cost */
590 8, /* vec_align_load_cost */
591 8, /* vec_unalign_load_cost */
592 4, /* vec_unalign_store_cost */
593 4, /* vec_store_cost */
594 2, /* cond_taken_branch_cost */
595 1 /* cond_not_taken_branch_cost */
596};
597
b9066f5a
MW
598/* Generic costs for branch instructions. */
599static const struct cpu_branch_cost generic_branch_cost =
600{
9094d4a4
WD
601 1, /* Predictable. */
602 3 /* Unpredictable. */
b9066f5a
MW
603};
604
9acc9cbe
EM
605/* Generic approximation modes. */
606static const cpu_approx_modes generic_approx_modes =
607{
79a2bc2d 608 AARCH64_APPROX_NONE, /* division */
98daafa0 609 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
610 AARCH64_APPROX_NONE /* recip_sqrt */
611};
612
613/* Approximation modes for Exynos M1. */
614static const cpu_approx_modes exynosm1_approx_modes =
615{
79a2bc2d 616 AARCH64_APPROX_NONE, /* division */
98daafa0 617 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
618 AARCH64_APPROX_ALL /* recip_sqrt */
619};
620
621/* Approximation modes for X-Gene 1. */
622static const cpu_approx_modes xgene1_approx_modes =
623{
79a2bc2d 624 AARCH64_APPROX_NONE, /* division */
98daafa0 625 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
626 AARCH64_APPROX_ALL /* recip_sqrt */
627};
628
9d2c6e2e
MK
629/* Generic prefetch settings (which disable prefetch). */
630static const cpu_prefetch_tune generic_prefetch_tune =
631{
632 0, /* num_slots */
633 -1, /* l1_cache_size */
634 -1, /* l1_cache_line_size */
16b2cafd 635 -1, /* l2_cache_size */
d2ff35c0 636 true, /* prefetch_dynamic_strides */
59100dfc 637 -1, /* minimum_stride */
16b2cafd 638 -1 /* default_opt_level */
9d2c6e2e
MK
639};
640
641static const cpu_prefetch_tune exynosm1_prefetch_tune =
642{
643 0, /* num_slots */
644 -1, /* l1_cache_size */
645 64, /* l1_cache_line_size */
16b2cafd 646 -1, /* l2_cache_size */
d2ff35c0 647 true, /* prefetch_dynamic_strides */
59100dfc 648 -1, /* minimum_stride */
16b2cafd 649 -1 /* default_opt_level */
9d2c6e2e
MK
650};
651
652static const cpu_prefetch_tune qdf24xx_prefetch_tune =
653{
70c51b58
MK
654 4, /* num_slots */
655 32, /* l1_cache_size */
9d2c6e2e 656 64, /* l1_cache_line_size */
725e2110 657 512, /* l2_cache_size */
d2ff35c0 658 false, /* prefetch_dynamic_strides */
59100dfc
LM
659 2048, /* minimum_stride */
660 3 /* default_opt_level */
9d2c6e2e
MK
661};
662
f1e247d0
AP
663static const cpu_prefetch_tune thunderxt88_prefetch_tune =
664{
665 8, /* num_slots */
666 32, /* l1_cache_size */
667 128, /* l1_cache_line_size */
668 16*1024, /* l2_cache_size */
d2ff35c0 669 true, /* prefetch_dynamic_strides */
59100dfc 670 -1, /* minimum_stride */
f1e247d0
AP
671 3 /* default_opt_level */
672};
673
674static const cpu_prefetch_tune thunderx_prefetch_tune =
675{
676 8, /* num_slots */
677 32, /* l1_cache_size */
678 128, /* l1_cache_line_size */
679 -1, /* l2_cache_size */
d2ff35c0 680 true, /* prefetch_dynamic_strides */
59100dfc 681 -1, /* minimum_stride */
f1e247d0
AP
682 -1 /* default_opt_level */
683};
684
9d2c6e2e
MK
685static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
686{
f1e247d0
AP
687 8, /* num_slots */
688 32, /* l1_cache_size */
9d2c6e2e 689 64, /* l1_cache_line_size */
f1e247d0 690 256, /* l2_cache_size */
d2ff35c0 691 true, /* prefetch_dynamic_strides */
59100dfc 692 -1, /* minimum_stride */
16b2cafd 693 -1 /* default_opt_level */
9d2c6e2e
MK
694};
695
910f72e7
SZ
696static const cpu_prefetch_tune tsv110_prefetch_tune =
697{
698 0, /* num_slots */
699 64, /* l1_cache_size */
700 64, /* l1_cache_line_size */
701 512, /* l2_cache_size */
702 true, /* prefetch_dynamic_strides */
703 -1, /* minimum_stride */
704 -1 /* default_opt_level */
705};
706
d5e9851e
CM
707static const cpu_prefetch_tune xgene1_prefetch_tune =
708{
709 8, /* num_slots */
710 32, /* l1_cache_size */
711 64, /* l1_cache_line_size */
712 256, /* l2_cache_size */
713 true, /* prefetch_dynamic_strides */
714 -1, /* minimum_stride */
715 -1 /* default_opt_level */
716};
717
43e9d192
IB
718static const struct tune_params generic_tunings =
719{
4e2cd668 720 &cortexa57_extra_costs,
43e9d192
IB
721 &generic_addrcost_table,
722 &generic_regmove_cost,
8990e73a 723 &generic_vector_cost,
b9066f5a 724 &generic_branch_cost,
9acc9cbe 725 &generic_approx_modes,
2d56d6ba 726 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
727 4, /* memmov_cost */
728 2, /* issue_rate */
e0701ef0 729 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
4e55aefa 730 "16:12", /* function_align. */
c518c102
ML
731 "4", /* jump_align. */
732 "8", /* loop_align. */
cee66c68
WD
733 2, /* int_reassoc_width. */
734 4, /* fp_reassoc_width. */
50093a33
WD
735 1, /* vec_reassoc_width. */
736 2, /* min_div_recip_mul_sf. */
dfba575f 737 2, /* min_div_recip_mul_df. */
50487d79 738 0, /* max_case_values. */
3b4c0f7e 739 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
740 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
741 &generic_prefetch_tune
43e9d192
IB
742};
743
1c72a3ca
JG
744static const struct tune_params cortexa35_tunings =
745{
746 &cortexa53_extra_costs,
747 &generic_addrcost_table,
748 &cortexa53_regmove_cost,
749 &generic_vector_cost,
aca97ef8 750 &generic_branch_cost,
9acc9cbe 751 &generic_approx_modes,
2d56d6ba 752 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
753 4, /* memmov_cost */
754 1, /* issue_rate */
0bc24338 755 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 756 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
757 "16", /* function_align. */
758 "4", /* jump_align. */
759 "8", /* loop_align. */
1c72a3ca
JG
760 2, /* int_reassoc_width. */
761 4, /* fp_reassoc_width. */
762 1, /* vec_reassoc_width. */
763 2, /* min_div_recip_mul_sf. */
764 2, /* min_div_recip_mul_df. */
765 0, /* max_case_values. */
1c72a3ca 766 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
767 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
768 &generic_prefetch_tune
1c72a3ca
JG
769};
770
984239ad
KT
771static const struct tune_params cortexa53_tunings =
772{
773 &cortexa53_extra_costs,
774 &generic_addrcost_table,
e4a9c55a 775 &cortexa53_regmove_cost,
984239ad 776 &generic_vector_cost,
aca97ef8 777 &generic_branch_cost,
9acc9cbe 778 &generic_approx_modes,
2d56d6ba 779 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
780 4, /* memmov_cost */
781 2, /* issue_rate */
00a8574a 782 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 783 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
784 "16", /* function_align. */
785 "4", /* jump_align. */
786 "8", /* loop_align. */
cee66c68
WD
787 2, /* int_reassoc_width. */
788 4, /* fp_reassoc_width. */
50093a33
WD
789 1, /* vec_reassoc_width. */
790 2, /* min_div_recip_mul_sf. */
dfba575f 791 2, /* min_div_recip_mul_df. */
50487d79 792 0, /* max_case_values. */
2d6bc7fa 793 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
794 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
795 &generic_prefetch_tune
984239ad
KT
796};
797
4fd92af6
KT
798static const struct tune_params cortexa57_tunings =
799{
800 &cortexa57_extra_costs,
a39d4348 801 &generic_addrcost_table,
e4a9c55a 802 &cortexa57_regmove_cost,
60bff090 803 &cortexa57_vector_cost,
aca97ef8 804 &generic_branch_cost,
9acc9cbe 805 &generic_approx_modes,
2d56d6ba 806 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
807 4, /* memmov_cost */
808 3, /* issue_rate */
00a8574a 809 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 810 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
811 "16", /* function_align. */
812 "4", /* jump_align. */
813 "8", /* loop_align. */
cee66c68
WD
814 2, /* int_reassoc_width. */
815 4, /* fp_reassoc_width. */
50093a33
WD
816 1, /* vec_reassoc_width. */
817 2, /* min_div_recip_mul_sf. */
dfba575f 818 2, /* min_div_recip_mul_df. */
50487d79 819 0, /* max_case_values. */
2d6bc7fa 820 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
821 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
822 &generic_prefetch_tune
dfba575f
JG
823};
824
825static const struct tune_params cortexa72_tunings =
826{
827 &cortexa57_extra_costs,
a39d4348 828 &generic_addrcost_table,
dfba575f
JG
829 &cortexa57_regmove_cost,
830 &cortexa57_vector_cost,
aca97ef8 831 &generic_branch_cost,
9acc9cbe 832 &generic_approx_modes,
2d56d6ba 833 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
834 4, /* memmov_cost */
835 3, /* issue_rate */
00a8574a 836 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 837 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
838 "16", /* function_align. */
839 "4", /* jump_align. */
840 "8", /* loop_align. */
dfba575f
JG
841 2, /* int_reassoc_width. */
842 4, /* fp_reassoc_width. */
843 1, /* vec_reassoc_width. */
844 2, /* min_div_recip_mul_sf. */
845 2, /* min_div_recip_mul_df. */
50487d79 846 0, /* max_case_values. */
0bc24338 847 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
848 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
849 &generic_prefetch_tune
4fd92af6
KT
850};
851
4fb570c4
KT
852static const struct tune_params cortexa73_tunings =
853{
854 &cortexa57_extra_costs,
a39d4348 855 &generic_addrcost_table,
4fb570c4
KT
856 &cortexa57_regmove_cost,
857 &cortexa57_vector_cost,
aca97ef8 858 &generic_branch_cost,
4fb570c4 859 &generic_approx_modes,
2d56d6ba 860 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
861 4, /* memmov_cost. */
862 2, /* issue_rate. */
863 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
864 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
865 "16", /* function_align. */
866 "4", /* jump_align. */
867 "8", /* loop_align. */
4fb570c4
KT
868 2, /* int_reassoc_width. */
869 4, /* fp_reassoc_width. */
870 1, /* vec_reassoc_width. */
871 2, /* min_div_recip_mul_sf. */
872 2, /* min_div_recip_mul_df. */
873 0, /* max_case_values. */
4fb570c4 874 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
875 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
876 &generic_prefetch_tune
4fb570c4
KT
877};
878
9d2c6e2e
MK
879
880
5ec1ae3b
EM
881static const struct tune_params exynosm1_tunings =
882{
883 &exynosm1_extra_costs,
884 &exynosm1_addrcost_table,
885 &exynosm1_regmove_cost,
886 &exynosm1_vector_cost,
887 &generic_branch_cost,
9acc9cbe 888 &exynosm1_approx_modes,
2d56d6ba 889 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
890 4, /* memmov_cost */
891 3, /* issue_rate */
25cc2199 892 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
893 "4", /* function_align. */
894 "4", /* jump_align. */
895 "4", /* loop_align. */
5ec1ae3b
EM
896 2, /* int_reassoc_width. */
897 4, /* fp_reassoc_width. */
898 1, /* vec_reassoc_width. */
899 2, /* min_div_recip_mul_sf. */
900 2, /* min_div_recip_mul_df. */
901 48, /* max_case_values. */
220379df 902 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
903 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
904 &exynosm1_prefetch_tune
5ec1ae3b
EM
905};
906
f1e247d0
AP
907static const struct tune_params thunderxt88_tunings =
908{
909 &thunderx_extra_costs,
910 &generic_addrcost_table,
911 &thunderx_regmove_cost,
912 &thunderx_vector_cost,
913 &generic_branch_cost,
914 &generic_approx_modes,
2d56d6ba 915 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
916 6, /* memmov_cost */
917 2, /* issue_rate */
a4f3fa71 918 AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
c518c102
ML
919 "8", /* function_align. */
920 "8", /* jump_align. */
921 "8", /* loop_align. */
f1e247d0
AP
922 2, /* int_reassoc_width. */
923 4, /* fp_reassoc_width. */
924 1, /* vec_reassoc_width. */
925 2, /* min_div_recip_mul_sf. */
926 2, /* min_div_recip_mul_df. */
927 0, /* max_case_values. */
928 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
929 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
930 &thunderxt88_prefetch_tune
931};
932
d1bcc29f
AP
933static const struct tune_params thunderx_tunings =
934{
935 &thunderx_extra_costs,
936 &generic_addrcost_table,
937 &thunderx_regmove_cost,
c3f20327 938 &thunderx_vector_cost,
b9066f5a 939 &generic_branch_cost,
9acc9cbe 940 &generic_approx_modes,
2d56d6ba 941 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
942 6, /* memmov_cost */
943 2, /* issue_rate */
a4f3fa71 944 AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
c518c102
ML
945 "8", /* function_align. */
946 "8", /* jump_align. */
947 "8", /* loop_align. */
cee66c68
WD
948 2, /* int_reassoc_width. */
949 4, /* fp_reassoc_width. */
50093a33
WD
950 1, /* vec_reassoc_width. */
951 2, /* min_div_recip_mul_sf. */
dfba575f 952 2, /* min_div_recip_mul_df. */
50487d79 953 0, /* max_case_values. */
2d6bc7fa 954 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
955 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
956 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 957 &thunderx_prefetch_tune
d1bcc29f
AP
958};
959
910f72e7
SZ
960static const struct tune_params tsv110_tunings =
961{
962 &tsv110_extra_costs,
963 &tsv110_addrcost_table,
964 &tsv110_regmove_cost,
965 &tsv110_vector_cost,
966 &generic_branch_cost,
967 &generic_approx_modes,
2d56d6ba 968 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
969 4, /* memmov_cost */
970 4, /* issue_rate */
a4f3fa71
WD
971 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
972 | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
910f72e7
SZ
973 "16", /* function_align. */
974 "4", /* jump_align. */
975 "8", /* loop_align. */
976 2, /* int_reassoc_width. */
977 4, /* fp_reassoc_width. */
978 1, /* vec_reassoc_width. */
979 2, /* min_div_recip_mul_sf. */
980 2, /* min_div_recip_mul_df. */
981 0, /* max_case_values. */
982 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
983 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
984 &tsv110_prefetch_tune
985};
986
381e27aa 987static const struct tune_params xgene1_tunings =
e02669db
CM
988{
989 &xgene1_extra_costs,
990 &xgene1_addrcost_table,
991 &xgene1_regmove_cost,
992 &xgene1_vector_cost,
993 &generic_branch_cost,
994 &xgene1_approx_modes,
2d56d6ba 995 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
996 6, /* memmov_cost */
997 4, /* issue_rate */
998 AARCH64_FUSE_NOTHING, /* fusible_ops */
999 "16", /* function_align. */
1000 "16", /* jump_align. */
1001 "16", /* loop_align. */
1002 2, /* int_reassoc_width. */
1003 4, /* fp_reassoc_width. */
1004 1, /* vec_reassoc_width. */
1005 2, /* min_div_recip_mul_sf. */
1006 2, /* min_div_recip_mul_df. */
1007 17, /* max_case_values. */
1008 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
1009 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
1010 &xgene1_prefetch_tune
1011};
1012
1013static const struct tune_params emag_tunings =
381e27aa
PT
1014{
1015 &xgene1_extra_costs,
1016 &xgene1_addrcost_table,
1017 &xgene1_regmove_cost,
1018 &xgene1_vector_cost,
b9066f5a 1019 &generic_branch_cost,
9acc9cbe 1020 &xgene1_approx_modes,
2d56d6ba 1021 SVE_NOT_IMPLEMENTED,
bd95e655
JG
1022 6, /* memmov_cost */
1023 4, /* issue_rate */
e9a3a175 1024 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 1025 "16", /* function_align. */
cf28c77e 1026 "16", /* jump_align. */
c518c102 1027 "16", /* loop_align. */
381e27aa
PT
1028 2, /* int_reassoc_width. */
1029 4, /* fp_reassoc_width. */
50093a33
WD
1030 1, /* vec_reassoc_width. */
1031 2, /* min_div_recip_mul_sf. */
dfba575f 1032 2, /* min_div_recip_mul_df. */
cf28c77e 1033 17, /* max_case_values. */
2d6bc7fa 1034 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 1035 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 1036 &xgene1_prefetch_tune
381e27aa
PT
1037};
1038
ee446d9f
JW
1039static const struct tune_params qdf24xx_tunings =
1040{
1041 &qdf24xx_extra_costs,
8d39ea2f 1042 &qdf24xx_addrcost_table,
ee446d9f 1043 &qdf24xx_regmove_cost,
e75bc10e 1044 &qdf24xx_vector_cost,
ee446d9f
JW
1045 &generic_branch_cost,
1046 &generic_approx_modes,
2d56d6ba 1047 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1048 4, /* memmov_cost */
1049 4, /* issue_rate */
1050 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1051 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1052 "16", /* function_align. */
1053 "8", /* jump_align. */
1054 "16", /* loop_align. */
ee446d9f
JW
1055 2, /* int_reassoc_width. */
1056 4, /* fp_reassoc_width. */
1057 1, /* vec_reassoc_width. */
1058 2, /* min_div_recip_mul_sf. */
1059 2, /* min_div_recip_mul_df. */
1060 0, /* max_case_values. */
4f2a94e6 1061 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1062 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1063 &qdf24xx_prefetch_tune
ee446d9f
JW
1064};
1065
52ee8191
SP
1066/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1067 for now. */
1068static const struct tune_params saphira_tunings =
1069{
1070 &generic_extra_costs,
1071 &generic_addrcost_table,
1072 &generic_regmove_cost,
1073 &generic_vector_cost,
1074 &generic_branch_cost,
1075 &generic_approx_modes,
2d56d6ba 1076 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1077 4, /* memmov_cost */
1078 4, /* issue_rate */
1079 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1080 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1081 "16", /* function_align. */
1082 "8", /* jump_align. */
1083 "16", /* loop_align. */
52ee8191
SP
1084 2, /* int_reassoc_width. */
1085 4, /* fp_reassoc_width. */
1086 1, /* vec_reassoc_width. */
1087 2, /* min_div_recip_mul_sf. */
1088 2, /* min_div_recip_mul_df. */
1089 0, /* max_case_values. */
1090 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1091 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1092 &generic_prefetch_tune
1093};
1094
d1261ac6 1095static const struct tune_params thunderx2t99_tunings =
ad611a4c 1096{
d1261ac6
AP
1097 &thunderx2t99_extra_costs,
1098 &thunderx2t99_addrcost_table,
1099 &thunderx2t99_regmove_cost,
1100 &thunderx2t99_vector_cost,
aca97ef8 1101 &generic_branch_cost,
ad611a4c 1102 &generic_approx_modes,
2d56d6ba 1103 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1104 4, /* memmov_cost. */
1105 4, /* issue_rate. */
a4f3fa71
WD
1106 (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
1107 | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
c518c102
ML
1108 "16", /* function_align. */
1109 "8", /* jump_align. */
1110 "16", /* loop_align. */
ad611a4c
VP
1111 3, /* int_reassoc_width. */
1112 2, /* fp_reassoc_width. */
1113 2, /* vec_reassoc_width. */
1114 2, /* min_div_recip_mul_sf. */
1115 2, /* min_div_recip_mul_df. */
1116 0, /* max_case_values. */
f1e247d0 1117 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1118 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1119 &thunderx2t99_prefetch_tune
ad611a4c
VP
1120};
1121
9ed6834d 1122static const struct tune_params neoversen1_tunings =
fc881de2
KT
1123{
1124 &cortexa57_extra_costs,
1125 &generic_addrcost_table,
1126 &generic_regmove_cost,
1127 &cortexa57_vector_cost,
1128 &generic_branch_cost,
1129 &generic_approx_modes,
1130 SVE_NOT_IMPLEMENTED, /* sve_width */
1131 4, /* memmov_cost */
1132 3, /* issue_rate */
1133 AARCH64_FUSE_AES_AESMC, /* fusible_ops */
1134 "32:16", /* function_align. */
1135 "32:16", /* jump_align. */
1136 "32:16", /* loop_align. */
1137 2, /* int_reassoc_width. */
1138 4, /* fp_reassoc_width. */
1139 2, /* vec_reassoc_width. */
1140 2, /* min_div_recip_mul_sf. */
1141 2, /* min_div_recip_mul_df. */
1142 0, /* max_case_values. */
1143 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1144 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1145 &generic_prefetch_tune
1146};
1147
8dec06f2
JG
1148/* Support for fine-grained override of the tuning structures. */
1149struct aarch64_tuning_override_function
1150{
1151 const char* name;
1152 void (*parse_override)(const char*, struct tune_params*);
1153};
1154
1155static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1156static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1157static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1158
1159static const struct aarch64_tuning_override_function
1160aarch64_tuning_override_functions[] =
1161{
1162 { "fuse", aarch64_parse_fuse_string },
1163 { "tune", aarch64_parse_tune_string },
886f092f 1164 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1165 { NULL, NULL }
1166};
1167
43e9d192
IB
1168/* A processor implementing AArch64. */
1169struct processor
1170{
1171 const char *const name;
46806c44
KT
1172 enum aarch64_processor ident;
1173 enum aarch64_processor sched_core;
393ae126 1174 enum aarch64_arch arch;
0c6caaf8 1175 unsigned architecture_version;
28108a53 1176 const uint64_t flags;
43e9d192
IB
1177 const struct tune_params *const tune;
1178};
1179
393ae126
KT
1180/* Architectures implementing AArch64. */
1181static const struct processor all_architectures[] =
1182{
1183#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1184 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1185#include "aarch64-arches.def"
393ae126
KT
1186 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1187};
1188
43e9d192
IB
1189/* Processor cores implementing AArch64. */
1190static const struct processor all_cores[] =
1191{
e8fcc9fa 1192#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1193 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1194 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1195 FLAGS, &COSTS##_tunings},
43e9d192 1196#include "aarch64-cores.def"
393ae126
KT
1197 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1198 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1199 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1200};
1201
43e9d192 1202
361fb3ee
KT
1203/* Target specification. These are populated by the -march, -mtune, -mcpu
1204 handling code or by target attributes. */
43e9d192
IB
1205static const struct processor *selected_arch;
1206static const struct processor *selected_cpu;
1207static const struct processor *selected_tune;
1208
8fc16d72
ST
1209enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
1210
b175b679
JG
1211/* The current tuning set. */
1212struct tune_params aarch64_tune_params = generic_tunings;
1213
c600df9a
RS
1214/* Check whether an 'aarch64_vector_pcs' attribute is valid. */
1215
1216static tree
1217handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
1218 int, bool *no_add_attrs)
1219{
1220 /* Since we set fn_type_req to true, the caller should have checked
1221 this for us. */
1222 gcc_assert (FUNC_OR_METHOD_TYPE_P (*node));
1223 switch ((arm_pcs) fntype_abi (*node).id ())
1224 {
1225 case ARM_PCS_AAPCS64:
1226 case ARM_PCS_SIMD:
1227 return NULL_TREE;
1228
1229 case ARM_PCS_SVE:
1230 error ("the %qE attribute cannot be applied to an SVE function type",
1231 name);
1232 *no_add_attrs = true;
1233 return NULL_TREE;
1234
1235 case ARM_PCS_TLSDESC:
1236 case ARM_PCS_UNKNOWN:
1237 break;
1238 }
1239 gcc_unreachable ();
1240}
1241
a0d0b980
SE
1242/* Table of machine attributes. */
1243static const struct attribute_spec aarch64_attribute_table[] =
1244{
1245 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1246 affects_type_identity, handler, exclude } */
c600df9a
RS
1247 { "aarch64_vector_pcs", 0, 0, false, true, true, true,
1248 handle_aarch64_vector_pcs_attribute, NULL },
a0d0b980
SE
1249 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1250};
1251
43e9d192
IB
1252#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1253
1254/* An ISA extension in the co-processor and main instruction set space. */
1255struct aarch64_option_extension
1256{
1257 const char *const name;
1258 const unsigned long flags_on;
1259 const unsigned long flags_off;
1260};
1261
43e9d192
IB
1262typedef enum aarch64_cond_code
1263{
1264 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1265 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1266 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1267}
1268aarch64_cc;
1269
1270#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1271
efac62a3
ST
1272struct aarch64_branch_protect_type
1273{
1274 /* The type's name that the user passes to the branch-protection option
1275 string. */
1276 const char* name;
1277 /* Function to handle the protection type and set global variables.
1278 First argument is the string token corresponding with this type and the
1279 second argument is the next token in the option string.
1280 Return values:
1281 * AARCH64_PARSE_OK: Handling was sucessful.
1282 * AARCH64_INVALID_ARG: The type is invalid in this context and the caller
1283 should print an error.
1284 * AARCH64_INVALID_FEATURE: The type is invalid and the handler prints its
1285 own error. */
1286 enum aarch64_parse_opt_result (*handler)(char*, char*);
1287 /* A list of types that can follow this type in the option string. */
1288 const aarch64_branch_protect_type* subtypes;
1289 unsigned int num_subtypes;
1290};
1291
1292static enum aarch64_parse_opt_result
1293aarch64_handle_no_branch_protection (char* str, char* rest)
1294{
1295 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
30afdf34 1296 aarch64_enable_bti = 0;
efac62a3
ST
1297 if (rest)
1298 {
1299 error ("unexpected %<%s%> after %<%s%>", rest, str);
1300 return AARCH64_PARSE_INVALID_FEATURE;
1301 }
1302 return AARCH64_PARSE_OK;
1303}
1304
1305static enum aarch64_parse_opt_result
1306aarch64_handle_standard_branch_protection (char* str, char* rest)
1307{
1308 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1309 aarch64_ra_sign_key = AARCH64_KEY_A;
30afdf34 1310 aarch64_enable_bti = 1;
efac62a3
ST
1311 if (rest)
1312 {
1313 error ("unexpected %<%s%> after %<%s%>", rest, str);
1314 return AARCH64_PARSE_INVALID_FEATURE;
1315 }
1316 return AARCH64_PARSE_OK;
1317}
1318
1319static enum aarch64_parse_opt_result
1320aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED,
1321 char* rest ATTRIBUTE_UNUSED)
1322{
1323 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1324 aarch64_ra_sign_key = AARCH64_KEY_A;
efac62a3
ST
1325 return AARCH64_PARSE_OK;
1326}
1327
1328static enum aarch64_parse_opt_result
1329aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED,
1330 char* rest ATTRIBUTE_UNUSED)
1331{
1332 aarch64_ra_sign_scope = AARCH64_FUNCTION_ALL;
1333 return AARCH64_PARSE_OK;
1334}
1335
8fc16d72
ST
1336static enum aarch64_parse_opt_result
1337aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED,
1338 char* rest ATTRIBUTE_UNUSED)
1339{
1340 aarch64_ra_sign_key = AARCH64_KEY_B;
1341 return AARCH64_PARSE_OK;
1342}
1343
30afdf34
SD
1344static enum aarch64_parse_opt_result
1345aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED,
1346 char* rest ATTRIBUTE_UNUSED)
1347{
1348 aarch64_enable_bti = 1;
1349 return AARCH64_PARSE_OK;
1350}
1351
efac62a3
ST
1352static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = {
1353 { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 },
8fc16d72 1354 { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 },
efac62a3
ST
1355 { NULL, NULL, NULL, 0 }
1356};
1357
1358static const struct aarch64_branch_protect_type aarch64_branch_protect_types[] = {
1359 { "none", aarch64_handle_no_branch_protection, NULL, 0 },
1360 { "standard", aarch64_handle_standard_branch_protection, NULL, 0 },
1361 { "pac-ret", aarch64_handle_pac_ret_protection, aarch64_pac_ret_subtypes,
1362 ARRAY_SIZE (aarch64_pac_ret_subtypes) },
30afdf34 1363 { "bti", aarch64_handle_bti_protection, NULL, 0 },
efac62a3
ST
1364 { NULL, NULL, NULL, 0 }
1365};
1366
43e9d192
IB
1367/* The condition codes of the processor, and the inverse function. */
1368static const char * const aarch64_condition_codes[] =
1369{
1370 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1371 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1372};
1373
57d6f4d0
RS
1374/* The preferred condition codes for SVE conditions. */
1375static const char *const aarch64_sve_condition_codes[] =
1376{
1377 "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc",
1378 "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv"
1379};
1380
0b1fe8cf
RS
1381/* Return the assembly token for svpattern value VALUE. */
1382
1383static const char *
1384svpattern_token (enum aarch64_svpattern pattern)
1385{
1386 switch (pattern)
1387 {
1388#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
1389 AARCH64_FOR_SVPATTERN (CASE)
1390#undef CASE
1391 case AARCH64_NUM_SVPATTERNS:
1392 break;
1393 }
1394 gcc_unreachable ();
1395}
1396
002ffd3c
RS
1397/* Return the descriptor of the SIMD ABI. */
1398
1399static const predefined_function_abi &
1400aarch64_simd_abi (void)
1401{
1402 predefined_function_abi &simd_abi = function_abis[ARM_PCS_SIMD];
1403 if (!simd_abi.initialized_p ())
1404 {
1405 HARD_REG_SET full_reg_clobbers
1406 = default_function_abi.full_reg_clobbers ();
1407 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1408 if (FP_SIMD_SAVED_REGNUM_P (regno))
1409 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
1410 simd_abi.initialize (ARM_PCS_SIMD, full_reg_clobbers);
1411 }
1412 return simd_abi;
1413}
1414
c600df9a
RS
1415/* Return the descriptor of the SVE PCS. */
1416
1417static const predefined_function_abi &
1418aarch64_sve_abi (void)
1419{
1420 predefined_function_abi &sve_abi = function_abis[ARM_PCS_SVE];
1421 if (!sve_abi.initialized_p ())
1422 {
1423 HARD_REG_SET full_reg_clobbers
1424 = default_function_abi.full_reg_clobbers ();
1425 for (int regno = V8_REGNUM; regno <= V23_REGNUM; ++regno)
1426 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
1427 for (int regno = P4_REGNUM; regno <= P11_REGNUM; ++regno)
1428 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
1429 sve_abi.initialize (ARM_PCS_SVE, full_reg_clobbers);
1430 }
1431 return sve_abi;
1432}
1433
973d2e01
TP
1434/* Generate code to enable conditional branches in functions over 1 MiB. */
1435const char *
1436aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1437 const char * branch_format)
1438{
1439 rtx_code_label * tmp_label = gen_label_rtx ();
1440 char label_buf[256];
1441 char buffer[128];
1442 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1443 CODE_LABEL_NUMBER (tmp_label));
1444 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1445 rtx dest_label = operands[pos_label];
1446 operands[pos_label] = tmp_label;
1447
1448 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1449 output_asm_insn (buffer, operands);
1450
1451 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1452 operands[pos_label] = dest_label;
1453 output_asm_insn (buffer, operands);
1454 return "";
1455}
1456
261fb553 1457void
fc29dfc9 1458aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1459{
261fb553 1460 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1461 if (FLOAT_MODE_P (mode))
1462 error ("%qs is incompatible with the use of floating-point types",
1463 "-mgeneral-regs-only");
1464 else
1465 error ("%qs is incompatible with the use of vector types",
1466 "-mgeneral-regs-only");
261fb553 1467 else
fc29dfc9
SE
1468 if (FLOAT_MODE_P (mode))
1469 error ("%qs feature modifier is incompatible with the use of"
1470 " floating-point types", "+nofp");
1471 else
1472 error ("%qs feature modifier is incompatible with the use of"
1473 " vector types", "+nofp");
261fb553
AL
1474}
1475
c0e0174b
RS
1476/* Report when we try to do something that requires SVE when SVE is disabled.
1477 This is an error of last resort and isn't very high-quality. It usually
1478 involves attempts to measure the vector length in some way. */
1479static void
1480aarch64_report_sve_required (void)
1481{
1482 static bool reported_p = false;
1483
1484 /* Avoid reporting a slew of messages for a single oversight. */
1485 if (reported_p)
1486 return;
1487
1488 error ("this operation requires the SVE ISA extension");
1489 inform (input_location, "you can enable SVE using the command-line"
1490 " option %<-march%>, or by using the %<target%>"
1491 " attribute or pragma");
1492 reported_p = true;
1493}
1494
183bfdaf
RS
1495/* Return true if REGNO is P0-P15 or one of the special FFR-related
1496 registers. */
1497inline bool
1498pr_or_ffr_regnum_p (unsigned int regno)
1499{
1500 return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM;
1501}
1502
c64f7d37 1503/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1504 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1505 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1506 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1507 and GENERAL_REGS is lower than the memory cost (in this case the best class
1508 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1509 cost results in bad allocations with many redundant int<->FP moves which
1510 are expensive on various cores.
1511 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1512 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1513 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1514 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1515 The result of this is that it is no longer inefficient to have a higher
1516 memory move cost than the register move cost.
1517*/
c64f7d37
WD
1518
1519static reg_class_t
31e2b5a3
WD
1520aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1521 reg_class_t best_class)
c64f7d37 1522{
b8506a8a 1523 machine_mode mode;
c64f7d37 1524
67e5c59a
RS
1525 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1526 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1527 return allocno_class;
1528
67e5c59a
RS
1529 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1530 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1531 return best_class;
1532
c64f7d37
WD
1533 mode = PSEUDO_REGNO_MODE (regno);
1534 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1535}
1536
26e0ff94 1537static unsigned int
b8506a8a 1538aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1539{
50093a33 1540 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1541 return aarch64_tune_params.min_div_recip_mul_sf;
1542 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1543}
1544
b5b33e11 1545/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1546static int
b5b33e11 1547aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1548{
1549 if (VECTOR_MODE_P (mode))
b175b679 1550 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1551 if (INTEGRAL_MODE_P (mode))
b175b679 1552 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1553 /* Avoid reassociating floating point addition so we emit more FMAs. */
1554 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1555 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1556 return 1;
1557}
1558
43e9d192
IB
1559/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1560unsigned
1561aarch64_dbx_register_number (unsigned regno)
1562{
1563 if (GP_REGNUM_P (regno))
1564 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1565 else if (regno == SP_REGNUM)
1566 return AARCH64_DWARF_SP;
1567 else if (FP_REGNUM_P (regno))
1568 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1569 else if (PR_REGNUM_P (regno))
1570 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1571 else if (regno == VG_REGNUM)
1572 return AARCH64_DWARF_VG;
43e9d192
IB
1573
1574 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1575 equivalent DWARF register. */
1576 return DWARF_FRAME_REGISTERS;
1577}
1578
d29f7dd5
RS
1579/* If X is a CONST_DOUBLE, return its bit representation as a constant
1580 integer, otherwise return X unmodified. */
1581static rtx
1582aarch64_bit_representation (rtx x)
1583{
1584 if (CONST_DOUBLE_P (x))
1585 x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
1586 return x;
1587}
1588
43cacb12
RS
1589/* Return true if MODE is any of the Advanced SIMD structure modes. */
1590static bool
1591aarch64_advsimd_struct_mode_p (machine_mode mode)
1592{
1593 return (TARGET_SIMD
1594 && (mode == OImode || mode == CImode || mode == XImode));
1595}
1596
1597/* Return true if MODE is an SVE predicate mode. */
1598static bool
1599aarch64_sve_pred_mode_p (machine_mode mode)
1600{
1601 return (TARGET_SVE
1602 && (mode == VNx16BImode
1603 || mode == VNx8BImode
1604 || mode == VNx4BImode
1605 || mode == VNx2BImode));
1606}
1607
1608/* Three mutually-exclusive flags describing a vector or predicate type. */
1609const unsigned int VEC_ADVSIMD = 1;
1610const unsigned int VEC_SVE_DATA = 2;
1611const unsigned int VEC_SVE_PRED = 4;
1612/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1613 a structure of 2, 3 or 4 vectors. */
1614const unsigned int VEC_STRUCT = 8;
550a3380
RS
1615/* Can be used in combination with VEC_SVE_DATA to indicate that the
1616 vector has fewer significant bytes than a full SVE vector. */
1617const unsigned int VEC_PARTIAL = 16;
43cacb12
RS
1618/* Useful combinations of the above. */
1619const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1620const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1621
1622/* Return a set of flags describing the vector properties of mode MODE.
1623 Ignore modes that are not supported by the current target. */
1624static unsigned int
1625aarch64_classify_vector_mode (machine_mode mode)
1626{
1627 if (aarch64_advsimd_struct_mode_p (mode))
1628 return VEC_ADVSIMD | VEC_STRUCT;
1629
1630 if (aarch64_sve_pred_mode_p (mode))
1631 return VEC_SVE_PRED;
1632
806f69cd
RS
1633 /* Make the decision based on the mode's enum value rather than its
1634 properties, so that we keep the correct classification regardless
1635 of -msve-vector-bits. */
1636 switch (mode)
43cacb12 1637 {
550a3380
RS
1638 /* Partial SVE QI vectors. */
1639 case E_VNx2QImode:
1640 case E_VNx4QImode:
1641 case E_VNx8QImode:
1642 /* Partial SVE HI vectors. */
1643 case E_VNx2HImode:
1644 case E_VNx4HImode:
1645 /* Partial SVE SI vector. */
1646 case E_VNx2SImode:
cc68f7c2
RS
1647 /* Partial SVE HF vectors. */
1648 case E_VNx2HFmode:
1649 case E_VNx4HFmode:
1650 /* Partial SVE SF vector. */
1651 case E_VNx2SFmode:
550a3380
RS
1652 return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
1653
806f69cd
RS
1654 case E_VNx16QImode:
1655 case E_VNx8HImode:
1656 case E_VNx4SImode:
1657 case E_VNx2DImode:
1658 case E_VNx8HFmode:
1659 case E_VNx4SFmode:
1660 case E_VNx2DFmode:
1661 return TARGET_SVE ? VEC_SVE_DATA : 0;
1662
1663 /* x2 SVE vectors. */
1664 case E_VNx32QImode:
1665 case E_VNx16HImode:
1666 case E_VNx8SImode:
1667 case E_VNx4DImode:
1668 case E_VNx16HFmode:
1669 case E_VNx8SFmode:
1670 case E_VNx4DFmode:
1671 /* x3 SVE vectors. */
1672 case E_VNx48QImode:
1673 case E_VNx24HImode:
1674 case E_VNx12SImode:
1675 case E_VNx6DImode:
1676 case E_VNx24HFmode:
1677 case E_VNx12SFmode:
1678 case E_VNx6DFmode:
1679 /* x4 SVE vectors. */
1680 case E_VNx64QImode:
1681 case E_VNx32HImode:
1682 case E_VNx16SImode:
1683 case E_VNx8DImode:
1684 case E_VNx32HFmode:
1685 case E_VNx16SFmode:
1686 case E_VNx8DFmode:
1687 return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0;
1688
1689 /* 64-bit Advanced SIMD vectors. */
1690 case E_V8QImode:
1691 case E_V4HImode:
1692 case E_V2SImode:
1693 /* ...E_V1DImode doesn't exist. */
1694 case E_V4HFmode:
1695 case E_V2SFmode:
1696 case E_V1DFmode:
1697 /* 128-bit Advanced SIMD vectors. */
1698 case E_V16QImode:
1699 case E_V8HImode:
1700 case E_V4SImode:
1701 case E_V2DImode:
1702 case E_V8HFmode:
1703 case E_V4SFmode:
1704 case E_V2DFmode:
1705 return TARGET_SIMD ? VEC_ADVSIMD : 0;
1706
1707 default:
1708 return 0;
43cacb12 1709 }
43cacb12
RS
1710}
1711
1712/* Return true if MODE is any of the data vector modes, including
1713 structure modes. */
43e9d192 1714static bool
43cacb12 1715aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1716{
43cacb12 1717 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1718}
1719
5c38705d
RS
1720/* Return true if MODE is any form of SVE mode, including predicates,
1721 vectors and structures. */
1722bool
1723aarch64_sve_mode_p (machine_mode mode)
1724{
1725 return aarch64_classify_vector_mode (mode) & VEC_ANY_SVE;
1726}
1727
43cacb12
RS
1728/* Return true if MODE is an SVE data vector mode; either a single vector
1729 or a structure of vectors. */
43e9d192 1730static bool
43cacb12 1731aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1732{
43cacb12 1733 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1734}
1735
550a3380
RS
1736/* Return the number of defined bytes in one constituent vector of
1737 SVE mode MODE, which has vector flags VEC_FLAGS. */
1738static poly_int64
1739aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
1740{
1741 if (vec_flags & VEC_PARTIAL)
1742 /* A single partial vector. */
1743 return GET_MODE_SIZE (mode);
1744
1745 if (vec_flags & VEC_SVE_DATA)
1746 /* A single vector or a tuple. */
1747 return BYTES_PER_SVE_VECTOR;
1748
1749 /* A single predicate. */
1750 gcc_assert (vec_flags & VEC_SVE_PRED);
1751 return BYTES_PER_SVE_PRED;
1752}
1753
9f4cbab8
RS
1754/* Implement target hook TARGET_ARRAY_MODE. */
1755static opt_machine_mode
1756aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1757{
1758 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1759 && IN_RANGE (nelems, 2, 4))
1760 return mode_for_vector (GET_MODE_INNER (mode),
1761 GET_MODE_NUNITS (mode) * nelems);
1762
1763 return opt_machine_mode ();
1764}
1765
43e9d192
IB
1766/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1767static bool
ef4bddc2 1768aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1769 unsigned HOST_WIDE_INT nelems)
1770{
1771 if (TARGET_SIMD
635e66fe
AL
1772 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1773 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1774 && (nelems >= 2 && nelems <= 4))
1775 return true;
1776
1777 return false;
1778}
1779
cc68f7c2
RS
1780/* MODE is some form of SVE vector mode. For data modes, return the number
1781 of vector register bits that each element of MODE occupies, such as 64
1782 for both VNx2DImode and VNx2SImode (where each 32-bit value is stored
1783 in a 64-bit container). For predicate modes, return the number of
1784 data bits controlled by each significant predicate bit. */
1785
1786static unsigned int
1787aarch64_sve_container_bits (machine_mode mode)
1788{
1789 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1790 poly_uint64 vector_bits = (vec_flags & (VEC_PARTIAL | VEC_SVE_PRED)
1791 ? BITS_PER_SVE_VECTOR
1792 : GET_MODE_BITSIZE (mode));
1793 return vector_element_size (vector_bits, GET_MODE_NUNITS (mode));
1794}
1795
43cacb12
RS
1796/* Return the SVE predicate mode to use for elements that have
1797 ELEM_NBYTES bytes, if such a mode exists. */
1798
1799opt_machine_mode
1800aarch64_sve_pred_mode (unsigned int elem_nbytes)
1801{
1802 if (TARGET_SVE)
1803 {
1804 if (elem_nbytes == 1)
1805 return VNx16BImode;
1806 if (elem_nbytes == 2)
1807 return VNx8BImode;
1808 if (elem_nbytes == 4)
1809 return VNx4BImode;
1810 if (elem_nbytes == 8)
1811 return VNx2BImode;
1812 }
1813 return opt_machine_mode ();
1814}
1815
cc68f7c2
RS
1816/* Return the SVE predicate mode that should be used to control
1817 SVE mode MODE. */
1818
1819machine_mode
1820aarch64_sve_pred_mode (machine_mode mode)
1821{
1822 unsigned int bits = aarch64_sve_container_bits (mode);
1823 return aarch64_sve_pred_mode (bits / BITS_PER_UNIT).require ();
1824}
1825
43cacb12
RS
1826/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1827
1828static opt_machine_mode
10116ec1 1829aarch64_get_mask_mode (machine_mode mode)
43cacb12 1830{
10116ec1
RS
1831 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1832 if (vec_flags & VEC_SVE_DATA)
cc68f7c2 1833 return aarch64_sve_pred_mode (mode);
43cacb12 1834
10116ec1 1835 return default_get_mask_mode (mode);
43cacb12
RS
1836}
1837
d7a09c44
RS
1838/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE. */
1839
624d0f07 1840opt_machine_mode
d7a09c44
RS
1841aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
1842{
1843 enum mode_class mclass = (is_a <scalar_float_mode> (inner_mode)
1844 ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
1845 machine_mode mode;
1846 FOR_EACH_MODE_IN_CLASS (mode, mclass)
1847 if (inner_mode == GET_MODE_INNER (mode)
1848 && known_eq (nunits, GET_MODE_NUNITS (mode))
1849 && aarch64_sve_data_mode_p (mode))
1850 return mode;
1851 return opt_machine_mode ();
1852}
1853
1044fa32
RS
1854/* Return the integer element mode associated with SVE mode MODE. */
1855
1856static scalar_int_mode
1857aarch64_sve_element_int_mode (machine_mode mode)
1858{
cc68f7c2
RS
1859 poly_uint64 vector_bits = (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
1860 ? BITS_PER_SVE_VECTOR
1861 : GET_MODE_BITSIZE (mode));
1862 unsigned int elt_bits = vector_element_size (vector_bits,
1044fa32
RS
1863 GET_MODE_NUNITS (mode));
1864 return int_mode_for_size (elt_bits, 0).require ();
1865}
1866
cc68f7c2
RS
1867/* Return an integer element mode that contains exactly
1868 aarch64_sve_container_bits (MODE) bits. This is wider than
1869 aarch64_sve_element_int_mode if MODE is a partial vector,
1870 otherwise it's the same. */
1871
1872static scalar_int_mode
1873aarch64_sve_container_int_mode (machine_mode mode)
1874{
1875 return int_mode_for_size (aarch64_sve_container_bits (mode), 0).require ();
1876}
1877
d7a09c44 1878/* Return the integer vector mode associated with SVE mode MODE.
d083ee47 1879 Unlike related_int_vector_mode, this can handle the case in which
d7a09c44
RS
1880 MODE is a predicate (and thus has a different total size). */
1881
624d0f07 1882machine_mode
d7a09c44
RS
1883aarch64_sve_int_mode (machine_mode mode)
1884{
1885 scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
1886 return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
1887}
1888
74166aab
RS
1889/* Implement TARGET_VECTORIZE_RELATED_MODE. */
1890
1891static opt_machine_mode
1892aarch64_vectorize_related_mode (machine_mode vector_mode,
1893 scalar_mode element_mode,
1894 poly_uint64 nunits)
1895{
1896 unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
1897
cc68f7c2
RS
1898 /* If we're operating on SVE vectors, try to return an SVE mode. */
1899 poly_uint64 sve_nunits;
1900 if ((vec_flags & VEC_SVE_DATA)
1901 && multiple_p (BYTES_PER_SVE_VECTOR,
1902 GET_MODE_SIZE (element_mode), &sve_nunits))
1903 {
1904 machine_mode sve_mode;
1905 if (maybe_ne (nunits, 0U))
1906 {
1907 /* Try to find a full or partial SVE mode with exactly
1908 NUNITS units. */
1909 if (multiple_p (sve_nunits, nunits)
1910 && aarch64_sve_data_mode (element_mode,
1911 nunits).exists (&sve_mode))
1912 return sve_mode;
1913 }
1914 else
1915 {
1916 /* Take the preferred number of units from the number of bytes
1917 that fit in VECTOR_MODE. We always start by "autodetecting"
1918 a full vector mode with preferred_simd_mode, so vectors
1919 chosen here will also be full vector modes. Then
1920 autovectorize_vector_modes tries smaller starting modes
1921 and thus smaller preferred numbers of units. */
1922 sve_nunits = ordered_min (sve_nunits, GET_MODE_SIZE (vector_mode));
1923 if (aarch64_sve_data_mode (element_mode,
1924 sve_nunits).exists (&sve_mode))
1925 return sve_mode;
1926 }
1927 }
1928
74166aab
RS
1929 /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
1930 if ((vec_flags & VEC_ADVSIMD)
1931 && known_eq (nunits, 0U)
1932 && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
1933 && maybe_ge (GET_MODE_BITSIZE (element_mode)
1934 * GET_MODE_NUNITS (vector_mode), 128U))
1935 {
1936 machine_mode res = aarch64_simd_container_mode (element_mode, 128);
1937 if (VECTOR_MODE_P (res))
1938 return res;
1939 }
1940
1941 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
1942}
1943
b41d1f6e
RS
1944/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1945 prefer to use the first arithmetic operand as the else value if
1946 the else value doesn't matter, since that exactly matches the SVE
1947 destructive merging form. For ternary operations we could either
1948 pick the first operand and use FMAD-like instructions or the last
1949 operand and use FMLA-like instructions; the latter seems more
1950 natural. */
6a86928d
RS
1951
1952static tree
b41d1f6e 1953aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1954{
b41d1f6e 1955 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1956}
1957
c43f4279 1958/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1959
c43f4279 1960static unsigned int
ef4bddc2 1961aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1962{
6a70badb
RS
1963 /* ??? Logically we should only need to provide a value when
1964 HARD_REGNO_MODE_OK says that the combination is valid,
1965 but at the moment we need to handle all modes. Just ignore
1966 any runtime parts for registers that can't store them. */
1967 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1968 switch (aarch64_regno_regclass (regno))
1969 {
1970 case FP_REGS:
1971 case FP_LO_REGS:
163b1f6a 1972 case FP_LO8_REGS:
550a3380
RS
1973 {
1974 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1975 if (vec_flags & VEC_SVE_DATA)
1976 return exact_div (GET_MODE_SIZE (mode),
1977 aarch64_vl_bytes (mode, vec_flags)).to_constant ();
1978 return CEIL (lowest_size, UNITS_PER_VREG);
1979 }
43cacb12
RS
1980 case PR_REGS:
1981 case PR_LO_REGS:
1982 case PR_HI_REGS:
183bfdaf
RS
1983 case FFR_REGS:
1984 case PR_AND_FFR_REGS:
43cacb12 1985 return 1;
43e9d192 1986 default:
6a70badb 1987 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1988 }
1989 gcc_unreachable ();
1990}
1991
f939c3e6 1992/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1993
f939c3e6 1994static bool
ef4bddc2 1995aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1996{
1997 if (GET_MODE_CLASS (mode) == MODE_CC)
1998 return regno == CC_REGNUM;
1999
43cacb12
RS
2000 if (regno == VG_REGNUM)
2001 /* This must have the same size as _Unwind_Word. */
2002 return mode == DImode;
2003
2004 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2005 if (vec_flags & VEC_SVE_PRED)
183bfdaf 2006 return pr_or_ffr_regnum_p (regno);
43cacb12 2007
183bfdaf
RS
2008 if (pr_or_ffr_regnum_p (regno))
2009 return false;
43cacb12 2010
9259db42
YZ
2011 if (regno == SP_REGNUM)
2012 /* The purpose of comparing with ptr_mode is to support the
2013 global register variable associated with the stack pointer
2014 register via the syntax of asm ("wsp") in ILP32. */
2015 return mode == Pmode || mode == ptr_mode;
2016
2017 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
2018 return mode == Pmode;
2019
563cc649
RH
2020 if (GP_REGNUM_P (regno))
2021 {
aa1a2795
RS
2022 if (vec_flags & VEC_ANY_SVE)
2023 return false;
563cc649
RH
2024 if (known_le (GET_MODE_SIZE (mode), 8))
2025 return true;
aa1a2795 2026 if (known_le (GET_MODE_SIZE (mode), 16))
563cc649
RH
2027 return (regno & 1) == 0;
2028 }
2029 else if (FP_REGNUM_P (regno))
43e9d192 2030 {
43cacb12 2031 if (vec_flags & VEC_STRUCT)
4edd6298 2032 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 2033 else
43cacb12 2034 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
2035 }
2036
f939c3e6 2037 return false;
43e9d192
IB
2038}
2039
c600df9a
RS
2040/* Return true if TYPE is a type that should be passed or returned in
2041 SVE registers, assuming enough registers are available. When returning
2042 true, set *NUM_ZR and *NUM_PR to the number of required Z and P registers
2043 respectively. */
2044
2045static bool
2046aarch64_sve_argument_p (const_tree type, unsigned int *num_zr,
2047 unsigned int *num_pr)
2048{
2049 if (aarch64_sve::svbool_type_p (type))
2050 {
2051 *num_pr = 1;
2052 *num_zr = 0;
2053 return true;
2054 }
2055
2056 if (unsigned int nvectors = aarch64_sve::nvectors_if_data_type (type))
2057 {
2058 *num_pr = 0;
2059 *num_zr = nvectors;
2060 return true;
2061 }
2062
2063 return false;
2064}
2065
2066/* Return true if a function with type FNTYPE returns its value in
2067 SVE vector or predicate registers. */
2068
2069static bool
2070aarch64_returns_value_in_sve_regs_p (const_tree fntype)
2071{
2072 unsigned int num_zr, num_pr;
2073 tree return_type = TREE_TYPE (fntype);
2074 return (return_type != error_mark_node
2075 && aarch64_sve_argument_p (return_type, &num_zr, &num_pr));
2076}
2077
2078/* Return true if a function with type FNTYPE takes arguments in
2079 SVE vector or predicate registers. */
2080
2081static bool
2082aarch64_takes_arguments_in_sve_regs_p (const_tree fntype)
2083{
2084 CUMULATIVE_ARGS args_so_far_v;
2085 aarch64_init_cumulative_args (&args_so_far_v, NULL_TREE, NULL_RTX,
2086 NULL_TREE, 0, true);
2087 cumulative_args_t args_so_far = pack_cumulative_args (&args_so_far_v);
2088
2089 for (tree chain = TYPE_ARG_TYPES (fntype);
2090 chain && chain != void_list_node;
2091 chain = TREE_CHAIN (chain))
2092 {
2093 tree arg_type = TREE_VALUE (chain);
2094 if (arg_type == error_mark_node)
2095 return false;
2096
2097 function_arg_info arg (arg_type, /*named=*/true);
2098 apply_pass_by_reference_rules (&args_so_far_v, arg);
2099 unsigned int num_zr, num_pr;
2100 if (aarch64_sve_argument_p (arg.type, &num_zr, &num_pr))
2101 return true;
2102
2103 targetm.calls.function_arg_advance (args_so_far, arg);
2104 }
2105 return false;
2106}
2107
002ffd3c
RS
2108/* Implement TARGET_FNTYPE_ABI. */
2109
2110static const predefined_function_abi &
2111aarch64_fntype_abi (const_tree fntype)
2112{
2113 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
2114 return aarch64_simd_abi ();
c600df9a
RS
2115
2116 if (aarch64_returns_value_in_sve_regs_p (fntype)
2117 || aarch64_takes_arguments_in_sve_regs_p (fntype))
2118 return aarch64_sve_abi ();
2119
002ffd3c
RS
2120 return default_function_abi;
2121}
2122
c600df9a 2123/* Return true if we should emit CFI for register REGNO. */
a0d0b980
SE
2124
2125static bool
c600df9a 2126aarch64_emit_cfi_for_reg_p (unsigned int regno)
a0d0b980 2127{
c600df9a
RS
2128 return (GP_REGNUM_P (regno)
2129 || !default_function_abi.clobbers_full_reg_p (regno));
a0d0b980
SE
2130}
2131
c600df9a 2132/* Return the mode we should use to save and restore register REGNO. */
a0d0b980
SE
2133
2134static machine_mode
c600df9a 2135aarch64_reg_save_mode (unsigned int regno)
a0d0b980 2136{
c600df9a
RS
2137 if (GP_REGNUM_P (regno))
2138 return DImode;
2139
2140 if (FP_REGNUM_P (regno))
2141 switch (crtl->abi->id ())
2142 {
2143 case ARM_PCS_AAPCS64:
2144 /* Only the low 64 bits are saved by the base PCS. */
2145 return DFmode;
2146
2147 case ARM_PCS_SIMD:
2148 /* The vector PCS saves the low 128 bits (which is the full
2149 register on non-SVE targets). */
2150 return TFmode;
2151
2152 case ARM_PCS_SVE:
2153 /* Use vectors of DImode for registers that need frame
2154 information, so that the first 64 bytes of the save slot
2155 are always the equivalent of what storing D<n> would give. */
2156 if (aarch64_emit_cfi_for_reg_p (regno))
2157 return VNx2DImode;
2158
2159 /* Use vectors of bytes otherwise, so that the layout is
2160 endian-agnostic, and so that we can use LDR and STR for
2161 big-endian targets. */
2162 return VNx16QImode;
2163
2164 case ARM_PCS_TLSDESC:
2165 case ARM_PCS_UNKNOWN:
2166 break;
2167 }
2168
2169 if (PR_REGNUM_P (regno))
2170 /* Save the full predicate register. */
2171 return VNx16BImode;
2172
2173 gcc_unreachable ();
a0d0b980
SE
2174}
2175
5a5a3bc5 2176/* Implement TARGET_INSN_CALLEE_ABI. */
b3650d40 2177
5a5a3bc5
RS
2178const predefined_function_abi &
2179aarch64_insn_callee_abi (const rtx_insn *insn)
b3650d40 2180{
08cc4d92
RS
2181 rtx pat = PATTERN (insn);
2182 gcc_assert (GET_CODE (pat) == PARALLEL);
2183 rtx unspec = XVECEXP (pat, 0, 1);
2184 gcc_assert (GET_CODE (unspec) == UNSPEC
2185 && XINT (unspec, 1) == UNSPEC_CALLEE_ABI);
2186 return function_abis[INTVAL (XVECEXP (unspec, 0, 0))];
b3650d40
SE
2187}
2188
80ec73f4
RS
2189/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
2190 the lower 64 bits of a 128-bit register. Tell the compiler the callee
2191 clobbers the top 64 bits when restoring the bottom 64 bits. */
2192
2193static bool
6ee2cc70
RS
2194aarch64_hard_regno_call_part_clobbered (unsigned int abi_id,
2195 unsigned int regno,
473574ee 2196 machine_mode mode)
80ec73f4 2197{
c600df9a 2198 if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE)
51051f47 2199 {
51051f47
RS
2200 poly_int64 per_register_size = GET_MODE_SIZE (mode);
2201 unsigned int nregs = hard_regno_nregs (regno, mode);
2202 if (nregs > 1)
2203 per_register_size = exact_div (per_register_size, nregs);
bb6ce448
RS
2204 if (abi_id == ARM_PCS_SIMD || abi_id == ARM_PCS_TLSDESC)
2205 return maybe_gt (per_register_size, 16);
2206 return maybe_gt (per_register_size, 8);
51051f47
RS
2207 }
2208 return false;
473574ee
SE
2209}
2210
43cacb12
RS
2211/* Implement REGMODE_NATURAL_SIZE. */
2212poly_uint64
2213aarch64_regmode_natural_size (machine_mode mode)
2214{
2215 /* The natural size for SVE data modes is one SVE data vector,
2216 and similarly for predicates. We can't independently modify
2217 anything smaller than that. */
2218 /* ??? For now, only do this for variable-width SVE registers.
2219 Doing it for constant-sized registers breaks lower-subreg.c. */
2220 /* ??? And once that's fixed, we should probably have similar
2221 code for Advanced SIMD. */
2222 if (!aarch64_sve_vg.is_constant ())
2223 {
2224 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2225 if (vec_flags & VEC_SVE_PRED)
2226 return BYTES_PER_SVE_PRED;
2227 if (vec_flags & VEC_SVE_DATA)
2228 return BYTES_PER_SVE_VECTOR;
2229 }
2230 return UNITS_PER_WORD;
2231}
2232
73d9ac6a 2233/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 2234machine_mode
43cacb12
RS
2235aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
2236 machine_mode mode)
2237{
2238 /* The predicate mode determines which bits are significant and
2239 which are "don't care". Decreasing the number of lanes would
2240 lose data while increasing the number of lanes would make bits
2241 unnecessarily significant. */
2242 if (PR_REGNUM_P (regno))
2243 return mode;
6a70badb
RS
2244 if (known_ge (GET_MODE_SIZE (mode), 4))
2245 return mode;
73d9ac6a 2246 else
6a70badb 2247 return SImode;
73d9ac6a
IB
2248}
2249
231c52ae
ST
2250/* Return true if I's bits are consecutive ones from the MSB. */
2251bool
2252aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
2253{
2254 return exact_log2 (-i) != HOST_WIDE_INT_M1;
2255}
2256
58e17cf8
RS
2257/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
2258 that strcpy from constants will be faster. */
2259
2260static HOST_WIDE_INT
2261aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
2262{
2263 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
2264 return MAX (align, BITS_PER_WORD);
2265 return align;
2266}
2267
43e9d192
IB
2268/* Return true if calls to DECL should be treated as
2269 long-calls (ie called via a register). */
2270static bool
2271aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
2272{
2273 return false;
2274}
2275
2276/* Return true if calls to symbol-ref SYM should be treated as
2277 long-calls (ie called via a register). */
2278bool
2279aarch64_is_long_call_p (rtx sym)
2280{
2281 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
2282}
2283
b60d63cb
JW
2284/* Return true if calls to symbol-ref SYM should not go through
2285 plt stubs. */
2286
2287bool
2288aarch64_is_noplt_call_p (rtx sym)
2289{
2290 const_tree decl = SYMBOL_REF_DECL (sym);
2291
2292 if (flag_pic
2293 && decl
2294 && (!flag_plt
2295 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
2296 && !targetm.binds_local_p (decl))
2297 return true;
2298
2299 return false;
2300}
2301
43e9d192
IB
2302/* Return true if the offsets to a zero/sign-extract operation
2303 represent an expression that matches an extend operation. The
2304 operands represent the paramters from
2305
4745e701 2306 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 2307bool
77e994c9 2308aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
2309 rtx extract_imm)
2310{
2311 HOST_WIDE_INT mult_val, extract_val;
2312
2313 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
2314 return false;
2315
2316 mult_val = INTVAL (mult_imm);
2317 extract_val = INTVAL (extract_imm);
2318
2319 if (extract_val > 8
2320 && extract_val < GET_MODE_BITSIZE (mode)
2321 && exact_log2 (extract_val & ~7) > 0
2322 && (extract_val & 7) <= 4
2323 && mult_val == (1 << (extract_val & 7)))
2324 return true;
2325
2326 return false;
2327}
2328
2329/* Emit an insn that's a simple single-set. Both the operands must be
2330 known to be valid. */
827ab47a 2331inline static rtx_insn *
43e9d192
IB
2332emit_set_insn (rtx x, rtx y)
2333{
f7df4a84 2334 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
2335}
2336
2337/* X and Y are two things to compare using CODE. Emit the compare insn and
2338 return the rtx for register 0 in the proper mode. */
2339rtx
2340aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
2341{
4a2095eb
RH
2342 machine_mode cmp_mode = GET_MODE (x);
2343 machine_mode cc_mode;
2344 rtx cc_reg;
43e9d192 2345
4a2095eb
RH
2346 if (cmp_mode == TImode)
2347 {
2348 gcc_assert (code == NE);
2349
2350 cc_mode = CCmode;
2351 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2352
2353 rtx x_lo = operand_subword (x, 0, 0, TImode);
2354 rtx y_lo = operand_subword (y, 0, 0, TImode);
2355 emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
2356
2357 rtx x_hi = operand_subword (x, 1, 0, TImode);
2358 rtx y_hi = operand_subword (y, 1, 0, TImode);
2359 emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
2360 gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
2361 GEN_INT (AARCH64_EQ)));
2362 }
2363 else
2364 {
2365 cc_mode = SELECT_CC_MODE (code, x, y);
2366 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2367 emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
2368 }
43e9d192
IB
2369 return cc_reg;
2370}
2371
d400fda3
RH
2372/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
2373
2374static rtx
2375aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
2376 machine_mode y_mode)
2377{
2378 if (y_mode == E_QImode || y_mode == E_HImode)
2379 {
2380 if (CONST_INT_P (y))
2381 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
2382 else
2383 {
2384 rtx t, cc_reg;
2385 machine_mode cc_mode;
2386
2387 t = gen_rtx_ZERO_EXTEND (SImode, y);
2388 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
2389 cc_mode = CC_SWPmode;
2390 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2391 emit_set_insn (cc_reg, t);
2392 return cc_reg;
2393 }
2394 }
2395
846f78d4
PK
2396 if (!aarch64_plus_operand (y, y_mode))
2397 y = force_reg (y_mode, y);
2398
d400fda3
RH
2399 return aarch64_gen_compare_reg (code, x, y);
2400}
2401
43e9d192
IB
2402/* Build the SYMBOL_REF for __tls_get_addr. */
2403
2404static GTY(()) rtx tls_get_addr_libfunc;
2405
2406rtx
2407aarch64_tls_get_addr (void)
2408{
2409 if (!tls_get_addr_libfunc)
2410 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
2411 return tls_get_addr_libfunc;
2412}
2413
2414/* Return the TLS model to use for ADDR. */
2415
2416static enum tls_model
2417tls_symbolic_operand_type (rtx addr)
2418{
2419 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
2420 if (GET_CODE (addr) == CONST)
2421 {
6a70badb
RS
2422 poly_int64 addend;
2423 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
2424 if (GET_CODE (sym) == SYMBOL_REF)
2425 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
2426 }
2427 else if (GET_CODE (addr) == SYMBOL_REF)
2428 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
2429
2430 return tls_kind;
2431}
2432
2433/* We'll allow lo_sum's in addresses in our legitimate addresses
2434 so that combine would take care of combining addresses where
2435 necessary, but for generation purposes, we'll generate the address
2436 as :
2437 RTL Absolute
2438 tmp = hi (symbol_ref); adrp x1, foo
2439 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
2440 nop
2441
2442 PIC TLS
2443 adrp x1, :got:foo adrp tmp, :tlsgd:foo
2444 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
2445 bl __tls_get_addr
2446 nop
2447
2448 Load TLS symbol, depending on TLS mechanism and TLS access model.
2449
2450 Global Dynamic - Traditional TLS:
2451 adrp tmp, :tlsgd:imm
2452 add dest, tmp, #:tlsgd_lo12:imm
2453 bl __tls_get_addr
2454
2455 Global Dynamic - TLS Descriptors:
2456 adrp dest, :tlsdesc:imm
2457 ldr tmp, [dest, #:tlsdesc_lo12:imm]
2458 add dest, dest, #:tlsdesc_lo12:imm
2459 blr tmp
2460 mrs tp, tpidr_el0
2461 add dest, dest, tp
2462
2463 Initial Exec:
2464 mrs tp, tpidr_el0
2465 adrp tmp, :gottprel:imm
2466 ldr dest, [tmp, #:gottprel_lo12:imm]
2467 add dest, dest, tp
2468
2469 Local Exec:
2470 mrs tp, tpidr_el0
0699caae
RL
2471 add t0, tp, #:tprel_hi12:imm, lsl #12
2472 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
2473*/
2474
2475static void
2476aarch64_load_symref_appropriately (rtx dest, rtx imm,
2477 enum aarch64_symbol_type type)
2478{
2479 switch (type)
2480 {
2481 case SYMBOL_SMALL_ABSOLUTE:
2482 {
28514dda 2483 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 2484 rtx tmp_reg = dest;
ef4bddc2 2485 machine_mode mode = GET_MODE (dest);
28514dda
YZ
2486
2487 gcc_assert (mode == Pmode || mode == ptr_mode);
2488
43e9d192 2489 if (can_create_pseudo_p ())
28514dda 2490 tmp_reg = gen_reg_rtx (mode);
43e9d192 2491
28514dda 2492 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
2493 emit_insn (gen_add_losym (dest, tmp_reg, imm));
2494 return;
2495 }
2496
a5350ddc 2497 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 2498 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
2499 return;
2500
1b1e81f8
JW
2501 case SYMBOL_SMALL_GOT_28K:
2502 {
2503 machine_mode mode = GET_MODE (dest);
2504 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
2505 rtx insn;
2506 rtx mem;
1b1e81f8
JW
2507
2508 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
2509 here before rtl expand. Tree IVOPT will generate rtl pattern to
2510 decide rtx costs, in which case pic_offset_table_rtx is not
2511 initialized. For that case no need to generate the first adrp
026c3cfd 2512 instruction as the final cost for global variable access is
1b1e81f8
JW
2513 one instruction. */
2514 if (gp_rtx != NULL)
2515 {
2516 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
2517 using the page base as GOT base, the first page may be wasted,
2518 in the worst scenario, there is only 28K space for GOT).
2519
2520 The generate instruction sequence for accessing global variable
2521 is:
2522
a3957742 2523 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
2524
2525 Only one instruction needed. But we must initialize
2526 pic_offset_table_rtx properly. We generate initialize insn for
2527 every global access, and allow CSE to remove all redundant.
2528
2529 The final instruction sequences will look like the following
2530 for multiply global variables access.
2531
a3957742 2532 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 2533
a3957742
JW
2534 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
2535 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
2536 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
2537 ... */
1b1e81f8
JW
2538
2539 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2540 crtl->uses_pic_offset_table = 1;
2541 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
2542
2543 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
2544 gp_rtx = gen_lowpart (mode, gp_rtx);
2545
1b1e81f8
JW
2546 }
2547
2548 if (mode == ptr_mode)
2549 {
2550 if (mode == DImode)
53021678 2551 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 2552 else
53021678
JW
2553 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
2554
2555 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
2556 }
2557 else
2558 {
2559 gcc_assert (mode == Pmode);
53021678
JW
2560
2561 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
2562 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
2563 }
2564
53021678
JW
2565 /* The operand is expected to be MEM. Whenever the related insn
2566 pattern changed, above code which calculate mem should be
2567 updated. */
2568 gcc_assert (GET_CODE (mem) == MEM);
2569 MEM_READONLY_P (mem) = 1;
2570 MEM_NOTRAP_P (mem) = 1;
2571 emit_insn (insn);
1b1e81f8
JW
2572 return;
2573 }
2574
6642bdb4 2575 case SYMBOL_SMALL_GOT_4G:
43e9d192 2576 {
28514dda
YZ
2577 /* In ILP32, the mode of dest can be either SImode or DImode,
2578 while the got entry is always of SImode size. The mode of
2579 dest depends on how dest is used: if dest is assigned to a
2580 pointer (e.g. in the memory), it has SImode; it may have
2581 DImode if dest is dereferenced to access the memeory.
2582 This is why we have to handle three different ldr_got_small
2583 patterns here (two patterns for ILP32). */
53021678
JW
2584
2585 rtx insn;
2586 rtx mem;
43e9d192 2587 rtx tmp_reg = dest;
ef4bddc2 2588 machine_mode mode = GET_MODE (dest);
28514dda 2589
43e9d192 2590 if (can_create_pseudo_p ())
28514dda
YZ
2591 tmp_reg = gen_reg_rtx (mode);
2592
2593 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
2594 if (mode == ptr_mode)
2595 {
2596 if (mode == DImode)
53021678 2597 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 2598 else
53021678
JW
2599 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
2600
2601 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
2602 }
2603 else
2604 {
2605 gcc_assert (mode == Pmode);
53021678
JW
2606
2607 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
2608 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
2609 }
2610
53021678
JW
2611 gcc_assert (GET_CODE (mem) == MEM);
2612 MEM_READONLY_P (mem) = 1;
2613 MEM_NOTRAP_P (mem) = 1;
2614 emit_insn (insn);
43e9d192
IB
2615 return;
2616 }
2617
2618 case SYMBOL_SMALL_TLSGD:
2619 {
5d8a22a5 2620 rtx_insn *insns;
23b88fda
N
2621 machine_mode mode = GET_MODE (dest);
2622 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2623
2624 start_sequence ();
23b88fda
N
2625 if (TARGET_ILP32)
2626 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
2627 else
2628 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
2629 insns = get_insns ();
2630 end_sequence ();
2631
2632 RTL_CONST_CALL_P (insns) = 1;
2633 emit_libcall_block (insns, dest, result, imm);
2634 return;
2635 }
2636
2637 case SYMBOL_SMALL_TLSDESC:
2638 {
ef4bddc2 2639 machine_mode mode = GET_MODE (dest);
621ad2de 2640 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
2641 rtx tp;
2642
621ad2de
AP
2643 gcc_assert (mode == Pmode || mode == ptr_mode);
2644
2876a13f
JW
2645 /* In ILP32, the got entry is always of SImode size. Unlike
2646 small GOT, the dest is fixed at reg 0. */
2647 if (TARGET_ILP32)
2648 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 2649 else
2876a13f 2650 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 2651 tp = aarch64_load_tp (NULL);
621ad2de
AP
2652
2653 if (mode != Pmode)
2654 tp = gen_lowpart (mode, tp);
2655
2876a13f 2656 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
2657 if (REG_P (dest))
2658 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2659 return;
2660 }
2661
79496620 2662 case SYMBOL_SMALL_TLSIE:
43e9d192 2663 {
621ad2de
AP
2664 /* In ILP32, the mode of dest can be either SImode or DImode,
2665 while the got entry is always of SImode size. The mode of
2666 dest depends on how dest is used: if dest is assigned to a
2667 pointer (e.g. in the memory), it has SImode; it may have
2668 DImode if dest is dereferenced to access the memeory.
2669 This is why we have to handle three different tlsie_small
2670 patterns here (two patterns for ILP32). */
ef4bddc2 2671 machine_mode mode = GET_MODE (dest);
621ad2de 2672 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 2673 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
2674
2675 if (mode == ptr_mode)
2676 {
2677 if (mode == DImode)
2678 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
2679 else
2680 {
2681 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
2682 tp = gen_lowpart (mode, tp);
2683 }
2684 }
2685 else
2686 {
2687 gcc_assert (mode == Pmode);
2688 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
2689 }
2690
f7df4a84 2691 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
2692 if (REG_P (dest))
2693 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2694 return;
2695 }
2696
cbf5629e 2697 case SYMBOL_TLSLE12:
d18ba284 2698 case SYMBOL_TLSLE24:
cbf5629e
JW
2699 case SYMBOL_TLSLE32:
2700 case SYMBOL_TLSLE48:
43e9d192 2701 {
cbf5629e 2702 machine_mode mode = GET_MODE (dest);
43e9d192 2703 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 2704
cbf5629e
JW
2705 if (mode != Pmode)
2706 tp = gen_lowpart (mode, tp);
2707
2708 switch (type)
2709 {
2710 case SYMBOL_TLSLE12:
2711 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
2712 (dest, tp, imm));
2713 break;
2714 case SYMBOL_TLSLE24:
2715 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
2716 (dest, tp, imm));
2717 break;
2718 case SYMBOL_TLSLE32:
2719 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
2720 (dest, imm));
2721 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2722 (dest, dest, tp));
2723 break;
2724 case SYMBOL_TLSLE48:
2725 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
2726 (dest, imm));
2727 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2728 (dest, dest, tp));
2729 break;
2730 default:
2731 gcc_unreachable ();
2732 }
e6f7f0e9 2733
241dbd9d
QZ
2734 if (REG_P (dest))
2735 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2736 return;
2737 }
2738
87dd8ab0
MS
2739 case SYMBOL_TINY_GOT:
2740 emit_insn (gen_ldr_got_tiny (dest, imm));
2741 return;
2742
5ae7caad
JW
2743 case SYMBOL_TINY_TLSIE:
2744 {
2745 machine_mode mode = GET_MODE (dest);
2746 rtx tp = aarch64_load_tp (NULL);
2747
2748 if (mode == ptr_mode)
2749 {
2750 if (mode == DImode)
2751 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2752 else
2753 {
2754 tp = gen_lowpart (mode, tp);
2755 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2756 }
2757 }
2758 else
2759 {
2760 gcc_assert (mode == Pmode);
2761 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2762 }
2763
241dbd9d
QZ
2764 if (REG_P (dest))
2765 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2766 return;
2767 }
2768
43e9d192
IB
2769 default:
2770 gcc_unreachable ();
2771 }
2772}
2773
2774/* Emit a move from SRC to DEST. Assume that the move expanders can
2775 handle all moves if !can_create_pseudo_p (). The distinction is
2776 important because, unlike emit_move_insn, the move expanders know
2777 how to force Pmode objects into the constant pool even when the
2778 constant pool address is not itself legitimate. */
2779static rtx
2780aarch64_emit_move (rtx dest, rtx src)
2781{
2782 return (can_create_pseudo_p ()
2783 ? emit_move_insn (dest, src)
2784 : emit_move_insn_1 (dest, src));
2785}
2786
f22d7973
RS
2787/* Apply UNOPTAB to OP and store the result in DEST. */
2788
2789static void
2790aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2791{
2792 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2793 if (dest != tmp)
2794 emit_move_insn (dest, tmp);
2795}
2796
2797/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2798
2799static void
2800aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2801{
2802 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2803 OPTAB_DIRECT);
2804 if (dest != tmp)
2805 emit_move_insn (dest, tmp);
2806}
2807
030d03b8
RE
2808/* Split a 128-bit move operation into two 64-bit move operations,
2809 taking care to handle partial overlap of register to register
2810 copies. Special cases are needed when moving between GP regs and
2811 FP regs. SRC can be a register, constant or memory; DST a register
2812 or memory. If either operand is memory it must not have any side
2813 effects. */
43e9d192
IB
2814void
2815aarch64_split_128bit_move (rtx dst, rtx src)
2816{
030d03b8
RE
2817 rtx dst_lo, dst_hi;
2818 rtx src_lo, src_hi;
43e9d192 2819
ef4bddc2 2820 machine_mode mode = GET_MODE (dst);
12dc6974 2821
030d03b8
RE
2822 gcc_assert (mode == TImode || mode == TFmode);
2823 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2824 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2825
2826 if (REG_P (dst) && REG_P (src))
2827 {
030d03b8
RE
2828 int src_regno = REGNO (src);
2829 int dst_regno = REGNO (dst);
43e9d192 2830
030d03b8 2831 /* Handle FP <-> GP regs. */
43e9d192
IB
2832 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2833 {
030d03b8
RE
2834 src_lo = gen_lowpart (word_mode, src);
2835 src_hi = gen_highpart (word_mode, src);
2836
0016d8d9
RS
2837 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2838 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2839 return;
43e9d192
IB
2840 }
2841 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2842 {
030d03b8
RE
2843 dst_lo = gen_lowpart (word_mode, dst);
2844 dst_hi = gen_highpart (word_mode, dst);
2845
0016d8d9
RS
2846 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2847 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2848 return;
43e9d192 2849 }
43e9d192
IB
2850 }
2851
030d03b8
RE
2852 dst_lo = gen_lowpart (word_mode, dst);
2853 dst_hi = gen_highpart (word_mode, dst);
2854 src_lo = gen_lowpart (word_mode, src);
2855 src_hi = gen_highpart_mode (word_mode, mode, src);
2856
2857 /* At most one pairing may overlap. */
2858 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2859 {
2860 aarch64_emit_move (dst_hi, src_hi);
2861 aarch64_emit_move (dst_lo, src_lo);
2862 }
2863 else
2864 {
2865 aarch64_emit_move (dst_lo, src_lo);
2866 aarch64_emit_move (dst_hi, src_hi);
2867 }
43e9d192
IB
2868}
2869
2870bool
2871aarch64_split_128bit_move_p (rtx dst, rtx src)
2872{
2873 return (! REG_P (src)
2874 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2875}
2876
8b033a8a
SN
2877/* Split a complex SIMD combine. */
2878
2879void
2880aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2881{
ef4bddc2
RS
2882 machine_mode src_mode = GET_MODE (src1);
2883 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2884
2885 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2886 gcc_assert (register_operand (dst, dst_mode)
2887 && register_operand (src1, src_mode)
2888 && register_operand (src2, src_mode));
8b033a8a 2889
0016d8d9 2890 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2891 return;
8b033a8a
SN
2892}
2893
fd4842cd
SN
2894/* Split a complex SIMD move. */
2895
2896void
2897aarch64_split_simd_move (rtx dst, rtx src)
2898{
ef4bddc2
RS
2899 machine_mode src_mode = GET_MODE (src);
2900 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2901
2902 gcc_assert (VECTOR_MODE_P (dst_mode));
2903
2904 if (REG_P (dst) && REG_P (src))
2905 {
2906 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2907 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2908 }
2909}
2910
ef22810a
RH
2911bool
2912aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2913 machine_mode ymode, rtx y)
2914{
2915 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2916 gcc_assert (r != NULL);
2917 return rtx_equal_p (x, r);
2918}
ef22810a 2919
678faefc
RS
2920/* Return TARGET if it is nonnull and a register of mode MODE.
2921 Otherwise, return a fresh register of mode MODE if we can,
2922 or TARGET reinterpreted as MODE if we can't. */
2923
2924static rtx
2925aarch64_target_reg (rtx target, machine_mode mode)
2926{
2927 if (target && REG_P (target) && GET_MODE (target) == mode)
2928 return target;
2929 if (!can_create_pseudo_p ())
2930 {
2931 gcc_assert (target);
2932 return gen_lowpart (mode, target);
2933 }
2934 return gen_reg_rtx (mode);
2935}
2936
2937/* Return a register that contains the constant in BUILDER, given that
2938 the constant is a legitimate move operand. Use TARGET as the register
2939 if it is nonnull and convenient. */
2940
2941static rtx
2942aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
2943{
2944 rtx src = builder.build ();
2945 target = aarch64_target_reg (target, GET_MODE (src));
2946 emit_insn (gen_rtx_SET (target, src));
2947 return target;
2948}
2949
43e9d192 2950static rtx
ef4bddc2 2951aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2952{
2953 if (can_create_pseudo_p ())
e18b4a81 2954 return force_reg (mode, value);
43e9d192
IB
2955 else
2956 {
f5470a77
RS
2957 gcc_assert (x);
2958 aarch64_emit_move (x, value);
43e9d192
IB
2959 return x;
2960 }
2961}
2962
0b1fe8cf
RS
2963/* Return true if predicate value X is a constant in which every element
2964 is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI
2965 value, i.e. as a predicate in which all bits are significant. */
2966
2967static bool
2968aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x)
2969{
2970 if (GET_CODE (x) != CONST_VECTOR)
2971 return false;
2972
2973 unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
2974 GET_MODE_NUNITS (GET_MODE (x)));
2975 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor;
2976 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
2977 builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern);
2978
2979 unsigned int nelts = const_vector_encoded_nelts (x);
2980 for (unsigned int i = 0; i < nelts; ++i)
2981 {
2982 rtx elt = CONST_VECTOR_ENCODED_ELT (x, i);
2983 if (!CONST_INT_P (elt))
2984 return false;
2985
2986 builder.quick_push (elt);
2987 for (unsigned int j = 1; j < factor; ++j)
2988 builder.quick_push (const0_rtx);
2989 }
2990 builder.finalize ();
2991 return true;
2992}
2993
2994/* BUILDER contains a predicate constant of mode VNx16BI. Return the
2995 widest predicate element size it can have (that is, the largest size
2996 for which each element would still be 0 or 1). */
2997
2998unsigned int
2999aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder)
3000{
3001 /* Start with the most optimistic assumption: that we only need
3002 one bit per pattern. This is what we will use if only the first
3003 bit in each pattern is ever set. */
3004 unsigned int mask = GET_MODE_SIZE (DImode);
3005 mask |= builder.npatterns ();
3006
3007 /* Look for set bits. */
3008 unsigned int nelts = builder.encoded_nelts ();
3009 for (unsigned int i = 1; i < nelts; ++i)
3010 if (INTVAL (builder.elt (i)) != 0)
3011 {
3012 if (i & 1)
3013 return 1;
3014 mask |= i;
3015 }
3016 return mask & -mask;
3017}
3018
624d0f07
RS
3019/* If VNx16BImode rtx X is a canonical PTRUE for a predicate mode,
3020 return that predicate mode, otherwise return opt_machine_mode (). */
3021
3022opt_machine_mode
3023aarch64_ptrue_all_mode (rtx x)
3024{
3025 gcc_assert (GET_MODE (x) == VNx16BImode);
3026 if (GET_CODE (x) != CONST_VECTOR
3027 || !CONST_VECTOR_DUPLICATE_P (x)
3028 || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0))
3029 || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0)
3030 return opt_machine_mode ();
3031
3032 unsigned int nelts = const_vector_encoded_nelts (x);
3033 for (unsigned int i = 1; i < nelts; ++i)
3034 if (CONST_VECTOR_ENCODED_ELT (x, i) != const0_rtx)
3035 return opt_machine_mode ();
3036
3037 return aarch64_sve_pred_mode (nelts);
3038}
3039
0b1fe8cf
RS
3040/* BUILDER is a predicate constant of mode VNx16BI. Consider the value
3041 that the constant would have with predicate element size ELT_SIZE
3042 (ignoring the upper bits in each element) and return:
3043
3044 * -1 if all bits are set
3045 * N if the predicate has N leading set bits followed by all clear bits
3046 * 0 if the predicate does not have any of these forms. */
3047
3048int
3049aarch64_partial_ptrue_length (rtx_vector_builder &builder,
3050 unsigned int elt_size)
3051{
3052 /* If nelts_per_pattern is 3, we have set bits followed by clear bits
3053 followed by set bits. */
3054 if (builder.nelts_per_pattern () == 3)
3055 return 0;
3056
3057 /* Skip over leading set bits. */
3058 unsigned int nelts = builder.encoded_nelts ();
3059 unsigned int i = 0;
3060 for (; i < nelts; i += elt_size)
3061 if (INTVAL (builder.elt (i)) == 0)
3062 break;
3063 unsigned int vl = i / elt_size;
3064
3065 /* Check for the all-true case. */
3066 if (i == nelts)
3067 return -1;
3068
3069 /* If nelts_per_pattern is 1, then either VL is zero, or we have a
3070 repeating pattern of set bits followed by clear bits. */
3071 if (builder.nelts_per_pattern () != 2)
3072 return 0;
3073
3074 /* We have a "foreground" value and a duplicated "background" value.
3075 If the background might repeat and the last set bit belongs to it,
3076 we might have set bits followed by clear bits followed by set bits. */
3077 if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ()))
3078 return 0;
3079
3080 /* Make sure that the rest are all clear. */
3081 for (; i < nelts; i += elt_size)
3082 if (INTVAL (builder.elt (i)) != 0)
3083 return 0;
3084
3085 return vl;
3086}
3087
3088/* See if there is an svpattern that encodes an SVE predicate of mode
3089 PRED_MODE in which the first VL bits are set and the rest are clear.
3090 Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
3091 A VL of -1 indicates an all-true vector. */
3092
3093aarch64_svpattern
3094aarch64_svpattern_for_vl (machine_mode pred_mode, int vl)
3095{
3096 if (vl < 0)
3097 return AARCH64_SV_ALL;
3098
3099 if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode)))
3100 return AARCH64_NUM_SVPATTERNS;
3101
3102 if (vl >= 1 && vl <= 8)
3103 return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1));
3104
3105 if (vl >= 16 && vl <= 256 && pow2p_hwi (vl))
3106 return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4));
3107
3108 int max_vl;
3109 if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl))
3110 {
3111 if (vl == (max_vl / 3) * 3)
3112 return AARCH64_SV_MUL3;
3113 /* These would only trigger for non-power-of-2 lengths. */
3114 if (vl == (max_vl & -4))
3115 return AARCH64_SV_MUL4;
3116 if (vl == (1 << floor_log2 (max_vl)))
3117 return AARCH64_SV_POW2;
3118 if (vl == max_vl)
3119 return AARCH64_SV_ALL;
3120 }
3121 return AARCH64_NUM_SVPATTERNS;
3122}
3123
34467289
RS
3124/* Return a VNx16BImode constant in which every sequence of ELT_SIZE
3125 bits has the lowest bit set and the upper bits clear. This is the
3126 VNx16BImode equivalent of a PTRUE for controlling elements of
3127 ELT_SIZE bytes. However, because the constant is VNx16BImode,
3128 all bits are significant, even the upper zeros. */
3129
3130rtx
3131aarch64_ptrue_all (unsigned int elt_size)
3132{
3133 rtx_vector_builder builder (VNx16BImode, elt_size, 1);
3134 builder.quick_push (const1_rtx);
3135 for (unsigned int i = 1; i < elt_size; ++i)
3136 builder.quick_push (const0_rtx);
3137 return builder.build ();
3138}
3139
16de3637
RS
3140/* Return an all-true predicate register of mode MODE. */
3141
3142rtx
3143aarch64_ptrue_reg (machine_mode mode)
3144{
3145 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
3146 rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3147 return gen_lowpart (mode, reg);
16de3637
RS
3148}
3149
e7053b0c
RS
3150/* Return an all-false predicate register of mode MODE. */
3151
3152rtx
3153aarch64_pfalse_reg (machine_mode mode)
3154{
3155 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
3156 rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
3157 return gen_lowpart (mode, reg);
3158}
3159
c9c5a809
RS
3160/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
3161 true, or alternatively if we know that the operation predicated by
3162 PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a
3163 aarch64_sve_gp_strictness operand that describes the operation
3164 predicated by PRED1[0]. */
3165
3166bool
3167aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
3168{
3169 machine_mode mode = GET_MODE (pred2);
3170 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
3171 && mode == GET_MODE (pred1[0])
3172 && aarch64_sve_gp_strictness (pred1[1], SImode));
3173 return (pred1[0] == CONSTM1_RTX (mode)
3174 || INTVAL (pred1[1]) == SVE_RELAXED_GP
3175 || rtx_equal_p (pred1[0], pred2));
3176}
3177
00fa90d9
RS
3178/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
3179 for it. PRED2[0] is the predicate for the instruction whose result
3180 is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
3181 for it. Return true if we can prove that the two predicates are
3182 equivalent for PTEST purposes; that is, if we can replace PRED2[0]
3183 with PRED1[0] without changing behavior. */
3184
3185bool
3186aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
3187{
3188 machine_mode mode = GET_MODE (pred1[0]);
3189 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
3190 && mode == GET_MODE (pred2[0])
3191 && aarch64_sve_ptrue_flag (pred1[1], SImode)
3192 && aarch64_sve_ptrue_flag (pred2[1], SImode));
3193
3194 bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
3195 || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
3196 bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
3197 || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
3198 return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
3199}
3200
3201/* Emit a comparison CMP between OP0 and OP1, both of which have mode
3202 DATA_MODE, and return the result in a predicate of mode PRED_MODE.
3203 Use TARGET as the target register if nonnull and convenient. */
3204
3205static rtx
3206aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
3207 machine_mode data_mode, rtx op1, rtx op2)
3208{
3209 insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
3210 expand_operand ops[5];
3211 create_output_operand (&ops[0], target, pred_mode);
3212 create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
3213 create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
3214 create_input_operand (&ops[3], op1, data_mode);
3215 create_input_operand (&ops[4], op2, data_mode);
3216 expand_insn (icode, 5, ops);
3217 return ops[0].value;
3218}
3219
678faefc
RS
3220/* Use a comparison to convert integer vector SRC into MODE, which is
3221 the corresponding SVE predicate mode. Use TARGET for the result
3222 if it's nonnull and convenient. */
3223
624d0f07 3224rtx
678faefc
RS
3225aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
3226{
3227 machine_mode src_mode = GET_MODE (src);
00fa90d9
RS
3228 return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
3229 src, CONST0_RTX (src_mode));
e7053b0c
RS
3230}
3231
624d0f07
RS
3232/* Return the assembly token for svprfop value PRFOP. */
3233
3234static const char *
3235svprfop_token (enum aarch64_svprfop prfop)
3236{
3237 switch (prfop)
3238 {
3239#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
3240 AARCH64_FOR_SVPRFOP (CASE)
3241#undef CASE
3242 case AARCH64_NUM_SVPRFOPS:
3243 break;
3244 }
3245 gcc_unreachable ();
3246}
3247
3248/* Return the assembly string for an SVE prefetch operation with
3249 mnemonic MNEMONIC, given that PRFOP_RTX is the prefetch operation
3250 and that SUFFIX is the format for the remaining operands. */
3251
3252char *
3253aarch64_output_sve_prefetch (const char *mnemonic, rtx prfop_rtx,
3254 const char *suffix)
3255{
3256 static char buffer[128];
3257 aarch64_svprfop prfop = (aarch64_svprfop) INTVAL (prfop_rtx);
3258 unsigned int written = snprintf (buffer, sizeof (buffer), "%s\t%s, %s",
3259 mnemonic, svprfop_token (prfop), suffix);
3260 gcc_assert (written < sizeof (buffer));
3261 return buffer;
3262}
3263
3264/* Check whether we can calculate the number of elements in PATTERN
3265 at compile time, given that there are NELTS_PER_VQ elements per
3266 128-bit block. Return the value if so, otherwise return -1. */
3267
3268HOST_WIDE_INT
3269aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
3270{
3271 unsigned int vl, const_vg;
3272 if (pattern >= AARCH64_SV_VL1 && pattern <= AARCH64_SV_VL8)
3273 vl = 1 + (pattern - AARCH64_SV_VL1);
3274 else if (pattern >= AARCH64_SV_VL16 && pattern <= AARCH64_SV_VL256)
3275 vl = 16 << (pattern - AARCH64_SV_VL16);
3276 else if (aarch64_sve_vg.is_constant (&const_vg))
3277 {
3278 /* There are two vector granules per quadword. */
3279 unsigned int nelts = (const_vg / 2) * nelts_per_vq;
3280 switch (pattern)
3281 {
3282 case AARCH64_SV_POW2: return 1 << floor_log2 (nelts);
3283 case AARCH64_SV_MUL4: return nelts & -4;
3284 case AARCH64_SV_MUL3: return (nelts / 3) * 3;
3285 case AARCH64_SV_ALL: return nelts;
3286 default: gcc_unreachable ();
3287 }
3288 }
3289 else
3290 return -1;
3291
3292 /* There are two vector granules per quadword. */
3293 poly_uint64 nelts_all = exact_div (aarch64_sve_vg, 2) * nelts_per_vq;
3294 if (known_le (vl, nelts_all))
3295 return vl;
3296
3297 /* Requesting more elements than are available results in a PFALSE. */
3298 if (known_gt (vl, nelts_all))
3299 return 0;
3300
3301 return -1;
3302}
3303
43cacb12
RS
3304/* Return true if we can move VALUE into a register using a single
3305 CNT[BHWD] instruction. */
3306
3307static bool
3308aarch64_sve_cnt_immediate_p (poly_int64 value)
3309{
3310 HOST_WIDE_INT factor = value.coeffs[0];
3311 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
3312 return (value.coeffs[1] == factor
3313 && IN_RANGE (factor, 2, 16 * 16)
3314 && (factor & 1) == 0
3315 && factor <= 16 * (factor & -factor));
3316}
3317
3318/* Likewise for rtx X. */
3319
3320bool
3321aarch64_sve_cnt_immediate_p (rtx x)
3322{
3323 poly_int64 value;
3324 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
3325}
3326
3327/* Return the asm string for an instruction with a CNT-like vector size
3328 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3329 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3330 first part of the operands template (the part that comes before the
139df05a
RS
3331 vector size itself). PATTERN is the pattern to use. FACTOR is the
3332 number of quadwords. NELTS_PER_VQ, if nonzero, is the number of elements
3333 in each quadword. If it is zero, we can use any element size. */
43cacb12
RS
3334
3335static char *
3336aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
139df05a 3337 aarch64_svpattern pattern,
43cacb12
RS
3338 unsigned int factor,
3339 unsigned int nelts_per_vq)
3340{
139df05a 3341 static char buffer[sizeof ("sqincd\t%x0, %w0, vl256, mul #16")];
43cacb12
RS
3342
3343 if (nelts_per_vq == 0)
3344 /* There is some overlap in the ranges of the four CNT instructions.
3345 Here we always use the smallest possible element size, so that the
3346 multiplier is 1 whereever possible. */
3347 nelts_per_vq = factor & -factor;
3348 int shift = std::min (exact_log2 (nelts_per_vq), 4);
3349 gcc_assert (IN_RANGE (shift, 1, 4));
3350 char suffix = "dwhb"[shift - 1];
3351
3352 factor >>= shift;
3353 unsigned int written;
139df05a 3354 if (pattern == AARCH64_SV_ALL && factor == 1)
43cacb12
RS
3355 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
3356 prefix, suffix, operands);
139df05a
RS
3357 else if (factor == 1)
3358 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s",
3359 prefix, suffix, operands, svpattern_token (pattern));
43cacb12 3360 else
139df05a
RS
3361 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s, mul #%d",
3362 prefix, suffix, operands, svpattern_token (pattern),
3363 factor);
43cacb12
RS
3364 gcc_assert (written < sizeof (buffer));
3365 return buffer;
3366}
3367
3368/* Return the asm string for an instruction with a CNT-like vector size
3369 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3370 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3371 first part of the operands template (the part that comes before the
3372 vector size itself). X is the value of the vector size operand,
139df05a
RS
3373 as a polynomial integer rtx; we need to convert this into an "all"
3374 pattern with a multiplier. */
43cacb12
RS
3375
3376char *
3377aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
3378 rtx x)
3379{
3380 poly_int64 value = rtx_to_poly_int64 (x);
3381 gcc_assert (aarch64_sve_cnt_immediate_p (value));
139df05a 3382 return aarch64_output_sve_cnt_immediate (prefix, operands, AARCH64_SV_ALL,
43cacb12
RS
3383 value.coeffs[1], 0);
3384}
3385
624d0f07
RS
3386/* Return the asm string for an instruction with a CNT-like vector size
3387 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3388 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3389 first part of the operands template (the part that comes before the
3390 vector size itself). CNT_PAT[0..2] are the operands of the
3391 UNSPEC_SVE_CNT_PAT; see aarch64_sve_cnt_pat for details. */
3392
3393char *
3394aarch64_output_sve_cnt_pat_immediate (const char *prefix,
3395 const char *operands, rtx *cnt_pat)
3396{
3397 aarch64_svpattern pattern = (aarch64_svpattern) INTVAL (cnt_pat[0]);
3398 unsigned int nelts_per_vq = INTVAL (cnt_pat[1]);
3399 unsigned int factor = INTVAL (cnt_pat[2]) * nelts_per_vq;
3400 return aarch64_output_sve_cnt_immediate (prefix, operands, pattern,
3401 factor, nelts_per_vq);
3402}
3403
0fdc30bc
RS
3404/* Return true if we can add X using a single SVE INC or DEC instruction. */
3405
3406bool
3407aarch64_sve_scalar_inc_dec_immediate_p (rtx x)
3408{
3409 poly_int64 value;
3410 return (poly_int_rtx_p (x, &value)
3411 && (aarch64_sve_cnt_immediate_p (value)
3412 || aarch64_sve_cnt_immediate_p (-value)));
3413}
3414
3415/* Return the asm string for adding SVE INC/DEC immediate OFFSET to
3416 operand 0. */
3417
3418char *
3419aarch64_output_sve_scalar_inc_dec (rtx offset)
3420{
3421 poly_int64 offset_value = rtx_to_poly_int64 (offset);
3422 gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]);
3423 if (offset_value.coeffs[1] > 0)
139df05a 3424 return aarch64_output_sve_cnt_immediate ("inc", "%x0", AARCH64_SV_ALL,
0fdc30bc
RS
3425 offset_value.coeffs[1], 0);
3426 else
139df05a 3427 return aarch64_output_sve_cnt_immediate ("dec", "%x0", AARCH64_SV_ALL,
0fdc30bc
RS
3428 -offset_value.coeffs[1], 0);
3429}
3430
43cacb12
RS
3431/* Return true if we can add VALUE to a register using a single ADDVL
3432 or ADDPL instruction. */
3433
3434static bool
3435aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
3436{
3437 HOST_WIDE_INT factor = value.coeffs[0];
3438 if (factor == 0 || value.coeffs[1] != factor)
3439 return false;
3440 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
3441 and a value of 16 is one vector width. */
3442 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
3443 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
3444}
3445
3446/* Likewise for rtx X. */
3447
3448bool
3449aarch64_sve_addvl_addpl_immediate_p (rtx x)
3450{
3451 poly_int64 value;
3452 return (poly_int_rtx_p (x, &value)
3453 && aarch64_sve_addvl_addpl_immediate_p (value));
3454}
3455
0fdc30bc
RS
3456/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET
3457 to operand 1 and storing the result in operand 0. */
43cacb12
RS
3458
3459char *
0fdc30bc 3460aarch64_output_sve_addvl_addpl (rtx offset)
43cacb12
RS
3461{
3462 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
3463 poly_int64 offset_value = rtx_to_poly_int64 (offset);
3464 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
3465
43cacb12
RS
3466 int factor = offset_value.coeffs[1];
3467 if ((factor & 15) == 0)
3468 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
3469 else
3470 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
3471 return buffer;
3472}
3473
3474/* Return true if X is a valid immediate for an SVE vector INC or DEC
3475 instruction. If it is, store the number of elements in each vector
3476 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
3477 factor in *FACTOR_OUT (if nonnull). */
3478
3479bool
0fdc30bc
RS
3480aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out,
3481 unsigned int *nelts_per_vq_out)
43cacb12
RS
3482{
3483 rtx elt;
3484 poly_int64 value;
3485
3486 if (!const_vec_duplicate_p (x, &elt)
3487 || !poly_int_rtx_p (elt, &value))
3488 return false;
3489
3490 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
3491 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
3492 /* There's no vector INCB. */
3493 return false;
3494
3495 HOST_WIDE_INT factor = value.coeffs[0];
3496 if (value.coeffs[1] != factor)
3497 return false;
3498
3499 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
3500 if ((factor % nelts_per_vq) != 0
3501 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
3502 return false;
3503
3504 if (factor_out)
3505 *factor_out = factor;
3506 if (nelts_per_vq_out)
3507 *nelts_per_vq_out = nelts_per_vq;
3508 return true;
3509}
3510
3511/* Return true if X is a valid immediate for an SVE vector INC or DEC
3512 instruction. */
3513
3514bool
0fdc30bc 3515aarch64_sve_vector_inc_dec_immediate_p (rtx x)
43cacb12 3516{
0fdc30bc 3517 return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL);
43cacb12
RS
3518}
3519
3520/* Return the asm template for an SVE vector INC or DEC instruction.
3521 OPERANDS gives the operands before the vector count and X is the
3522 value of the vector count operand itself. */
3523
3524char *
0fdc30bc 3525aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
43cacb12
RS
3526{
3527 int factor;
3528 unsigned int nelts_per_vq;
0fdc30bc 3529 if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
43cacb12
RS
3530 gcc_unreachable ();
3531 if (factor < 0)
139df05a
RS
3532 return aarch64_output_sve_cnt_immediate ("dec", operands, AARCH64_SV_ALL,
3533 -factor, nelts_per_vq);
43cacb12 3534 else
139df05a
RS
3535 return aarch64_output_sve_cnt_immediate ("inc", operands, AARCH64_SV_ALL,
3536 factor, nelts_per_vq);
43cacb12 3537}
43e9d192 3538
82614948
RR
3539static int
3540aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 3541 scalar_int_mode mode)
43e9d192 3542{
43e9d192 3543 int i;
9a4865db
WD
3544 unsigned HOST_WIDE_INT val, val2, mask;
3545 int one_match, zero_match;
3546 int num_insns;
43e9d192 3547
9a4865db
WD
3548 val = INTVAL (imm);
3549
3550 if (aarch64_move_imm (val, mode))
43e9d192 3551 {
82614948 3552 if (generate)
f7df4a84 3553 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 3554 return 1;
43e9d192
IB
3555 }
3556
9de00935
TC
3557 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
3558 (with XXXX non-zero). In that case check to see if the move can be done in
3559 a smaller mode. */
3560 val2 = val & 0xffffffff;
3561 if (mode == DImode
3562 && aarch64_move_imm (val2, SImode)
3563 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
3564 {
3565 if (generate)
3566 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
3567
3568 /* Check if we have to emit a second instruction by checking to see
3569 if any of the upper 32 bits of the original DI mode value is set. */
3570 if (val == val2)
3571 return 1;
3572
3573 i = (val >> 48) ? 48 : 32;
3574
3575 if (generate)
3576 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
3577 GEN_INT ((val >> i) & 0xffff)));
3578
3579 return 2;
3580 }
3581
9a4865db 3582 if ((val >> 32) == 0 || mode == SImode)
43e9d192 3583 {
82614948
RR
3584 if (generate)
3585 {
9a4865db
WD
3586 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
3587 if (mode == SImode)
3588 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
3589 GEN_INT ((val >> 16) & 0xffff)));
3590 else
3591 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
3592 GEN_INT ((val >> 16) & 0xffff)));
82614948 3593 }
9a4865db 3594 return 2;
43e9d192
IB
3595 }
3596
3597 /* Remaining cases are all for DImode. */
3598
43e9d192 3599 mask = 0xffff;
9a4865db
WD
3600 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
3601 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
3602 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
3603 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 3604
62c8d76c 3605 if (zero_match != 2 && one_match != 2)
43e9d192 3606 {
62c8d76c
WD
3607 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
3608 For a 64-bit bitmask try whether changing 16 bits to all ones or
3609 zeroes creates a valid bitmask. To check any repeated bitmask,
3610 try using 16 bits from the other 32-bit half of val. */
43e9d192 3611
62c8d76c 3612 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 3613 {
62c8d76c
WD
3614 val2 = val & ~mask;
3615 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3616 break;
3617 val2 = val | mask;
3618 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3619 break;
3620 val2 = val2 & ~mask;
3621 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
3622 if (val2 != val && aarch64_bitmask_imm (val2, mode))
3623 break;
43e9d192 3624 }
62c8d76c 3625 if (i != 64)
43e9d192 3626 {
62c8d76c 3627 if (generate)
43e9d192 3628 {
62c8d76c
WD
3629 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
3630 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 3631 GEN_INT ((val >> i) & 0xffff)));
43e9d192 3632 }
1312b1ba 3633 return 2;
43e9d192
IB
3634 }
3635 }
3636
9a4865db
WD
3637 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
3638 are emitted by the initial mov. If one_match > zero_match, skip set bits,
3639 otherwise skip zero bits. */
2c274197 3640
9a4865db 3641 num_insns = 1;
43e9d192 3642 mask = 0xffff;
9a4865db
WD
3643 val2 = one_match > zero_match ? ~val : val;
3644 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
3645
3646 if (generate)
3647 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
3648 ? (val | ~(mask << i))
3649 : (val & (mask << i)))));
3650 for (i += 16; i < 64; i += 16)
43e9d192 3651 {
9a4865db
WD
3652 if ((val2 & (mask << i)) == 0)
3653 continue;
3654 if (generate)
3655 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
3656 GEN_INT ((val >> i) & 0xffff)));
3657 num_insns ++;
82614948
RR
3658 }
3659
3660 return num_insns;
3661}
3662
c0bb5bc5
WD
3663/* Return whether imm is a 128-bit immediate which is simple enough to
3664 expand inline. */
3665bool
3666aarch64_mov128_immediate (rtx imm)
3667{
3668 if (GET_CODE (imm) == CONST_INT)
3669 return true;
3670
3671 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
3672
3673 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
3674 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
3675
3676 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
3677 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
3678}
3679
3680
43cacb12
RS
3681/* Return the number of temporary registers that aarch64_add_offset_1
3682 would need to add OFFSET to a register. */
3683
3684static unsigned int
3685aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
3686{
3687 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
3688}
3689
f5470a77
RS
3690/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
3691 a non-polynomial OFFSET. MODE is the mode of the addition.
3692 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
3693 be set and CFA adjustments added to the generated instructions.
3694
3695 TEMP1, if nonnull, is a register of mode MODE that can be used as a
3696 temporary if register allocation is already complete. This temporary
3697 register may overlap DEST but must not overlap SRC. If TEMP1 is known
3698 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
3699 the immediate again.
0100c5f9
RS
3700
3701 Since this function may be used to adjust the stack pointer, we must
3702 ensure that it cannot cause transient stack deallocation (for example
3703 by first incrementing SP and then decrementing when adjusting by a
3704 large immediate). */
3705
3706static void
f5470a77
RS
3707aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
3708 rtx src, HOST_WIDE_INT offset, rtx temp1,
3709 bool frame_related_p, bool emit_move_imm)
0100c5f9 3710{
f5470a77
RS
3711 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
3712 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
3713
3714 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
3715 rtx_insn *insn;
3716
f5470a77
RS
3717 if (!moffset)
3718 {
3719 if (!rtx_equal_p (dest, src))
3720 {
3721 insn = emit_insn (gen_rtx_SET (dest, src));
3722 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3723 }
3724 return;
3725 }
0100c5f9
RS
3726
3727 /* Single instruction adjustment. */
f5470a77 3728 if (aarch64_uimm12_shift (moffset))
0100c5f9 3729 {
f5470a77 3730 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
3731 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3732 return;
3733 }
3734
f5470a77
RS
3735 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
3736 and either:
3737
3738 a) the offset cannot be loaded by a 16-bit move or
3739 b) there is no spare register into which we can move it. */
3740 if (moffset < 0x1000000
3741 && ((!temp1 && !can_create_pseudo_p ())
3742 || !aarch64_move_imm (moffset, mode)))
0100c5f9 3743 {
f5470a77 3744 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 3745
f5470a77
RS
3746 low_off = offset < 0 ? -low_off : low_off;
3747 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 3748 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 3749 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
3750 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3751 return;
3752 }
3753
3754 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 3755 if (emit_move_imm)
f5470a77
RS
3756 {
3757 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
3758 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
3759 }
3760 insn = emit_insn (offset < 0
3761 ? gen_sub3_insn (dest, src, temp1)
3762 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
3763 if (frame_related_p)
3764 {
3765 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
3766 rtx adj = plus_constant (mode, src, offset);
3767 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
3768 }
3769}
3770
43cacb12
RS
3771/* Return the number of temporary registers that aarch64_add_offset
3772 would need to move OFFSET into a register or add OFFSET to a register;
3773 ADD_P is true if we want the latter rather than the former. */
3774
3775static unsigned int
3776aarch64_offset_temporaries (bool add_p, poly_int64 offset)
3777{
3778 /* This follows the same structure as aarch64_add_offset. */
3779 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
3780 return 0;
3781
3782 unsigned int count = 0;
3783 HOST_WIDE_INT factor = offset.coeffs[1];
3784 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
3785 poly_int64 poly_offset (factor, factor);
3786 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
3787 /* Need one register for the ADDVL/ADDPL result. */
3788 count += 1;
3789 else if (factor != 0)
3790 {
3791 factor = abs (factor);
3792 if (factor > 16 * (factor & -factor))
3793 /* Need one register for the CNT result and one for the multiplication
3794 factor. If necessary, the second temporary can be reused for the
3795 constant part of the offset. */
3796 return 2;
3797 /* Need one register for the CNT result (which might then
3798 be shifted). */
3799 count += 1;
3800 }
3801 return count + aarch64_add_offset_1_temporaries (constant);
3802}
3803
3804/* If X can be represented as a poly_int64, return the number
3805 of temporaries that are required to add it to a register.
3806 Return -1 otherwise. */
3807
3808int
3809aarch64_add_offset_temporaries (rtx x)
3810{
3811 poly_int64 offset;
3812 if (!poly_int_rtx_p (x, &offset))
3813 return -1;
3814 return aarch64_offset_temporaries (true, offset);
3815}
3816
f5470a77
RS
3817/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
3818 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
3819 be set and CFA adjustments added to the generated instructions.
3820
3821 TEMP1, if nonnull, is a register of mode MODE that can be used as a
3822 temporary if register allocation is already complete. This temporary
43cacb12
RS
3823 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
3824 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
3825 false to avoid emitting the immediate again.
3826
3827 TEMP2, if nonnull, is a second temporary register that doesn't
3828 overlap either DEST or REG.
f5470a77
RS
3829
3830 Since this function may be used to adjust the stack pointer, we must
3831 ensure that it cannot cause transient stack deallocation (for example
3832 by first incrementing SP and then decrementing when adjusting by a
3833 large immediate). */
3834
3835static void
3836aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
3837 poly_int64 offset, rtx temp1, rtx temp2,
3838 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 3839{
f5470a77
RS
3840 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
3841 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
3842 gcc_assert (temp1 == NULL_RTX
3843 || !frame_related_p
3844 || !reg_overlap_mentioned_p (temp1, dest));
3845 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
3846
3847 /* Try using ADDVL or ADDPL to add the whole value. */
3848 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
3849 {
3850 rtx offset_rtx = gen_int_mode (offset, mode);
3851 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
3852 RTX_FRAME_RELATED_P (insn) = frame_related_p;
3853 return;
3854 }
3855
3856 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
3857 SVE vector register, over and above the minimum size of 128 bits.
3858 This is equivalent to half the value returned by CNTD with a
3859 vector shape of ALL. */
3860 HOST_WIDE_INT factor = offset.coeffs[1];
3861 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
3862
3863 /* Try using ADDVL or ADDPL to add the VG-based part. */
3864 poly_int64 poly_offset (factor, factor);
3865 if (src != const0_rtx
3866 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
3867 {
3868 rtx offset_rtx = gen_int_mode (poly_offset, mode);
3869 if (frame_related_p)
3870 {
3871 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
3872 RTX_FRAME_RELATED_P (insn) = true;
3873 src = dest;
3874 }
3875 else
3876 {
3877 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
3878 src = aarch64_force_temporary (mode, temp1, addr);
3879 temp1 = temp2;
3880 temp2 = NULL_RTX;
3881 }
3882 }
3883 /* Otherwise use a CNT-based sequence. */
3884 else if (factor != 0)
3885 {
3886 /* Use a subtraction if we have a negative factor. */
3887 rtx_code code = PLUS;
3888 if (factor < 0)
3889 {
3890 factor = -factor;
3891 code = MINUS;
3892 }
3893
3894 /* Calculate CNTD * FACTOR / 2. First try to fold the division
3895 into the multiplication. */
3896 rtx val;
3897 int shift = 0;
3898 if (factor & 1)
3899 /* Use a right shift by 1. */
3900 shift = -1;
3901 else
3902 factor /= 2;
3903 HOST_WIDE_INT low_bit = factor & -factor;
3904 if (factor <= 16 * low_bit)
3905 {
3906 if (factor > 16 * 8)
3907 {
3908 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
3909 the value with the minimum multiplier and shift it into
3910 position. */
3911 int extra_shift = exact_log2 (low_bit);
3912 shift += extra_shift;
3913 factor >>= extra_shift;
3914 }
3915 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
3916 }
3917 else
3918 {
7d8bdfa7
RS
3919 /* Base the factor on LOW_BIT if we can calculate LOW_BIT
3920 directly, since that should increase the chances of being
3921 able to use a shift and add sequence. If LOW_BIT itself
3922 is out of range, just use CNTD. */
3923 if (low_bit <= 16 * 8)
3924 factor /= low_bit;
3925 else
3926 low_bit = 1;
3927
3928 val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
43cacb12
RS
3929 val = aarch64_force_temporary (mode, temp1, val);
3930
7d8bdfa7
RS
3931 if (can_create_pseudo_p ())
3932 {
3933 rtx coeff1 = gen_int_mode (factor, mode);
3934 val = expand_mult (mode, val, coeff1, NULL_RTX, false, true);
3935 }
3936 else
43cacb12 3937 {
7d8bdfa7
RS
3938 /* Go back to using a negative multiplication factor if we have
3939 no register from which to subtract. */
3940 if (code == MINUS && src == const0_rtx)
3941 {
3942 factor = -factor;
3943 code = PLUS;
3944 }
3945 rtx coeff1 = gen_int_mode (factor, mode);
3946 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
3947 val = gen_rtx_MULT (mode, val, coeff1);
43cacb12 3948 }
43cacb12
RS
3949 }
3950
3951 if (shift > 0)
3952 {
3953 /* Multiply by 1 << SHIFT. */
3954 val = aarch64_force_temporary (mode, temp1, val);
3955 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
3956 }
3957 else if (shift == -1)
3958 {
3959 /* Divide by 2. */
3960 val = aarch64_force_temporary (mode, temp1, val);
3961 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
3962 }
3963
3964 /* Calculate SRC +/- CNTD * FACTOR / 2. */
3965 if (src != const0_rtx)
3966 {
3967 val = aarch64_force_temporary (mode, temp1, val);
3968 val = gen_rtx_fmt_ee (code, mode, src, val);
3969 }
3970 else if (code == MINUS)
3971 {
3972 val = aarch64_force_temporary (mode, temp1, val);
3973 val = gen_rtx_NEG (mode, val);
3974 }
3975
3976 if (constant == 0 || frame_related_p)
3977 {
3978 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
3979 if (frame_related_p)
3980 {
3981 RTX_FRAME_RELATED_P (insn) = true;
3982 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3983 gen_rtx_SET (dest, plus_constant (Pmode, src,
3984 poly_offset)));
3985 }
3986 src = dest;
3987 if (constant == 0)
3988 return;
3989 }
3990 else
3991 {
3992 src = aarch64_force_temporary (mode, temp1, val);
3993 temp1 = temp2;
3994 temp2 = NULL_RTX;
3995 }
3996
3997 emit_move_imm = true;
3998 }
f5470a77 3999
f5470a77
RS
4000 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
4001 frame_related_p, emit_move_imm);
0100c5f9
RS
4002}
4003
43cacb12
RS
4004/* Like aarch64_add_offset, but the offset is given as an rtx rather
4005 than a poly_int64. */
4006
4007void
4008aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
4009 rtx offset_rtx, rtx temp1, rtx temp2)
4010{
4011 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
4012 temp1, temp2, false);
4013}
4014
f5470a77
RS
4015/* Add DELTA to the stack pointer, marking the instructions frame-related.
4016 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
4017 if TEMP1 already contains abs (DELTA). */
4018
0100c5f9 4019static inline void
43cacb12 4020aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 4021{
f5470a77 4022 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 4023 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
4024}
4025
f5470a77
RS
4026/* Subtract DELTA from the stack pointer, marking the instructions
4027 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
4028 if nonnull. */
4029
0100c5f9 4030static inline void
cd1bef27
JL
4031aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
4032 bool emit_move_imm = true)
0100c5f9 4033{
f5470a77 4034 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 4035 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 4036}
82614948 4037
43cacb12
RS
4038/* Set DEST to (vec_series BASE STEP). */
4039
4040static void
4041aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
4042{
4043 machine_mode mode = GET_MODE (dest);
43cacb12
RS
4044 scalar_mode inner = GET_MODE_INNER (mode);
4045
4046 /* Each operand can be a register or an immediate in the range [-16, 15]. */
4047 if (!aarch64_sve_index_immediate_p (base))
4048 base = force_reg (inner, base);
4049 if (!aarch64_sve_index_immediate_p (step))
4050 step = force_reg (inner, step);
4051
4052 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
4053}
82614948 4054
4aeb1ba7
RS
4055/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE
4056 register of mode MODE. Use TARGET for the result if it's nonnull
4057 and convenient.
4058
4059 The two vector modes must have the same element mode. The behavior
4060 is to duplicate architectural lane N of SRC into architectural lanes
4061 N + I * STEP of the result. On big-endian targets, architectural
4062 lane 0 of an Advanced SIMD vector is the last element of the vector
4063 in memory layout, so for big-endian targets this operation has the
4064 effect of reversing SRC before duplicating it. Callers need to
4065 account for this. */
43cacb12 4066
4aeb1ba7
RS
4067rtx
4068aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src)
4069{
4070 machine_mode src_mode = GET_MODE (src);
4071 gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode));
4072 insn_code icode = (BYTES_BIG_ENDIAN
4073 ? code_for_aarch64_vec_duplicate_vq_be (mode)
4074 : code_for_aarch64_vec_duplicate_vq_le (mode));
4075
4076 unsigned int i = 0;
4077 expand_operand ops[3];
4078 create_output_operand (&ops[i++], target, mode);
4079 create_output_operand (&ops[i++], src, src_mode);
4080 if (BYTES_BIG_ENDIAN)
4081 {
4082 /* Create a PARALLEL describing the reversal of SRC. */
4083 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode);
4084 rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq,
4085 nelts_per_vq - 1, -1);
4086 create_fixed_operand (&ops[i++], sel);
43cacb12 4087 }
4aeb1ba7
RS
4088 expand_insn (icode, i, ops);
4089 return ops[0].value;
4090}
4091
4092/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch
4093 the memory image into DEST. Return true on success. */
43cacb12 4094
4aeb1ba7
RS
4095static bool
4096aarch64_expand_sve_ld1rq (rtx dest, rtx src)
4097{
4098 src = force_const_mem (GET_MODE (src), src);
43cacb12
RS
4099 if (!src)
4100 return false;
4101
4102 /* Make sure that the address is legitimate. */
4aeb1ba7 4103 if (!aarch64_sve_ld1rq_operand_p (src))
43cacb12
RS
4104 {
4105 rtx addr = force_reg (Pmode, XEXP (src, 0));
4106 src = replace_equiv_address (src, addr);
4107 }
4108
947b1372 4109 machine_mode mode = GET_MODE (dest);
cc68f7c2 4110 machine_mode pred_mode = aarch64_sve_pred_mode (mode);
16de3637 4111 rtx ptrue = aarch64_ptrue_reg (pred_mode);
4aeb1ba7 4112 emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue));
43cacb12
RS
4113 return true;
4114}
4115
4aeb1ba7
RS
4116/* Return a register containing CONST_VECTOR SRC, given that SRC has an
4117 SVE data mode and isn't a legitimate constant. Use TARGET for the
4118 result if convenient.
43cacb12 4119
4aeb1ba7
RS
4120 The returned register can have whatever mode seems most natural
4121 given the contents of SRC. */
4122
4123static rtx
4124aarch64_expand_sve_const_vector (rtx target, rtx src)
43cacb12
RS
4125{
4126 machine_mode mode = GET_MODE (src);
4127 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
4128 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
4aeb1ba7
RS
4129 scalar_mode elt_mode = GET_MODE_INNER (mode);
4130 unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode);
cc68f7c2
RS
4131 unsigned int container_bits = aarch64_sve_container_bits (mode);
4132 unsigned int encoded_bits = npatterns * nelts_per_pattern * container_bits;
4133
4134 if (nelts_per_pattern == 1
4135 && encoded_bits <= 128
4136 && container_bits != elt_bits)
4137 {
4138 /* We have a partial vector mode and a constant whose full-vector
4139 equivalent would occupy a repeating 128-bit sequence. Build that
4140 full-vector equivalent instead, so that we have the option of
4141 using LD1RQ and Advanced SIMD operations. */
4142 unsigned int repeat = container_bits / elt_bits;
4143 machine_mode full_mode = aarch64_full_sve_mode (elt_mode).require ();
4144 rtx_vector_builder builder (full_mode, npatterns * repeat, 1);
4145 for (unsigned int i = 0; i < npatterns; ++i)
4146 for (unsigned int j = 0; j < repeat; ++j)
4147 builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i));
4148 target = aarch64_target_reg (target, full_mode);
4149 return aarch64_expand_sve_const_vector (target, builder.build ());
4150 }
4aeb1ba7
RS
4151
4152 if (nelts_per_pattern == 1 && encoded_bits == 128)
4153 {
4154 /* The constant is a duplicated quadword but can't be narrowed
4155 beyond a quadword. Get the memory image of the first quadword
4156 as a 128-bit vector and try using LD1RQ to load it from memory.
4157
4158 The effect for both endiannesses is to load memory lane N into
4159 architectural lanes N + I * STEP of the result. On big-endian
4160 targets, the layout of the 128-bit vector in an Advanced SIMD
4161 register would be different from its layout in an SVE register,
4162 but this 128-bit vector is a memory value only. */
4163 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
4164 rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0);
4165 if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value))
4166 return target;
4167 }
4168
4169 if (nelts_per_pattern == 1 && encoded_bits < 128)
4170 {
4171 /* The vector is a repeating sequence of 64 bits or fewer.
4172 See if we can load them using an Advanced SIMD move and then
4173 duplicate it to fill a vector. This is better than using a GPR
4174 move because it keeps everything in the same register file. */
4175 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
4176 rtx_vector_builder builder (vq_mode, npatterns, 1);
4177 for (unsigned int i = 0; i < npatterns; ++i)
4178 {
4179 /* We want memory lane N to go into architectural lane N,
4180 so reverse for big-endian targets. The DUP .Q pattern
4181 has a compensating reverse built-in. */
4182 unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i;
4183 builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci));
4184 }
4185 rtx vq_src = builder.build ();
4186 if (aarch64_simd_valid_immediate (vq_src, NULL))
4187 {
4188 vq_src = force_reg (vq_mode, vq_src);
4189 return aarch64_expand_sve_dupq (target, mode, vq_src);
4190 }
4191
4192 /* Get an integer representation of the repeating part of Advanced
4193 SIMD vector VQ_SRC. This preserves the endianness of VQ_SRC,
4194 which for big-endian targets is lane-swapped wrt a normal
4195 Advanced SIMD vector. This means that for both endiannesses,
4196 memory lane N of SVE vector SRC corresponds to architectural
4197 lane N of a register holding VQ_SRC. This in turn means that
4198 memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed
4199 as a single 128-bit value) and thus that memory lane 0 of SRC is
4200 in the lsb of the integer. Duplicating the integer therefore
4201 ensures that memory lane N of SRC goes into architectural lane
4202 N + I * INDEX of the SVE register. */
4203 scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require ();
4204 rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0);
4205 if (elt_value)
4206 {
4207 /* Pretend that we had a vector of INT_MODE to start with. */
4208 elt_mode = int_mode;
4209 mode = aarch64_full_sve_mode (int_mode).require ();
4210
4211 /* If the integer can be moved into a general register by a
4212 single instruction, do that and duplicate the result. */
4213 if (CONST_INT_P (elt_value)
4214 && aarch64_move_imm (INTVAL (elt_value), elt_mode))
4215 {
4216 elt_value = force_reg (elt_mode, elt_value);
4217 return expand_vector_broadcast (mode, elt_value);
4218 }
4219 }
4220 else if (npatterns == 1)
4221 /* We're duplicating a single value, but can't do better than
4222 force it to memory and load from there. This handles things
4223 like symbolic constants. */
4224 elt_value = CONST_VECTOR_ENCODED_ELT (src, 0);
43cacb12 4225
4aeb1ba7 4226 if (elt_value)
8179efe0 4227 {
4aeb1ba7
RS
4228 /* Load the element from memory if we can, otherwise move it into
4229 a register and use a DUP. */
4230 rtx op = force_const_mem (elt_mode, elt_value);
4231 if (!op)
4232 op = force_reg (elt_mode, elt_value);
4233 return expand_vector_broadcast (mode, op);
8179efe0 4234 }
43cacb12
RS
4235 }
4236
4aeb1ba7
RS
4237 /* Try using INDEX. */
4238 rtx base, step;
4239 if (const_vec_series_p (src, &base, &step))
4240 {
4241 aarch64_expand_vec_series (target, base, step);
4242 return target;
4243 }
4244
4245 /* From here on, it's better to force the whole constant to memory
4246 if we can. */
4247 if (GET_MODE_NUNITS (mode).is_constant ())
4248 return NULL_RTX;
4249
43cacb12 4250 /* Expand each pattern individually. */
4aeb1ba7 4251 gcc_assert (npatterns > 1);
43cacb12
RS
4252 rtx_vector_builder builder;
4253 auto_vec<rtx, 16> vectors (npatterns);
4254 for (unsigned int i = 0; i < npatterns; ++i)
4255 {
4256 builder.new_vector (mode, 1, nelts_per_pattern);
4257 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
4258 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
4259 vectors.quick_push (force_reg (mode, builder.build ()));
4260 }
4261
4262 /* Use permutes to interleave the separate vectors. */
4263 while (npatterns > 1)
4264 {
4265 npatterns /= 2;
4266 for (unsigned int i = 0; i < npatterns; ++i)
4267 {
4aeb1ba7 4268 rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode));
43cacb12
RS
4269 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
4270 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
4271 vectors[i] = tmp;
4272 }
4273 }
4aeb1ba7
RS
4274 gcc_assert (vectors[0] == target);
4275 return target;
43cacb12
RS
4276}
4277
678faefc
RS
4278/* Use WHILE to set a predicate register of mode MODE in which the first
4279 VL bits are set and the rest are clear. Use TARGET for the register
4280 if it's nonnull and convenient. */
0b1fe8cf 4281
678faefc
RS
4282static rtx
4283aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
4284 unsigned int vl)
0b1fe8cf
RS
4285{
4286 rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
678faefc 4287 target = aarch64_target_reg (target, mode);
624d0f07
RS
4288 emit_insn (gen_while (UNSPEC_WHILE_LO, DImode, mode,
4289 target, const0_rtx, limit));
678faefc
RS
4290 return target;
4291}
4292
2803bc3b
RS
4293static rtx
4294aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool);
4295
4296/* BUILDER is a constant predicate in which the index of every set bit
4297 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
4298 by inverting every element at a multiple of ELT_SIZE and EORing the
4299 result with an ELT_SIZE PTRUE.
4300
4301 Return a register that contains the constant on success, otherwise
4302 return null. Use TARGET as the register if it is nonnull and
4303 convenient. */
4304
4305static rtx
4306aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder,
4307 unsigned int elt_size)
4308{
4309 /* Invert every element at a multiple of ELT_SIZE, keeping the
4310 other bits zero. */
4311 rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (),
4312 builder.nelts_per_pattern ());
4313 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
4314 if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0)
4315 inv_builder.quick_push (const1_rtx);
4316 else
4317 inv_builder.quick_push (const0_rtx);
4318 inv_builder.finalize ();
4319
4320 /* See if we can load the constant cheaply. */
4321 rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false);
4322 if (!inv)
4323 return NULL_RTX;
4324
4325 /* EOR the result with an ELT_SIZE PTRUE. */
4326 rtx mask = aarch64_ptrue_all (elt_size);
4327 mask = force_reg (VNx16BImode, mask);
4328 target = aarch64_target_reg (target, VNx16BImode);
4329 emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
4330 return target;
4331}
4332
4333/* BUILDER is a constant predicate in which the index of every set bit
4334 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
4335 using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE. Return the
4336 register on success, otherwise return null. Use TARGET as the register
4337 if nonnull and convenient. */
4338
4339static rtx
4340aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
4341 unsigned int elt_size,
4342 unsigned int permute_size)
4343{
4344 /* We're going to split the constant into two new constants A and B,
4345 with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0
4346 and into B otherwise. E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1:
4347
4348 A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ }
4349 B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ }
4350
4351 where _ indicates elements that will be discarded by the permute.
4352
4353 First calculate the ELT_SIZEs for A and B. */
4354 unsigned int a_elt_size = GET_MODE_SIZE (DImode);
4355 unsigned int b_elt_size = GET_MODE_SIZE (DImode);
4356 for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size)
4357 if (INTVAL (builder.elt (i)) != 0)
4358 {
4359 if (i & permute_size)
4360 b_elt_size |= i - permute_size;
4361 else
4362 a_elt_size |= i;
4363 }
4364 a_elt_size &= -a_elt_size;
4365 b_elt_size &= -b_elt_size;
4366
4367 /* Now construct the vectors themselves. */
4368 rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (),
4369 builder.nelts_per_pattern ());
4370 rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (),
4371 builder.nelts_per_pattern ());
4372 unsigned int nelts = builder.encoded_nelts ();
4373 for (unsigned int i = 0; i < nelts; ++i)
4374 if (i & (elt_size - 1))
4375 {
4376 a_builder.quick_push (const0_rtx);
4377 b_builder.quick_push (const0_rtx);
4378 }
4379 else if ((i & permute_size) == 0)
4380 {
4381 /* The A and B elements are significant. */
4382 a_builder.quick_push (builder.elt (i));
4383 b_builder.quick_push (builder.elt (i + permute_size));
4384 }
4385 else
4386 {
4387 /* The A and B elements are going to be discarded, so pick whatever
4388 is likely to give a nice constant. We are targeting element
4389 sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively,
4390 with the aim of each being a sequence of ones followed by
4391 a sequence of zeros. So:
4392
4393 * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to
4394 duplicate the last X_ELT_SIZE element, to extend the
4395 current sequence of ones or zeros.
4396
4397 * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a
4398 zero, so that the constant really does have X_ELT_SIZE and
4399 not a smaller size. */
4400 if (a_elt_size > permute_size)
4401 a_builder.quick_push (const0_rtx);
4402 else
4403 a_builder.quick_push (a_builder.elt (i - a_elt_size));
4404 if (b_elt_size > permute_size)
4405 b_builder.quick_push (const0_rtx);
4406 else
4407 b_builder.quick_push (b_builder.elt (i - b_elt_size));
4408 }
4409 a_builder.finalize ();
4410 b_builder.finalize ();
4411
4412 /* Try loading A into a register. */
4413 rtx_insn *last = get_last_insn ();
4414 rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false);
4415 if (!a)
4416 return NULL_RTX;
4417
4418 /* Try loading B into a register. */
4419 rtx b = a;
4420 if (a_builder != b_builder)
4421 {
4422 b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false);
4423 if (!b)
4424 {
4425 delete_insns_since (last);
4426 return NULL_RTX;
4427 }
4428 }
4429
4430 /* Emit the TRN1 itself. */
4431 machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
4432 target = aarch64_target_reg (target, mode);
4433 emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
4434 gen_lowpart (mode, a),
4435 gen_lowpart (mode, b)));
4436 return target;
4437}
4438
678faefc
RS
4439/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI
4440 constant in BUILDER into an SVE predicate register. Return the register
4441 on success, otherwise return null. Use TARGET for the register if
2803bc3b
RS
4442 nonnull and convenient.
4443
4444 ALLOW_RECURSE_P is true if we can use methods that would call this
4445 function recursively. */
678faefc
RS
4446
4447static rtx
2803bc3b
RS
4448aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder,
4449 bool allow_recurse_p)
678faefc
RS
4450{
4451 if (builder.encoded_nelts () == 1)
4452 /* A PFALSE or a PTRUE .B ALL. */
4453 return aarch64_emit_set_immediate (target, builder);
4454
4455 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
4456 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
4457 {
4458 /* If we can load the constant using PTRUE, use it as-is. */
4459 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
4460 if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
4461 return aarch64_emit_set_immediate (target, builder);
4462
4463 /* Otherwise use WHILE to set the first VL bits. */
4464 return aarch64_sve_move_pred_via_while (target, mode, vl);
4465 }
4466
2803bc3b
RS
4467 if (!allow_recurse_p)
4468 return NULL_RTX;
4469
4470 /* Try inverting the vector in element size ELT_SIZE and then EORing
4471 the result with an ELT_SIZE PTRUE. */
4472 if (INTVAL (builder.elt (0)) == 0)
4473 if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder,
4474 elt_size))
4475 return res;
4476
4477 /* Try using TRN1 to permute two simpler constants. */
4478 for (unsigned int i = elt_size; i <= 8; i *= 2)
4479 if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder,
4480 elt_size, i))
4481 return res;
4482
678faefc
RS
4483 return NULL_RTX;
4484}
4485
4486/* Return an SVE predicate register that contains the VNx16BImode
4487 constant in BUILDER, without going through the move expanders.
4488
4489 The returned register can have whatever mode seems most natural
4490 given the contents of BUILDER. Use TARGET for the result if
4491 convenient. */
4492
4493static rtx
4494aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
4495{
4496 /* Try loading the constant using pure predicate operations. */
2803bc3b 4497 if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true))
678faefc
RS
4498 return res;
4499
4500 /* Try forcing the constant to memory. */
4501 if (builder.full_nelts ().is_constant ())
4502 if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
4503 {
4504 target = aarch64_target_reg (target, VNx16BImode);
4505 emit_move_insn (target, mem);
4506 return target;
4507 }
4508
4509 /* The last resort is to load the constant as an integer and then
4510 compare it against zero. Use -1 for set bits in order to increase
4511 the changes of using SVE DUPM or an Advanced SIMD byte mask. */
4512 rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
4513 builder.nelts_per_pattern ());
4514 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
4515 int_builder.quick_push (INTVAL (builder.elt (i))
4516 ? constm1_rtx : const0_rtx);
4517 return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
4518 int_builder.build ());
0b1fe8cf
RS
4519}
4520
4aeb1ba7 4521/* Set DEST to immediate IMM. */
43cacb12
RS
4522
4523void
4aeb1ba7 4524aarch64_expand_mov_immediate (rtx dest, rtx imm)
43cacb12
RS
4525{
4526 machine_mode mode = GET_MODE (dest);
82614948
RR
4527
4528 /* Check on what type of symbol it is. */
77e994c9
RS
4529 scalar_int_mode int_mode;
4530 if ((GET_CODE (imm) == SYMBOL_REF
4531 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
4532 || GET_CODE (imm) == CONST
4533 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 4534 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 4535 {
43cacb12
RS
4536 rtx mem;
4537 poly_int64 offset;
4538 HOST_WIDE_INT const_offset;
82614948
RR
4539 enum aarch64_symbol_type sty;
4540
4541 /* If we have (const (plus symbol offset)), separate out the offset
4542 before we start classifying the symbol. */
43cacb12 4543 rtx base = strip_offset (imm, &offset);
82614948 4544
43cacb12
RS
4545 /* We must always add an offset involving VL separately, rather than
4546 folding it into the relocation. */
4547 if (!offset.is_constant (&const_offset))
4548 {
c0e0174b
RS
4549 if (!TARGET_SVE)
4550 {
4551 aarch64_report_sve_required ();
4552 return;
4553 }
43cacb12
RS
4554 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
4555 emit_insn (gen_rtx_SET (dest, imm));
4556 else
4557 {
4558 /* Do arithmetic on 32-bit values if the result is smaller
4559 than that. */
4560 if (partial_subreg_p (int_mode, SImode))
4561 {
4562 /* It is invalid to do symbol calculations in modes
4563 narrower than SImode. */
4564 gcc_assert (base == const0_rtx);
4565 dest = gen_lowpart (SImode, dest);
4566 int_mode = SImode;
4567 }
4568 if (base != const0_rtx)
4569 {
4570 base = aarch64_force_temporary (int_mode, dest, base);
4571 aarch64_add_offset (int_mode, dest, base, offset,
4572 NULL_RTX, NULL_RTX, false);
4573 }
4574 else
4575 aarch64_add_offset (int_mode, dest, base, offset,
4576 dest, NULL_RTX, false);
4577 }
4578 return;
4579 }
4580
4581 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
4582 switch (sty)
4583 {
4584 case SYMBOL_FORCE_TO_MEM:
43cacb12 4585 if (const_offset != 0
77e994c9 4586 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
4587 {
4588 gcc_assert (can_create_pseudo_p ());
77e994c9 4589 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
4590 aarch64_add_offset (int_mode, dest, base, const_offset,
4591 NULL_RTX, NULL_RTX, false);
82614948
RR
4592 return;
4593 }
b4f50fd4 4594
82614948
RR
4595 mem = force_const_mem (ptr_mode, imm);
4596 gcc_assert (mem);
b4f50fd4
RR
4597
4598 /* If we aren't generating PC relative literals, then
4599 we need to expand the literal pool access carefully.
4600 This is something that needs to be done in a number
4601 of places, so could well live as a separate function. */
9ee6540a 4602 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
4603 {
4604 gcc_assert (can_create_pseudo_p ());
4605 base = gen_reg_rtx (ptr_mode);
4606 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
4607 if (ptr_mode != Pmode)
4608 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
4609 mem = gen_rtx_MEM (ptr_mode, base);
4610 }
4611
77e994c9
RS
4612 if (int_mode != ptr_mode)
4613 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 4614
f7df4a84 4615 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 4616
82614948
RR
4617 return;
4618
4619 case SYMBOL_SMALL_TLSGD:
4620 case SYMBOL_SMALL_TLSDESC:
79496620 4621 case SYMBOL_SMALL_TLSIE:
1b1e81f8 4622 case SYMBOL_SMALL_GOT_28K:
6642bdb4 4623 case SYMBOL_SMALL_GOT_4G:
82614948 4624 case SYMBOL_TINY_GOT:
5ae7caad 4625 case SYMBOL_TINY_TLSIE:
43cacb12 4626 if (const_offset != 0)
82614948
RR
4627 {
4628 gcc_assert(can_create_pseudo_p ());
77e994c9 4629 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
4630 aarch64_add_offset (int_mode, dest, base, const_offset,
4631 NULL_RTX, NULL_RTX, false);
82614948
RR
4632 return;
4633 }
4634 /* FALLTHRU */
4635
82614948
RR
4636 case SYMBOL_SMALL_ABSOLUTE:
4637 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 4638 case SYMBOL_TLSLE12:
d18ba284 4639 case SYMBOL_TLSLE24:
cbf5629e
JW
4640 case SYMBOL_TLSLE32:
4641 case SYMBOL_TLSLE48:
82614948
RR
4642 aarch64_load_symref_appropriately (dest, imm, sty);
4643 return;
4644
4645 default:
4646 gcc_unreachable ();
4647 }
4648 }
4649
4650 if (!CONST_INT_P (imm))
4651 {
678faefc
RS
4652 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
4653 {
4654 /* Only the low bit of each .H, .S and .D element is defined,
4655 so we can set the upper bits to whatever we like. If the
4656 predicate is all-true in MODE, prefer to set all the undefined
4657 bits as well, so that we can share a single .B predicate for
4658 all modes. */
4659 if (imm == CONSTM1_RTX (mode))
4660 imm = CONSTM1_RTX (VNx16BImode);
4661
4662 /* All methods for constructing predicate modes wider than VNx16BI
4663 will set the upper bits of each element to zero. Expose this
4664 by moving such constants as a VNx16BI, so that all bits are
4665 significant and so that constants for different modes can be
4666 shared. The wider constant will still be available as a
4667 REG_EQUAL note. */
4668 rtx_vector_builder builder;
4669 if (aarch64_get_sve_pred_bits (builder, imm))
4670 {
4671 rtx res = aarch64_expand_sve_const_pred (dest, builder);
4672 if (dest != res)
4673 emit_move_insn (dest, gen_lowpart (mode, res));
4674 return;
4675 }
4676 }
4677
43cacb12
RS
4678 if (GET_CODE (imm) == HIGH
4679 || aarch64_simd_valid_immediate (imm, NULL))
43cacb12 4680 {
4aeb1ba7
RS
4681 emit_insn (gen_rtx_SET (dest, imm));
4682 return;
43e9d192 4683 }
82614948 4684
4aeb1ba7
RS
4685 if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
4686 if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
4687 {
4688 if (dest != res)
4689 emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res));
4690 return;
4691 }
4692
4693 rtx mem = force_const_mem (mode, imm);
4694 gcc_assert (mem);
4695 emit_move_insn (dest, mem);
82614948 4696 return;
43e9d192 4697 }
82614948 4698
77e994c9
RS
4699 aarch64_internal_mov_immediate (dest, imm, true,
4700 as_a <scalar_int_mode> (mode));
43e9d192
IB
4701}
4702
43cacb12
RS
4703/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
4704 that is known to contain PTRUE. */
4705
4706void
4707aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
4708{
0c63a8ee
TC
4709 expand_operand ops[3];
4710 machine_mode mode = GET_MODE (dest);
4711 create_output_operand (&ops[0], dest, mode);
4712 create_input_operand (&ops[1], pred, GET_MODE(pred));
4713 create_input_operand (&ops[2], src, mode);
f2b29269 4714 temporary_volatile_ok v (true);
0c63a8ee 4715 expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
43cacb12
RS
4716}
4717
4718/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
4719 operand is in memory. In this case we need to use the predicated LD1
4720 and ST1 instead of LDR and STR, both for correctness on big-endian
4721 targets and because LD1 and ST1 support a wider range of addressing modes.
4722 PRED_MODE is the mode of the predicate.
4723
4724 See the comment at the head of aarch64-sve.md for details about the
4725 big-endian handling. */
4726
4727void
4728aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
4729{
4730 machine_mode mode = GET_MODE (dest);
16de3637 4731 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
4732 if (!register_operand (src, mode)
4733 && !register_operand (dest, mode))
4734 {
4735 rtx tmp = gen_reg_rtx (mode);
4736 if (MEM_P (src))
4737 aarch64_emit_sve_pred_move (tmp, ptrue, src);
4738 else
4739 emit_move_insn (tmp, src);
4740 src = tmp;
4741 }
4742 aarch64_emit_sve_pred_move (dest, ptrue, src);
4743}
4744
002092be
RS
4745/* Called only on big-endian targets. See whether an SVE vector move
4746 from SRC to DEST is effectively a REV[BHW] instruction, because at
4747 least one operand is a subreg of an SVE vector that has wider or
4748 narrower elements. Return true and emit the instruction if so.
4749
4750 For example:
4751
4752 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
4753
4754 represents a VIEW_CONVERT between the following vectors, viewed
4755 in memory order:
4756
4757 R2: { [0].high, [0].low, [1].high, [1].low, ... }
4758 R1: { [0], [1], [2], [3], ... }
4759
4760 The high part of lane X in R2 should therefore correspond to lane X*2
4761 of R1, but the register representations are:
4762
4763 msb lsb
4764 R2: ...... [1].high [1].low [0].high [0].low
4765 R1: ...... [3] [2] [1] [0]
4766
4767 where the low part of lane X in R2 corresponds to lane X*2 in R1.
4768 We therefore need a reverse operation to swap the high and low values
4769 around.
4770
4771 This is purely an optimization. Without it we would spill the
4772 subreg operand to the stack in one mode and reload it in the
4773 other mode, which has the same effect as the REV. */
4774
4775bool
4776aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
4777{
4778 gcc_assert (BYTES_BIG_ENDIAN);
4779 if (GET_CODE (dest) == SUBREG)
4780 dest = SUBREG_REG (dest);
4781 if (GET_CODE (src) == SUBREG)
4782 src = SUBREG_REG (src);
4783
4784 /* The optimization handles two single SVE REGs with different element
4785 sizes. */
4786 if (!REG_P (dest)
4787 || !REG_P (src)
4788 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
4789 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
4790 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
4791 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
4792 return false;
4793
4794 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
16de3637 4795 rtx ptrue = aarch64_ptrue_reg (VNx16BImode);
002092be
RS
4796 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
4797 UNSPEC_REV_SUBREG);
4798 emit_insn (gen_rtx_SET (dest, unspec));
4799 return true;
4800}
4801
4802/* Return a copy of X with mode MODE, without changing its other
4803 attributes. Unlike gen_lowpart, this doesn't care whether the
4804 mode change is valid. */
4805
624d0f07 4806rtx
002092be
RS
4807aarch64_replace_reg_mode (rtx x, machine_mode mode)
4808{
4809 if (GET_MODE (x) == mode)
4810 return x;
4811
4812 x = shallow_copy_rtx (x);
4813 set_mode_and_regno (x, mode, REGNO (x));
4814 return x;
4815}
4816
d7a09c44
RS
4817/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE
4818 stored in wider integer containers. */
4819
4820static unsigned int
4821aarch64_sve_rev_unspec (machine_mode mode)
4822{
4823 switch (GET_MODE_UNIT_SIZE (mode))
4824 {
4825 case 1: return UNSPEC_REVB;
4826 case 2: return UNSPEC_REVH;
4827 case 4: return UNSPEC_REVW;
4828 }
4829 gcc_unreachable ();
4830}
4831
002092be
RS
4832/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
4833 operands. */
4834
4835void
4836aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
4837{
d7a09c44
RS
4838 /* Decide which REV operation we need. The mode with wider elements
4839 determines the mode of the operands and the mode with the narrower
002092be
RS
4840 elements determines the reverse width. */
4841 machine_mode mode_with_wider_elts = GET_MODE (dest);
4842 machine_mode mode_with_narrower_elts = GET_MODE (src);
4843 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
4844 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
4845 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
4846
d7a09c44 4847 unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
cc68f7c2 4848 machine_mode pred_mode = aarch64_sve_pred_mode (mode_with_wider_elts);
002092be 4849
d7a09c44 4850 /* Get the operands in the appropriate modes and emit the instruction. */
002092be 4851 ptrue = gen_lowpart (pred_mode, ptrue);
d7a09c44
RS
4852 dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts);
4853 src = aarch64_replace_reg_mode (src, mode_with_wider_elts);
4854 emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts,
4855 dest, ptrue, src));
002092be
RS
4856}
4857
43e9d192 4858static bool
c600df9a 4859aarch64_function_ok_for_sibcall (tree, tree exp)
43e9d192 4860{
c600df9a 4861 if (crtl->abi->id () != expr_callee_abi (exp).id ())
a0d0b980
SE
4862 return false;
4863
43e9d192
IB
4864 return true;
4865}
4866
4867/* Implement TARGET_PASS_BY_REFERENCE. */
4868
4869static bool
c600df9a
RS
4870aarch64_pass_by_reference (cumulative_args_t pcum_v,
4871 const function_arg_info &arg)
43e9d192 4872{
c600df9a 4873 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
43e9d192 4874 HOST_WIDE_INT size;
ef4bddc2 4875 machine_mode dummymode;
43e9d192
IB
4876 int nregs;
4877
c600df9a
RS
4878 unsigned int num_zr, num_pr;
4879 if (arg.type && aarch64_sve_argument_p (arg.type, &num_zr, &num_pr))
4880 {
4881 if (pcum && !pcum->silent_p && !TARGET_SVE)
4882 /* We can't gracefully recover at this point, so make this a
4883 fatal error. */
4884 fatal_error (input_location, "arguments of type %qT require"
4885 " the SVE ISA extension", arg.type);
4886
4887 /* Variadic SVE types are passed by reference. Normal non-variadic
4888 arguments are too if we've run out of registers. */
4889 return (!arg.named
4890 || pcum->aapcs_nvrn + num_zr > NUM_FP_ARG_REGS
4891 || pcum->aapcs_nprn + num_pr > NUM_PR_ARG_REGS);
4892 }
4893
43e9d192 4894 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
52090e4d
RS
4895 if (arg.mode == BLKmode && arg.type)
4896 size = int_size_in_bytes (arg.type);
6a70badb
RS
4897 else
4898 /* No frontends can create types with variable-sized modes, so we
4899 shouldn't be asked to pass or return them. */
52090e4d 4900 size = GET_MODE_SIZE (arg.mode).to_constant ();
43e9d192 4901
aadc1c43 4902 /* Aggregates are passed by reference based on their size. */
52090e4d
RS
4903 if (arg.aggregate_type_p ())
4904 size = int_size_in_bytes (arg.type);
43e9d192
IB
4905
4906 /* Variable sized arguments are always returned by reference. */
4907 if (size < 0)
4908 return true;
4909
4910 /* Can this be a candidate to be passed in fp/simd register(s)? */
52090e4d 4911 if (aarch64_vfp_is_call_or_return_candidate (arg.mode, arg.type,
43e9d192
IB
4912 &dummymode, &nregs,
4913 NULL))
4914 return false;
4915
4916 /* Arguments which are variable sized or larger than 2 registers are
4917 passed by reference unless they are a homogenous floating point
4918 aggregate. */
4919 return size > 2 * UNITS_PER_WORD;
4920}
4921
4922/* Return TRUE if VALTYPE is padded to its least significant bits. */
4923static bool
4924aarch64_return_in_msb (const_tree valtype)
4925{
ef4bddc2 4926 machine_mode dummy_mode;
43e9d192
IB
4927 int dummy_int;
4928
4929 /* Never happens in little-endian mode. */
4930 if (!BYTES_BIG_ENDIAN)
4931 return false;
4932
4933 /* Only composite types smaller than or equal to 16 bytes can
4934 be potentially returned in registers. */
4935 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
4936 || int_size_in_bytes (valtype) <= 0
4937 || int_size_in_bytes (valtype) > 16)
4938 return false;
4939
4940 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
4941 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
4942 is always passed/returned in the least significant bits of fp/simd
4943 register(s). */
4944 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
4945 &dummy_mode, &dummy_int, NULL))
4946 return false;
4947
4948 return true;
4949}
4950
6aa5370c
RS
4951/* Subroutine of aarch64_function_value. MODE is the mode of the argument
4952 after promotion, and after partial SVE types have been replaced by
4953 their integer equivalents. */
43e9d192 4954static rtx
6aa5370c 4955aarch64_function_value_1 (const_tree type, machine_mode mode)
43e9d192 4956{
c600df9a
RS
4957 unsigned int num_zr, num_pr;
4958 if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr))
4959 {
4960 /* Don't raise an error here if we're called when SVE is disabled,
4961 since this is really just a query function. Other code must
4962 do that where appropriate. */
4963 mode = TYPE_MODE_RAW (type);
4964 gcc_assert (VECTOR_MODE_P (mode)
4965 && (!TARGET_SVE || aarch64_sve_mode_p (mode)));
4966
4967 if (num_zr > 0 && num_pr == 0)
4968 return gen_rtx_REG (mode, V0_REGNUM);
4969
4970 if (num_zr == 0 && num_pr == 1)
4971 return gen_rtx_REG (mode, P0_REGNUM);
4972
4973 gcc_unreachable ();
4974 }
4975
4976 /* Generic vectors that map to SVE modes with -msve-vector-bits=N are
4977 returned in memory, not by value. */
4978 gcc_assert (!aarch64_sve_mode_p (mode));
4979
43e9d192
IB
4980 if (aarch64_return_in_msb (type))
4981 {
4982 HOST_WIDE_INT size = int_size_in_bytes (type);
4983
4984 if (size % UNITS_PER_WORD != 0)
4985 {
4986 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 4987 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
4988 }
4989 }
4990
6aa5370c
RS
4991 int count;
4992 machine_mode ag_mode;
43e9d192
IB
4993 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
4994 &ag_mode, &count, NULL))
4995 {
4996 if (!aarch64_composite_type_p (type, mode))
4997 {
4998 gcc_assert (count == 1 && mode == ag_mode);
4999 return gen_rtx_REG (mode, V0_REGNUM);
5000 }
5001 else
5002 {
5003 int i;
5004 rtx par;
5005
5006 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5007 for (i = 0; i < count; i++)
5008 {
5009 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
5010 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
5011 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
5012 XVECEXP (par, 0, i) = tmp;
5013 }
5014 return par;
5015 }
5016 }
5017 else
5018 return gen_rtx_REG (mode, R0_REGNUM);
5019}
5020
6aa5370c
RS
5021/* Implement TARGET_FUNCTION_VALUE.
5022 Define how to find the value returned by a function. */
5023
5024static rtx
5025aarch64_function_value (const_tree type, const_tree func,
5026 bool outgoing ATTRIBUTE_UNUSED)
5027{
5028 machine_mode mode;
5029 int unsignedp;
5030
5031 mode = TYPE_MODE (type);
5032 if (INTEGRAL_TYPE_P (type))
5033 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
5034
5035 /* Vector types can acquire a partial SVE mode using things like
5036 __attribute__((vector_size(N))), and this is potentially useful.
5037 However, the choice of mode doesn't affect the type's ABI identity,
5038 so we should treat the types as though they had the associated
5039 integer mode, just like they did before SVE was introduced.
5040
5041 We know that the vector must be 128 bits or smaller, otherwise we'd
5042 have returned it in memory instead. */
5043 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5044 if ((vec_flags & VEC_ANY_SVE) && (vec_flags & VEC_PARTIAL))
5045 {
5046 scalar_int_mode int_mode = int_mode_for_mode (mode).require ();
5047 rtx reg = aarch64_function_value_1 (type, int_mode);
5048 /* Vector types are never returned in the MSB and are never split. */
5049 gcc_assert (REG_P (reg) && GET_MODE (reg) == int_mode);
5050 rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5051 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, pair));
5052 }
5053
5054 return aarch64_function_value_1 (type, mode);
5055}
5056
43e9d192
IB
5057/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
5058 Return true if REGNO is the number of a hard register in which the values
5059 of called function may come back. */
5060
5061static bool
5062aarch64_function_value_regno_p (const unsigned int regno)
5063{
5064 /* Maximum of 16 bytes can be returned in the general registers. Examples
5065 of 16-byte return values are: 128-bit integers and 16-byte small
5066 structures (excluding homogeneous floating-point aggregates). */
5067 if (regno == R0_REGNUM || regno == R1_REGNUM)
5068 return true;
5069
5070 /* Up to four fp/simd registers can return a function value, e.g. a
5071 homogeneous floating-point aggregate having four members. */
5072 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 5073 return TARGET_FLOAT;
43e9d192
IB
5074
5075 return false;
5076}
5077
5078/* Implement TARGET_RETURN_IN_MEMORY.
5079
5080 If the type T of the result of a function is such that
5081 void func (T arg)
5082 would require that arg be passed as a value in a register (or set of
5083 registers) according to the parameter passing rules, then the result
5084 is returned in the same registers as would be used for such an
5085 argument. */
5086
5087static bool
5088aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5089{
5090 HOST_WIDE_INT size;
ef4bddc2 5091 machine_mode ag_mode;
43e9d192
IB
5092 int count;
5093
5094 if (!AGGREGATE_TYPE_P (type)
5095 && TREE_CODE (type) != COMPLEX_TYPE
5096 && TREE_CODE (type) != VECTOR_TYPE)
5097 /* Simple scalar types always returned in registers. */
5098 return false;
5099
c600df9a
RS
5100 unsigned int num_zr, num_pr;
5101 if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr))
5102 {
5103 /* All SVE types we support fit in registers. For example, it isn't
5104 yet possible to define an aggregate of 9+ SVE vectors or 5+ SVE
5105 predicates. */
5106 gcc_assert (num_zr <= NUM_FP_ARG_REGS && num_pr <= NUM_PR_ARG_REGS);
5107 return false;
5108 }
5109
43e9d192
IB
5110 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
5111 type,
5112 &ag_mode,
5113 &count,
5114 NULL))
5115 return false;
5116
5117 /* Types larger than 2 registers returned in memory. */
5118 size = int_size_in_bytes (type);
5119 return (size < 0 || size > 2 * UNITS_PER_WORD);
5120}
5121
5122static bool
ef4bddc2 5123aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
5124 const_tree type, int *nregs)
5125{
5126 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5127 return aarch64_vfp_is_call_or_return_candidate (mode,
5128 type,
5129 &pcum->aapcs_vfp_rmode,
5130 nregs,
5131 NULL);
5132}
5133
985b8393 5134/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192 5135 bits. The idea is to suppress any stronger alignment requested by
c590597c
RE
5136 the user and opt for the natural alignment (specified in AAPCS64 \S
5137 4.1). ABI_BREAK is set to true if the alignment was incorrectly
5138 calculated in versions of GCC prior to GCC-9. This is a helper
5139 function for local use only. */
43e9d192 5140
985b8393 5141static unsigned int
c590597c
RE
5142aarch64_function_arg_alignment (machine_mode mode, const_tree type,
5143 bool *abi_break)
43e9d192 5144{
c590597c 5145 *abi_break = false;
75d6cc81 5146 if (!type)
985b8393 5147 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 5148
75d6cc81 5149 if (integer_zerop (TYPE_SIZE (type)))
985b8393 5150 return 0;
43e9d192 5151
75d6cc81
AL
5152 gcc_assert (TYPE_MODE (type) == mode);
5153
5154 if (!AGGREGATE_TYPE_P (type))
985b8393 5155 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
5156
5157 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 5158 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 5159
985b8393 5160 unsigned int alignment = 0;
c590597c 5161 unsigned int bitfield_alignment = 0;
75d6cc81 5162 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393 5163 if (TREE_CODE (field) == FIELD_DECL)
c590597c
RE
5164 {
5165 alignment = std::max (alignment, DECL_ALIGN (field));
5166 if (DECL_BIT_FIELD_TYPE (field))
5167 bitfield_alignment
5168 = std::max (bitfield_alignment,
5169 TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
5170 }
5171
5172 if (bitfield_alignment > alignment)
5173 {
5174 *abi_break = true;
5175 return bitfield_alignment;
5176 }
43e9d192 5177
985b8393 5178 return alignment;
43e9d192
IB
5179}
5180
5181/* Layout a function argument according to the AAPCS64 rules. The rule
6aa5370c
RS
5182 numbers refer to the rule numbers in the AAPCS64. ORIG_MODE is the
5183 mode that was originally given to us by the target hook, whereas the
5184 mode in ARG might be the result of replacing partial SVE modes with
5185 the equivalent integer mode. */
43e9d192
IB
5186
5187static void
6aa5370c
RS
5188aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg,
5189 machine_mode orig_mode)
43e9d192
IB
5190{
5191 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
c600df9a
RS
5192 tree type = arg.type;
5193 machine_mode mode = arg.mode;
43e9d192
IB
5194 int ncrn, nvrn, nregs;
5195 bool allocate_ncrn, allocate_nvrn;
3abf17cf 5196 HOST_WIDE_INT size;
c590597c 5197 bool abi_break;
43e9d192
IB
5198
5199 /* We need to do this once per argument. */
5200 if (pcum->aapcs_arg_processed)
5201 return;
5202
6aa5370c
RS
5203 /* Vector types can acquire a partial SVE mode using things like
5204 __attribute__((vector_size(N))), and this is potentially useful.
5205 However, the choice of mode doesn't affect the type's ABI identity,
5206 so we should treat the types as though they had the associated
5207 integer mode, just like they did before SVE was introduced.
5208
5209 We know that the vector must be 128 bits or smaller, otherwise we'd
5210 have passed it by reference instead. */
5211 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5212 if ((vec_flags & VEC_ANY_SVE) && (vec_flags & VEC_PARTIAL))
5213 {
5214 function_arg_info tmp_arg = arg;
5215 tmp_arg.mode = int_mode_for_mode (mode).require ();
5216 aarch64_layout_arg (pcum_v, tmp_arg, orig_mode);
5217 if (rtx reg = pcum->aapcs_reg)
5218 {
5219 gcc_assert (REG_P (reg) && GET_MODE (reg) == tmp_arg.mode);
5220 rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5221 pcum->aapcs_reg = gen_rtx_PARALLEL (mode, gen_rtvec (1, pair));
5222 }
5223 return;
5224 }
5225
43e9d192
IB
5226 pcum->aapcs_arg_processed = true;
5227
c600df9a
RS
5228 unsigned int num_zr, num_pr;
5229 if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr))
5230 {
5231 /* The PCS says that it is invalid to pass an SVE value to an
5232 unprototyped function. There is no ABI-defined location we
5233 can return in this case, so we have no real choice but to raise
5234 an error immediately, even though this is only a query function. */
5235 if (arg.named && pcum->pcs_variant != ARM_PCS_SVE)
5236 {
5237 gcc_assert (!pcum->silent_p);
5238 error ("SVE type %qT cannot be passed to an unprototyped function",
5239 arg.type);
5240 /* Avoid repeating the message, and avoid tripping the assert
5241 below. */
5242 pcum->pcs_variant = ARM_PCS_SVE;
5243 }
5244
5245 /* We would have converted the argument into pass-by-reference
5246 form if it didn't fit in registers. */
5247 pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + num_zr;
5248 pcum->aapcs_nextnprn = pcum->aapcs_nprn + num_pr;
5249 gcc_assert (arg.named
5250 && pcum->pcs_variant == ARM_PCS_SVE
5251 && aarch64_sve_mode_p (mode)
5252 && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS
5253 && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS);
5254
5255 if (num_zr > 0 && num_pr == 0)
5256 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + pcum->aapcs_nvrn);
5257 else if (num_zr == 0 && num_pr == 1)
5258 pcum->aapcs_reg = gen_rtx_REG (mode, P0_REGNUM + pcum->aapcs_nprn);
5259 else
5260 gcc_unreachable ();
5261 return;
5262 }
5263
5264 /* Generic vectors that map to SVE modes with -msve-vector-bits=N are
5265 passed by reference, not by value. */
5266 gcc_assert (!aarch64_sve_mode_p (mode));
5267
3abf17cf 5268 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
5269 if (type)
5270 size = int_size_in_bytes (type);
5271 else
5272 /* No frontends can create types with variable-sized modes, so we
5273 shouldn't be asked to pass or return them. */
5274 size = GET_MODE_SIZE (mode).to_constant ();
5275 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 5276
43e9d192
IB
5277 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
5278 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
5279 mode,
5280 type,
5281 &nregs);
5282
5283 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
5284 The following code thus handles passing by SIMD/FP registers first. */
5285
5286 nvrn = pcum->aapcs_nvrn;
5287
5288 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
5289 and homogenous short-vector aggregates (HVA). */
5290 if (allocate_nvrn)
5291 {
c600df9a 5292 if (!pcum->silent_p && !TARGET_FLOAT)
fc29dfc9 5293 aarch64_err_no_fpadvsimd (mode);
261fb553 5294
43e9d192
IB
5295 if (nvrn + nregs <= NUM_FP_ARG_REGS)
5296 {
5297 pcum->aapcs_nextnvrn = nvrn + nregs;
5298 if (!aarch64_composite_type_p (type, mode))
5299 {
5300 gcc_assert (nregs == 1);
5301 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
5302 }
5303 else
5304 {
5305 rtx par;
5306 int i;
5307 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
5308 for (i = 0; i < nregs; i++)
5309 {
5310 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
5311 V0_REGNUM + nvrn + i);
6a70badb
RS
5312 rtx offset = gen_int_mode
5313 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
5314 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
5315 XVECEXP (par, 0, i) = tmp;
5316 }
5317 pcum->aapcs_reg = par;
5318 }
5319 return;
5320 }
5321 else
5322 {
5323 /* C.3 NSRN is set to 8. */
5324 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
5325 goto on_stack;
5326 }
5327 }
5328
5329 ncrn = pcum->aapcs_ncrn;
3abf17cf 5330 nregs = size / UNITS_PER_WORD;
43e9d192
IB
5331
5332 /* C6 - C9. though the sign and zero extension semantics are
5333 handled elsewhere. This is the case where the argument fits
5334 entirely general registers. */
5335 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
5336 {
43e9d192
IB
5337 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
5338
5339 /* C.8 if the argument has an alignment of 16 then the NGRN is
c590597c 5340 rounded up to the next even number. */
985b8393
JJ
5341 if (nregs == 2
5342 && ncrn % 2
2ec07fa6 5343 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 5344 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
5345 alignment nregs should be > 2 and therefore it should be
5346 passed by reference rather than value. */
6aa5370c 5347 && (aarch64_function_arg_alignment (orig_mode, type, &abi_break)
c590597c 5348 == 16 * BITS_PER_UNIT))
985b8393 5349 {
c590597c
RE
5350 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
5351 inform (input_location, "parameter passing for argument of type "
5352 "%qT changed in GCC 9.1", type);
985b8393
JJ
5353 ++ncrn;
5354 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 5355 }
2ec07fa6 5356
43e9d192 5357 /* NREGS can be 0 when e.g. an empty structure is to be passed.
c590597c 5358 A reg is still generated for it, but the caller should be smart
43e9d192
IB
5359 enough not to use it. */
5360 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 5361 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
5362 else
5363 {
5364 rtx par;
5365 int i;
5366
5367 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
5368 for (i = 0; i < nregs; i++)
5369 {
5370 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
5371 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5372 GEN_INT (i * UNITS_PER_WORD));
5373 XVECEXP (par, 0, i) = tmp;
5374 }
5375 pcum->aapcs_reg = par;
5376 }
5377
5378 pcum->aapcs_nextncrn = ncrn + nregs;
5379 return;
5380 }
5381
5382 /* C.11 */
5383 pcum->aapcs_nextncrn = NUM_ARG_REGS;
5384
5385 /* The argument is passed on stack; record the needed number of words for
3abf17cf 5386 this argument and align the total size if necessary. */
43e9d192 5387on_stack:
3abf17cf 5388 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 5389
6aa5370c 5390 if (aarch64_function_arg_alignment (orig_mode, type, &abi_break)
c590597c
RE
5391 == 16 * BITS_PER_UNIT)
5392 {
5393 int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
5394 if (pcum->aapcs_stack_size != new_size)
5395 {
5396 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
5397 inform (input_location, "parameter passing for argument of type "
5398 "%qT changed in GCC 9.1", type);
5399 pcum->aapcs_stack_size = new_size;
5400 }
5401 }
43e9d192
IB
5402 return;
5403}
5404
5405/* Implement TARGET_FUNCTION_ARG. */
5406
5407static rtx
6783fdb7 5408aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
43e9d192
IB
5409{
5410 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
08cc4d92 5411 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64
c600df9a
RS
5412 || pcum->pcs_variant == ARM_PCS_SIMD
5413 || pcum->pcs_variant == ARM_PCS_SVE);
43e9d192 5414
6783fdb7 5415 if (arg.end_marker_p ())
08cc4d92 5416 return gen_int_mode (pcum->pcs_variant, DImode);
43e9d192 5417
6aa5370c 5418 aarch64_layout_arg (pcum_v, arg, arg.mode);
43e9d192
IB
5419 return pcum->aapcs_reg;
5420}
5421
5422void
5423aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
08cc4d92
RS
5424 const_tree fntype,
5425 rtx libname ATTRIBUTE_UNUSED,
5426 const_tree fndecl ATTRIBUTE_UNUSED,
c600df9a
RS
5427 unsigned n_named ATTRIBUTE_UNUSED,
5428 bool silent_p)
43e9d192
IB
5429{
5430 pcum->aapcs_ncrn = 0;
5431 pcum->aapcs_nvrn = 0;
c600df9a 5432 pcum->aapcs_nprn = 0;
43e9d192
IB
5433 pcum->aapcs_nextncrn = 0;
5434 pcum->aapcs_nextnvrn = 0;
c600df9a 5435 pcum->aapcs_nextnprn = 0;
08cc4d92
RS
5436 if (fntype)
5437 pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id ();
5438 else
5439 pcum->pcs_variant = ARM_PCS_AAPCS64;
43e9d192
IB
5440 pcum->aapcs_reg = NULL_RTX;
5441 pcum->aapcs_arg_processed = false;
5442 pcum->aapcs_stack_words = 0;
5443 pcum->aapcs_stack_size = 0;
c600df9a 5444 pcum->silent_p = silent_p;
43e9d192 5445
c600df9a
RS
5446 if (!silent_p
5447 && !TARGET_FLOAT
261fb553
AL
5448 && fndecl && TREE_PUBLIC (fndecl)
5449 && fntype && fntype != error_mark_node)
5450 {
5451 const_tree type = TREE_TYPE (fntype);
5452 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
5453 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
5454 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
5455 &mode, &nregs, NULL))
fc29dfc9 5456 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 5457 }
c600df9a
RS
5458
5459 if (!silent_p
5460 && !TARGET_SVE
5461 && pcum->pcs_variant == ARM_PCS_SVE)
5462 {
5463 /* We can't gracefully recover at this point, so make this a
5464 fatal error. */
5465 if (fndecl)
5466 fatal_error (input_location, "%qE requires the SVE ISA extension",
5467 fndecl);
5468 else
5469 fatal_error (input_location, "calls to functions of type %qT require"
5470 " the SVE ISA extension", fntype);
5471 }
43e9d192
IB
5472}
5473
5474static void
5475aarch64_function_arg_advance (cumulative_args_t pcum_v,
6930c98c 5476 const function_arg_info &arg)
43e9d192
IB
5477{
5478 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
08cc4d92 5479 if (pcum->pcs_variant == ARM_PCS_AAPCS64
c600df9a
RS
5480 || pcum->pcs_variant == ARM_PCS_SIMD
5481 || pcum->pcs_variant == ARM_PCS_SVE)
43e9d192 5482 {
6aa5370c 5483 aarch64_layout_arg (pcum_v, arg, arg.mode);
43e9d192
IB
5484 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
5485 != (pcum->aapcs_stack_words != 0));
5486 pcum->aapcs_arg_processed = false;
5487 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
5488 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
c600df9a 5489 pcum->aapcs_nprn = pcum->aapcs_nextnprn;
43e9d192
IB
5490 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
5491 pcum->aapcs_stack_words = 0;
5492 pcum->aapcs_reg = NULL_RTX;
5493 }
5494}
5495
5496bool
5497aarch64_function_arg_regno_p (unsigned regno)
5498{
5499 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
5500 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
5501}
5502
5503/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
5504 PARM_BOUNDARY bits of alignment, but will be given anything up
5505 to STACK_BOUNDARY bits if the type requires it. This makes sure
5506 that both before and after the layout of each argument, the Next
5507 Stacked Argument Address (NSAA) will have a minimum alignment of
5508 8 bytes. */
5509
5510static unsigned int
ef4bddc2 5511aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 5512{
c590597c
RE
5513 bool abi_break;
5514 unsigned int alignment = aarch64_function_arg_alignment (mode, type,
5515 &abi_break);
5516 if (abi_break & warn_psabi)
5517 inform (input_location, "parameter passing for argument of type "
5518 "%qT changed in GCC 9.1", type);
5519
985b8393 5520 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
5521}
5522
43cacb12
RS
5523/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
5524
5525static fixed_size_mode
5526aarch64_get_reg_raw_mode (int regno)
5527{
5528 if (TARGET_SVE && FP_REGNUM_P (regno))
5529 /* Don't use the SVE part of the register for __builtin_apply and
5530 __builtin_return. The SVE registers aren't used by the normal PCS,
5531 so using them there would be a waste of time. The PCS extensions
5532 for SVE types are fundamentally incompatible with the
5533 __builtin_return/__builtin_apply interface. */
5534 return as_a <fixed_size_mode> (V16QImode);
5535 return default_get_reg_raw_mode (regno);
5536}
5537
76b0cbf8 5538/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
5539
5540 Small aggregate types are placed in the lowest memory address.
5541
5542 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
5543
76b0cbf8
RS
5544static pad_direction
5545aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
5546{
5547 /* On little-endian targets, the least significant byte of every stack
5548 argument is passed at the lowest byte address of the stack slot. */
5549 if (!BYTES_BIG_ENDIAN)
76b0cbf8 5550 return PAD_UPWARD;
43e9d192 5551
00edcfbe 5552 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
5553 the least significant byte of a stack argument is passed at the highest
5554 byte address of the stack slot. */
5555 if (type
00edcfbe
YZ
5556 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
5557 || POINTER_TYPE_P (type))
43e9d192 5558 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 5559 return PAD_DOWNWARD;
43e9d192
IB
5560
5561 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 5562 return PAD_UPWARD;
43e9d192
IB
5563}
5564
5565/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
5566
5567 It specifies padding for the last (may also be the only)
5568 element of a block move between registers and memory. If
5569 assuming the block is in the memory, padding upward means that
5570 the last element is padded after its highest significant byte,
5571 while in downward padding, the last element is padded at the
5572 its least significant byte side.
5573
5574 Small aggregates and small complex types are always padded
5575 upwards.
5576
5577 We don't need to worry about homogeneous floating-point or
5578 short-vector aggregates; their move is not affected by the
5579 padding direction determined here. Regardless of endianness,
5580 each element of such an aggregate is put in the least
5581 significant bits of a fp/simd register.
5582
5583 Return !BYTES_BIG_ENDIAN if the least significant byte of the
5584 register has useful data, and return the opposite if the most
5585 significant byte does. */
5586
5587bool
ef4bddc2 5588aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
5589 bool first ATTRIBUTE_UNUSED)
5590{
5591
5592 /* Small composite types are always padded upward. */
5593 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
5594 {
6a70badb
RS
5595 HOST_WIDE_INT size;
5596 if (type)
5597 size = int_size_in_bytes (type);
5598 else
5599 /* No frontends can create types with variable-sized modes, so we
5600 shouldn't be asked to pass or return them. */
5601 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
5602 if (size < 2 * UNITS_PER_WORD)
5603 return true;
5604 }
5605
5606 /* Otherwise, use the default padding. */
5607 return !BYTES_BIG_ENDIAN;
5608}
5609
095a2d76 5610static scalar_int_mode
43e9d192
IB
5611aarch64_libgcc_cmp_return_mode (void)
5612{
5613 return SImode;
5614}
5615
a3eb8a52
EB
5616#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5617
5618/* We use the 12-bit shifted immediate arithmetic instructions so values
5619 must be multiple of (1 << 12), i.e. 4096. */
5620#define ARITH_FACTOR 4096
5621
5622#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
5623#error Cannot use simple address calculation for stack probing
5624#endif
5625
5626/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
5627#define PROBE_STACK_FIRST_REG R9_REGNUM
5628#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 5629
6a70badb 5630/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
5631 inclusive. These are offsets from the current stack pointer. */
5632
5633static void
6a70badb 5634aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 5635{
6a70badb
RS
5636 HOST_WIDE_INT size;
5637 if (!poly_size.is_constant (&size))
5638 {
5639 sorry ("stack probes for SVE frames");
5640 return;
5641 }
5642
5f5c5e0f 5643 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
5644
5645 /* See the same assertion on PROBE_INTERVAL above. */
5646 gcc_assert ((first % ARITH_FACTOR) == 0);
5647
5648 /* See if we have a constant small number of probes to generate. If so,
5649 that's the easy case. */
5650 if (size <= PROBE_INTERVAL)
5651 {
5652 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
5653
5654 emit_set_insn (reg1,
5f5c5e0f 5655 plus_constant (Pmode,
a3eb8a52 5656 stack_pointer_rtx, -(first + base)));
5f5c5e0f 5657 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
5658 }
5659
5660 /* The run-time loop is made up of 8 insns in the generic case while the
5661 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5662 else if (size <= 4 * PROBE_INTERVAL)
5663 {
5664 HOST_WIDE_INT i, rem;
5665
5666 emit_set_insn (reg1,
5f5c5e0f 5667 plus_constant (Pmode,
a3eb8a52
EB
5668 stack_pointer_rtx,
5669 -(first + PROBE_INTERVAL)));
5670 emit_stack_probe (reg1);
5671
5672 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5673 it exceeds SIZE. If only two probes are needed, this will not
5674 generate any code. Then probe at FIRST + SIZE. */
5675 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5676 {
5677 emit_set_insn (reg1,
5f5c5e0f 5678 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
5679 emit_stack_probe (reg1);
5680 }
5681
5682 rem = size - (i - PROBE_INTERVAL);
5683 if (rem > 256)
5684 {
5685 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
5686
5f5c5e0f
EB
5687 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
5688 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
5689 }
5690 else
5f5c5e0f 5691 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
5692 }
5693
5694 /* Otherwise, do the same as above, but in a loop. Note that we must be
5695 extra careful with variables wrapping around because we might be at
5696 the very top (or the very bottom) of the address space and we have
5697 to be able to handle this case properly; in particular, we use an
5698 equality test for the loop condition. */
5699 else
5700 {
5f5c5e0f 5701 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
5702
5703 /* Step 1: round SIZE to the previous multiple of the interval. */
5704
5705 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
5706
5707
5708 /* Step 2: compute initial and final value of the loop counter. */
5709
5710 /* TEST_ADDR = SP + FIRST. */
5711 emit_set_insn (reg1,
5f5c5e0f 5712 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
5713
5714 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
5715 HOST_WIDE_INT adjustment = - (first + rounded_size);
5716 if (! aarch64_uimm12_shift (adjustment))
5717 {
5718 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
5719 true, Pmode);
5720 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
5721 }
5722 else
8dd64cdf
EB
5723 emit_set_insn (reg2,
5724 plus_constant (Pmode, stack_pointer_rtx, adjustment));
5725
a3eb8a52
EB
5726 /* Step 3: the loop
5727
5728 do
5729 {
5730 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5731 probe at TEST_ADDR
5732 }
5733 while (TEST_ADDR != LAST_ADDR)
5734
5735 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5736 until it is equal to ROUNDED_SIZE. */
5737
5f5c5e0f 5738 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
5739
5740
5741 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5742 that SIZE is equal to ROUNDED_SIZE. */
5743
5744 if (size != rounded_size)
5745 {
5746 HOST_WIDE_INT rem = size - rounded_size;
5747
5748 if (rem > 256)
5749 {
5750 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
5751
5f5c5e0f
EB
5752 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
5753 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
5754 }
5755 else
5f5c5e0f 5756 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
5757 }
5758 }
5759
5760 /* Make sure nothing is scheduled before we are done. */
5761 emit_insn (gen_blockage ());
5762}
5763
5764/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5765 absolute addresses. */
5766
5767const char *
5768aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
5769{
5770 static int labelno = 0;
5771 char loop_lab[32];
5772 rtx xops[2];
5773
5774 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5775
5776 /* Loop. */
5777 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5778
cd1bef27 5779 HOST_WIDE_INT stack_clash_probe_interval
028d4092 5780 = 1 << param_stack_clash_protection_guard_size;
cd1bef27 5781
a3eb8a52
EB
5782 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5783 xops[0] = reg1;
cd1bef27
JL
5784 HOST_WIDE_INT interval;
5785 if (flag_stack_clash_protection)
5786 interval = stack_clash_probe_interval;
5787 else
5788 interval = PROBE_INTERVAL;
5789
5790 gcc_assert (aarch64_uimm12_shift (interval));
5791 xops[1] = GEN_INT (interval);
5792
a3eb8a52
EB
5793 output_asm_insn ("sub\t%0, %0, %1", xops);
5794
cd1bef27
JL
5795 /* If doing stack clash protection then we probe up by the ABI specified
5796 amount. We do this because we're dropping full pages at a time in the
5797 loop. But if we're doing non-stack clash probing, probe at SP 0. */
5798 if (flag_stack_clash_protection)
5799 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
5800 else
5801 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
5802
5803 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
5804 by this amount for each iteration. */
5805 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
5806
5807 /* Test if TEST_ADDR == LAST_ADDR. */
5808 xops[1] = reg2;
5809 output_asm_insn ("cmp\t%0, %1", xops);
5810
5811 /* Branch. */
5812 fputs ("\tb.ne\t", asm_out_file);
5813 assemble_name_raw (asm_out_file, loop_lab);
5814 fputc ('\n', asm_out_file);
5815
5816 return "";
5817}
5818
eb471ba3
TC
5819/* Emit the probe loop for doing stack clash probes and stack adjustments for
5820 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
5821 of GUARD_SIZE. When a probe is emitted it is done at most
5822 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
5823 at most MIN_PROBE_THRESHOLD. By the end of this function
5824 BASE = BASE - ADJUSTMENT. */
5825
5826const char *
5827aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
5828 rtx min_probe_threshold, rtx guard_size)
5829{
5830 /* This function is not allowed to use any instruction generation function
5831 like gen_ and friends. If you do you'll likely ICE during CFG validation,
5832 so instead emit the code you want using output_asm_insn. */
5833 gcc_assert (flag_stack_clash_protection);
5834 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
5835 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
5836
5837 /* The minimum required allocation before the residual requires probing. */
5838 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
5839
5840 /* Clamp the value down to the nearest value that can be used with a cmp. */
5841 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
5842 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
5843
5844 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
5845 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
5846
5847 static int labelno = 0;
5848 char loop_start_lab[32];
5849 char loop_end_lab[32];
5850 rtx xops[2];
5851
5852 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
5853 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
5854
5855 /* Emit loop start label. */
5856 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
5857
5858 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
5859 xops[0] = adjustment;
5860 xops[1] = probe_offset_value_rtx;
5861 output_asm_insn ("cmp\t%0, %1", xops);
5862
5863 /* Branch to end if not enough adjustment to probe. */
5864 fputs ("\tb.lt\t", asm_out_file);
5865 assemble_name_raw (asm_out_file, loop_end_lab);
5866 fputc ('\n', asm_out_file);
5867
5868 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
5869 xops[0] = base;
5870 xops[1] = probe_offset_value_rtx;
5871 output_asm_insn ("sub\t%0, %0, %1", xops);
5872
5873 /* Probe at BASE. */
5874 xops[1] = const0_rtx;
5875 output_asm_insn ("str\txzr, [%0, %1]", xops);
5876
5877 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
5878 xops[0] = adjustment;
5879 xops[1] = probe_offset_value_rtx;
5880 output_asm_insn ("sub\t%0, %0, %1", xops);
5881
5882 /* Branch to start if still more bytes to allocate. */
5883 fputs ("\tb\t", asm_out_file);
5884 assemble_name_raw (asm_out_file, loop_start_lab);
5885 fputc ('\n', asm_out_file);
5886
5887 /* No probe leave. */
5888 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
5889
5890 /* BASE = BASE - ADJUSTMENT. */
5891 xops[0] = base;
5892 xops[1] = adjustment;
5893 output_asm_insn ("sub\t%0, %0, %1", xops);
5894 return "";
5895}
5896
d6cb6d6a
WD
5897/* Determine whether a frame chain needs to be generated. */
5898static bool
5899aarch64_needs_frame_chain (void)
5900{
5901 /* Force a frame chain for EH returns so the return address is at FP+8. */
5902 if (frame_pointer_needed || crtl->calls_eh_return)
5903 return true;
5904
5905 /* A leaf function cannot have calls or write LR. */
5906 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
5907
5908 /* Don't use a frame chain in leaf functions if leaf frame pointers
5909 are disabled. */
5910 if (flag_omit_leaf_frame_pointer && is_leaf)
5911 return false;
5912
5913 return aarch64_use_frame_pointer;
5914}
5915
43e9d192
IB
5916/* Mark the registers that need to be saved by the callee and calculate
5917 the size of the callee-saved registers area and frame record (both FP
33a2e348 5918 and LR may be omitted). */
43e9d192
IB
5919static void
5920aarch64_layout_frame (void)
5921{
c600df9a 5922 poly_int64 offset = 0;
4b0685d9 5923 int regno, last_fp_reg = INVALID_REGNUM;
c600df9a
RS
5924 machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
5925 poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
5926 bool frame_related_fp_reg_p = false;
ab43763e 5927 aarch64_frame &frame = cfun->machine->frame;
43e9d192 5928
ab43763e 5929 frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 5930
8c6e3b23
TC
5931 /* Adjust the outgoing arguments size if required. Keep it in sync with what
5932 the mid-end is doing. */
5933 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
5934
97826595
MS
5935#define SLOT_NOT_REQUIRED (-2)
5936#define SLOT_REQUIRED (-1)
5937
ab43763e
RS
5938 frame.wb_candidate1 = INVALID_REGNUM;
5939 frame.wb_candidate2 = INVALID_REGNUM;
c600df9a 5940 frame.spare_pred_reg = INVALID_REGNUM;
363ffa50 5941
43e9d192 5942 /* First mark all the registers that really need to be saved... */
c600df9a 5943 for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
ab43763e 5944 frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
5945
5946 /* ... that includes the eh data registers (if needed)... */
5947 if (crtl->calls_eh_return)
5948 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
ab43763e 5949 frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED;
43e9d192
IB
5950
5951 /* ... and any callee saved register that dataflow says is live. */
5952 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
5953 if (df_regs_ever_live_p (regno)
dcdd0f05 5954 && !fixed_regs[regno]
1c923b60 5955 && (regno == R30_REGNUM
dcdd0f05 5956 || !crtl->abi->clobbers_full_reg_p (regno)))
ab43763e 5957 frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
5958
5959 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
5960 if (df_regs_ever_live_p (regno)
dcdd0f05
RS
5961 && !fixed_regs[regno]
5962 && !crtl->abi->clobbers_full_reg_p (regno))
4b0685d9 5963 {
ab43763e 5964 frame.reg_offset[regno] = SLOT_REQUIRED;
4b0685d9 5965 last_fp_reg = regno;
c600df9a
RS
5966 if (aarch64_emit_cfi_for_reg_p (regno))
5967 frame_related_fp_reg_p = true;
4b0685d9 5968 }
43e9d192 5969
c600df9a
RS
5970 /* Big-endian SVE frames need a spare predicate register in order
5971 to save Z8-Z15. Decide which register they should use. Prefer
5972 an unused argument register if possible, so that we don't force P4
5973 to be saved unnecessarily. */
5974 if (frame_related_fp_reg_p
5975 && crtl->abi->id () == ARM_PCS_SVE
5976 && BYTES_BIG_ENDIAN)
5977 {
5978 bitmap live1 = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5979 bitmap live2 = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
5980 for (regno = P0_REGNUM; regno <= P7_REGNUM; regno++)
5981 if (!bitmap_bit_p (live1, regno) && !bitmap_bit_p (live2, regno))
5982 break;
5983 gcc_assert (regno <= P7_REGNUM);
5984 frame.spare_pred_reg = regno;
5985 df_set_regs_ever_live (regno, true);
5986 }
5987
5988 for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
5989 if (df_regs_ever_live_p (regno)
5990 && !fixed_regs[regno]
5991 && !crtl->abi->clobbers_full_reg_p (regno))
5992 frame.reg_offset[regno] = SLOT_REQUIRED;
5993
5994 /* With stack-clash, LR must be saved in non-leaf functions. */
5995 gcc_assert (crtl->is_leaf
5996 || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
5997
5998 /* Now assign stack slots for the registers. Start with the predicate
5999 registers, since predicate LDR and STR have a relatively small
6000 offset range. These saves happen below the hard frame pointer. */
6001 for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
6002 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
6003 {
6004 frame.reg_offset[regno] = offset;
6005 offset += BYTES_PER_SVE_PRED;
6006 }
6007
6008 /* We save a maximum of 8 predicate registers, and since vector
6009 registers are 8 times the size of a predicate register, all the
6010 saved predicates fit within a single vector. Doing this also
6011 rounds the offset to a 128-bit boundary. */
6012 if (maybe_ne (offset, 0))
6013 {
6014 gcc_assert (known_le (offset, vector_save_size));
6015 offset = vector_save_size;
6016 }
6017
6018 /* If we need to save any SVE vector registers, add them next. */
6019 if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
6020 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
6021 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
6022 {
6023 frame.reg_offset[regno] = offset;
6024 offset += vector_save_size;
6025 }
6026
6027 /* OFFSET is now the offset of the hard frame pointer from the bottom
6028 of the callee save area. */
6029 bool saves_below_hard_fp_p = maybe_ne (offset, 0);
6030 frame.below_hard_fp_saved_regs_size = offset;
ab43763e 6031 if (frame.emit_frame_chain)
43e9d192 6032 {
2e1cdae5 6033 /* FP and LR are placed in the linkage record. */
c600df9a 6034 frame.reg_offset[R29_REGNUM] = offset;
ab43763e 6035 frame.wb_candidate1 = R29_REGNUM;
c600df9a 6036 frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
ab43763e 6037 frame.wb_candidate2 = R30_REGNUM;
c600df9a 6038 offset += 2 * UNITS_PER_WORD;
1f7bffd0 6039 }
43e9d192 6040
2e1cdae5 6041 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
c600df9a 6042 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
43e9d192 6043 {
ab43763e
RS
6044 frame.reg_offset[regno] = offset;
6045 if (frame.wb_candidate1 == INVALID_REGNUM)
6046 frame.wb_candidate1 = regno;
6047 else if (frame.wb_candidate2 == INVALID_REGNUM)
6048 frame.wb_candidate2 = regno;
43e9d192
IB
6049 offset += UNITS_PER_WORD;
6050 }
6051
c600df9a
RS
6052 poly_int64 max_int_offset = offset;
6053 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
6054 bool has_align_gap = maybe_ne (offset, max_int_offset);
4b0685d9 6055
43e9d192 6056 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
c600df9a 6057 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
43e9d192 6058 {
4b0685d9
WD
6059 /* If there is an alignment gap between integer and fp callee-saves,
6060 allocate the last fp register to it if possible. */
a0d0b980
SE
6061 if (regno == last_fp_reg
6062 && has_align_gap
c600df9a
RS
6063 && known_eq (vector_save_size, 8)
6064 && multiple_p (offset, 16))
4b0685d9 6065 {
ab43763e 6066 frame.reg_offset[regno] = max_int_offset;
4b0685d9
WD
6067 break;
6068 }
6069
ab43763e
RS
6070 frame.reg_offset[regno] = offset;
6071 if (frame.wb_candidate1 == INVALID_REGNUM)
6072 frame.wb_candidate1 = regno;
6073 else if (frame.wb_candidate2 == INVALID_REGNUM
6074 && frame.wb_candidate1 >= V0_REGNUM)
6075 frame.wb_candidate2 = regno;
c600df9a 6076 offset += vector_save_size;
43e9d192
IB
6077 }
6078
c600df9a 6079 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192 6080
ab43763e 6081 frame.saved_regs_size = offset;
1c960e02 6082
c600df9a 6083 poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
71bfb77a 6084
c600df9a 6085 poly_int64 above_outgoing_args
6a70badb
RS
6086 = aligned_upper_bound (varargs_and_saved_regs_size
6087 + get_frame_size (),
6088 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 6089
c600df9a
RS
6090 frame.hard_fp_offset
6091 = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
6092
6a70badb
RS
6093 /* Both these values are already aligned. */
6094 gcc_assert (multiple_p (crtl->outgoing_args_size,
6095 STACK_BOUNDARY / BITS_PER_UNIT));
c600df9a 6096 frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
1c960e02 6097
ab43763e 6098 frame.locals_offset = frame.saved_varargs_size;
71bfb77a 6099
ab43763e
RS
6100 frame.initial_adjust = 0;
6101 frame.final_adjust = 0;
6102 frame.callee_adjust = 0;
c600df9a 6103 frame.sve_callee_adjust = 0;
ab43763e 6104 frame.callee_offset = 0;
71bfb77a
WD
6105
6106 HOST_WIDE_INT max_push_offset = 0;
ab43763e 6107 if (frame.wb_candidate2 != INVALID_REGNUM)
71bfb77a 6108 max_push_offset = 512;
ab43763e 6109 else if (frame.wb_candidate1 != INVALID_REGNUM)
71bfb77a
WD
6110 max_push_offset = 256;
6111
9b17a646 6112 HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
c600df9a 6113 HOST_WIDE_INT const_saved_regs_size;
ab43763e 6114 if (frame.frame_size.is_constant (&const_size)
6a70badb 6115 && const_size < max_push_offset
c600df9a 6116 && known_eq (frame.hard_fp_offset, const_size))
71bfb77a
WD
6117 {
6118 /* Simple, small frame with no outgoing arguments:
c600df9a 6119
71bfb77a
WD
6120 stp reg1, reg2, [sp, -frame_size]!
6121 stp reg3, reg4, [sp, 16] */
ab43763e 6122 frame.callee_adjust = const_size;
71bfb77a 6123 }
9b17a646 6124 else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
c600df9a
RS
6125 && frame.saved_regs_size.is_constant (&const_saved_regs_size)
6126 && const_outgoing_args_size + const_saved_regs_size < 512
6127 /* We could handle this case even with outgoing args, provided
6128 that the number of args left us with valid offsets for all
6129 predicate and vector save slots. It's such a rare case that
6130 it hardly seems worth the effort though. */
6131 && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
71bfb77a 6132 && !(cfun->calls_alloca
9b17a646
RS
6133 && frame.hard_fp_offset.is_constant (&const_fp_offset)
6134 && const_fp_offset < max_push_offset))
71bfb77a
WD
6135 {
6136 /* Frame with small outgoing arguments:
c600df9a 6137
71bfb77a
WD
6138 sub sp, sp, frame_size
6139 stp reg1, reg2, [sp, outgoing_args_size]
6140 stp reg3, reg4, [sp, outgoing_args_size + 16] */
ab43763e 6141 frame.initial_adjust = frame.frame_size;
9b17a646 6142 frame.callee_offset = const_outgoing_args_size;
71bfb77a 6143 }
c600df9a
RS
6144 else if (saves_below_hard_fp_p
6145 && known_eq (frame.saved_regs_size,
6146 frame.below_hard_fp_saved_regs_size))
6147 {
6148 /* Frame in which all saves are SVE saves:
6149
6150 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
6151 save SVE registers relative to SP
6152 sub sp, sp, outgoing_args_size */
6153 frame.initial_adjust = (frame.hard_fp_offset
6154 + frame.below_hard_fp_saved_regs_size);
6155 frame.final_adjust = crtl->outgoing_args_size;
6156 }
ab43763e 6157 else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
6a70badb 6158 && const_fp_offset < max_push_offset)
71bfb77a 6159 {
c600df9a
RS
6160 /* Frame with large outgoing arguments or SVE saves, but with
6161 a small local area:
6162
71bfb77a
WD
6163 stp reg1, reg2, [sp, -hard_fp_offset]!
6164 stp reg3, reg4, [sp, 16]
c600df9a
RS
6165 [sub sp, sp, below_hard_fp_saved_regs_size]
6166 [save SVE registers relative to SP]
71bfb77a 6167 sub sp, sp, outgoing_args_size */
ab43763e 6168 frame.callee_adjust = const_fp_offset;
c600df9a 6169 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
8e66b377 6170 frame.final_adjust = crtl->outgoing_args_size;
71bfb77a 6171 }
71bfb77a
WD
6172 else
6173 {
c600df9a
RS
6174 /* Frame with large local area and outgoing arguments or SVE saves,
6175 using frame pointer:
6176
71bfb77a
WD
6177 sub sp, sp, hard_fp_offset
6178 stp x29, x30, [sp, 0]
6179 add x29, sp, 0
6180 stp reg3, reg4, [sp, 16]
c600df9a
RS
6181 [sub sp, sp, below_hard_fp_saved_regs_size]
6182 [save SVE registers relative to SP]
71bfb77a 6183 sub sp, sp, outgoing_args_size */
ab43763e 6184 frame.initial_adjust = frame.hard_fp_offset;
c600df9a 6185 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
8e66b377 6186 frame.final_adjust = crtl->outgoing_args_size;
71bfb77a
WD
6187 }
6188
8e66b377
RS
6189 /* Make sure the individual adjustments add up to the full frame size. */
6190 gcc_assert (known_eq (frame.initial_adjust
6191 + frame.callee_adjust
c600df9a 6192 + frame.sve_callee_adjust
8e66b377
RS
6193 + frame.final_adjust, frame.frame_size));
6194
ab43763e 6195 frame.laid_out = true;
43e9d192
IB
6196}
6197
04ddfe06
KT
6198/* Return true if the register REGNO is saved on entry to
6199 the current function. */
6200
43e9d192
IB
6201static bool
6202aarch64_register_saved_on_entry (int regno)
6203{
c600df9a 6204 return known_ge (cfun->machine->frame.reg_offset[regno], 0);
43e9d192
IB
6205}
6206
04ddfe06
KT
6207/* Return the next register up from REGNO up to LIMIT for the callee
6208 to save. */
6209
64dedd72
JW
6210static unsigned
6211aarch64_next_callee_save (unsigned regno, unsigned limit)
6212{
6213 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
6214 regno ++;
6215 return regno;
6216}
43e9d192 6217
04ddfe06
KT
6218/* Push the register number REGNO of mode MODE to the stack with write-back
6219 adjusting the stack by ADJUSTMENT. */
6220
c5e1f66e 6221static void
ef4bddc2 6222aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
6223 HOST_WIDE_INT adjustment)
6224 {
6225 rtx base_rtx = stack_pointer_rtx;
6226 rtx insn, reg, mem;
6227
6228 reg = gen_rtx_REG (mode, regno);
6229 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
6230 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 6231 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
6232
6233 insn = emit_move_insn (mem, reg);
6234 RTX_FRAME_RELATED_P (insn) = 1;
6235}
6236
04ddfe06
KT
6237/* Generate and return an instruction to store the pair of registers
6238 REG and REG2 of mode MODE to location BASE with write-back adjusting
6239 the stack location BASE by ADJUSTMENT. */
6240
80c11907 6241static rtx
ef4bddc2 6242aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
6243 HOST_WIDE_INT adjustment)
6244{
6245 switch (mode)
6246 {
4e10a5a7 6247 case E_DImode:
80c11907
JW
6248 return gen_storewb_pairdi_di (base, base, reg, reg2,
6249 GEN_INT (-adjustment),
6250 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 6251 case E_DFmode:
80c11907
JW
6252 return gen_storewb_pairdf_di (base, base, reg, reg2,
6253 GEN_INT (-adjustment),
6254 GEN_INT (UNITS_PER_WORD - adjustment));
a0d0b980
SE
6255 case E_TFmode:
6256 return gen_storewb_pairtf_di (base, base, reg, reg2,
6257 GEN_INT (-adjustment),
6258 GEN_INT (UNITS_PER_VREG - adjustment));
80c11907
JW
6259 default:
6260 gcc_unreachable ();
6261 }
6262}
6263
04ddfe06
KT
6264/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
6265 stack pointer by ADJUSTMENT. */
6266
80c11907 6267static void
89ac681e 6268aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 6269{
5d8a22a5 6270 rtx_insn *insn;
c600df9a 6271 machine_mode mode = aarch64_reg_save_mode (regno1);
89ac681e 6272
71bfb77a 6273 if (regno2 == INVALID_REGNUM)
89ac681e
WD
6274 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
6275
80c11907
JW
6276 rtx reg1 = gen_rtx_REG (mode, regno1);
6277 rtx reg2 = gen_rtx_REG (mode, regno2);
6278
6279 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
6280 reg2, adjustment));
6281 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
6282 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
6283 RTX_FRAME_RELATED_P (insn) = 1;
6284}
6285
04ddfe06
KT
6286/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
6287 adjusting it by ADJUSTMENT afterwards. */
6288
159313d9 6289static rtx
ef4bddc2 6290aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
6291 HOST_WIDE_INT adjustment)
6292{
6293 switch (mode)
6294 {
4e10a5a7 6295 case E_DImode:
159313d9 6296 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 6297 GEN_INT (UNITS_PER_WORD));
4e10a5a7 6298 case E_DFmode:
159313d9 6299 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 6300 GEN_INT (UNITS_PER_WORD));
a0d0b980
SE
6301 case E_TFmode:
6302 return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
6303 GEN_INT (UNITS_PER_VREG));
159313d9
JW
6304 default:
6305 gcc_unreachable ();
6306 }
6307}
6308
04ddfe06
KT
6309/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
6310 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
6311 into CFI_OPS. */
6312
89ac681e
WD
6313static void
6314aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
6315 rtx *cfi_ops)
6316{
c600df9a 6317 machine_mode mode = aarch64_reg_save_mode (regno1);
89ac681e
WD
6318 rtx reg1 = gen_rtx_REG (mode, regno1);
6319
6320 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
6321
71bfb77a 6322 if (regno2 == INVALID_REGNUM)
89ac681e
WD
6323 {
6324 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
6325 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 6326 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
6327 }
6328 else
6329 {
6330 rtx reg2 = gen_rtx_REG (mode, regno2);
6331 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
6332 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
6333 reg2, adjustment));
6334 }
6335}
6336
04ddfe06
KT
6337/* Generate and return a store pair instruction of mode MODE to store
6338 register REG1 to MEM1 and register REG2 to MEM2. */
6339
72df5c1f 6340static rtx
ef4bddc2 6341aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
6342 rtx reg2)
6343{
6344 switch (mode)
6345 {
4e10a5a7 6346 case E_DImode:
dfe1da23 6347 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 6348
4e10a5a7 6349 case E_DFmode:
dfe1da23 6350 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f 6351
a0d0b980
SE
6352 case E_TFmode:
6353 return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
6354
72df5c1f
JW
6355 default:
6356 gcc_unreachable ();
6357 }
6358}
6359
04ddfe06
KT
6360/* Generate and regurn a load pair isntruction of mode MODE to load register
6361 REG1 from MEM1 and register REG2 from MEM2. */
6362
72df5c1f 6363static rtx
ef4bddc2 6364aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
6365 rtx mem2)
6366{
6367 switch (mode)
6368 {
4e10a5a7 6369 case E_DImode:
dfe1da23 6370 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 6371
4e10a5a7 6372 case E_DFmode:
dfe1da23 6373 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f 6374
a0d0b980
SE
6375 case E_TFmode:
6376 return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
6377
72df5c1f
JW
6378 default:
6379 gcc_unreachable ();
6380 }
6381}
6382
db58fd89
JW
6383/* Return TRUE if return address signing should be enabled for the current
6384 function, otherwise return FALSE. */
6385
6386bool
6387aarch64_return_address_signing_enabled (void)
6388{
6389 /* This function should only be called after frame laid out. */
6390 gcc_assert (cfun->machine->frame.laid_out);
6391
6392 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
8fc16d72 6393 if its LR is pushed onto stack. */
db58fd89
JW
6394 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
6395 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
c600df9a 6396 && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)));
db58fd89
JW
6397}
6398
30afdf34
SD
6399/* Return TRUE if Branch Target Identification Mechanism is enabled. */
6400bool
6401aarch64_bti_enabled (void)
6402{
6403 return (aarch64_enable_bti == 1);
6404}
6405
c600df9a
RS
6406/* The caller is going to use ST1D or LD1D to save or restore an SVE
6407 register in mode MODE at BASE_RTX + OFFSET, where OFFSET is in
6408 the range [1, 16] * GET_MODE_SIZE (MODE). Prepare for this by:
6409
6410 (1) updating BASE_RTX + OFFSET so that it is a legitimate ST1D
6411 or LD1D address
6412
6413 (2) setting PRED to a valid predicate register for the ST1D or LD1D,
6414 if the variable isn't already nonnull
6415
6416 (1) is needed when OFFSET is in the range [8, 16] * GET_MODE_SIZE (MODE).
6417 Handle this case using a temporary base register that is suitable for
6418 all offsets in that range. Use ANCHOR_REG as this base register if it
6419 is nonnull, otherwise create a new register and store it in ANCHOR_REG. */
6420
6421static inline void
6422aarch64_adjust_sve_callee_save_base (machine_mode mode, rtx &base_rtx,
6423 rtx &anchor_reg, poly_int64 &offset,
6424 rtx &ptrue)
6425{
6426 if (maybe_ge (offset, 8 * GET_MODE_SIZE (mode)))
6427 {
6428 /* This is the maximum valid offset of the anchor from the base.
6429 Lower values would be valid too. */
6430 poly_int64 anchor_offset = 16 * GET_MODE_SIZE (mode);
6431 if (!anchor_reg)
6432 {
6433 anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
6434 emit_insn (gen_add3_insn (anchor_reg, base_rtx,
6435 gen_int_mode (anchor_offset, Pmode)));
6436 }
6437 base_rtx = anchor_reg;
6438 offset -= anchor_offset;
6439 }
6440 if (!ptrue)
6441 {
6442 int pred_reg = cfun->machine->frame.spare_pred_reg;
6443 emit_move_insn (gen_rtx_REG (VNx16BImode, pred_reg),
6444 CONSTM1_RTX (VNx16BImode));
6445 ptrue = gen_rtx_REG (VNx2BImode, pred_reg);
6446 }
6447}
6448
6449/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
6450 is saved at BASE + OFFSET. */
6451
6452static void
6453aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
6454 rtx base, poly_int64 offset)
6455{
6456 rtx mem = gen_frame_mem (GET_MODE (reg),
6457 plus_constant (Pmode, base, offset));
6458 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6459}
6460
04ddfe06
KT
6461/* Emit code to save the callee-saved registers from register number START
6462 to LIMIT to the stack at the location starting at offset START_OFFSET,
c600df9a
RS
6463 skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
6464 is true if the hard frame pointer has been set up. */
43e9d192 6465
43e9d192 6466static void
c600df9a
RS
6467aarch64_save_callee_saves (poly_int64 start_offset,
6468 unsigned start, unsigned limit, bool skip_wb,
6469 bool hard_fp_valid_p)
43e9d192 6470{
5d8a22a5 6471 rtx_insn *insn;
43e9d192
IB
6472 unsigned regno;
6473 unsigned regno2;
c600df9a 6474 rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
43e9d192 6475
0ec74a1e 6476 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
6477 regno <= limit;
6478 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 6479 {
ae13fce3 6480 rtx reg, mem;
6a70badb 6481 poly_int64 offset;
c600df9a 6482 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
64dedd72 6483
ae13fce3
JW
6484 if (skip_wb
6485 && (regno == cfun->machine->frame.wb_candidate1
6486 || regno == cfun->machine->frame.wb_candidate2))
6487 continue;
6488
827ab47a 6489 if (cfun->machine->reg_is_wrapped_separately[regno])
c600df9a 6490 continue;
827ab47a 6491
c600df9a 6492 machine_mode mode = aarch64_reg_save_mode (regno);
ae13fce3
JW
6493 reg = gen_rtx_REG (mode, regno);
6494 offset = start_offset + cfun->machine->frame.reg_offset[regno];
c600df9a
RS
6495 rtx base_rtx = stack_pointer_rtx;
6496 poly_int64 sp_offset = offset;
64dedd72 6497
c600df9a
RS
6498 HOST_WIDE_INT const_offset;
6499 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
6500 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
6501 offset, ptrue);
6502 else if (GP_REGNUM_P (regno)
6503 && (!offset.is_constant (&const_offset) || const_offset >= 512))
6504 {
6505 gcc_assert (known_eq (start_offset, 0));
6506 poly_int64 fp_offset
6507 = cfun->machine->frame.below_hard_fp_saved_regs_size;
6508 if (hard_fp_valid_p)
6509 base_rtx = hard_frame_pointer_rtx;
6510 else
6511 {
6512 if (!anchor_reg)
6513 {
6514 anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
6515 emit_insn (gen_add3_insn (anchor_reg, base_rtx,
6516 gen_int_mode (fp_offset, Pmode)));
6517 }
6518 base_rtx = anchor_reg;
6519 }
6520 offset -= fp_offset;
6521 }
6522 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
6523 bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
64dedd72 6524
c600df9a
RS
6525 if (!aarch64_sve_mode_p (mode)
6526 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
827ab47a 6527 && !cfun->machine->reg_is_wrapped_separately[regno2]
c600df9a
RS
6528 && known_eq (GET_MODE_SIZE (mode),
6529 cfun->machine->frame.reg_offset[regno2]
6530 - cfun->machine->frame.reg_offset[regno]))
43e9d192 6531 {
0ec74a1e 6532 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
6533 rtx mem2;
6534
c600df9a
RS
6535 offset += GET_MODE_SIZE (mode);
6536 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
6537 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
6538 reg2));
0b4a9743 6539
64dedd72
JW
6540 /* The first part of a frame-related parallel insn is
6541 always assumed to be relevant to the frame
6542 calculations; subsequent parts, are only
6543 frame-related if explicitly marked. */
c600df9a
RS
6544 if (aarch64_emit_cfi_for_reg_p (regno2))
6545 {
6546 if (need_cfa_note_p)
6547 aarch64_add_cfa_expression (insn, reg2, stack_pointer_rtx,
6548 sp_offset + GET_MODE_SIZE (mode));
6549 else
6550 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
6551 }
6552
64dedd72
JW
6553 regno = regno2;
6554 }
c600df9a
RS
6555 else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
6556 {
6557 insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg));
6558 need_cfa_note_p = true;
6559 }
6560 else if (aarch64_sve_mode_p (mode))
6561 insn = emit_insn (gen_rtx_SET (mem, reg));
64dedd72 6562 else
8ed2fc62
JW
6563 insn = emit_move_insn (mem, reg);
6564
c600df9a
RS
6565 RTX_FRAME_RELATED_P (insn) = frame_related_p;
6566 if (frame_related_p && need_cfa_note_p)
6567 aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset);
8ed2fc62
JW
6568 }
6569}
6570
c600df9a
RS
6571/* Emit code to restore the callee registers from register number START
6572 up to and including LIMIT. Restore from the stack offset START_OFFSET,
6573 skipping any write-back candidates if SKIP_WB is true. Write the
6574 appropriate REG_CFA_RESTORE notes into CFI_OPS. */
04ddfe06 6575
8ed2fc62 6576static void
c600df9a 6577aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
dd991abb 6578 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 6579{
8ed2fc62
JW
6580 unsigned regno;
6581 unsigned regno2;
6a70badb 6582 poly_int64 offset;
c600df9a 6583 rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
8ed2fc62
JW
6584
6585 for (regno = aarch64_next_callee_save (start, limit);
6586 regno <= limit;
6587 regno = aarch64_next_callee_save (regno + 1, limit))
6588 {
c600df9a 6589 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
827ab47a 6590 if (cfun->machine->reg_is_wrapped_separately[regno])
c600df9a 6591 continue;
827ab47a 6592
ae13fce3 6593 rtx reg, mem;
8ed2fc62 6594
ae13fce3
JW
6595 if (skip_wb
6596 && (regno == cfun->machine->frame.wb_candidate1
6597 || regno == cfun->machine->frame.wb_candidate2))
6598 continue;
6599
c600df9a 6600 machine_mode mode = aarch64_reg_save_mode (regno);
ae13fce3 6601 reg = gen_rtx_REG (mode, regno);
8ed2fc62 6602 offset = start_offset + cfun->machine->frame.reg_offset[regno];
c600df9a
RS
6603 rtx base_rtx = stack_pointer_rtx;
6604 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
6605 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
6606 offset, ptrue);
30079dde 6607 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62 6608
c600df9a
RS
6609 if (!aarch64_sve_mode_p (mode)
6610 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
827ab47a 6611 && !cfun->machine->reg_is_wrapped_separately[regno2]
c600df9a
RS
6612 && known_eq (GET_MODE_SIZE (mode),
6613 cfun->machine->frame.reg_offset[regno2]
6614 - cfun->machine->frame.reg_offset[regno]))
64dedd72 6615 {
8ed2fc62
JW
6616 rtx reg2 = gen_rtx_REG (mode, regno2);
6617 rtx mem2;
6618
c600df9a 6619 offset += GET_MODE_SIZE (mode);
30079dde 6620 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 6621 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 6622
dd991abb 6623 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 6624 regno = regno2;
43e9d192 6625 }
c600df9a
RS
6626 else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
6627 emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
6628 else if (aarch64_sve_mode_p (mode))
6629 emit_insn (gen_rtx_SET (reg, mem));
8ed2fc62 6630 else
dd991abb 6631 emit_move_insn (reg, mem);
c600df9a
RS
6632 if (frame_related_p)
6633 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 6634 }
43e9d192
IB
6635}
6636
43cacb12
RS
6637/* Return true if OFFSET is a signed 4-bit value multiplied by the size
6638 of MODE. */
6639
6640static inline bool
6641offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
6642{
6643 HOST_WIDE_INT multiple;
6644 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
6645 && IN_RANGE (multiple, -8, 7));
6646}
6647
6648/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
6649 of MODE. */
6650
6651static inline bool
6652offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
6653{
6654 HOST_WIDE_INT multiple;
6655 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
6656 && IN_RANGE (multiple, 0, 63));
6657}
6658
6659/* Return true if OFFSET is a signed 7-bit value multiplied by the size
6660 of MODE. */
6661
6662bool
6663aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
6664{
6665 HOST_WIDE_INT multiple;
6666 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
6667 && IN_RANGE (multiple, -64, 63));
6668}
6669
6670/* Return true if OFFSET is a signed 9-bit value. */
6671
3c5af608
MM
6672bool
6673aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
6674 poly_int64 offset)
827ab47a 6675{
6a70badb
RS
6676 HOST_WIDE_INT const_offset;
6677 return (offset.is_constant (&const_offset)
6678 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
6679}
6680
43cacb12
RS
6681/* Return true if OFFSET is a signed 9-bit value multiplied by the size
6682 of MODE. */
6683
827ab47a 6684static inline bool
43cacb12 6685offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 6686{
6a70badb
RS
6687 HOST_WIDE_INT multiple;
6688 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 6689 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
6690}
6691
43cacb12
RS
6692/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
6693 of MODE. */
6694
6695static inline bool
6696offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 6697{
6a70badb
RS
6698 HOST_WIDE_INT multiple;
6699 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 6700 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
6701}
6702
6703/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
6704
6705static sbitmap
6706aarch64_get_separate_components (void)
6707{
827ab47a
KT
6708 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
6709 bitmap_clear (components);
6710
6711 /* The registers we need saved to the frame. */
6712 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
6713 if (aarch64_register_saved_on_entry (regno))
6714 {
c600df9a
RS
6715 /* Punt on saves and restores that use ST1D and LD1D. We could
6716 try to be smarter, but it would involve making sure that the
6717 spare predicate register itself is safe to use at the save
6718 and restore points. Also, when a frame pointer is being used,
6719 the slots are often out of reach of ST1D and LD1D anyway. */
6720 machine_mode mode = aarch64_reg_save_mode (regno);
6721 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
6722 continue;
6723
6a70badb 6724 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
c600df9a
RS
6725
6726 /* If the register is saved in the first SVE save slot, we use
6727 it as a stack probe for -fstack-clash-protection. */
6728 if (flag_stack_clash_protection
6729 && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
6730 && known_eq (offset, 0))
6731 continue;
6732
6733 /* Get the offset relative to the register we'll use. */
6734 if (frame_pointer_needed)
6735 offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
6736 else
6737 offset += crtl->outgoing_args_size;
6738
827ab47a
KT
6739 /* Check that we can access the stack slot of the register with one
6740 direct load with no adjustments needed. */
c600df9a
RS
6741 if (aarch64_sve_mode_p (mode)
6742 ? offset_9bit_signed_scaled_p (mode, offset)
6743 : offset_12bit_unsigned_scaled_p (mode, offset))
827ab47a
KT
6744 bitmap_set_bit (components, regno);
6745 }
6746
6747 /* Don't mess with the hard frame pointer. */
6748 if (frame_pointer_needed)
6749 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
6750
c600df9a
RS
6751 /* If the spare predicate register used by big-endian SVE code
6752 is call-preserved, it must be saved in the main prologue
6753 before any saves that use it. */
6754 if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
6755 bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
6756
827ab47a
KT
6757 unsigned reg1 = cfun->machine->frame.wb_candidate1;
6758 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 6759 /* If registers have been chosen to be stored/restored with
827ab47a
KT
6760 writeback don't interfere with them to avoid having to output explicit
6761 stack adjustment instructions. */
6762 if (reg2 != INVALID_REGNUM)
6763 bitmap_clear_bit (components, reg2);
6764 if (reg1 != INVALID_REGNUM)
6765 bitmap_clear_bit (components, reg1);
6766
6767 bitmap_clear_bit (components, LR_REGNUM);
6768 bitmap_clear_bit (components, SP_REGNUM);
6769
6770 return components;
6771}
6772
6773/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
6774
6775static sbitmap
6776aarch64_components_for_bb (basic_block bb)
6777{
6778 bitmap in = DF_LIVE_IN (bb);
6779 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
6780 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
6781
6782 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
6783 bitmap_clear (components);
6784
ce9d2a37
RS
6785 /* Clobbered registers don't generate values in any meaningful sense,
6786 since nothing after the clobber can rely on their value. And we can't
6787 say that partially-clobbered registers are unconditionally killed,
6788 because whether they're killed or not depends on the mode of the
6789 value they're holding. Thus partially call-clobbered registers
6790 appear in neither the kill set nor the gen set.
6791
6792 Check manually for any calls that clobber more of a register than the
6793 current function can. */
6794 function_abi_aggregator callee_abis;
6795 rtx_insn *insn;
6796 FOR_BB_INSNS (bb, insn)
6797 if (CALL_P (insn))
6798 callee_abis.note_callee_abi (insn_callee_abi (insn));
6799 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
6800
827ab47a
KT
6801 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
6802 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
dcdd0f05
RS
6803 if (!fixed_regs[regno]
6804 && !crtl->abi->clobbers_full_reg_p (regno)
ce9d2a37
RS
6805 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
6806 || bitmap_bit_p (in, regno)
6807 || bitmap_bit_p (gen, regno)
6808 || bitmap_bit_p (kill, regno)))
3f26f054 6809 {
3f26f054
WD
6810 bitmap_set_bit (components, regno);
6811
6812 /* If there is a callee-save at an adjacent offset, add it too
6813 to increase the use of LDP/STP. */
c600df9a
RS
6814 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
6815 unsigned regno2 = multiple_p (offset, 16) ? regno + 1 : regno - 1;
3f26f054
WD
6816
6817 if (regno2 <= LAST_SAVED_REGNUM)
6818 {
c600df9a
RS
6819 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
6820 if (regno < regno2
6821 ? known_eq (offset + 8, offset2)
6822 : multiple_p (offset2, 16) && known_eq (offset2 + 8, offset))
3f26f054
WD
6823 bitmap_set_bit (components, regno2);
6824 }
6825 }
827ab47a
KT
6826
6827 return components;
6828}
6829
6830/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
6831 Nothing to do for aarch64. */
6832
6833static void
6834aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
6835{
6836}
6837
6838/* Return the next set bit in BMP from START onwards. Return the total number
6839 of bits in BMP if no set bit is found at or after START. */
6840
6841static unsigned int
6842aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
6843{
6844 unsigned int nbits = SBITMAP_SIZE (bmp);
6845 if (start == nbits)
6846 return start;
6847
6848 gcc_assert (start < nbits);
6849 for (unsigned int i = start; i < nbits; i++)
6850 if (bitmap_bit_p (bmp, i))
6851 return i;
6852
6853 return nbits;
6854}
6855
6856/* Do the work for aarch64_emit_prologue_components and
6857 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
6858 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
6859 for these components or the epilogue sequence. That is, it determines
6860 whether we should emit stores or loads and what kind of CFA notes to attach
6861 to the insns. Otherwise the logic for the two sequences is very
6862 similar. */
6863
6864static void
6865aarch64_process_components (sbitmap components, bool prologue_p)
6866{
6867 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
6868 ? HARD_FRAME_POINTER_REGNUM
6869 : STACK_POINTER_REGNUM);
6870
6871 unsigned last_regno = SBITMAP_SIZE (components);
6872 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
6873 rtx_insn *insn = NULL;
6874
6875 while (regno != last_regno)
6876 {
c600df9a
RS
6877 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
6878 machine_mode mode = aarch64_reg_save_mode (regno);
a0d0b980 6879
827ab47a 6880 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 6881 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
c600df9a
RS
6882 if (frame_pointer_needed)
6883 offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
6884 else
6885 offset += crtl->outgoing_args_size;
6886
827ab47a
KT
6887 rtx addr = plus_constant (Pmode, ptr_reg, offset);
6888 rtx mem = gen_frame_mem (mode, addr);
6889
6890 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
6891 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
6892 /* No more registers to handle after REGNO.
6893 Emit a single save/restore and exit. */
6894 if (regno2 == last_regno)
6895 {
6896 insn = emit_insn (set);
c600df9a
RS
6897 if (frame_related_p)
6898 {
6899 RTX_FRAME_RELATED_P (insn) = 1;
6900 if (prologue_p)
6901 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
6902 else
6903 add_reg_note (insn, REG_CFA_RESTORE, reg);
6904 }
827ab47a
KT
6905 break;
6906 }
6907
6a70badb 6908 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
6909 /* The next register is not of the same class or its offset is not
6910 mergeable with the current one into a pair. */
c600df9a
RS
6911 if (aarch64_sve_mode_p (mode)
6912 || !satisfies_constraint_Ump (mem)
827ab47a 6913 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
dcdd0f05 6914 || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
6a70badb
RS
6915 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
6916 GET_MODE_SIZE (mode)))
827ab47a
KT
6917 {
6918 insn = emit_insn (set);
c600df9a
RS
6919 if (frame_related_p)
6920 {
6921 RTX_FRAME_RELATED_P (insn) = 1;
6922 if (prologue_p)
6923 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
6924 else
6925 add_reg_note (insn, REG_CFA_RESTORE, reg);
6926 }
827ab47a
KT
6927
6928 regno = regno2;
6929 continue;
6930 }
6931
c600df9a
RS
6932 bool frame_related2_p = aarch64_emit_cfi_for_reg_p (regno2);
6933
827ab47a
KT
6934 /* REGNO2 can be saved/restored in a pair with REGNO. */
6935 rtx reg2 = gen_rtx_REG (mode, regno2);
c600df9a
RS
6936 if (frame_pointer_needed)
6937 offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
6938 else
6939 offset2 += crtl->outgoing_args_size;
827ab47a
KT
6940 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
6941 rtx mem2 = gen_frame_mem (mode, addr2);
6942 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
6943 : gen_rtx_SET (reg2, mem2);
6944
6945 if (prologue_p)
6946 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
6947 else
6948 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
6949
c600df9a 6950 if (frame_related_p || frame_related2_p)
827ab47a 6951 {
c600df9a
RS
6952 RTX_FRAME_RELATED_P (insn) = 1;
6953 if (prologue_p)
6954 {
6955 if (frame_related_p)
6956 add_reg_note (insn, REG_CFA_OFFSET, set);
6957 if (frame_related2_p)
6958 add_reg_note (insn, REG_CFA_OFFSET, set2);
6959 }
6960 else
6961 {
6962 if (frame_related_p)
6963 add_reg_note (insn, REG_CFA_RESTORE, reg);
6964 if (frame_related2_p)
6965 add_reg_note (insn, REG_CFA_RESTORE, reg2);
6966 }
827ab47a
KT
6967 }
6968
6969 regno = aarch64_get_next_set_bit (components, regno2 + 1);
6970 }
6971}
6972
6973/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
6974
6975static void
6976aarch64_emit_prologue_components (sbitmap components)
6977{
6978 aarch64_process_components (components, true);
6979}
6980
6981/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
6982
6983static void
6984aarch64_emit_epilogue_components (sbitmap components)
6985{
6986 aarch64_process_components (components, false);
6987}
6988
6989/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
6990
6991static void
6992aarch64_set_handled_components (sbitmap components)
6993{
6994 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
6995 if (bitmap_bit_p (components, regno))
6996 cfun->machine->reg_is_wrapped_separately[regno] = true;
6997}
6998
8c6e3b23
TC
6999/* On AArch64 we have an ABI defined safe buffer. This constant is used to
7000 determining the probe offset for alloca. */
7001
7002static HOST_WIDE_INT
7003aarch64_stack_clash_protection_alloca_probe_range (void)
7004{
7005 return STACK_CLASH_CALLER_GUARD;
7006}
7007
7008
cd1bef27
JL
7009/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
7010 registers. If POLY_SIZE is not large enough to require a probe this function
7011 will only adjust the stack. When allocating the stack space
7012 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
7013 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
7014 arguments. If we are then we ensure that any allocation larger than the ABI
7015 defined buffer needs a probe so that the invariant of having a 1KB buffer is
7016 maintained.
7017
7018 We emit barriers after each stack adjustment to prevent optimizations from
7019 breaking the invariant that we never drop the stack more than a page. This
7020 invariant is needed to make it easier to correctly handle asynchronous
7021 events, e.g. if we were to allow the stack to be dropped by more than a page
7022 and then have multiple probes up and we take a signal somewhere in between
7023 then the signal handler doesn't know the state of the stack and can make no
7024 assumptions about which pages have been probed. */
7025
7026static void
7027aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
7028 poly_int64 poly_size,
7029 bool frame_related_p,
7030 bool final_adjustment_p)
7031{
7032 HOST_WIDE_INT guard_size
028d4092 7033 = 1 << param_stack_clash_protection_guard_size;
cd1bef27 7034 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
cd1bef27 7035 HOST_WIDE_INT min_probe_threshold
c600df9a
RS
7036 = (final_adjustment_p
7037 ? guard_used_by_caller
7038 : guard_size - guard_used_by_caller);
7039 /* When doing the final adjustment for the outgoing arguments, take into
7040 account any unprobed space there is above the current SP. There are
7041 two cases:
7042
7043 - When saving SVE registers below the hard frame pointer, we force
7044 the lowest save to take place in the prologue before doing the final
7045 adjustment (i.e. we don't allow the save to be shrink-wrapped).
7046 This acts as a probe at SP, so there is no unprobed space.
7047
7048 - When there are no SVE register saves, we use the store of the link
7049 register as a probe. We can't assume that LR was saved at position 0
7050 though, so treat any space below it as unprobed. */
7051 if (final_adjustment_p
7052 && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
7053 {
7054 poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
7055 if (known_ge (lr_offset, 0))
7056 min_probe_threshold -= lr_offset.to_constant ();
7057 else
7058 gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
7059 }
cd1bef27
JL
7060
7061 poly_int64 frame_size = cfun->machine->frame.frame_size;
7062
7063 /* We should always have a positive probe threshold. */
7064 gcc_assert (min_probe_threshold > 0);
7065
7066 if (flag_stack_clash_protection && !final_adjustment_p)
7067 {
7068 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
c600df9a 7069 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
cd1bef27
JL
7070 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
7071
7072 if (known_eq (frame_size, 0))
7073 {
7074 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7075 }
c600df9a
RS
7076 else if (known_lt (initial_adjust + sve_callee_adjust,
7077 guard_size - guard_used_by_caller)
cd1bef27
JL
7078 && known_lt (final_adjust, guard_used_by_caller))
7079 {
7080 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7081 }
7082 }
7083
cd1bef27
JL
7084 /* If SIZE is not large enough to require probing, just adjust the stack and
7085 exit. */
eb471ba3 7086 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
7087 || !flag_stack_clash_protection)
7088 {
7089 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
7090 return;
7091 }
7092
eb471ba3
TC
7093 HOST_WIDE_INT size;
7094 /* Handle the SVE non-constant case first. */
7095 if (!poly_size.is_constant (&size))
7096 {
7097 if (dump_file)
7098 {
7099 fprintf (dump_file, "Stack clash SVE prologue: ");
7100 print_dec (poly_size, dump_file);
7101 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
7102 }
7103
7104 /* First calculate the amount of bytes we're actually spilling. */
7105 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
7106 poly_size, temp1, temp2, false, true);
7107
7108 rtx_insn *insn = get_last_insn ();
7109
7110 if (frame_related_p)
7111 {
7112 /* This is done to provide unwinding information for the stack
7113 adjustments we're about to do, however to prevent the optimizers
143d3b15 7114 from removing the R11 move and leaving the CFA note (which would be
eb471ba3
TC
7115 very wrong) we tie the old and new stack pointer together.
7116 The tie will expand to nothing but the optimizers will not touch
7117 the instruction. */
143d3b15 7118 rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
eb471ba3
TC
7119 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
7120 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
7121
7122 /* We want the CFA independent of the stack pointer for the
7123 duration of the loop. */
7124 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
7125 RTX_FRAME_RELATED_P (insn) = 1;
7126 }
7127
7128 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
7129 rtx guard_const = gen_int_mode (guard_size, Pmode);
7130
7131 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
7132 stack_pointer_rtx, temp1,
7133 probe_const, guard_const));
7134
7135 /* Now reset the CFA register if needed. */
7136 if (frame_related_p)
7137 {
7138 add_reg_note (insn, REG_CFA_DEF_CFA,
7139 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7140 gen_int_mode (poly_size, Pmode)));
7141 RTX_FRAME_RELATED_P (insn) = 1;
7142 }
7143
7144 return;
7145 }
7146
cd1bef27
JL
7147 if (dump_file)
7148 fprintf (dump_file,
eb471ba3
TC
7149 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
7150 " bytes, probing will be required.\n", size);
cd1bef27
JL
7151
7152 /* Round size to the nearest multiple of guard_size, and calculate the
7153 residual as the difference between the original size and the rounded
7154 size. */
7155 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
7156 HOST_WIDE_INT residual = size - rounded_size;
7157
7158 /* We can handle a small number of allocations/probes inline. Otherwise
7159 punt to a loop. */
7160 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
7161 {
7162 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
7163 {
7164 aarch64_sub_sp (NULL, temp2, guard_size, true);
7165 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7166 guard_used_by_caller));
7167 emit_insn (gen_blockage ());
7168 }
7169 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
7170 }
7171 else
7172 {
7173 /* Compute the ending address. */
7174 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
7175 temp1, NULL, false, true);
7176 rtx_insn *insn = get_last_insn ();
7177
7178 /* For the initial allocation, we don't have a frame pointer
7179 set up, so we always need CFI notes. If we're doing the
7180 final allocation, then we may have a frame pointer, in which
7181 case it is the CFA, otherwise we need CFI notes.
7182
7183 We can determine which allocation we are doing by looking at
7184 the value of FRAME_RELATED_P since the final allocations are not
7185 frame related. */
7186 if (frame_related_p)
7187 {
7188 /* We want the CFA independent of the stack pointer for the
7189 duration of the loop. */
7190 add_reg_note (insn, REG_CFA_DEF_CFA,
7191 plus_constant (Pmode, temp1, rounded_size));
7192 RTX_FRAME_RELATED_P (insn) = 1;
7193 }
7194
7195 /* This allocates and probes the stack. Note that this re-uses some of
7196 the existing Ada stack protection code. However we are guaranteed not
7197 to enter the non loop or residual branches of that code.
7198
7199 The non-loop part won't be entered because if our allocation amount
7200 doesn't require a loop, the case above would handle it.
7201
7202 The residual amount won't be entered because TEMP1 is a mutliple of
7203 the allocation size. The residual will always be 0. As such, the only
7204 part we are actually using from that code is the loop setup. The
7205 actual probing is done in aarch64_output_probe_stack_range. */
7206 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
7207 stack_pointer_rtx, temp1));
7208
7209 /* Now reset the CFA register if needed. */
7210 if (frame_related_p)
7211 {
7212 add_reg_note (insn, REG_CFA_DEF_CFA,
7213 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
7214 RTX_FRAME_RELATED_P (insn) = 1;
7215 }
7216
7217 emit_insn (gen_blockage ());
7218 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7219 }
7220
7221 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
7222 be probed. This maintains the requirement that each page is probed at
7223 least once. For initial probing we probe only if the allocation is
7224 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
7225 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
7226 GUARD_SIZE. This works that for any allocation that is large enough to
7227 trigger a probe here, we'll have at least one, and if they're not large
7228 enough for this code to emit anything for them, The page would have been
7229 probed by the saving of FP/LR either by this function or any callees. If
7230 we don't have any callees then we won't have more stack adjustments and so
7231 are still safe. */
7232 if (residual)
7233 {
7234 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
7235 /* If we're doing final adjustments, and we've done any full page
7236 allocations then any residual needs to be probed. */
7237 if (final_adjustment_p && rounded_size != 0)
7238 min_probe_threshold = 0;
7239 /* If doing a small final adjustment, we always probe at offset 0.
7240 This is done to avoid issues when LR is not at position 0 or when
7241 the final adjustment is smaller than the probing offset. */
7242 else if (final_adjustment_p && rounded_size == 0)
7243 residual_probe_offset = 0;
7244
7245 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
7246 if (residual >= min_probe_threshold)
7247 {
7248 if (dump_file)
7249 fprintf (dump_file,
7250 "Stack clash AArch64 prologue residuals: "
7251 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
7252 "\n", residual);
7253
7254 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7255 residual_probe_offset));
7256 emit_insn (gen_blockage ());
7257 }
7258 }
7259}
7260
a0d0b980
SE
7261/* Return 1 if the register is used by the epilogue. We need to say the
7262 return register is used, but only after epilogue generation is complete.
7263 Note that in the case of sibcalls, the values "used by the epilogue" are
7264 considered live at the start of the called function.
7265
7266 For SIMD functions we need to return 1 for FP registers that are saved and
7267 restored by a function but are not zero in call_used_regs. If we do not do
7268 this optimizations may remove the restore of the register. */
7269
7270int
7271aarch64_epilogue_uses (int regno)
7272{
7273 if (epilogue_completed)
7274 {
7275 if (regno == LR_REGNUM)
7276 return 1;
a0d0b980
SE
7277 }
7278 return 0;
7279}
7280
43e9d192
IB
7281/* AArch64 stack frames generated by this compiler look like:
7282
7283 +-------------------------------+
7284 | |
7285 | incoming stack arguments |
7286 | |
34834420
MS
7287 +-------------------------------+
7288 | | <-- incoming stack pointer (aligned)
43e9d192
IB
7289 | callee-allocated save area |
7290 | for register varargs |
7291 | |
34834420
MS
7292 +-------------------------------+
7293 | local variables | <-- frame_pointer_rtx
43e9d192
IB
7294 | |
7295 +-------------------------------+
cd1bef27 7296 | padding | \
454fdba9 7297 +-------------------------------+ |
454fdba9 7298 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
7299 +-------------------------------+ |
7300 | LR' | |
7301 +-------------------------------+ |
c600df9a
RS
7302 | FP' | |
7303 +-------------------------------+ |<- hard_frame_pointer_rtx (aligned)
7304 | SVE vector registers | | \
7305 +-------------------------------+ | | below_hard_fp_saved_regs_size
7306 | SVE predicate registers | / /
7307 +-------------------------------+
43e9d192
IB
7308 | dynamic allocation |
7309 +-------------------------------+
34834420
MS
7310 | padding |
7311 +-------------------------------+
7312 | outgoing stack arguments | <-- arg_pointer
7313 | |
7314 +-------------------------------+
7315 | | <-- stack_pointer_rtx (aligned)
43e9d192 7316
34834420
MS
7317 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
7318 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
7319 unchanged.
7320
7321 By default for stack-clash we assume the guard is at least 64KB, but this
7322 value is configurable to either 4KB or 64KB. We also force the guard size to
7323 be the same as the probing interval and both values are kept in sync.
7324
7325 With those assumptions the callee can allocate up to 63KB (or 3KB depending
7326 on the guard size) of stack space without probing.
7327
7328 When probing is needed, we emit a probe at the start of the prologue
7329 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
7330
7331 We have to track how much space has been allocated and the only stores
7332 to the stack we track as implicit probes are the FP/LR stores.
7333
7334 For outgoing arguments we probe if the size is larger than 1KB, such that
143d3b15
TC
7335 the ABI specified buffer is maintained for the next callee.
7336
7337 The following registers are reserved during frame layout and should not be
7338 used for any other purpose:
7339
c600df9a
RS
7340 - r11: Used by stack clash protection when SVE is enabled, and also
7341 as an anchor register when saving and restoring registers
143d3b15
TC
7342 - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment.
7343 - r14 and r15: Used for speculation tracking.
7344 - r16(IP0), r17(IP1): Used by indirect tailcalls.
7345 - r30(LR), r29(FP): Used by standard frame layout.
7346
7347 These registers must be avoided in frame layout related code unless the
7348 explicit intention is to interact with one of the features listed above. */
43e9d192
IB
7349
7350/* Generate the prologue instructions for entry into a function.
7351 Establish the stack frame by decreasing the stack pointer with a
7352 properly calculated size and, if necessary, create a frame record
7353 filled with the values of LR and previous frame pointer. The
6991c977 7354 current FP is also set up if it is in use. */
43e9d192
IB
7355
7356void
7357aarch64_expand_prologue (void)
7358{
6a70badb
RS
7359 poly_int64 frame_size = cfun->machine->frame.frame_size;
7360 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 7361 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
7362 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
7363 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
c600df9a
RS
7364 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
7365 poly_int64 below_hard_fp_saved_regs_size
7366 = cfun->machine->frame.below_hard_fp_saved_regs_size;
71bfb77a
WD
7367 unsigned reg1 = cfun->machine->frame.wb_candidate1;
7368 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 7369 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 7370 rtx_insn *insn;
43e9d192 7371
c600df9a
RS
7372 if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
7373 {
7374 /* Fold the SVE allocation into the initial allocation.
7375 We don't do this in aarch64_layout_arg to avoid pessimizing
7376 the epilogue code. */
7377 initial_adjust += sve_callee_adjust;
7378 sve_callee_adjust = 0;
7379 }
7380
db58fd89
JW
7381 /* Sign return address for functions. */
7382 if (aarch64_return_address_signing_enabled ())
27169e45 7383 {
8fc16d72
ST
7384 switch (aarch64_ra_sign_key)
7385 {
7386 case AARCH64_KEY_A:
7387 insn = emit_insn (gen_paciasp ());
7388 break;
7389 case AARCH64_KEY_B:
7390 insn = emit_insn (gen_pacibsp ());
7391 break;
7392 default:
7393 gcc_unreachable ();
7394 }
27169e45
JW
7395 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
7396 RTX_FRAME_RELATED_P (insn) = 1;
7397 }
db58fd89 7398
dd991abb 7399 if (flag_stack_usage_info)
6a70badb 7400 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 7401
a3eb8a52
EB
7402 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
7403 {
7404 if (crtl->is_leaf && !cfun->calls_alloca)
7405 {
6a70badb
RS
7406 if (maybe_gt (frame_size, PROBE_INTERVAL)
7407 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
7408 aarch64_emit_probe_stack_range (get_stack_check_protect (),
7409 (frame_size
7410 - get_stack_check_protect ()));
a3eb8a52 7411 }
6a70badb 7412 else if (maybe_gt (frame_size, 0))
8c1dd970 7413 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
7414 }
7415
901e66e0
SD
7416 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
7417 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 7418
cd1bef27
JL
7419 /* In theory we should never have both an initial adjustment
7420 and a callee save adjustment. Verify that is the case since the
7421 code below does not handle it for -fstack-clash-protection. */
7422 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
7423
7424 /* Will only probe if the initial adjustment is larger than the guard
7425 less the amount of the guard reserved for use by the caller's
7426 outgoing args. */
901e66e0 7427 aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
cd1bef27 7428 true, false);
43e9d192 7429
71bfb77a
WD
7430 if (callee_adjust != 0)
7431 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 7432
c600df9a
RS
7433 /* The offset of the frame chain record (if any) from the current SP. */
7434 poly_int64 chain_offset = (initial_adjust + callee_adjust
7435 - cfun->machine->frame.hard_fp_offset);
7436 gcc_assert (known_ge (chain_offset, 0));
7437
7438 /* The offset of the bottom of the save area from the current SP. */
7439 poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
7440
204d2c03 7441 if (emit_frame_chain)
43e9d192 7442 {
71bfb77a 7443 if (callee_adjust == 0)
43cacb12
RS
7444 {
7445 reg1 = R29_REGNUM;
7446 reg2 = R30_REGNUM;
c600df9a
RS
7447 aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
7448 false, false);
43cacb12 7449 }
c600df9a
RS
7450 else
7451 gcc_assert (known_eq (chain_offset, 0));
f5470a77 7452 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
c600df9a 7453 stack_pointer_rtx, chain_offset,
901e66e0 7454 tmp1_rtx, tmp0_rtx, frame_pointer_needed);
43cacb12
RS
7455 if (frame_pointer_needed && !frame_size.is_constant ())
7456 {
7457 /* Variable-sized frames need to describe the save slot
7458 address using DW_CFA_expression rather than DW_CFA_offset.
7459 This means that, without taking further action, the
7460 locations of the registers that we've already saved would
7461 remain based on the stack pointer even after we redefine
7462 the CFA based on the frame pointer. We therefore need new
7463 DW_CFA_expressions to re-express the save slots with addresses
7464 based on the frame pointer. */
7465 rtx_insn *insn = get_last_insn ();
7466 gcc_assert (RTX_FRAME_RELATED_P (insn));
7467
7468 /* Add an explicit CFA definition if this was previously
7469 implicit. */
7470 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
7471 {
7472 rtx src = plus_constant (Pmode, stack_pointer_rtx,
7473 callee_offset);
7474 add_reg_note (insn, REG_CFA_ADJUST_CFA,
7475 gen_rtx_SET (hard_frame_pointer_rtx, src));
7476 }
7477
7478 /* Change the save slot expressions for the registers that
7479 we've already saved. */
c600df9a
RS
7480 aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2],
7481 hard_frame_pointer_rtx, UNITS_PER_WORD);
7482 aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
7483 hard_frame_pointer_rtx, 0);
43cacb12 7484 }
71bfb77a 7485 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 7486 }
71bfb77a 7487
c600df9a
RS
7488 aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
7489 callee_adjust != 0 || emit_frame_chain,
7490 emit_frame_chain);
7491 if (maybe_ne (sve_callee_adjust, 0))
7492 {
7493 gcc_assert (!flag_stack_clash_protection
7494 || known_eq (initial_adjust, 0));
7495 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
7496 sve_callee_adjust,
7497 !frame_pointer_needed, false);
7498 saved_regs_offset += sve_callee_adjust;
7499 }
7500 aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
7501 false, emit_frame_chain);
7502 aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
7503 callee_adjust != 0 || emit_frame_chain,
7504 emit_frame_chain);
cd1bef27
JL
7505
7506 /* We may need to probe the final adjustment if it is larger than the guard
7507 that is assumed by the called. */
901e66e0 7508 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
cd1bef27 7509 !frame_pointer_needed, true);
43e9d192
IB
7510}
7511
4f942779
RL
7512/* Return TRUE if we can use a simple_return insn.
7513
7514 This function checks whether the callee saved stack is empty, which
7515 means no restore actions are need. The pro_and_epilogue will use
7516 this to check whether shrink-wrapping opt is feasible. */
7517
7518bool
7519aarch64_use_return_insn_p (void)
7520{
7521 if (!reload_completed)
7522 return false;
7523
7524 if (crtl->profile)
7525 return false;
7526
6a70badb 7527 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
7528}
7529
71bfb77a
WD
7530/* Generate the epilogue instructions for returning from a function.
7531 This is almost exactly the reverse of the prolog sequence, except
7532 that we need to insert barriers to avoid scheduling loads that read
7533 from a deallocated stack, and we optimize the unwind records by
7534 emitting them all together if possible. */
43e9d192
IB
7535void
7536aarch64_expand_epilogue (bool for_sibcall)
7537{
6a70badb 7538 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 7539 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
7540 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
7541 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
c600df9a
RS
7542 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
7543 poly_int64 below_hard_fp_saved_regs_size
7544 = cfun->machine->frame.below_hard_fp_saved_regs_size;
71bfb77a
WD
7545 unsigned reg1 = cfun->machine->frame.wb_candidate1;
7546 unsigned reg2 = cfun->machine->frame.wb_candidate2;
7547 rtx cfi_ops = NULL;
7548 rtx_insn *insn;
901e66e0
SD
7549 /* A stack clash protection prologue may not have left EP0_REGNUM or
7550 EP1_REGNUM in a usable state. The same is true for allocations
43cacb12 7551 with an SVE component, since we then need both temporary registers
cd1bef27
JL
7552 for each allocation. For stack clash we are in a usable state if
7553 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
7554 HOST_WIDE_INT guard_size
028d4092 7555 = 1 << param_stack_clash_protection_guard_size;
cd1bef27
JL
7556 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
7557
c600df9a
RS
7558 /* We can re-use the registers when:
7559
7560 (a) the deallocation amount is the same as the corresponding
7561 allocation amount (which is false if we combine the initial
7562 and SVE callee save allocations in the prologue); and
7563
7564 (b) the allocation amount doesn't need a probe (which is false
7565 if the amount is guard_size - guard_used_by_caller or greater).
7566
7567 In such situations the register should remain live with the correct
cd1bef27 7568 value. */
43cacb12 7569 bool can_inherit_p = (initial_adjust.is_constant ()
c600df9a 7570 && final_adjust.is_constant ()
cd1bef27 7571 && (!flag_stack_clash_protection
c600df9a
RS
7572 || (known_lt (initial_adjust,
7573 guard_size - guard_used_by_caller)
7574 && known_eq (sve_callee_adjust, 0))));
44c0e7b9 7575
71bfb77a 7576 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
7577 bool need_barrier_p
7578 = maybe_ne (get_frame_size ()
7579 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 7580
71bfb77a 7581 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
7582 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
7583 || cfun->calls_alloca
8144a493 7584 || crtl->calls_eh_return)
43e9d192 7585 {
71bfb77a
WD
7586 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
7587 need_barrier_p = false;
7588 }
7e8c2bd5 7589
71bfb77a
WD
7590 /* Restore the stack pointer from the frame pointer if it may not
7591 be the same as the stack pointer. */
901e66e0
SD
7592 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
7593 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
6a70badb
RS
7594 if (frame_pointer_needed
7595 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
7596 /* If writeback is used when restoring callee-saves, the CFA
7597 is restored on the instruction doing the writeback. */
7598 aarch64_add_offset (Pmode, stack_pointer_rtx,
c600df9a
RS
7599 hard_frame_pointer_rtx,
7600 -callee_offset - below_hard_fp_saved_regs_size,
901e66e0 7601 tmp1_rtx, tmp0_rtx, callee_adjust == 0);
71bfb77a 7602 else
cd1bef27
JL
7603 /* The case where we need to re-use the register here is very rare, so
7604 avoid the complicated condition and just always emit a move if the
7605 immediate doesn't fit. */
901e66e0 7606 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
43e9d192 7607
c600df9a
RS
7608 /* Restore the vector registers before the predicate registers,
7609 so that we can use P4 as a temporary for big-endian SVE frames. */
7610 aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
7611 callee_adjust != 0, &cfi_ops);
7612 aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
7613 false, &cfi_ops);
7614 if (maybe_ne (sve_callee_adjust, 0))
7615 aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
7616 aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
7617 R0_REGNUM, R30_REGNUM,
71bfb77a 7618 callee_adjust != 0, &cfi_ops);
43e9d192 7619
71bfb77a
WD
7620 if (need_barrier_p)
7621 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
7622
7623 if (callee_adjust != 0)
7624 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
7625
6a70badb 7626 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
7627 {
7628 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 7629 insn = get_last_insn ();
71bfb77a
WD
7630 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
7631 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 7632 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 7633 cfi_ops = NULL;
43e9d192
IB
7634 }
7635
901e66e0
SD
7636 /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
7637 add restriction on emit_move optimization to leaf functions. */
7638 aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust,
7639 (!can_inherit_p || !crtl->is_leaf
7640 || df_regs_ever_live_p (EP0_REGNUM)));
7e8c2bd5 7641
71bfb77a
WD
7642 if (cfi_ops)
7643 {
7644 /* Emit delayed restores and reset the CFA to be SP. */
7645 insn = get_last_insn ();
7646 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
7647 REG_NOTES (insn) = cfi_ops;
7648 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
7649 }
7650
db58fd89
JW
7651 /* We prefer to emit the combined return/authenticate instruction RETAA,
7652 however there are three cases in which we must instead emit an explicit
7653 authentication instruction.
7654
7655 1) Sibcalls don't return in a normal way, so if we're about to call one
7656 we must authenticate.
7657
7658 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
7659 generating code for !TARGET_ARMV8_3 we can't use it and must
7660 explicitly authenticate.
7661
7662 3) On an eh_return path we make extra stack adjustments to update the
7663 canonical frame address to be the exception handler's CFA. We want
7664 to authenticate using the CFA of the function which calls eh_return.
7665 */
7666 if (aarch64_return_address_signing_enabled ()
7667 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45 7668 {
8fc16d72
ST
7669 switch (aarch64_ra_sign_key)
7670 {
7671 case AARCH64_KEY_A:
7672 insn = emit_insn (gen_autiasp ());
7673 break;
7674 case AARCH64_KEY_B:
7675 insn = emit_insn (gen_autibsp ());
7676 break;
7677 default:
7678 gcc_unreachable ();
7679 }
27169e45
JW
7680 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
7681 RTX_FRAME_RELATED_P (insn) = 1;
7682 }
db58fd89 7683
dd991abb 7684 /* Stack adjustment for exception handler. */
b5b9147d 7685 if (crtl->calls_eh_return && !for_sibcall)
dd991abb
RH
7686 {
7687 /* We need to unwind the stack by the offset computed by
7688 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
7689 to be SP; letting the CFA move during this adjustment
7690 is just as correct as retaining the CFA from the body
7691 of the function. Therefore, do nothing special. */
7692 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
7693 }
7694
7695 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
7696 if (!for_sibcall)
7697 emit_jump_insn (ret_rtx);
7698}
7699
8144a493
WD
7700/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
7701 normally or return to a previous frame after unwinding.
1c960e02 7702
8144a493
WD
7703 An EH return uses a single shared return sequence. The epilogue is
7704 exactly like a normal epilogue except that it has an extra input
7705 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
7706 that must be applied after the frame has been destroyed. An extra label
7707 is inserted before the epilogue which initializes this register to zero,
7708 and this is the entry point for a normal return.
43e9d192 7709
8144a493
WD
7710 An actual EH return updates the return address, initializes the stack
7711 adjustment and jumps directly into the epilogue (bypassing the zeroing
7712 of the adjustment). Since the return address is typically saved on the
7713 stack when a function makes a call, the saved LR must be updated outside
7714 the epilogue.
43e9d192 7715
8144a493
WD
7716 This poses problems as the store is generated well before the epilogue,
7717 so the offset of LR is not known yet. Also optimizations will remove the
7718 store as it appears dead, even after the epilogue is generated (as the
7719 base or offset for loading LR is different in many cases).
43e9d192 7720
8144a493
WD
7721 To avoid these problems this implementation forces the frame pointer
7722 in eh_return functions so that the location of LR is fixed and known early.
7723 It also marks the store volatile, so no optimization is permitted to
7724 remove the store. */
7725rtx
7726aarch64_eh_return_handler_rtx (void)
7727{
7728 rtx tmp = gen_frame_mem (Pmode,
7729 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 7730
8144a493
WD
7731 /* Mark the store volatile, so no optimization is permitted to remove it. */
7732 MEM_VOLATILE_P (tmp) = true;
7733 return tmp;
43e9d192
IB
7734}
7735
43e9d192
IB
7736/* Output code to add DELTA to the first argument, and then jump
7737 to FUNCTION. Used for C++ multiple inheritance. */
7738static void
7739aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7740 HOST_WIDE_INT delta,
7741 HOST_WIDE_INT vcall_offset,
7742 tree function)
7743{
7744 /* The this pointer is always in x0. Note that this differs from
7745 Arm where the this pointer maybe bumped to r1 if r0 is required
7746 to return a pointer to an aggregate. On AArch64 a result value
7747 pointer will be in x8. */
7748 int this_regno = R0_REGNUM;
5d8a22a5
DM
7749 rtx this_rtx, temp0, temp1, addr, funexp;
7750 rtx_insn *insn;
6b5777c6 7751 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
43e9d192 7752
c904388d
SD
7753 if (aarch64_bti_enabled ())
7754 emit_insn (gen_bti_c());
7755
75f1d6fc
SN
7756 reload_completed = 1;
7757 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 7758
f5470a77 7759 this_rtx = gen_rtx_REG (Pmode, this_regno);
901e66e0
SD
7760 temp0 = gen_rtx_REG (Pmode, EP0_REGNUM);
7761 temp1 = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 7762
43e9d192 7763 if (vcall_offset == 0)
43cacb12 7764 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
7765 else
7766 {
28514dda 7767 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 7768
75f1d6fc
SN
7769 addr = this_rtx;
7770 if (delta != 0)
7771 {
7772 if (delta >= -256 && delta < 256)
7773 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
7774 plus_constant (Pmode, this_rtx, delta));
7775 else
43cacb12
RS
7776 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
7777 temp1, temp0, false);
43e9d192
IB
7778 }
7779
28514dda
YZ
7780 if (Pmode == ptr_mode)
7781 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
7782 else
7783 aarch64_emit_move (temp0,
7784 gen_rtx_ZERO_EXTEND (Pmode,
7785 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 7786
28514dda 7787 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 7788 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
7789 else
7790 {
f43657b4
JW
7791 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
7792 Pmode);
75f1d6fc 7793 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
7794 }
7795
28514dda
YZ
7796 if (Pmode == ptr_mode)
7797 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
7798 else
7799 aarch64_emit_move (temp1,
7800 gen_rtx_SIGN_EXTEND (Pmode,
7801 gen_rtx_MEM (ptr_mode, addr)));
7802
75f1d6fc 7803 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
7804 }
7805
75f1d6fc
SN
7806 /* Generate a tail call to the target function. */
7807 if (!TREE_USED (function))
7808 {
7809 assemble_external (function);
7810 TREE_USED (function) = 1;
7811 }
7812 funexp = XEXP (DECL_RTL (function), 0);
7813 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
08cc4d92
RS
7814 rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode);
7815 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi));
75f1d6fc
SN
7816 SIBLING_CALL_P (insn) = 1;
7817
7818 insn = get_insns ();
7819 shorten_branches (insn);
6b5777c6
MF
7820
7821 assemble_start_function (thunk, fnname);
75f1d6fc
SN
7822 final_start_function (insn, file, 1);
7823 final (insn, file, 1);
43e9d192 7824 final_end_function ();
6b5777c6 7825 assemble_end_function (thunk, fnname);
75f1d6fc
SN
7826
7827 /* Stop pretending to be a post-reload pass. */
7828 reload_completed = 0;
43e9d192
IB
7829}
7830
43e9d192
IB
7831static bool
7832aarch64_tls_referenced_p (rtx x)
7833{
7834 if (!TARGET_HAVE_TLS)
7835 return false;
e7de8563
RS
7836 subrtx_iterator::array_type array;
7837 FOR_EACH_SUBRTX (iter, array, x, ALL)
7838 {
7839 const_rtx x = *iter;
7840 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
7841 return true;
7842 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7843 TLS offsets, not real symbol references. */
7844 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7845 iter.skip_subrtxes ();
7846 }
7847 return false;
43e9d192
IB
7848}
7849
7850
43e9d192
IB
7851/* Return true if val can be encoded as a 12-bit unsigned immediate with
7852 a left shift of 0 or 12 bits. */
7853bool
7854aarch64_uimm12_shift (HOST_WIDE_INT val)
7855{
7856 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
7857 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
7858 );
7859}
7860
eb471ba3
TC
7861/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
7862 that can be created with a left shift of 0 or 12. */
7863static HOST_WIDE_INT
7864aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
7865{
7866 /* Check to see if the value fits in 24 bits, as that is the maximum we can
7867 handle correctly. */
7868 gcc_assert ((val & 0xffffff) == val);
7869
7870 if (((val & 0xfff) << 0) == val)
7871 return val;
7872
7873 return val & (0xfff << 12);
7874}
43e9d192
IB
7875
7876/* Return true if val is an immediate that can be loaded into a
7877 register by a MOVZ instruction. */
7878static bool
77e994c9 7879aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
7880{
7881 if (GET_MODE_SIZE (mode) > 4)
7882 {
7883 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
7884 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
7885 return 1;
7886 }
7887 else
7888 {
43cacb12
RS
7889 /* Ignore sign extension. */
7890 val &= (HOST_WIDE_INT) 0xffffffff;
7891 }
7892 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
7893 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
7894}
7895
7896/* VAL is a value with the inner mode of MODE. Replicate it to fill a
7897 64-bit (DImode) integer. */
7898
7899static unsigned HOST_WIDE_INT
7900aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
7901{
7902 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
7903 while (size < 64)
7904 {
7905 val &= (HOST_WIDE_INT_1U << size) - 1;
7906 val |= val << size;
7907 size *= 2;
43e9d192 7908 }
43cacb12 7909 return val;
43e9d192
IB
7910}
7911
a64c73a2
WD
7912/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
7913
7914static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
7915 {
7916 0x0000000100000001ull,
7917 0x0001000100010001ull,
7918 0x0101010101010101ull,
7919 0x1111111111111111ull,
7920 0x5555555555555555ull,
7921 };
7922
43e9d192
IB
7923
7924/* Return true if val is a valid bitmask immediate. */
a64c73a2 7925
43e9d192 7926bool
a64c73a2 7927aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 7928{
a64c73a2
WD
7929 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
7930 int bits;
7931
7932 /* Check for a single sequence of one bits and return quickly if so.
7933 The special cases of all ones and all zeroes returns false. */
43cacb12 7934 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
7935 tmp = val + (val & -val);
7936
7937 if (tmp == (tmp & -tmp))
7938 return (val + 1) > 1;
7939
7940 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
7941 if (mode == SImode)
7942 val = (val << 32) | (val & 0xffffffff);
7943
7944 /* Invert if the immediate doesn't start with a zero bit - this means we
7945 only need to search for sequences of one bits. */
7946 if (val & 1)
7947 val = ~val;
7948
7949 /* Find the first set bit and set tmp to val with the first sequence of one
7950 bits removed. Return success if there is a single sequence of ones. */
7951 first_one = val & -val;
7952 tmp = val & (val + first_one);
7953
7954 if (tmp == 0)
7955 return true;
7956
7957 /* Find the next set bit and compute the difference in bit position. */
7958 next_one = tmp & -tmp;
7959 bits = clz_hwi (first_one) - clz_hwi (next_one);
7960 mask = val ^ tmp;
7961
7962 /* Check the bit position difference is a power of 2, and that the first
7963 sequence of one bits fits within 'bits' bits. */
7964 if ((mask >> bits) != 0 || bits != (bits & -bits))
7965 return false;
7966
7967 /* Check the sequence of one bits is repeated 64/bits times. */
7968 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
7969}
7970
43fd192f
MC
7971/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
7972 Assumed precondition: VAL_IN Is not zero. */
7973
7974unsigned HOST_WIDE_INT
7975aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
7976{
7977 int lowest_bit_set = ctz_hwi (val_in);
7978 int highest_bit_set = floor_log2 (val_in);
7979 gcc_assert (val_in != 0);
7980
7981 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
7982 (HOST_WIDE_INT_1U << lowest_bit_set));
7983}
7984
7985/* Create constant where bits outside of lowest bit set to highest bit set
7986 are set to 1. */
7987
7988unsigned HOST_WIDE_INT
7989aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
7990{
7991 return val_in | ~aarch64_and_split_imm1 (val_in);
7992}
7993
7994/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
7995
7996bool
7997aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
7998{
77e994c9
RS
7999 scalar_int_mode int_mode;
8000 if (!is_a <scalar_int_mode> (mode, &int_mode))
8001 return false;
8002
8003 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
8004 return false;
8005
77e994c9 8006 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
8007 return false;
8008
8009 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
8010
77e994c9 8011 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 8012}
43e9d192
IB
8013
8014/* Return true if val is an immediate that can be loaded into a
8015 register in a single instruction. */
8016bool
ef4bddc2 8017aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 8018{
77e994c9
RS
8019 scalar_int_mode int_mode;
8020 if (!is_a <scalar_int_mode> (mode, &int_mode))
8021 return false;
8022
8023 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 8024 return 1;
77e994c9 8025 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
8026}
8027
8028static bool
ef4bddc2 8029aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
8030{
8031 rtx base, offset;
7eda14e1 8032
43e9d192
IB
8033 if (GET_CODE (x) == HIGH)
8034 return true;
8035
43cacb12
RS
8036 /* There's no way to calculate VL-based values using relocations. */
8037 subrtx_iterator::array_type array;
8038 FOR_EACH_SUBRTX (iter, array, x, ALL)
8039 if (GET_CODE (*iter) == CONST_POLY_INT)
8040 return true;
8041
43e9d192
IB
8042 split_const (x, &base, &offset);
8043 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 8044 {
43cacb12 8045 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
8046 != SYMBOL_FORCE_TO_MEM)
8047 return true;
8048 else
8049 /* Avoid generating a 64-bit relocation in ILP32; leave
8050 to aarch64_expand_mov_immediate to handle it properly. */
8051 return mode != ptr_mode;
8052 }
43e9d192
IB
8053
8054 return aarch64_tls_referenced_p (x);
8055}
8056
e79136e4
WD
8057/* Implement TARGET_CASE_VALUES_THRESHOLD.
8058 The expansion for a table switch is quite expensive due to the number
8059 of instructions, the table lookup and hard to predict indirect jump.
8060 When optimizing for speed, and -O3 enabled, use the per-core tuning if
8061 set, otherwise use tables for > 16 cases as a tradeoff between size and
8062 performance. When optimizing for size, use the default setting. */
50487d79
EM
8063
8064static unsigned int
8065aarch64_case_values_threshold (void)
8066{
8067 /* Use the specified limit for the number of cases before using jump
8068 tables at higher optimization levels. */
8069 if (optimize > 2
8070 && selected_cpu->tune->max_case_values != 0)
8071 return selected_cpu->tune->max_case_values;
8072 else
e79136e4 8073 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
8074}
8075
43e9d192
IB
8076/* Return true if register REGNO is a valid index register.
8077 STRICT_P is true if REG_OK_STRICT is in effect. */
8078
8079bool
8080aarch64_regno_ok_for_index_p (int regno, bool strict_p)
8081{
8082 if (!HARD_REGISTER_NUM_P (regno))
8083 {
8084 if (!strict_p)
8085 return true;
8086
8087 if (!reg_renumber)
8088 return false;
8089
8090 regno = reg_renumber[regno];
8091 }
8092 return GP_REGNUM_P (regno);
8093}
8094
8095/* Return true if register REGNO is a valid base register for mode MODE.
8096 STRICT_P is true if REG_OK_STRICT is in effect. */
8097
8098bool
8099aarch64_regno_ok_for_base_p (int regno, bool strict_p)
8100{
8101 if (!HARD_REGISTER_NUM_P (regno))
8102 {
8103 if (!strict_p)
8104 return true;
8105
8106 if (!reg_renumber)
8107 return false;
8108
8109 regno = reg_renumber[regno];
8110 }
8111
8112 /* The fake registers will be eliminated to either the stack or
8113 hard frame pointer, both of which are usually valid base registers.
8114 Reload deals with the cases where the eliminated form isn't valid. */
8115 return (GP_REGNUM_P (regno)
8116 || regno == SP_REGNUM
8117 || regno == FRAME_POINTER_REGNUM
8118 || regno == ARG_POINTER_REGNUM);
8119}
8120
8121/* Return true if X is a valid base register for mode MODE.
8122 STRICT_P is true if REG_OK_STRICT is in effect. */
8123
8124static bool
8125aarch64_base_register_rtx_p (rtx x, bool strict_p)
8126{
76160199
RS
8127 if (!strict_p
8128 && GET_CODE (x) == SUBREG
8129 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
8130 x = SUBREG_REG (x);
8131
8132 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
8133}
8134
8135/* Return true if address offset is a valid index. If it is, fill in INFO
8136 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
8137
8138static bool
8139aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 8140 machine_mode mode, bool strict_p)
43e9d192
IB
8141{
8142 enum aarch64_address_type type;
8143 rtx index;
8144 int shift;
8145
8146 /* (reg:P) */
8147 if ((REG_P (x) || GET_CODE (x) == SUBREG)
8148 && GET_MODE (x) == Pmode)
8149 {
8150 type = ADDRESS_REG_REG;
8151 index = x;
8152 shift = 0;
8153 }
8154 /* (sign_extend:DI (reg:SI)) */
8155 else if ((GET_CODE (x) == SIGN_EXTEND
8156 || GET_CODE (x) == ZERO_EXTEND)
8157 && GET_MODE (x) == DImode
8158 && GET_MODE (XEXP (x, 0)) == SImode)
8159 {
8160 type = (GET_CODE (x) == SIGN_EXTEND)
8161 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8162 index = XEXP (x, 0);
8163 shift = 0;
8164 }
8165 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
8166 else if (GET_CODE (x) == MULT
8167 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
8168 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
8169 && GET_MODE (XEXP (x, 0)) == DImode
8170 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
8171 && CONST_INT_P (XEXP (x, 1)))
8172 {
8173 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
8174 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8175 index = XEXP (XEXP (x, 0), 0);
8176 shift = exact_log2 (INTVAL (XEXP (x, 1)));
8177 }
8178 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
8179 else if (GET_CODE (x) == ASHIFT
8180 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
8181 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
8182 && GET_MODE (XEXP (x, 0)) == DImode
8183 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
8184 && CONST_INT_P (XEXP (x, 1)))
8185 {
8186 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
8187 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8188 index = XEXP (XEXP (x, 0), 0);
8189 shift = INTVAL (XEXP (x, 1));
8190 }
8191 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
8192 else if ((GET_CODE (x) == SIGN_EXTRACT
8193 || GET_CODE (x) == ZERO_EXTRACT)
8194 && GET_MODE (x) == DImode
8195 && GET_CODE (XEXP (x, 0)) == MULT
8196 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8197 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8198 {
8199 type = (GET_CODE (x) == SIGN_EXTRACT)
8200 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8201 index = XEXP (XEXP (x, 0), 0);
8202 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
8203 if (INTVAL (XEXP (x, 1)) != 32 + shift
8204 || INTVAL (XEXP (x, 2)) != 0)
8205 shift = -1;
8206 }
8207 /* (and:DI (mult:DI (reg:DI) (const_int scale))
8208 (const_int 0xffffffff<<shift)) */
8209 else if (GET_CODE (x) == AND
8210 && GET_MODE (x) == DImode
8211 && GET_CODE (XEXP (x, 0)) == MULT
8212 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8213 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8214 && CONST_INT_P (XEXP (x, 1)))
8215 {
8216 type = ADDRESS_REG_UXTW;
8217 index = XEXP (XEXP (x, 0), 0);
8218 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
8219 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
8220 shift = -1;
8221 }
8222 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
8223 else if ((GET_CODE (x) == SIGN_EXTRACT
8224 || GET_CODE (x) == ZERO_EXTRACT)
8225 && GET_MODE (x) == DImode
8226 && GET_CODE (XEXP (x, 0)) == ASHIFT
8227 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8228 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8229 {
8230 type = (GET_CODE (x) == SIGN_EXTRACT)
8231 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8232 index = XEXP (XEXP (x, 0), 0);
8233 shift = INTVAL (XEXP (XEXP (x, 0), 1));
8234 if (INTVAL (XEXP (x, 1)) != 32 + shift
8235 || INTVAL (XEXP (x, 2)) != 0)
8236 shift = -1;
8237 }
8238 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
8239 (const_int 0xffffffff<<shift)) */
8240 else if (GET_CODE (x) == AND
8241 && GET_MODE (x) == DImode
8242 && GET_CODE (XEXP (x, 0)) == ASHIFT
8243 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8244 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8245 && CONST_INT_P (XEXP (x, 1)))
8246 {
8247 type = ADDRESS_REG_UXTW;
8248 index = XEXP (XEXP (x, 0), 0);
8249 shift = INTVAL (XEXP (XEXP (x, 0), 1));
8250 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
8251 shift = -1;
8252 }
8253 /* (mult:P (reg:P) (const_int scale)) */
8254 else if (GET_CODE (x) == MULT
8255 && GET_MODE (x) == Pmode
8256 && GET_MODE (XEXP (x, 0)) == Pmode
8257 && CONST_INT_P (XEXP (x, 1)))
8258 {
8259 type = ADDRESS_REG_REG;
8260 index = XEXP (x, 0);
8261 shift = exact_log2 (INTVAL (XEXP (x, 1)));
8262 }
8263 /* (ashift:P (reg:P) (const_int shift)) */
8264 else if (GET_CODE (x) == ASHIFT
8265 && GET_MODE (x) == Pmode
8266 && GET_MODE (XEXP (x, 0)) == Pmode
8267 && CONST_INT_P (XEXP (x, 1)))
8268 {
8269 type = ADDRESS_REG_REG;
8270 index = XEXP (x, 0);
8271 shift = INTVAL (XEXP (x, 1));
8272 }
8273 else
8274 return false;
8275
76160199
RS
8276 if (!strict_p
8277 && GET_CODE (index) == SUBREG
8278 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
8279 index = SUBREG_REG (index);
8280
43cacb12
RS
8281 if (aarch64_sve_data_mode_p (mode))
8282 {
8283 if (type != ADDRESS_REG_REG
8284 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
8285 return false;
8286 }
8287 else
8288 {
8289 if (shift != 0
8290 && !(IN_RANGE (shift, 1, 3)
8291 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
8292 return false;
8293 }
8294
8295 if (REG_P (index)
43e9d192
IB
8296 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
8297 {
8298 info->type = type;
8299 info->offset = index;
8300 info->shift = shift;
8301 return true;
8302 }
8303
8304 return false;
8305}
8306
abc52318
KT
8307/* Return true if MODE is one of the modes for which we
8308 support LDP/STP operations. */
8309
8310static bool
8311aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
8312{
8313 return mode == SImode || mode == DImode
8314 || mode == SFmode || mode == DFmode
8315 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
8316 && (known_eq (GET_MODE_SIZE (mode), 8)
8317 || (known_eq (GET_MODE_SIZE (mode), 16)
8318 && (aarch64_tune_params.extra_tuning_flags
8319 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
8320}
8321
9e0218fc
RH
8322/* Return true if REGNO is a virtual pointer register, or an eliminable
8323 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
8324 include stack_pointer or hard_frame_pointer. */
8325static bool
8326virt_or_elim_regno_p (unsigned regno)
8327{
8328 return ((regno >= FIRST_VIRTUAL_REGISTER
8329 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
8330 || regno == FRAME_POINTER_REGNUM
8331 || regno == ARG_POINTER_REGNUM);
8332}
8333
a97d8b98
RS
8334/* Return true if X is a valid address of type TYPE for machine mode MODE.
8335 If it is, fill in INFO appropriately. STRICT_P is true if
8336 REG_OK_STRICT is in effect. */
43e9d192 8337
a98824ac 8338bool
43e9d192 8339aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 8340 rtx x, machine_mode mode, bool strict_p,
a98824ac 8341 aarch64_addr_query_type type)
43e9d192
IB
8342{
8343 enum rtx_code code = GET_CODE (x);
8344 rtx op0, op1;
dc640181
RS
8345 poly_int64 offset;
8346
6a70badb 8347 HOST_WIDE_INT const_size;
2d8c6dc1 8348
550a3380
RS
8349 /* Whether a vector mode is partial doesn't affect address legitimacy.
8350 Partial vectors like VNx8QImode allow the same indexed addressing
8351 mode and MUL VL addressing mode as full vectors like VNx16QImode;
8352 in both cases, MUL VL counts multiples of GET_MODE_SIZE. */
8353 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
8354 vec_flags &= ~VEC_PARTIAL;
8355
80d43579
WD
8356 /* On BE, we use load/store pair for all large int mode load/stores.
8357 TI/TFmode may also use a load/store pair. */
43cacb12 8358 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 8359 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 8360 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
8361 || mode == TImode
8362 || mode == TFmode
43cacb12 8363 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 8364
a25831ac
AV
8365 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
8366 corresponds to the actual size of the memory being loaded/stored and the
8367 mode of the corresponding addressing mode is half of that. */
8368 if (type == ADDR_QUERY_LDP_STP_N
8369 && known_eq (GET_MODE_SIZE (mode), 16))
8370 mode = DFmode;
8371
6a70badb 8372 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
8373 && (known_lt (GET_MODE_SIZE (mode), 16)
8374 || vec_flags == VEC_ADVSIMD
fa9863e7 8375 || vec_flags & VEC_SVE_DATA));
43cacb12
RS
8376
8377 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
8378 [Rn, #offset, MUL VL]. */
8379 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
8380 && (code != REG && code != PLUS))
8381 return false;
2d8c6dc1
AH
8382
8383 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
8384 REG addressing. */
43cacb12
RS
8385 if (advsimd_struct_p
8386 && !BYTES_BIG_ENDIAN
43e9d192
IB
8387 && (code != POST_INC && code != REG))
8388 return false;
8389
43cacb12
RS
8390 gcc_checking_assert (GET_MODE (x) == VOIDmode
8391 || SCALAR_INT_MODE_P (GET_MODE (x)));
8392
43e9d192
IB
8393 switch (code)
8394 {
8395 case REG:
8396 case SUBREG:
8397 info->type = ADDRESS_REG_IMM;
8398 info->base = x;
8399 info->offset = const0_rtx;
dc640181 8400 info->const_offset = 0;
43e9d192
IB
8401 return aarch64_base_register_rtx_p (x, strict_p);
8402
8403 case PLUS:
8404 op0 = XEXP (x, 0);
8405 op1 = XEXP (x, 1);
15c0c5c9
JW
8406
8407 if (! strict_p
4aa81c2e 8408 && REG_P (op0)
9e0218fc 8409 && virt_or_elim_regno_p (REGNO (op0))
dc640181 8410 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
8411 {
8412 info->type = ADDRESS_REG_IMM;
8413 info->base = op0;
8414 info->offset = op1;
dc640181 8415 info->const_offset = offset;
15c0c5c9
JW
8416
8417 return true;
8418 }
8419
6a70badb 8420 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
8421 && aarch64_base_register_rtx_p (op0, strict_p)
8422 && poly_int_rtx_p (op1, &offset))
43e9d192 8423 {
43e9d192
IB
8424 info->type = ADDRESS_REG_IMM;
8425 info->base = op0;
8426 info->offset = op1;
dc640181 8427 info->const_offset = offset;
43e9d192
IB
8428
8429 /* TImode and TFmode values are allowed in both pairs of X
8430 registers and individual Q registers. The available
8431 address modes are:
8432 X,X: 7-bit signed scaled offset
8433 Q: 9-bit signed offset
8434 We conservatively require an offset representable in either mode.
8ed49fab
KT
8435 When performing the check for pairs of X registers i.e. LDP/STP
8436 pass down DImode since that is the natural size of the LDP/STP
8437 instruction memory accesses. */
43e9d192 8438 if (mode == TImode || mode == TFmode)
8ed49fab 8439 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 8440 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 8441 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 8442
2d8c6dc1
AH
8443 /* A 7bit offset check because OImode will emit a ldp/stp
8444 instruction (only big endian will get here).
8445 For ldp/stp instructions, the offset is scaled for the size of a
8446 single element of the pair. */
8447 if (mode == OImode)
8448 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
8449
8450 /* Three 9/12 bit offsets checks because CImode will emit three
8451 ldr/str instructions (only big endian will get here). */
8452 if (mode == CImode)
8453 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
8454 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
8455 offset + 32)
2d8c6dc1
AH
8456 || offset_12bit_unsigned_scaled_p (V16QImode,
8457 offset + 32)));
8458
8459 /* Two 7bit offsets checks because XImode will emit two ldp/stp
8460 instructions (only big endian will get here). */
8461 if (mode == XImode)
8462 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
8463 && aarch64_offset_7bit_signed_scaled_p (TImode,
8464 offset + 32));
8465
43cacb12
RS
8466 /* Make "m" use the LD1 offset range for SVE data modes, so
8467 that pre-RTL optimizers like ivopts will work to that
8468 instead of the wider LDR/STR range. */
8469 if (vec_flags == VEC_SVE_DATA)
8470 return (type == ADDR_QUERY_M
8471 ? offset_4bit_signed_scaled_p (mode, offset)
8472 : offset_9bit_signed_scaled_p (mode, offset));
8473
9f4cbab8
RS
8474 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
8475 {
8476 poly_int64 end_offset = (offset
8477 + GET_MODE_SIZE (mode)
8478 - BYTES_PER_SVE_VECTOR);
8479 return (type == ADDR_QUERY_M
8480 ? offset_4bit_signed_scaled_p (mode, offset)
8481 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
8482 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
8483 end_offset)));
8484 }
8485
43cacb12
RS
8486 if (vec_flags == VEC_SVE_PRED)
8487 return offset_9bit_signed_scaled_p (mode, offset);
8488
2d8c6dc1 8489 if (load_store_pair_p)
6a70badb 8490 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
8491 || known_eq (GET_MODE_SIZE (mode), 8)
8492 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 8493 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 8494 else
3c5af608 8495 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
8496 || offset_12bit_unsigned_scaled_p (mode, offset));
8497 }
8498
8499 if (allow_reg_index_p)
8500 {
8501 /* Look for base + (scaled/extended) index register. */
8502 if (aarch64_base_register_rtx_p (op0, strict_p)
8503 && aarch64_classify_index (info, op1, mode, strict_p))
8504 {
8505 info->base = op0;
8506 return true;
8507 }
8508 if (aarch64_base_register_rtx_p (op1, strict_p)
8509 && aarch64_classify_index (info, op0, mode, strict_p))
8510 {
8511 info->base = op1;
8512 return true;
8513 }
8514 }
8515
8516 return false;
8517
8518 case POST_INC:
8519 case POST_DEC:
8520 case PRE_INC:
8521 case PRE_DEC:
8522 info->type = ADDRESS_REG_WB;
8523 info->base = XEXP (x, 0);
8524 info->offset = NULL_RTX;
8525 return aarch64_base_register_rtx_p (info->base, strict_p);
8526
8527 case POST_MODIFY:
8528 case PRE_MODIFY:
8529 info->type = ADDRESS_REG_WB;
8530 info->base = XEXP (x, 0);
8531 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 8532 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
8533 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
8534 && aarch64_base_register_rtx_p (info->base, strict_p))
8535 {
43e9d192 8536 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 8537 info->const_offset = offset;
43e9d192
IB
8538
8539 /* TImode and TFmode values are allowed in both pairs of X
8540 registers and individual Q registers. The available
8541 address modes are:
8542 X,X: 7-bit signed scaled offset
8543 Q: 9-bit signed offset
8544 We conservatively require an offset representable in either mode.
8545 */
8546 if (mode == TImode || mode == TFmode)
44707478 8547 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 8548 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 8549
2d8c6dc1 8550 if (load_store_pair_p)
6a70badb 8551 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
8552 || known_eq (GET_MODE_SIZE (mode), 8)
8553 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 8554 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 8555 else
3c5af608 8556 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
8557 }
8558 return false;
8559
8560 case CONST:
8561 case SYMBOL_REF:
8562 case LABEL_REF:
79517551
SN
8563 /* load literal: pc-relative constant pool entry. Only supported
8564 for SI mode or larger. */
43e9d192 8565 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 8566
6a70badb
RS
8567 if (!load_store_pair_p
8568 && GET_MODE_SIZE (mode).is_constant (&const_size)
8569 && const_size >= 4)
43e9d192
IB
8570 {
8571 rtx sym, addend;
8572
8573 split_const (x, &sym, &addend);
b4f50fd4
RR
8574 return ((GET_CODE (sym) == LABEL_REF
8575 || (GET_CODE (sym) == SYMBOL_REF
8576 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 8577 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
8578 }
8579 return false;
8580
8581 case LO_SUM:
8582 info->type = ADDRESS_LO_SUM;
8583 info->base = XEXP (x, 0);
8584 info->offset = XEXP (x, 1);
8585 if (allow_reg_index_p
8586 && aarch64_base_register_rtx_p (info->base, strict_p))
8587 {
8588 rtx sym, offs;
8589 split_const (info->offset, &sym, &offs);
8590 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
8591 && (aarch64_classify_symbol (sym, INTVAL (offs))
8592 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
8593 {
8594 /* The symbol and offset must be aligned to the access size. */
8595 unsigned int align;
43e9d192
IB
8596
8597 if (CONSTANT_POOL_ADDRESS_P (sym))
8598 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
8599 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
8600 {
8601 tree exp = SYMBOL_REF_DECL (sym);
8602 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 8603 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
8604 }
8605 else if (SYMBOL_REF_DECL (sym))
8606 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
8607 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
8608 && SYMBOL_REF_BLOCK (sym) != NULL)
8609 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
8610 else
8611 align = BITS_PER_UNIT;
8612
6a70badb
RS
8613 poly_int64 ref_size = GET_MODE_SIZE (mode);
8614 if (known_eq (ref_size, 0))
43e9d192
IB
8615 ref_size = GET_MODE_SIZE (DImode);
8616
6a70badb
RS
8617 return (multiple_p (INTVAL (offs), ref_size)
8618 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
8619 }
8620 }
8621 return false;
8622
8623 default:
8624 return false;
8625 }
8626}
8627
9bf2f779
KT
8628/* Return true if the address X is valid for a PRFM instruction.
8629 STRICT_P is true if we should do strict checking with
8630 aarch64_classify_address. */
8631
8632bool
8633aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
8634{
8635 struct aarch64_address_info addr;
8636
8637 /* PRFM accepts the same addresses as DImode... */
a97d8b98 8638 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
8639 if (!res)
8640 return false;
8641
8642 /* ... except writeback forms. */
8643 return addr.type != ADDRESS_REG_WB;
8644}
8645
43e9d192
IB
8646bool
8647aarch64_symbolic_address_p (rtx x)
8648{
8649 rtx offset;
8650
8651 split_const (x, &x, &offset);
8652 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
8653}
8654
a6e0bfa7 8655/* Classify the base of symbolic expression X. */
da4f13a4
MS
8656
8657enum aarch64_symbol_type
a6e0bfa7 8658aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
8659{
8660 rtx offset;
da4f13a4 8661
43e9d192 8662 split_const (x, &x, &offset);
43cacb12 8663 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
8664}
8665
8666
8667/* Return TRUE if X is a legitimate address for accessing memory in
8668 mode MODE. */
8669static bool
ef4bddc2 8670aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
8671{
8672 struct aarch64_address_info addr;
8673
a97d8b98 8674 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
8675}
8676
a97d8b98
RS
8677/* Return TRUE if X is a legitimate address of type TYPE for accessing
8678 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 8679bool
a97d8b98
RS
8680aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
8681 aarch64_addr_query_type type)
43e9d192
IB
8682{
8683 struct aarch64_address_info addr;
8684
a97d8b98 8685 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
8686}
8687
9005477f
RS
8688/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
8689
491ec060 8690static bool
9005477f
RS
8691aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
8692 poly_int64 orig_offset,
8693 machine_mode mode)
491ec060 8694{
6a70badb
RS
8695 HOST_WIDE_INT size;
8696 if (GET_MODE_SIZE (mode).is_constant (&size))
8697 {
9005477f
RS
8698 HOST_WIDE_INT const_offset, second_offset;
8699
8700 /* A general SVE offset is A * VQ + B. Remove the A component from
8701 coefficient 0 in order to get the constant B. */
8702 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
8703
8704 /* Split an out-of-range address displacement into a base and
8705 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
8706 range otherwise to increase opportunities for sharing the base
8707 address of different sizes. Unaligned accesses use the signed
8708 9-bit range, TImode/TFmode use the intersection of signed
8709 scaled 7-bit and signed 9-bit offset. */
6a70badb 8710 if (mode == TImode || mode == TFmode)
9005477f
RS
8711 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
8712 else if ((const_offset & (size - 1)) != 0)
8713 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 8714 else
9005477f 8715 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 8716
9005477f
RS
8717 if (second_offset == 0 || known_eq (orig_offset, second_offset))
8718 return false;
8719
8720 /* Split the offset into second_offset and the rest. */
8721 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
8722 *offset2 = gen_int_mode (second_offset, Pmode);
8723 return true;
8724 }
8725 else
8726 {
8727 /* Get the mode we should use as the basis of the range. For structure
8728 modes this is the mode of one vector. */
8729 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
8730 machine_mode step_mode
8731 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
8732
8733 /* Get the "mul vl" multiplier we'd like to use. */
8734 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
8735 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
8736 if (vec_flags & VEC_SVE_DATA)
8737 /* LDR supports a 9-bit range, but the move patterns for
8738 structure modes require all vectors to be in range of the
8739 same base. The simplest way of accomodating that while still
8740 promoting reuse of anchor points between different modes is
8741 to use an 8-bit range unconditionally. */
8742 vnum = ((vnum + 128) & 255) - 128;
8743 else
8744 /* Predicates are only handled singly, so we might as well use
8745 the full range. */
8746 vnum = ((vnum + 256) & 511) - 256;
8747 if (vnum == 0)
8748 return false;
8749
8750 /* Convert the "mul vl" multiplier into a byte offset. */
8751 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
8752 if (known_eq (second_offset, orig_offset))
8753 return false;
8754
8755 /* Split the offset into second_offset and the rest. */
8756 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
8757 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
8758 return true;
8759 }
491ec060
WD
8760}
8761
a2170965
TC
8762/* Return the binary representation of floating point constant VALUE in INTVAL.
8763 If the value cannot be converted, return false without setting INTVAL.
8764 The conversion is done in the given MODE. */
8765bool
8766aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
8767{
8768
8769 /* We make a general exception for 0. */
8770 if (aarch64_float_const_zero_rtx_p (value))
8771 {
8772 *intval = 0;
8773 return true;
8774 }
8775
0d0e0188 8776 scalar_float_mode mode;
a2170965 8777 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 8778 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
8779 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
8780 /* Only support up to DF mode. */
8781 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
8782 return false;
8783
8784 unsigned HOST_WIDE_INT ival = 0;
8785
8786 long res[2];
8787 real_to_target (res,
8788 CONST_DOUBLE_REAL_VALUE (value),
8789 REAL_MODE_FORMAT (mode));
8790
5c22bb48
TC
8791 if (mode == DFmode)
8792 {
8793 int order = BYTES_BIG_ENDIAN ? 1 : 0;
8794 ival = zext_hwi (res[order], 32);
8795 ival |= (zext_hwi (res[1 - order], 32) << 32);
8796 }
8797 else
8798 ival = zext_hwi (res[0], 32);
a2170965
TC
8799
8800 *intval = ival;
8801 return true;
8802}
8803
8804/* Return TRUE if rtx X is an immediate constant that can be moved using a
8805 single MOV(+MOVK) followed by an FMOV. */
8806bool
8807aarch64_float_const_rtx_p (rtx x)
8808{
8809 machine_mode mode = GET_MODE (x);
8810 if (mode == VOIDmode)
8811 return false;
8812
8813 /* Determine whether it's cheaper to write float constants as
8814 mov/movk pairs over ldr/adrp pairs. */
8815 unsigned HOST_WIDE_INT ival;
8816
8817 if (GET_CODE (x) == CONST_DOUBLE
8818 && SCALAR_FLOAT_MODE_P (mode)
8819 && aarch64_reinterpret_float_as_int (x, &ival))
8820 {
77e994c9
RS
8821 scalar_int_mode imode = (mode == HFmode
8822 ? SImode
8823 : int_mode_for_mode (mode).require ());
a2170965
TC
8824 int num_instr = aarch64_internal_mov_immediate
8825 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
8826 return num_instr < 3;
8827 }
8828
8829 return false;
8830}
8831
43e9d192
IB
8832/* Return TRUE if rtx X is immediate constant 0.0 */
8833bool
3520f7cc 8834aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 8835{
43e9d192
IB
8836 if (GET_MODE (x) == VOIDmode)
8837 return false;
8838
34a72c33 8839 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 8840 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 8841 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
8842}
8843
a2170965
TC
8844/* Return TRUE if rtx X is immediate constant that fits in a single
8845 MOVI immediate operation. */
8846bool
8847aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
8848{
8849 if (!TARGET_SIMD)
8850 return false;
8851
77e994c9
RS
8852 machine_mode vmode;
8853 scalar_int_mode imode;
a2170965
TC
8854 unsigned HOST_WIDE_INT ival;
8855
8856 if (GET_CODE (x) == CONST_DOUBLE
8857 && SCALAR_FLOAT_MODE_P (mode))
8858 {
8859 if (!aarch64_reinterpret_float_as_int (x, &ival))
8860 return false;
8861
35c38fa6
TC
8862 /* We make a general exception for 0. */
8863 if (aarch64_float_const_zero_rtx_p (x))
8864 return true;
8865
304b9962 8866 imode = int_mode_for_mode (mode).require ();
a2170965
TC
8867 }
8868 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
8869 && is_a <scalar_int_mode> (mode, &imode))
8870 ival = INTVAL (x);
a2170965
TC
8871 else
8872 return false;
8873
8874 /* use a 64 bit mode for everything except for DI/DF mode, where we use
8875 a 128 bit vector mode. */
77e994c9 8876 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
8877
8878 vmode = aarch64_simd_container_mode (imode, width);
8879 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
8880
b187677b 8881 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
8882}
8883
8884
70f09188
AP
8885/* Return the fixed registers used for condition codes. */
8886
8887static bool
8888aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8889{
8890 *p1 = CC_REGNUM;
8891 *p2 = INVALID_REGNUM;
8892 return true;
8893}
8894
47210a04
RL
8895/* This function is used by the call expanders of the machine description.
8896 RESULT is the register in which the result is returned. It's NULL for
8897 "call" and "sibcall".
8898 MEM is the location of the function call.
08cc4d92 8899 CALLEE_ABI is a const_int that gives the arm_pcs of the callee.
47210a04
RL
8900 SIBCALL indicates whether this function call is normal call or sibling call.
8901 It will generate different pattern accordingly. */
8902
8903void
08cc4d92 8904aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall)
47210a04
RL
8905{
8906 rtx call, callee, tmp;
8907 rtvec vec;
8908 machine_mode mode;
8909
8910 gcc_assert (MEM_P (mem));
8911 callee = XEXP (mem, 0);
8912 mode = GET_MODE (callee);
8913 gcc_assert (mode == Pmode);
8914
8915 /* Decide if we should generate indirect calls by loading the
8916 address of the callee into a register before performing
8917 the branch-and-link. */
8918 if (SYMBOL_REF_P (callee)
8919 ? (aarch64_is_long_call_p (callee)
8920 || aarch64_is_noplt_call_p (callee))
8921 : !REG_P (callee))
8922 XEXP (mem, 0) = force_reg (mode, callee);
8923
8924 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
8925
8926 if (result != NULL_RTX)
8927 call = gen_rtx_SET (result, call);
8928
8929 if (sibcall)
8930 tmp = ret_rtx;
8931 else
8932 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
8933
08cc4d92
RS
8934 gcc_assert (CONST_INT_P (callee_abi));
8935 callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi),
8936 UNSPEC_CALLEE_ABI);
8937
8938 vec = gen_rtvec (3, call, callee_abi, tmp);
47210a04
RL
8939 call = gen_rtx_PARALLEL (VOIDmode, vec);
8940
8941 aarch64_emit_call_insn (call);
8942}
8943
78607708
TV
8944/* Emit call insn with PAT and do aarch64-specific handling. */
8945
d07a3fed 8946void
78607708
TV
8947aarch64_emit_call_insn (rtx pat)
8948{
8949 rtx insn = emit_call_insn (pat);
8950
8951 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
8952 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
8953 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
8954}
8955
ef4bddc2 8956machine_mode
43e9d192
IB
8957aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
8958{
f7343f20
RE
8959 machine_mode mode_x = GET_MODE (x);
8960 rtx_code code_x = GET_CODE (x);
8961
43e9d192
IB
8962 /* All floating point compares return CCFP if it is an equality
8963 comparison, and CCFPE otherwise. */
f7343f20 8964 if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
43e9d192
IB
8965 {
8966 switch (code)
8967 {
8968 case EQ:
8969 case NE:
8970 case UNORDERED:
8971 case ORDERED:
8972 case UNLT:
8973 case UNLE:
8974 case UNGT:
8975 case UNGE:
8976 case UNEQ:
43e9d192
IB
8977 return CCFPmode;
8978
8979 case LT:
8980 case LE:
8981 case GT:
8982 case GE:
8332c5ee 8983 case LTGT:
43e9d192
IB
8984 return CCFPEmode;
8985
8986 default:
8987 gcc_unreachable ();
8988 }
8989 }
8990
2b8568fe
KT
8991 /* Equality comparisons of short modes against zero can be performed
8992 using the TST instruction with the appropriate bitmask. */
f73dc006 8993 if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
2b8568fe 8994 && (code == EQ || code == NE)
f7343f20 8995 && (mode_x == HImode || mode_x == QImode))
2b8568fe
KT
8996 return CC_NZmode;
8997
b06335f9
KT
8998 /* Similarly, comparisons of zero_extends from shorter modes can
8999 be performed using an ANDS with an immediate mask. */
f7343f20
RE
9000 if (y == const0_rtx && code_x == ZERO_EXTEND
9001 && (mode_x == SImode || mode_x == DImode)
b06335f9
KT
9002 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
9003 && (code == EQ || code == NE))
9004 return CC_NZmode;
9005
f7343f20 9006 if ((mode_x == SImode || mode_x == DImode)
43e9d192
IB
9007 && y == const0_rtx
9008 && (code == EQ || code == NE || code == LT || code == GE)
f7343f20
RE
9009 && (code_x == PLUS || code_x == MINUS || code_x == AND
9010 || code_x == NEG
9011 || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
7325d85a 9012 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
9013 return CC_NZmode;
9014
1c992d1e 9015 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
9016 the comparison will have to be swapped when we emit the assembly
9017 code. */
f7343f20 9018 if ((mode_x == SImode || mode_x == DImode)
ffa8a921 9019 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
f7343f20
RE
9020 && (code_x == ASHIFT || code_x == ASHIFTRT
9021 || code_x == LSHIFTRT
9022 || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
43e9d192
IB
9023 return CC_SWPmode;
9024
1c992d1e
RE
9025 /* Similarly for a negated operand, but we can only do this for
9026 equalities. */
f7343f20 9027 if ((mode_x == SImode || mode_x == DImode)
4aa81c2e 9028 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e 9029 && (code == EQ || code == NE)
f7343f20 9030 && code_x == NEG)
1c992d1e
RE
9031 return CC_Zmode;
9032
f7343f20
RE
9033 /* A test for unsigned overflow from an addition. */
9034 if ((mode_x == DImode || mode_x == TImode)
9035 && (code == LTU || code == GEU)
9036 && code_x == PLUS
9037 && rtx_equal_p (XEXP (x, 0), y))
ef22810a
RH
9038 return CC_Cmode;
9039
f7343f20
RE
9040 /* A test for unsigned overflow from an add with carry. */
9041 if ((mode_x == DImode || mode_x == TImode)
9042 && (code == LTU || code == GEU)
9043 && code_x == PLUS
9044 && CONST_SCALAR_INT_P (y)
9045 && (rtx_mode_t (y, mode_x)
9046 == (wi::shwi (1, mode_x)
9047 << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
9048 return CC_ADCmode;
9049
30c46053 9050 /* A test for signed overflow. */
f7343f20 9051 if ((mode_x == DImode || mode_x == TImode)
30c46053 9052 && code == NE
f7343f20 9053 && code_x == PLUS
30c46053
MC
9054 && GET_CODE (y) == SIGN_EXTEND)
9055 return CC_Vmode;
9056
43e9d192
IB
9057 /* For everything else, return CCmode. */
9058 return CCmode;
9059}
9060
3dfa7055 9061static int
b8506a8a 9062aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 9063
cd5660ab 9064int
43e9d192
IB
9065aarch64_get_condition_code (rtx x)
9066{
ef4bddc2 9067 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
9068 enum rtx_code comp_code = GET_CODE (x);
9069
9070 if (GET_MODE_CLASS (mode) != MODE_CC)
9071 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
9072 return aarch64_get_condition_code_1 (mode, comp_code);
9073}
43e9d192 9074
3dfa7055 9075static int
b8506a8a 9076aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 9077{
43e9d192
IB
9078 switch (mode)
9079 {
4e10a5a7
RS
9080 case E_CCFPmode:
9081 case E_CCFPEmode:
43e9d192
IB
9082 switch (comp_code)
9083 {
9084 case GE: return AARCH64_GE;
9085 case GT: return AARCH64_GT;
9086 case LE: return AARCH64_LS;
9087 case LT: return AARCH64_MI;
9088 case NE: return AARCH64_NE;
9089 case EQ: return AARCH64_EQ;
9090 case ORDERED: return AARCH64_VC;
9091 case UNORDERED: return AARCH64_VS;
9092 case UNLT: return AARCH64_LT;
9093 case UNLE: return AARCH64_LE;
9094 case UNGT: return AARCH64_HI;
9095 case UNGE: return AARCH64_PL;
cd5660ab 9096 default: return -1;
43e9d192
IB
9097 }
9098 break;
9099
4e10a5a7 9100 case E_CCmode:
43e9d192
IB
9101 switch (comp_code)
9102 {
9103 case NE: return AARCH64_NE;
9104 case EQ: return AARCH64_EQ;
9105 case GE: return AARCH64_GE;
9106 case GT: return AARCH64_GT;
9107 case LE: return AARCH64_LE;
9108 case LT: return AARCH64_LT;
9109 case GEU: return AARCH64_CS;
9110 case GTU: return AARCH64_HI;
9111 case LEU: return AARCH64_LS;
9112 case LTU: return AARCH64_CC;
cd5660ab 9113 default: return -1;
43e9d192
IB
9114 }
9115 break;
9116
4e10a5a7 9117 case E_CC_SWPmode:
43e9d192
IB
9118 switch (comp_code)
9119 {
9120 case NE: return AARCH64_NE;
9121 case EQ: return AARCH64_EQ;
9122 case GE: return AARCH64_LE;
9123 case GT: return AARCH64_LT;
9124 case LE: return AARCH64_GE;
9125 case LT: return AARCH64_GT;
9126 case GEU: return AARCH64_LS;
9127 case GTU: return AARCH64_CC;
9128 case LEU: return AARCH64_CS;
9129 case LTU: return AARCH64_HI;
cd5660ab 9130 default: return -1;
43e9d192
IB
9131 }
9132 break;
9133
57d6f4d0
RS
9134 case E_CC_NZCmode:
9135 switch (comp_code)
9136 {
9137 case NE: return AARCH64_NE; /* = any */
9138 case EQ: return AARCH64_EQ; /* = none */
9139 case GE: return AARCH64_PL; /* = nfrst */
9140 case LT: return AARCH64_MI; /* = first */
9141 case GEU: return AARCH64_CS; /* = nlast */
9142 case GTU: return AARCH64_HI; /* = pmore */
9143 case LEU: return AARCH64_LS; /* = plast */
9144 case LTU: return AARCH64_CC; /* = last */
9145 default: return -1;
9146 }
9147 break;
9148
4e10a5a7 9149 case E_CC_NZmode:
43e9d192
IB
9150 switch (comp_code)
9151 {
9152 case NE: return AARCH64_NE;
9153 case EQ: return AARCH64_EQ;
9154 case GE: return AARCH64_PL;
9155 case LT: return AARCH64_MI;
cd5660ab 9156 default: return -1;
43e9d192
IB
9157 }
9158 break;
9159
4e10a5a7 9160 case E_CC_Zmode:
1c992d1e
RE
9161 switch (comp_code)
9162 {
9163 case NE: return AARCH64_NE;
9164 case EQ: return AARCH64_EQ;
cd5660ab 9165 default: return -1;
1c992d1e
RE
9166 }
9167 break;
9168
4e10a5a7 9169 case E_CC_Cmode:
ef22810a
RH
9170 switch (comp_code)
9171 {
f7343f20
RE
9172 case LTU: return AARCH64_CS;
9173 case GEU: return AARCH64_CC;
9174 default: return -1;
9175 }
9176 break;
9177
9178 case E_CC_ADCmode:
9179 switch (comp_code)
9180 {
9181 case GEU: return AARCH64_CS;
9182 case LTU: return AARCH64_CC;
ef22810a
RH
9183 default: return -1;
9184 }
9185 break;
9186
30c46053
MC
9187 case E_CC_Vmode:
9188 switch (comp_code)
9189 {
9190 case NE: return AARCH64_VS;
9191 case EQ: return AARCH64_VC;
9192 default: return -1;
9193 }
9194 break;
9195
43e9d192 9196 default:
cd5660ab 9197 return -1;
43e9d192 9198 }
3dfa7055 9199
3dfa7055 9200 return -1;
43e9d192
IB
9201}
9202
ddeabd3e
AL
9203bool
9204aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
9205 HOST_WIDE_INT minval,
9206 HOST_WIDE_INT maxval)
ddeabd3e 9207{
6a70badb
RS
9208 rtx elt;
9209 return (const_vec_duplicate_p (x, &elt)
9210 && CONST_INT_P (elt)
9211 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
9212}
9213
9214bool
9215aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
9216{
9217 return aarch64_const_vec_all_same_in_range_p (x, val, val);
9218}
9219
43cacb12
RS
9220/* Return true if VEC is a constant in which every element is in the range
9221 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
9222
9223static bool
9224aarch64_const_vec_all_in_range_p (rtx vec,
9225 HOST_WIDE_INT minval,
9226 HOST_WIDE_INT maxval)
9227{
9228 if (GET_CODE (vec) != CONST_VECTOR
9229 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
9230 return false;
9231
9232 int nunits;
9233 if (!CONST_VECTOR_STEPPED_P (vec))
9234 nunits = const_vector_encoded_nelts (vec);
9235 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
9236 return false;
9237
9238 for (int i = 0; i < nunits; i++)
9239 {
9240 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
9241 if (!CONST_INT_P (vec_elem)
9242 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
9243 return false;
9244 }
9245 return true;
9246}
43e9d192 9247
cf670503
ZC
9248/* N Z C V. */
9249#define AARCH64_CC_V 1
9250#define AARCH64_CC_C (1 << 1)
9251#define AARCH64_CC_Z (1 << 2)
9252#define AARCH64_CC_N (1 << 3)
9253
c8012fbc
WD
9254/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
9255static const int aarch64_nzcv_codes[] =
9256{
9257 0, /* EQ, Z == 1. */
9258 AARCH64_CC_Z, /* NE, Z == 0. */
9259 0, /* CS, C == 1. */
9260 AARCH64_CC_C, /* CC, C == 0. */
9261 0, /* MI, N == 1. */
9262 AARCH64_CC_N, /* PL, N == 0. */
9263 0, /* VS, V == 1. */
9264 AARCH64_CC_V, /* VC, V == 0. */
9265 0, /* HI, C ==1 && Z == 0. */
9266 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
9267 AARCH64_CC_V, /* GE, N == V. */
9268 0, /* LT, N != V. */
9269 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
9270 0, /* LE, !(Z == 0 && N == V). */
9271 0, /* AL, Any. */
9272 0 /* NV, Any. */
cf670503
ZC
9273};
9274
43cacb12
RS
9275/* Print floating-point vector immediate operand X to F, negating it
9276 first if NEGATE is true. Return true on success, false if it isn't
9277 a constant we can handle. */
9278
9279static bool
9280aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
9281{
9282 rtx elt;
9283
9284 if (!const_vec_duplicate_p (x, &elt))
9285 return false;
9286
9287 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
9288 if (negate)
9289 r = real_value_negate (&r);
9290
d29f7dd5
RS
9291 /* Handle the SVE single-bit immediates specially, since they have a
9292 fixed form in the assembly syntax. */
43cacb12
RS
9293 if (real_equal (&r, &dconst0))
9294 asm_fprintf (f, "0.0");
a19ba9e1
RS
9295 else if (real_equal (&r, &dconst2))
9296 asm_fprintf (f, "2.0");
43cacb12
RS
9297 else if (real_equal (&r, &dconst1))
9298 asm_fprintf (f, "1.0");
9299 else if (real_equal (&r, &dconsthalf))
9300 asm_fprintf (f, "0.5");
9301 else
d29f7dd5
RS
9302 {
9303 const int buf_size = 20;
9304 char float_buf[buf_size] = {'\0'};
9305 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
9306 1, GET_MODE (elt));
9307 asm_fprintf (f, "%s", float_buf);
9308 }
43cacb12
RS
9309
9310 return true;
9311}
9312
9f4cbab8
RS
9313/* Return the equivalent letter for size. */
9314static char
9315sizetochar (int size)
9316{
9317 switch (size)
9318 {
9319 case 64: return 'd';
9320 case 32: return 's';
9321 case 16: return 'h';
9322 case 8 : return 'b';
9323 default: gcc_unreachable ();
9324 }
9325}
9326
bcf19844
JW
9327/* Print operand X to file F in a target specific manner according to CODE.
9328 The acceptable formatting commands given by CODE are:
9329 'c': An integer or symbol address without a preceding #
9330 sign.
43cacb12
RS
9331 'C': Take the duplicated element in a vector constant
9332 and print it in hex.
9333 'D': Take the duplicated element in a vector constant
9334 and print it as an unsigned integer, in decimal.
bcf19844 9335 'e': Print the sign/zero-extend size as a character 8->b,
d113ece6
RS
9336 16->h, 32->w. Can also be used for masks:
9337 0xff->b, 0xffff->h, 0xffffffff->w.
d29f7dd5
RS
9338 'I': If the operand is a duplicated vector constant,
9339 replace it with the duplicated scalar. If the
9340 operand is then a floating-point constant, replace
9341 it with the integer bit representation. Print the
9342 transformed constant as a signed decimal number.
bcf19844
JW
9343 'p': Prints N such that 2^N == X (X must be power of 2 and
9344 const int).
9345 'P': Print the number of non-zero bits in X (a const_int).
9346 'H': Print the higher numbered register of a pair (TImode)
9347 of regs.
9348 'm': Print a condition (eq, ne, etc).
9349 'M': Same as 'm', but invert condition.
43cacb12
RS
9350 'N': Take the duplicated element in a vector constant
9351 and print the negative of it in decimal.
bcf19844
JW
9352 'b/h/s/d/q': Print a scalar FP/SIMD register name.
9353 'S/T/U/V': Print a FP/SIMD register name for a register list.
9354 The register printed is the FP/SIMD register name
9355 of X + 0/1/2/3 for S/T/U/V.
e3f15286 9356 'R': Print a scalar Integer/FP/SIMD register name + 1.
bcf19844
JW
9357 'X': Print bottom 16 bits of integer constant in hex.
9358 'w/x': Print a general register name or the zero register
9359 (32-bit or 64-bit).
9360 '0': Print a normal operand, if it's a general register,
9361 then we assume DImode.
9362 'k': Print NZCV for conditional compare instructions.
9363 'A': Output address constant representing the first
9364 argument of X, specifying a relocation offset
9365 if appropriate.
9366 'L': Output constant address specified by X
9367 with a relocation offset if appropriate.
9368 'G': Prints address of X, specifying a PC relative
e69a816d
WD
9369 relocation mode if appropriate.
9370 'y': Output address of LDP or STP - this is used for
9371 some LDP/STPs which don't use a PARALLEL in their
9372 pattern (so the mode needs to be adjusted).
9373 'z': Output address of a typical LDP or STP. */
bcf19844 9374
cc8ca59e
JB
9375static void
9376aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 9377{
43cacb12 9378 rtx elt;
43e9d192
IB
9379 switch (code)
9380 {
f541a481
KT
9381 case 'c':
9382 switch (GET_CODE (x))
9383 {
9384 case CONST_INT:
9385 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9386 break;
9387
9388 case SYMBOL_REF:
9389 output_addr_const (f, x);
9390 break;
9391
9392 case CONST:
9393 if (GET_CODE (XEXP (x, 0)) == PLUS
9394 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
9395 {
9396 output_addr_const (f, x);
9397 break;
9398 }
9399 /* Fall through. */
9400
9401 default:
ee61f880 9402 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
9403 }
9404 break;
9405
43e9d192 9406 case 'e':
43e9d192 9407 {
d113ece6
RS
9408 x = unwrap_const_vec_duplicate (x);
9409 if (!CONST_INT_P (x))
43e9d192
IB
9410 {
9411 output_operand_lossage ("invalid operand for '%%%c'", code);
9412 return;
9413 }
9414
d113ece6
RS
9415 HOST_WIDE_INT val = INTVAL (x);
9416 if ((val & ~7) == 8 || val == 0xff)
9417 fputc ('b', f);
9418 else if ((val & ~7) == 16 || val == 0xffff)
9419 fputc ('h', f);
9420 else if ((val & ~7) == 32 || val == 0xffffffff)
9421 fputc ('w', f);
9422 else
43e9d192 9423 {
43e9d192
IB
9424 output_operand_lossage ("invalid operand for '%%%c'", code);
9425 return;
9426 }
9427 }
9428 break;
9429
9430 case 'p':
9431 {
9432 int n;
9433
4aa81c2e 9434 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
9435 {
9436 output_operand_lossage ("invalid operand for '%%%c'", code);
9437 return;
9438 }
9439
9440 asm_fprintf (f, "%d", n);
9441 }
9442 break;
9443
9444 case 'P':
4aa81c2e 9445 if (!CONST_INT_P (x))
43e9d192
IB
9446 {
9447 output_operand_lossage ("invalid operand for '%%%c'", code);
9448 return;
9449 }
9450
8d55c61b 9451 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
9452 break;
9453
9454 case 'H':
c0111dc4
RE
9455 if (x == const0_rtx)
9456 {
9457 asm_fprintf (f, "xzr");
9458 break;
9459 }
9460
4aa81c2e 9461 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
9462 {
9463 output_operand_lossage ("invalid operand for '%%%c'", code);
9464 return;
9465 }
9466
01a3a324 9467 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
9468 break;
9469
d29f7dd5
RS
9470 case 'I':
9471 {
9472 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
9473 if (CONST_INT_P (x))
9474 asm_fprintf (f, "%wd", INTVAL (x));
9475 else
9476 {
9477 output_operand_lossage ("invalid operand for '%%%c'", code);
9478 return;
9479 }
9480 break;
9481 }
9482
43e9d192 9483 case 'M':
c8012fbc 9484 case 'm':
cd5660ab
KT
9485 {
9486 int cond_code;
c8012fbc
WD
9487 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
9488 if (x == const_true_rtx)
cd5660ab 9489 {
c8012fbc
WD
9490 if (code == 'M')
9491 fputs ("nv", f);
cd5660ab
KT
9492 return;
9493 }
43e9d192 9494
cd5660ab
KT
9495 if (!COMPARISON_P (x))
9496 {
9497 output_operand_lossage ("invalid operand for '%%%c'", code);
9498 return;
9499 }
c8012fbc 9500
cd5660ab
KT
9501 cond_code = aarch64_get_condition_code (x);
9502 gcc_assert (cond_code >= 0);
c8012fbc
WD
9503 if (code == 'M')
9504 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
57d6f4d0
RS
9505 if (GET_MODE (XEXP (x, 0)) == CC_NZCmode)
9506 fputs (aarch64_sve_condition_codes[cond_code], f);
9507 else
9508 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 9509 }
43e9d192
IB
9510 break;
9511
43cacb12
RS
9512 case 'N':
9513 if (!const_vec_duplicate_p (x, &elt))
9514 {
9515 output_operand_lossage ("invalid vector constant");
9516 return;
9517 }
9518
9519 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
9520 asm_fprintf (f, "%wd", -INTVAL (elt));
9521 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
9522 && aarch64_print_vector_float_operand (f, x, true))
9523 ;
9524 else
9525 {
9526 output_operand_lossage ("invalid vector constant");
9527 return;
9528 }
9529 break;
9530
43e9d192
IB
9531 case 'b':
9532 case 'h':
9533 case 's':
9534 case 'd':
9535 case 'q':
43e9d192
IB
9536 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
9537 {
9538 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
9539 return;
9540 }
50ce6f88 9541 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
9542 break;
9543
9544 case 'S':
9545 case 'T':
9546 case 'U':
9547 case 'V':
43e9d192
IB
9548 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
9549 {
9550 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
9551 return;
9552 }
43cacb12
RS
9553 asm_fprintf (f, "%c%d",
9554 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
9555 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
9556 break;
9557
2d8c6dc1 9558 case 'R':
e3f15286
RH
9559 if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
9560 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
9561 else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
9562 asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
9563 else
9564 output_operand_lossage ("incompatible register operand for '%%%c'",
9565 code);
2d8c6dc1
AH
9566 break;
9567
a05c0ddf 9568 case 'X':
4aa81c2e 9569 if (!CONST_INT_P (x))
a05c0ddf
IB
9570 {
9571 output_operand_lossage ("invalid operand for '%%%c'", code);
9572 return;
9573 }
50d38551 9574 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
9575 break;
9576
43cacb12
RS
9577 case 'C':
9578 {
9579 /* Print a replicated constant in hex. */
9580 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
9581 {
9582 output_operand_lossage ("invalid operand for '%%%c'", code);
9583 return;
9584 }
9585 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
9586 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
9587 }
9588 break;
9589
9590 case 'D':
9591 {
9592 /* Print a replicated constant in decimal, treating it as
9593 unsigned. */
9594 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
9595 {
9596 output_operand_lossage ("invalid operand for '%%%c'", code);
9597 return;
9598 }
9599 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
9600 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
9601 }
9602 break;
9603
43e9d192
IB
9604 case 'w':
9605 case 'x':
3520f7cc
JG
9606 if (x == const0_rtx
9607 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 9608 {
50ce6f88 9609 asm_fprintf (f, "%czr", code);
43e9d192
IB
9610 break;
9611 }
9612
9613 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
9614 {
50ce6f88 9615 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
9616 break;
9617 }
9618
9619 if (REG_P (x) && REGNO (x) == SP_REGNUM)
9620 {
50ce6f88 9621 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
9622 break;
9623 }
9624
9625 /* Fall through */
9626
9627 case 0:
43e9d192
IB
9628 if (x == NULL)
9629 {
9630 output_operand_lossage ("missing operand");
9631 return;
9632 }
9633
9634 switch (GET_CODE (x))
9635 {
9636 case REG:
43cacb12 9637 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
9638 {
9639 if (REG_NREGS (x) == 1)
9640 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
9641 else
9642 {
9643 char suffix
9644 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
9645 asm_fprintf (f, "{z%d.%c - z%d.%c}",
9646 REGNO (x) - V0_REGNUM, suffix,
9647 END_REGNO (x) - V0_REGNUM - 1, suffix);
9648 }
9649 }
43cacb12
RS
9650 else
9651 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
9652 break;
9653
9654 case MEM:
cc8ca59e 9655 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
9656 break;
9657
9658 case LABEL_REF:
9659 case SYMBOL_REF:
9660 output_addr_const (asm_out_file, x);
9661 break;
9662
9663 case CONST_INT:
9664 asm_fprintf (f, "%wd", INTVAL (x));
9665 break;
9666
43cacb12
RS
9667 case CONST:
9668 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 9669 {
43cacb12
RS
9670 output_addr_const (asm_out_file, x);
9671 break;
3520f7cc 9672 }
43cacb12
RS
9673 /* fall through */
9674
9675 case CONST_VECTOR:
9676 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 9677 {
43cacb12
RS
9678 output_operand_lossage ("invalid vector constant");
9679 return;
3520f7cc 9680 }
43cacb12
RS
9681
9682 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
9683 asm_fprintf (f, "%wd", INTVAL (elt));
9684 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
9685 && aarch64_print_vector_float_operand (f, x, false))
9686 ;
3520f7cc 9687 else
43cacb12
RS
9688 {
9689 output_operand_lossage ("invalid vector constant");
9690 return;
9691 }
43e9d192
IB
9692 break;
9693
3520f7cc 9694 case CONST_DOUBLE:
2ca5b430
KT
9695 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
9696 be getting CONST_DOUBLEs holding integers. */
9697 gcc_assert (GET_MODE (x) != VOIDmode);
9698 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
9699 {
9700 fputc ('0', f);
9701 break;
9702 }
9703 else if (aarch64_float_const_representable_p (x))
9704 {
9705#define buf_size 20
9706 char float_buf[buf_size] = {'\0'};
34a72c33
RS
9707 real_to_decimal_for_mode (float_buf,
9708 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
9709 buf_size, buf_size,
9710 1, GET_MODE (x));
9711 asm_fprintf (asm_out_file, "%s", float_buf);
9712 break;
9713#undef buf_size
9714 }
9715 output_operand_lossage ("invalid constant");
9716 return;
43e9d192
IB
9717 default:
9718 output_operand_lossage ("invalid operand");
9719 return;
9720 }
9721 break;
9722
9723 case 'A':
9724 if (GET_CODE (x) == HIGH)
9725 x = XEXP (x, 0);
9726
a6e0bfa7 9727 switch (aarch64_classify_symbolic_expression (x))
43e9d192 9728 {
6642bdb4 9729 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
9730 asm_fprintf (asm_out_file, ":got:");
9731 break;
9732
9733 case SYMBOL_SMALL_TLSGD:
9734 asm_fprintf (asm_out_file, ":tlsgd:");
9735 break;
9736
9737 case SYMBOL_SMALL_TLSDESC:
9738 asm_fprintf (asm_out_file, ":tlsdesc:");
9739 break;
9740
79496620 9741 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
9742 asm_fprintf (asm_out_file, ":gottprel:");
9743 break;
9744
d18ba284 9745 case SYMBOL_TLSLE24:
43e9d192
IB
9746 asm_fprintf (asm_out_file, ":tprel:");
9747 break;
9748
87dd8ab0
MS
9749 case SYMBOL_TINY_GOT:
9750 gcc_unreachable ();
9751 break;
9752
43e9d192
IB
9753 default:
9754 break;
9755 }
9756 output_addr_const (asm_out_file, x);
9757 break;
9758
9759 case 'L':
a6e0bfa7 9760 switch (aarch64_classify_symbolic_expression (x))
43e9d192 9761 {
6642bdb4 9762 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
9763 asm_fprintf (asm_out_file, ":lo12:");
9764 break;
9765
9766 case SYMBOL_SMALL_TLSGD:
9767 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
9768 break;
9769
9770 case SYMBOL_SMALL_TLSDESC:
9771 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
9772 break;
9773
79496620 9774 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
9775 asm_fprintf (asm_out_file, ":gottprel_lo12:");
9776 break;
9777
cbf5629e
JW
9778 case SYMBOL_TLSLE12:
9779 asm_fprintf (asm_out_file, ":tprel_lo12:");
9780 break;
9781
d18ba284 9782 case SYMBOL_TLSLE24:
43e9d192
IB
9783 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
9784 break;
9785
87dd8ab0
MS
9786 case SYMBOL_TINY_GOT:
9787 asm_fprintf (asm_out_file, ":got:");
9788 break;
9789
5ae7caad
JW
9790 case SYMBOL_TINY_TLSIE:
9791 asm_fprintf (asm_out_file, ":gottprel:");
9792 break;
9793
43e9d192
IB
9794 default:
9795 break;
9796 }
9797 output_addr_const (asm_out_file, x);
9798 break;
9799
9800 case 'G':
a6e0bfa7 9801 switch (aarch64_classify_symbolic_expression (x))
43e9d192 9802 {
d18ba284 9803 case SYMBOL_TLSLE24:
43e9d192
IB
9804 asm_fprintf (asm_out_file, ":tprel_hi12:");
9805 break;
9806 default:
9807 break;
9808 }
9809 output_addr_const (asm_out_file, x);
9810 break;
9811
cf670503
ZC
9812 case 'k':
9813 {
c8012fbc 9814 HOST_WIDE_INT cond_code;
cf670503 9815
c8012fbc 9816 if (!CONST_INT_P (x))
cf670503
ZC
9817 {
9818 output_operand_lossage ("invalid operand for '%%%c'", code);
9819 return;
9820 }
9821
c8012fbc
WD
9822 cond_code = INTVAL (x);
9823 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
9824 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
9825 }
9826 break;
9827
e69a816d
WD
9828 case 'y':
9829 case 'z':
9830 {
9831 machine_mode mode = GET_MODE (x);
9832
c348cab0 9833 if (GET_CODE (x) != MEM
6a70badb 9834 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
9835 {
9836 output_operand_lossage ("invalid operand for '%%%c'", code);
9837 return;
9838 }
9839
a25831ac
AV
9840 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
9841 code == 'y'
9842 ? ADDR_QUERY_LDP_STP_N
9843 : ADDR_QUERY_LDP_STP))
c348cab0 9844 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
9845 }
9846 break;
9847
43e9d192
IB
9848 default:
9849 output_operand_lossage ("invalid operand prefix '%%%c'", code);
9850 return;
9851 }
9852}
9853
e69a816d
WD
9854/* Print address 'x' of a memory access with mode 'mode'.
9855 'op' is the context required by aarch64_classify_address. It can either be
9856 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 9857static bool
a97d8b98
RS
9858aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
9859 aarch64_addr_query_type type)
43e9d192
IB
9860{
9861 struct aarch64_address_info addr;
550a3380 9862 unsigned int size, vec_flags;
43e9d192 9863
e69a816d 9864 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
9865 if (GET_MODE (x) != Pmode
9866 && (!CONST_INT_P (x)
9867 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
9868 {
9869 output_operand_lossage ("invalid address mode");
9870 return false;
9871 }
e69a816d 9872
a97d8b98 9873 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
9874 switch (addr.type)
9875 {
9876 case ADDRESS_REG_IMM:
dc640181 9877 if (known_eq (addr.const_offset, 0))
43cacb12 9878 {
550a3380
RS
9879 asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
9880 return true;
43cacb12 9881 }
550a3380
RS
9882
9883 vec_flags = aarch64_classify_vector_mode (mode);
9884 if (vec_flags & VEC_ANY_SVE)
43cacb12
RS
9885 {
9886 HOST_WIDE_INT vnum
9887 = exact_div (addr.const_offset,
550a3380 9888 aarch64_vl_bytes (mode, vec_flags)).to_constant ();
43cacb12
RS
9889 asm_fprintf (f, "[%s, #%wd, mul vl]",
9890 reg_names[REGNO (addr.base)], vnum);
550a3380 9891 return true;
43cacb12 9892 }
550a3380
RS
9893
9894 asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
9895 INTVAL (addr.offset));
c348cab0 9896 return true;
43e9d192
IB
9897
9898 case ADDRESS_REG_REG:
9899 if (addr.shift == 0)
16a3246f 9900 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 9901 reg_names [REGNO (addr.offset)]);
43e9d192 9902 else
16a3246f 9903 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 9904 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 9905 return true;
43e9d192
IB
9906
9907 case ADDRESS_REG_UXTW:
9908 if (addr.shift == 0)
16a3246f 9909 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
9910 REGNO (addr.offset) - R0_REGNUM);
9911 else
16a3246f 9912 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 9913 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 9914 return true;
43e9d192
IB
9915
9916 case ADDRESS_REG_SXTW:
9917 if (addr.shift == 0)
16a3246f 9918 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
9919 REGNO (addr.offset) - R0_REGNUM);
9920 else
16a3246f 9921 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 9922 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 9923 return true;
43e9d192
IB
9924
9925 case ADDRESS_REG_WB:
6a70badb
RS
9926 /* Writeback is only supported for fixed-width modes. */
9927 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
9928 switch (GET_CODE (x))
9929 {
9930 case PRE_INC:
6a70badb 9931 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 9932 return true;
43e9d192 9933 case POST_INC:
6a70badb 9934 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 9935 return true;
43e9d192 9936 case PRE_DEC:
6a70badb 9937 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 9938 return true;
43e9d192 9939 case POST_DEC:
6a70badb 9940 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 9941 return true;
43e9d192 9942 case PRE_MODIFY:
6a70badb 9943 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 9944 INTVAL (addr.offset));
c348cab0 9945 return true;
43e9d192 9946 case POST_MODIFY:
6a70badb 9947 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 9948 INTVAL (addr.offset));
c348cab0 9949 return true;
43e9d192
IB
9950 default:
9951 break;
9952 }
9953 break;
9954
9955 case ADDRESS_LO_SUM:
16a3246f 9956 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
9957 output_addr_const (f, addr.offset);
9958 asm_fprintf (f, "]");
c348cab0 9959 return true;
43e9d192
IB
9960
9961 case ADDRESS_SYMBOLIC:
d6591257 9962 output_addr_const (f, x);
c348cab0 9963 return true;
43e9d192
IB
9964 }
9965
c348cab0 9966 return false;
43e9d192
IB
9967}
9968
e69a816d
WD
9969/* Print address 'x' of a memory access with mode 'mode'. */
9970static void
9971aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
9972{
43cacb12 9973 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 9974 output_addr_const (f, x);
e69a816d
WD
9975}
9976
43e9d192
IB
9977bool
9978aarch64_label_mentioned_p (rtx x)
9979{
9980 const char *fmt;
9981 int i;
9982
9983 if (GET_CODE (x) == LABEL_REF)
9984 return true;
9985
9986 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
9987 referencing instruction, but they are constant offsets, not
9988 symbols. */
9989 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9990 return false;
9991
9992 fmt = GET_RTX_FORMAT (GET_CODE (x));
9993 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9994 {
9995 if (fmt[i] == 'E')
9996 {
9997 int j;
9998
9999 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10000 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
10001 return 1;
10002 }
10003 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
10004 return 1;
10005 }
10006
10007 return 0;
10008}
10009
10010/* Implement REGNO_REG_CLASS. */
10011
10012enum reg_class
10013aarch64_regno_regclass (unsigned regno)
10014{
10015 if (GP_REGNUM_P (regno))
a4a182c6 10016 return GENERAL_REGS;
43e9d192
IB
10017
10018 if (regno == SP_REGNUM)
10019 return STACK_REG;
10020
10021 if (regno == FRAME_POINTER_REGNUM
10022 || regno == ARG_POINTER_REGNUM)
f24bb080 10023 return POINTER_REGS;
43e9d192
IB
10024
10025 if (FP_REGNUM_P (regno))
163b1f6a
RS
10026 return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS
10027 : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS);
43e9d192 10028
43cacb12
RS
10029 if (PR_REGNUM_P (regno))
10030 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
10031
183bfdaf
RS
10032 if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
10033 return FFR_REGS;
10034
43e9d192
IB
10035 return NO_REGS;
10036}
10037
6a70badb
RS
10038/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
10039 If OFFSET is out of range, return an offset of an anchor point
10040 that is in range. Return 0 otherwise. */
10041
10042static HOST_WIDE_INT
10043aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
10044 machine_mode mode)
10045{
10046 /* Does it look like we'll need a 16-byte load/store-pair operation? */
10047 if (size > 16)
10048 return (offset + 0x400) & ~0x7f0;
10049
10050 /* For offsets that aren't a multiple of the access size, the limit is
10051 -256...255. */
10052 if (offset & (size - 1))
10053 {
10054 /* BLKmode typically uses LDP of X-registers. */
10055 if (mode == BLKmode)
10056 return (offset + 512) & ~0x3ff;
10057 return (offset + 0x100) & ~0x1ff;
10058 }
10059
10060 /* Small negative offsets are supported. */
10061 if (IN_RANGE (offset, -256, 0))
10062 return 0;
10063
10064 if (mode == TImode || mode == TFmode)
10065 return (offset + 0x100) & ~0x1ff;
10066
10067 /* Use 12-bit offset by access size. */
10068 return offset & (~0xfff * size);
10069}
10070
0c4ec427 10071static rtx
ef4bddc2 10072aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
10073{
10074 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
10075 where mask is selected by alignment and size of the offset.
10076 We try to pick as large a range for the offset as possible to
10077 maximize the chance of a CSE. However, for aligned addresses
10078 we limit the range to 4k so that structures with different sized
e8426e0a
BC
10079 elements are likely to use the same base. We need to be careful
10080 not to split a CONST for some forms of address expression, otherwise
10081 it will generate sub-optimal code. */
0c4ec427
RE
10082
10083 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
10084 {
9e0218fc 10085 rtx base = XEXP (x, 0);
17d7bdd8 10086 rtx offset_rtx = XEXP (x, 1);
9e0218fc 10087 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 10088
9e0218fc 10089 if (GET_CODE (base) == PLUS)
e8426e0a 10090 {
9e0218fc
RH
10091 rtx op0 = XEXP (base, 0);
10092 rtx op1 = XEXP (base, 1);
10093
10094 /* Force any scaling into a temp for CSE. */
10095 op0 = force_reg (Pmode, op0);
10096 op1 = force_reg (Pmode, op1);
10097
10098 /* Let the pointer register be in op0. */
10099 if (REG_POINTER (op1))
10100 std::swap (op0, op1);
10101
10102 /* If the pointer is virtual or frame related, then we know that
10103 virtual register instantiation or register elimination is going
10104 to apply a second constant. We want the two constants folded
10105 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
10106 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 10107 {
9e0218fc
RH
10108 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
10109 NULL_RTX, true, OPTAB_DIRECT);
10110 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 10111 }
e8426e0a 10112
9e0218fc
RH
10113 /* Otherwise, in order to encourage CSE (and thence loop strength
10114 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
10115 base = expand_binop (Pmode, add_optab, op0, op1,
10116 NULL_RTX, true, OPTAB_DIRECT);
10117 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
10118 }
10119
6a70badb
RS
10120 HOST_WIDE_INT size;
10121 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 10122 {
6a70badb
RS
10123 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
10124 mode);
10125 if (base_offset != 0)
10126 {
10127 base = plus_constant (Pmode, base, base_offset);
10128 base = force_operand (base, NULL_RTX);
10129 return plus_constant (Pmode, base, offset - base_offset);
10130 }
9e0218fc 10131 }
0c4ec427
RE
10132 }
10133
10134 return x;
10135}
10136
43e9d192
IB
10137static reg_class_t
10138aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
10139 reg_class_t rclass,
ef4bddc2 10140 machine_mode mode,
43e9d192
IB
10141 secondary_reload_info *sri)
10142{
cc68f7c2
RS
10143 /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
10144 LDR and STR. See the comment at the head of aarch64-sve.md for
10145 more details about the big-endian handling. */
10146 if (reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
10147 && !((REG_P (x) && HARD_REGISTER_P (x))
10148 || aarch64_simd_valid_immediate (x, NULL))
cc68f7c2 10149 && mode != VNx16QImode)
43cacb12 10150 {
cc68f7c2
RS
10151 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
10152 if ((vec_flags & VEC_SVE_DATA)
10153 && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
10154 {
10155 sri->icode = CODE_FOR_aarch64_sve_reload_mem;
10156 return NO_REGS;
10157 }
43cacb12 10158 }
b4f50fd4
RR
10159
10160 /* If we have to disable direct literal pool loads and stores because the
10161 function is too big, then we need a scratch register. */
10162 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
10163 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
10164 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 10165 && !aarch64_pcrelative_literal_loads)
b4f50fd4 10166 {
0016d8d9 10167 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
10168 return NO_REGS;
10169 }
10170
43e9d192
IB
10171 /* Without the TARGET_SIMD instructions we cannot move a Q register
10172 to a Q register directly. We need a scratch. */
10173 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
10174 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
10175 && reg_class_subset_p (rclass, FP_REGS))
10176 {
0016d8d9 10177 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
10178 return NO_REGS;
10179 }
10180
10181 /* A TFmode or TImode memory access should be handled via an FP_REGS
10182 because AArch64 has richer addressing modes for LDR/STR instructions
10183 than LDP/STP instructions. */
d5726973 10184 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 10185 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
10186 return FP_REGS;
10187
10188 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 10189 return GENERAL_REGS;
43e9d192
IB
10190
10191 return NO_REGS;
10192}
10193
10194static bool
6216fd90 10195aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 10196{
6216fd90 10197 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 10198
6216fd90
WD
10199 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
10200 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 10201 if (frame_pointer_needed)
6216fd90 10202 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
10203 return true;
10204}
10205
6a70badb 10206poly_int64
43e9d192
IB
10207aarch64_initial_elimination_offset (unsigned from, unsigned to)
10208{
78c29983
MS
10209 if (to == HARD_FRAME_POINTER_REGNUM)
10210 {
10211 if (from == ARG_POINTER_REGNUM)
71bfb77a 10212 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
10213
10214 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
10215 return cfun->machine->frame.hard_fp_offset
10216 - cfun->machine->frame.locals_offset;
78c29983
MS
10217 }
10218
10219 if (to == STACK_POINTER_REGNUM)
10220 {
10221 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
10222 return cfun->machine->frame.frame_size
10223 - cfun->machine->frame.locals_offset;
78c29983
MS
10224 }
10225
1c960e02 10226 return cfun->machine->frame.frame_size;
43e9d192
IB
10227}
10228
43e9d192
IB
10229/* Implement RETURN_ADDR_RTX. We do not support moving back to a
10230 previous frame. */
10231
10232rtx
10233aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
10234{
10235 if (count != 0)
10236 return const0_rtx;
10237 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
10238}
10239
10240
10241static void
10242aarch64_asm_trampoline_template (FILE *f)
10243{
b5f794b4
SD
10244 int offset1 = 16;
10245 int offset2 = 20;
10246
10247 if (aarch64_bti_enabled ())
10248 {
10249 asm_fprintf (f, "\thint\t34 // bti c\n");
10250 offset1 -= 4;
10251 offset2 -= 4;
10252 }
10253
28514dda
YZ
10254 if (TARGET_ILP32)
10255 {
b5f794b4
SD
10256 asm_fprintf (f, "\tldr\tw%d, .+%d\n", IP1_REGNUM - R0_REGNUM, offset1);
10257 asm_fprintf (f, "\tldr\tw%d, .+%d\n", STATIC_CHAIN_REGNUM - R0_REGNUM,
10258 offset1);
28514dda
YZ
10259 }
10260 else
10261 {
b5f794b4
SD
10262 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [IP1_REGNUM], offset1);
10263 asm_fprintf (f, "\tldr\t%s, .+%d\n", reg_names [STATIC_CHAIN_REGNUM],
10264 offset2);
28514dda 10265 }
01a3a324 10266 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
b5f794b4
SD
10267
10268 /* The trampoline needs an extra padding instruction. In case if BTI is
10269 enabled the padding instruction is replaced by the BTI instruction at
10270 the beginning. */
10271 if (!aarch64_bti_enabled ())
10272 assemble_aligned_integer (4, const0_rtx);
10273
28514dda
YZ
10274 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
10275 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
10276}
10277
10278static void
10279aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10280{
10281 rtx fnaddr, mem, a_tramp;
28514dda 10282 const int tramp_code_sz = 16;
43e9d192
IB
10283
10284 /* Don't need to copy the trailing D-words, we fill those in below. */
10285 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
10286 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
10287 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 10288 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
10289 if (GET_MODE (fnaddr) != ptr_mode)
10290 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
10291 emit_move_insn (mem, fnaddr);
10292
28514dda 10293 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
10294 emit_move_insn (mem, chain_value);
10295
10296 /* XXX We should really define a "clear_cache" pattern and use
10297 gen_clear_cache(). */
10298 a_tramp = XEXP (m_tramp, 0);
10299 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 10300 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
10301 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
10302 ptr_mode);
43e9d192
IB
10303}
10304
10305static unsigned char
ef4bddc2 10306aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 10307{
6a70badb
RS
10308 /* ??? Logically we should only need to provide a value when
10309 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
10310 can hold MODE, but at the moment we need to handle all modes.
10311 Just ignore any runtime parts for registers that can't store them. */
10312 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
550a3380 10313 unsigned int nregs, vec_flags;
43e9d192
IB
10314 switch (regclass)
10315 {
d677263e 10316 case TAILCALL_ADDR_REGS:
43e9d192
IB
10317 case POINTER_REGS:
10318 case GENERAL_REGS:
10319 case ALL_REGS:
f25a140b 10320 case POINTER_AND_FP_REGS:
43e9d192
IB
10321 case FP_REGS:
10322 case FP_LO_REGS:
163b1f6a 10323 case FP_LO8_REGS:
550a3380
RS
10324 vec_flags = aarch64_classify_vector_mode (mode);
10325 if ((vec_flags & VEC_SVE_DATA)
43cacb12 10326 && constant_multiple_p (GET_MODE_SIZE (mode),
550a3380 10327 aarch64_vl_bytes (mode, vec_flags), &nregs))
43cacb12 10328 return nregs;
550a3380 10329 return (vec_flags & VEC_ADVSIMD
6a70badb
RS
10330 ? CEIL (lowest_size, UNITS_PER_VREG)
10331 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 10332 case STACK_REG:
43cacb12
RS
10333 case PR_REGS:
10334 case PR_LO_REGS:
10335 case PR_HI_REGS:
183bfdaf
RS
10336 case FFR_REGS:
10337 case PR_AND_FFR_REGS:
43e9d192
IB
10338 return 1;
10339
10340 case NO_REGS:
10341 return 0;
10342
10343 default:
10344 break;
10345 }
10346 gcc_unreachable ();
10347}
10348
10349static reg_class_t
78d8b9f0 10350aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 10351{
51bb310d 10352 if (regclass == POINTER_REGS)
78d8b9f0
IB
10353 return GENERAL_REGS;
10354
51bb310d
MS
10355 if (regclass == STACK_REG)
10356 {
10357 if (REG_P(x)
10358 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
10359 return regclass;
10360
10361 return NO_REGS;
10362 }
10363
27bd251b
IB
10364 /* Register eliminiation can result in a request for
10365 SP+constant->FP_REGS. We cannot support such operations which
10366 use SP as source and an FP_REG as destination, so reject out
10367 right now. */
10368 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
10369 {
10370 rtx lhs = XEXP (x, 0);
10371
10372 /* Look through a possible SUBREG introduced by ILP32. */
10373 if (GET_CODE (lhs) == SUBREG)
10374 lhs = SUBREG_REG (lhs);
10375
10376 gcc_assert (REG_P (lhs));
10377 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
10378 POINTER_REGS));
10379 return NO_REGS;
10380 }
10381
78d8b9f0 10382 return regclass;
43e9d192
IB
10383}
10384
10385void
10386aarch64_asm_output_labelref (FILE* f, const char *name)
10387{
10388 asm_fprintf (f, "%U%s", name);
10389}
10390
10391static void
10392aarch64_elf_asm_constructor (rtx symbol, int priority)
10393{
10394 if (priority == DEFAULT_INIT_PRIORITY)
10395 default_ctor_section_asm_out_constructor (symbol, priority);
10396 else
10397 {
10398 section *s;
53d190c1
AT
10399 /* While priority is known to be in range [0, 65535], so 18 bytes
10400 would be enough, the compiler might not know that. To avoid
10401 -Wformat-truncation false positive, use a larger size. */
10402 char buf[23];
43e9d192 10403 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 10404 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
10405 switch_to_section (s);
10406 assemble_align (POINTER_SIZE);
28514dda 10407 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
10408 }
10409}
10410
10411static void
10412aarch64_elf_asm_destructor (rtx symbol, int priority)
10413{
10414 if (priority == DEFAULT_INIT_PRIORITY)
10415 default_dtor_section_asm_out_destructor (symbol, priority);
10416 else
10417 {
10418 section *s;
53d190c1
AT
10419 /* While priority is known to be in range [0, 65535], so 18 bytes
10420 would be enough, the compiler might not know that. To avoid
10421 -Wformat-truncation false positive, use a larger size. */
10422 char buf[23];
43e9d192 10423 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 10424 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
10425 switch_to_section (s);
10426 assemble_align (POINTER_SIZE);
28514dda 10427 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
10428 }
10429}
10430
10431const char*
10432aarch64_output_casesi (rtx *operands)
10433{
10434 char buf[100];
10435 char label[100];
b32d5189 10436 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
10437 int index;
10438 static const char *const patterns[4][2] =
10439 {
10440 {
10441 "ldrb\t%w3, [%0,%w1,uxtw]",
10442 "add\t%3, %4, %w3, sxtb #2"
10443 },
10444 {
10445 "ldrh\t%w3, [%0,%w1,uxtw #1]",
10446 "add\t%3, %4, %w3, sxth #2"
10447 },
10448 {
10449 "ldr\t%w3, [%0,%w1,uxtw #2]",
10450 "add\t%3, %4, %w3, sxtw #2"
10451 },
10452 /* We assume that DImode is only generated when not optimizing and
10453 that we don't really need 64-bit address offsets. That would
10454 imply an object file with 8GB of code in a single function! */
10455 {
10456 "ldr\t%w3, [%0,%w1,uxtw #2]",
10457 "add\t%3, %4, %w3, sxtw #2"
10458 }
10459 };
10460
10461 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
10462
77e994c9
RS
10463 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
10464 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
10465
10466 gcc_assert (index >= 0 && index <= 3);
10467
10468 /* Need to implement table size reduction, by chaning the code below. */
10469 output_asm_insn (patterns[index][0], operands);
10470 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
10471 snprintf (buf, sizeof (buf),
10472 "adr\t%%4, %s", targetm.strip_name_encoding (label));
10473 output_asm_insn (buf, operands);
10474 output_asm_insn (patterns[index][1], operands);
10475 output_asm_insn ("br\t%3", operands);
10476 assemble_label (asm_out_file, label);
10477 return "";
10478}
10479
10480
10481/* Return size in bits of an arithmetic operand which is shifted/scaled and
10482 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
10483 operator. */
10484
10485int
10486aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
10487{
10488 if (shift >= 0 && shift <= 3)
10489 {
10490 int size;
10491 for (size = 8; size <= 32; size *= 2)
10492 {
10493 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
10494 if (mask == bits << shift)
10495 return size;
10496 }
10497 }
10498 return 0;
10499}
10500
e78d485e
RR
10501/* Constant pools are per function only when PC relative
10502 literal loads are true or we are in the large memory
10503 model. */
10504
10505static inline bool
10506aarch64_can_use_per_function_literal_pools_p (void)
10507{
9ee6540a 10508 return (aarch64_pcrelative_literal_loads
e78d485e
RR
10509 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
10510}
10511
43e9d192 10512static bool
e78d485e 10513aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 10514{
74a9301d
VM
10515 /* We can't use blocks for constants when we're using a per-function
10516 constant pool. */
10517 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
10518}
10519
e78d485e
RR
10520/* Select appropriate section for constants depending
10521 on where we place literal pools. */
10522
43e9d192 10523static section *
e78d485e
RR
10524aarch64_select_rtx_section (machine_mode mode,
10525 rtx x,
10526 unsigned HOST_WIDE_INT align)
43e9d192 10527{
e78d485e
RR
10528 if (aarch64_can_use_per_function_literal_pools_p ())
10529 return function_section (current_function_decl);
43e9d192 10530
e78d485e
RR
10531 return default_elf_select_rtx_section (mode, x, align);
10532}
43e9d192 10533
5fca7b66
RH
10534/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
10535void
10536aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
10537 HOST_WIDE_INT offset)
10538{
10539 /* When using per-function literal pools, we must ensure that any code
10540 section is aligned to the minimal instruction length, lest we get
10541 errors from the assembler re "unaligned instructions". */
10542 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
10543 ASM_OUTPUT_ALIGN (f, 2);
10544}
10545
43e9d192
IB
10546/* Costs. */
10547
10548/* Helper function for rtx cost calculation. Strip a shift expression
10549 from X. Returns the inner operand if successful, or the original
10550 expression on failure. */
10551static rtx
10552aarch64_strip_shift (rtx x)
10553{
10554 rtx op = x;
10555
57b77d46
RE
10556 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
10557 we can convert both to ROR during final output. */
43e9d192
IB
10558 if ((GET_CODE (op) == ASHIFT
10559 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
10560 || GET_CODE (op) == LSHIFTRT
10561 || GET_CODE (op) == ROTATERT
10562 || GET_CODE (op) == ROTATE)
43e9d192
IB
10563 && CONST_INT_P (XEXP (op, 1)))
10564 return XEXP (op, 0);
10565
10566 if (GET_CODE (op) == MULT
10567 && CONST_INT_P (XEXP (op, 1))
10568 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
10569 return XEXP (op, 0);
10570
10571 return x;
10572}
10573
4745e701 10574/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
10575 expression from X. Returns the inner operand if successful, or the
10576 original expression on failure. We deal with a number of possible
b10f1009
AP
10577 canonicalization variations here. If STRIP_SHIFT is true, then
10578 we can strip off a shift also. */
43e9d192 10579static rtx
b10f1009 10580aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 10581{
77e994c9 10582 scalar_int_mode mode;
43e9d192
IB
10583 rtx op = x;
10584
77e994c9
RS
10585 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
10586 return op;
10587
43e9d192
IB
10588 /* Zero and sign extraction of a widened value. */
10589 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
10590 && XEXP (op, 2) == const0_rtx
4745e701 10591 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 10592 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
10593 XEXP (op, 1)))
10594 return XEXP (XEXP (op, 0), 0);
10595
10596 /* It can also be represented (for zero-extend) as an AND with an
10597 immediate. */
10598 if (GET_CODE (op) == AND
10599 && GET_CODE (XEXP (op, 0)) == MULT
10600 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
10601 && CONST_INT_P (XEXP (op, 1))
10602 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
10603 INTVAL (XEXP (op, 1))) != 0)
10604 return XEXP (XEXP (op, 0), 0);
10605
10606 /* Now handle extended register, as this may also have an optional
10607 left shift by 1..4. */
b10f1009
AP
10608 if (strip_shift
10609 && GET_CODE (op) == ASHIFT
43e9d192
IB
10610 && CONST_INT_P (XEXP (op, 1))
10611 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
10612 op = XEXP (op, 0);
10613
10614 if (GET_CODE (op) == ZERO_EXTEND
10615 || GET_CODE (op) == SIGN_EXTEND)
10616 op = XEXP (op, 0);
10617
10618 if (op != x)
10619 return op;
10620
4745e701
JG
10621 return x;
10622}
10623
0a78ebe4
KT
10624/* Return true iff CODE is a shift supported in combination
10625 with arithmetic instructions. */
4d1919ed 10626
0a78ebe4
KT
10627static bool
10628aarch64_shift_p (enum rtx_code code)
10629{
10630 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
10631}
10632
b10f1009
AP
10633
10634/* Return true iff X is a cheap shift without a sign extend. */
10635
10636static bool
10637aarch64_cheap_mult_shift_p (rtx x)
10638{
10639 rtx op0, op1;
10640
10641 op0 = XEXP (x, 0);
10642 op1 = XEXP (x, 1);
10643
10644 if (!(aarch64_tune_params.extra_tuning_flags
10645 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
10646 return false;
10647
10648 if (GET_CODE (op0) == SIGN_EXTEND)
10649 return false;
10650
10651 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
10652 && UINTVAL (op1) <= 4)
10653 return true;
10654
10655 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
10656 return false;
10657
10658 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
10659
10660 if (l2 > 0 && l2 <= 4)
10661 return true;
10662
10663 return false;
10664}
10665
4745e701 10666/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
10667 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
10668 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
10669 operands where needed. */
10670
10671static int
e548c9df 10672aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
10673{
10674 rtx op0, op1;
10675 const struct cpu_cost_table *extra_cost
b175b679 10676 = aarch64_tune_params.insn_extra_cost;
4745e701 10677 int cost = 0;
0a78ebe4 10678 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 10679 machine_mode mode = GET_MODE (x);
4745e701
JG
10680
10681 gcc_checking_assert (code == MULT);
10682
10683 op0 = XEXP (x, 0);
10684 op1 = XEXP (x, 1);
10685
10686 if (VECTOR_MODE_P (mode))
10687 mode = GET_MODE_INNER (mode);
10688
10689 /* Integer multiply/fma. */
10690 if (GET_MODE_CLASS (mode) == MODE_INT)
10691 {
10692 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
10693 if (aarch64_shift_p (GET_CODE (x))
10694 || (CONST_INT_P (op1)
10695 && exact_log2 (INTVAL (op1)) > 0))
4745e701 10696 {
0a78ebe4
KT
10697 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
10698 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
10699 if (speed)
10700 {
0a78ebe4
KT
10701 if (compound_p)
10702 {
b10f1009
AP
10703 /* If the shift is considered cheap,
10704 then don't add any cost. */
10705 if (aarch64_cheap_mult_shift_p (x))
10706 ;
10707 else if (REG_P (op1))
0a78ebe4
KT
10708 /* ARITH + shift-by-register. */
10709 cost += extra_cost->alu.arith_shift_reg;
10710 else if (is_extend)
10711 /* ARITH + extended register. We don't have a cost field
10712 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
10713 cost += extra_cost->alu.extend_arith;
10714 else
10715 /* ARITH + shift-by-immediate. */
10716 cost += extra_cost->alu.arith_shift;
10717 }
4745e701
JG
10718 else
10719 /* LSL (immediate). */
0a78ebe4
KT
10720 cost += extra_cost->alu.shift;
10721
4745e701 10722 }
0a78ebe4
KT
10723 /* Strip extends as we will have costed them in the case above. */
10724 if (is_extend)
b10f1009 10725 op0 = aarch64_strip_extend (op0, true);
4745e701 10726
e548c9df 10727 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
10728
10729 return cost;
10730 }
10731
d2ac256b
KT
10732 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
10733 compound and let the below cases handle it. After all, MNEG is a
10734 special-case alias of MSUB. */
10735 if (GET_CODE (op0) == NEG)
10736 {
10737 op0 = XEXP (op0, 0);
10738 compound_p = true;
10739 }
10740
4745e701
JG
10741 /* Integer multiplies or FMAs have zero/sign extending variants. */
10742 if ((GET_CODE (op0) == ZERO_EXTEND
10743 && GET_CODE (op1) == ZERO_EXTEND)
10744 || (GET_CODE (op0) == SIGN_EXTEND
10745 && GET_CODE (op1) == SIGN_EXTEND))
10746 {
e548c9df
AM
10747 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
10748 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
10749
10750 if (speed)
10751 {
0a78ebe4 10752 if (compound_p)
d2ac256b 10753 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
10754 cost += extra_cost->mult[0].extend_add;
10755 else
10756 /* MUL/SMULL/UMULL. */
10757 cost += extra_cost->mult[0].extend;
10758 }
10759
10760 return cost;
10761 }
10762
d2ac256b 10763 /* This is either an integer multiply or a MADD. In both cases
4745e701 10764 we want to recurse and cost the operands. */
e548c9df
AM
10765 cost += rtx_cost (op0, mode, MULT, 0, speed);
10766 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
10767
10768 if (speed)
10769 {
0a78ebe4 10770 if (compound_p)
d2ac256b 10771 /* MADD/MSUB. */
4745e701
JG
10772 cost += extra_cost->mult[mode == DImode].add;
10773 else
10774 /* MUL. */
10775 cost += extra_cost->mult[mode == DImode].simple;
10776 }
10777
10778 return cost;
10779 }
10780 else
10781 {
10782 if (speed)
10783 {
3d840f7d 10784 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
10785 operands, unless the rounding mode is upward or downward in
10786 which case FNMUL is different than FMUL with operand negation. */
10787 bool neg0 = GET_CODE (op0) == NEG;
10788 bool neg1 = GET_CODE (op1) == NEG;
10789 if (compound_p || !flag_rounding_math || (neg0 && neg1))
10790 {
10791 if (neg0)
10792 op0 = XEXP (op0, 0);
10793 if (neg1)
10794 op1 = XEXP (op1, 0);
10795 }
4745e701 10796
0a78ebe4 10797 if (compound_p)
4745e701
JG
10798 /* FMADD/FNMADD/FNMSUB/FMSUB. */
10799 cost += extra_cost->fp[mode == DFmode].fma;
10800 else
3d840f7d 10801 /* FMUL/FNMUL. */
4745e701
JG
10802 cost += extra_cost->fp[mode == DFmode].mult;
10803 }
10804
e548c9df
AM
10805 cost += rtx_cost (op0, mode, MULT, 0, speed);
10806 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
10807 return cost;
10808 }
43e9d192
IB
10809}
10810
67747367
JG
10811static int
10812aarch64_address_cost (rtx x,
ef4bddc2 10813 machine_mode mode,
67747367
JG
10814 addr_space_t as ATTRIBUTE_UNUSED,
10815 bool speed)
10816{
10817 enum rtx_code c = GET_CODE (x);
b175b679 10818 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
10819 struct aarch64_address_info info;
10820 int cost = 0;
10821 info.shift = 0;
10822
a97d8b98 10823 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
10824 {
10825 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
10826 {
10827 /* This is a CONST or SYMBOL ref which will be split
10828 in a different way depending on the code model in use.
10829 Cost it through the generic infrastructure. */
e548c9df 10830 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
10831 /* Divide through by the cost of one instruction to
10832 bring it to the same units as the address costs. */
10833 cost_symbol_ref /= COSTS_N_INSNS (1);
10834 /* The cost is then the cost of preparing the address,
10835 followed by an immediate (possibly 0) offset. */
10836 return cost_symbol_ref + addr_cost->imm_offset;
10837 }
10838 else
10839 {
10840 /* This is most likely a jump table from a case
10841 statement. */
10842 return addr_cost->register_offset;
10843 }
10844 }
10845
10846 switch (info.type)
10847 {
10848 case ADDRESS_LO_SUM:
10849 case ADDRESS_SYMBOLIC:
10850 case ADDRESS_REG_IMM:
10851 cost += addr_cost->imm_offset;
10852 break;
10853
10854 case ADDRESS_REG_WB:
10855 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
10856 cost += addr_cost->pre_modify;
10857 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
10858 cost += addr_cost->post_modify;
10859 else
10860 gcc_unreachable ();
10861
10862 break;
10863
10864 case ADDRESS_REG_REG:
10865 cost += addr_cost->register_offset;
10866 break;
10867
67747367 10868 case ADDRESS_REG_SXTW:
783879e6
EM
10869 cost += addr_cost->register_sextend;
10870 break;
10871
10872 case ADDRESS_REG_UXTW:
10873 cost += addr_cost->register_zextend;
67747367
JG
10874 break;
10875
10876 default:
10877 gcc_unreachable ();
10878 }
10879
10880
10881 if (info.shift > 0)
10882 {
10883 /* For the sake of calculating the cost of the shifted register
10884 component, we can treat same sized modes in the same way. */
6a70badb
RS
10885 if (known_eq (GET_MODE_BITSIZE (mode), 16))
10886 cost += addr_cost->addr_scale_costs.hi;
10887 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
10888 cost += addr_cost->addr_scale_costs.si;
10889 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
10890 cost += addr_cost->addr_scale_costs.di;
10891 else
10892 /* We can't tell, or this is a 128-bit vector. */
10893 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
10894 }
10895
10896 return cost;
10897}
10898
b9066f5a
MW
10899/* Return the cost of a branch. If SPEED_P is true then the compiler is
10900 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
10901 to be taken. */
10902
10903int
10904aarch64_branch_cost (bool speed_p, bool predictable_p)
10905{
10906 /* When optimizing for speed, use the cost of unpredictable branches. */
10907 const struct cpu_branch_cost *branch_costs =
b175b679 10908 aarch64_tune_params.branch_costs;
b9066f5a
MW
10909
10910 if (!speed_p || predictable_p)
10911 return branch_costs->predictable;
10912 else
10913 return branch_costs->unpredictable;
10914}
10915
7cc2145f
JG
10916/* Return true if the RTX X in mode MODE is a zero or sign extract
10917 usable in an ADD or SUB (extended register) instruction. */
10918static bool
77e994c9 10919aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
10920{
10921 /* Catch add with a sign extract.
10922 This is add_<optab><mode>_multp2. */
10923 if (GET_CODE (x) == SIGN_EXTRACT
10924 || GET_CODE (x) == ZERO_EXTRACT)
10925 {
10926 rtx op0 = XEXP (x, 0);
10927 rtx op1 = XEXP (x, 1);
10928 rtx op2 = XEXP (x, 2);
10929
10930 if (GET_CODE (op0) == MULT
10931 && CONST_INT_P (op1)
10932 && op2 == const0_rtx
10933 && CONST_INT_P (XEXP (op0, 1))
10934 && aarch64_is_extend_from_extract (mode,
10935 XEXP (op0, 1),
10936 op1))
10937 {
10938 return true;
10939 }
10940 }
e47c4031
KT
10941 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
10942 No shift. */
10943 else if (GET_CODE (x) == SIGN_EXTEND
10944 || GET_CODE (x) == ZERO_EXTEND)
10945 return REG_P (XEXP (x, 0));
7cc2145f
JG
10946
10947 return false;
10948}
10949
61263118
KT
10950static bool
10951aarch64_frint_unspec_p (unsigned int u)
10952{
10953 switch (u)
10954 {
10955 case UNSPEC_FRINTZ:
10956 case UNSPEC_FRINTP:
10957 case UNSPEC_FRINTM:
10958 case UNSPEC_FRINTA:
10959 case UNSPEC_FRINTN:
10960 case UNSPEC_FRINTX:
10961 case UNSPEC_FRINTI:
10962 return true;
10963
10964 default:
10965 return false;
10966 }
10967}
10968
fb0cb7fa
KT
10969/* Return true iff X is an rtx that will match an extr instruction
10970 i.e. as described in the *extr<mode>5_insn family of patterns.
10971 OP0 and OP1 will be set to the operands of the shifts involved
10972 on success and will be NULL_RTX otherwise. */
10973
10974static bool
10975aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
10976{
10977 rtx op0, op1;
77e994c9
RS
10978 scalar_int_mode mode;
10979 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
10980 return false;
fb0cb7fa
KT
10981
10982 *res_op0 = NULL_RTX;
10983 *res_op1 = NULL_RTX;
10984
10985 if (GET_CODE (x) != IOR)
10986 return false;
10987
10988 op0 = XEXP (x, 0);
10989 op1 = XEXP (x, 1);
10990
10991 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
10992 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
10993 {
10994 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
10995 if (GET_CODE (op1) == ASHIFT)
10996 std::swap (op0, op1);
10997
10998 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
10999 return false;
11000
11001 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
11002 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
11003
11004 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
11005 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
11006 {
11007 *res_op0 = XEXP (op0, 0);
11008 *res_op1 = XEXP (op1, 0);
11009 return true;
11010 }
11011 }
11012
11013 return false;
11014}
11015
2d5ffe46
AP
11016/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
11017 storing it in *COST. Result is true if the total cost of the operation
11018 has now been calculated. */
11019static bool
11020aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
11021{
b9e3afe9
AP
11022 rtx inner;
11023 rtx comparator;
11024 enum rtx_code cmpcode;
11025
11026 if (COMPARISON_P (op0))
11027 {
11028 inner = XEXP (op0, 0);
11029 comparator = XEXP (op0, 1);
11030 cmpcode = GET_CODE (op0);
11031 }
11032 else
11033 {
11034 inner = op0;
11035 comparator = const0_rtx;
11036 cmpcode = NE;
11037 }
11038
2d5ffe46
AP
11039 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
11040 {
11041 /* Conditional branch. */
b9e3afe9 11042 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
11043 return true;
11044 else
11045 {
b9e3afe9 11046 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 11047 {
2d5ffe46
AP
11048 if (comparator == const0_rtx)
11049 {
11050 /* TBZ/TBNZ/CBZ/CBNZ. */
11051 if (GET_CODE (inner) == ZERO_EXTRACT)
11052 /* TBZ/TBNZ. */
e548c9df
AM
11053 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
11054 ZERO_EXTRACT, 0, speed);
11055 else
11056 /* CBZ/CBNZ. */
11057 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
11058
11059 return true;
11060 }
11061 }
b9e3afe9 11062 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 11063 {
2d5ffe46
AP
11064 /* TBZ/TBNZ. */
11065 if (comparator == const0_rtx)
11066 return true;
11067 }
11068 }
11069 }
b9e3afe9 11070 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 11071 {
786298dc 11072 /* CCMP. */
6dfeb7ce 11073 if (GET_CODE (op1) == COMPARE)
786298dc
WD
11074 {
11075 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
11076 if (XEXP (op1, 1) == const0_rtx)
11077 *cost += 1;
11078 if (speed)
11079 {
11080 machine_mode mode = GET_MODE (XEXP (op1, 0));
11081 const struct cpu_cost_table *extra_cost
11082 = aarch64_tune_params.insn_extra_cost;
11083
11084 if (GET_MODE_CLASS (mode) == MODE_INT)
11085 *cost += extra_cost->alu.arith;
11086 else
11087 *cost += extra_cost->fp[mode == DFmode].compare;
11088 }
11089 return true;
11090 }
11091
2d5ffe46
AP
11092 /* It's a conditional operation based on the status flags,
11093 so it must be some flavor of CSEL. */
11094
11095 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
11096 if (GET_CODE (op1) == NEG
11097 || GET_CODE (op1) == NOT
11098 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
11099 op1 = XEXP (op1, 0);
bad00732
KT
11100 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
11101 {
11102 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
11103 op1 = XEXP (op1, 0);
11104 op2 = XEXP (op2, 0);
11105 }
2d5ffe46 11106
e548c9df
AM
11107 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
11108 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
11109 return true;
11110 }
11111
11112 /* We don't know what this is, cost all operands. */
11113 return false;
11114}
11115
283b6c85
KT
11116/* Check whether X is a bitfield operation of the form shift + extend that
11117 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
11118 operand to which the bitfield operation is applied. Otherwise return
11119 NULL_RTX. */
11120
11121static rtx
11122aarch64_extend_bitfield_pattern_p (rtx x)
11123{
11124 rtx_code outer_code = GET_CODE (x);
11125 machine_mode outer_mode = GET_MODE (x);
11126
11127 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
11128 && outer_mode != SImode && outer_mode != DImode)
11129 return NULL_RTX;
11130
11131 rtx inner = XEXP (x, 0);
11132 rtx_code inner_code = GET_CODE (inner);
11133 machine_mode inner_mode = GET_MODE (inner);
11134 rtx op = NULL_RTX;
11135
11136 switch (inner_code)
11137 {
11138 case ASHIFT:
11139 if (CONST_INT_P (XEXP (inner, 1))
11140 && (inner_mode == QImode || inner_mode == HImode))
11141 op = XEXP (inner, 0);
11142 break;
11143 case LSHIFTRT:
11144 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
11145 && (inner_mode == QImode || inner_mode == HImode))
11146 op = XEXP (inner, 0);
11147 break;
11148 case ASHIFTRT:
11149 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
11150 && (inner_mode == QImode || inner_mode == HImode))
11151 op = XEXP (inner, 0);
11152 break;
11153 default:
11154 break;
11155 }
11156
11157 return op;
11158}
11159
8c83f71d
KT
11160/* Return true if the mask and a shift amount from an RTX of the form
11161 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
11162 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
11163
11164bool
77e994c9
RS
11165aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
11166 rtx shft_amnt)
8c83f71d
KT
11167{
11168 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
11169 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
11170 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
11171 && (INTVAL (mask)
11172 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
11173}
11174
6a0d3939
SE
11175/* Return true if the masks and a shift amount from an RTX of the form
11176 ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
11177 a BFI instruction of mode MODE. See *arch64_bfi patterns. */
11178
11179bool
11180aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
11181 unsigned HOST_WIDE_INT mask1,
11182 unsigned HOST_WIDE_INT shft_amnt,
11183 unsigned HOST_WIDE_INT mask2)
11184{
11185 unsigned HOST_WIDE_INT t;
11186
11187 /* Verify that there is no overlap in what bits are set in the two masks. */
11188 if (mask1 != ~mask2)
11189 return false;
11190
11191 /* Verify that mask2 is not all zeros or ones. */
11192 if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
11193 return false;
11194
11195 /* The shift amount should always be less than the mode size. */
11196 gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
11197
11198 /* Verify that the mask being shifted is contiguous and would be in the
11199 least significant bits after shifting by shft_amnt. */
11200 t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
11201 return (t == (t & -t));
11202}
11203
43e9d192
IB
11204/* Calculate the cost of calculating X, storing it in *COST. Result
11205 is true if the total cost of the operation has now been calculated. */
11206static bool
e548c9df 11207aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
11208 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
11209{
a8eecd00 11210 rtx op0, op1, op2;
73250c4c 11211 const struct cpu_cost_table *extra_cost
b175b679 11212 = aarch64_tune_params.insn_extra_cost;
e548c9df 11213 int code = GET_CODE (x);
b4206259 11214 scalar_int_mode int_mode;
43e9d192 11215
7fc5ef02
JG
11216 /* By default, assume that everything has equivalent cost to the
11217 cheapest instruction. Any additional costs are applied as a delta
11218 above this default. */
11219 *cost = COSTS_N_INSNS (1);
11220
43e9d192
IB
11221 switch (code)
11222 {
11223 case SET:
ba123b0d
JG
11224 /* The cost depends entirely on the operands to SET. */
11225 *cost = 0;
43e9d192
IB
11226 op0 = SET_DEST (x);
11227 op1 = SET_SRC (x);
11228
11229 switch (GET_CODE (op0))
11230 {
11231 case MEM:
11232 if (speed)
2961177e
JG
11233 {
11234 rtx address = XEXP (op0, 0);
b6875aac
KV
11235 if (VECTOR_MODE_P (mode))
11236 *cost += extra_cost->ldst.storev;
11237 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
11238 *cost += extra_cost->ldst.store;
11239 else if (mode == SFmode)
11240 *cost += extra_cost->ldst.storef;
11241 else if (mode == DFmode)
11242 *cost += extra_cost->ldst.stored;
11243
11244 *cost +=
11245 COSTS_N_INSNS (aarch64_address_cost (address, mode,
11246 0, speed));
11247 }
43e9d192 11248
e548c9df 11249 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
11250 return true;
11251
11252 case SUBREG:
11253 if (! REG_P (SUBREG_REG (op0)))
e548c9df 11254 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 11255
43e9d192
IB
11256 /* Fall through. */
11257 case REG:
b6875aac
KV
11258 /* The cost is one per vector-register copied. */
11259 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
11260 {
fe1447a1
RS
11261 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
11262 *cost = COSTS_N_INSNS (nregs);
b6875aac 11263 }
ba123b0d
JG
11264 /* const0_rtx is in general free, but we will use an
11265 instruction to set a register to 0. */
b6875aac
KV
11266 else if (REG_P (op1) || op1 == const0_rtx)
11267 {
11268 /* The cost is 1 per register copied. */
fe1447a1
RS
11269 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
11270 *cost = COSTS_N_INSNS (nregs);
b6875aac 11271 }
ba123b0d
JG
11272 else
11273 /* Cost is just the cost of the RHS of the set. */
e548c9df 11274 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
11275 return true;
11276
ba123b0d 11277 case ZERO_EXTRACT:
43e9d192 11278 case SIGN_EXTRACT:
ba123b0d
JG
11279 /* Bit-field insertion. Strip any redundant widening of
11280 the RHS to meet the width of the target. */
43e9d192
IB
11281 if (GET_CODE (op1) == SUBREG)
11282 op1 = SUBREG_REG (op1);
11283 if ((GET_CODE (op1) == ZERO_EXTEND
11284 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 11285 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
11286 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
11287 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 11288 op1 = XEXP (op1, 0);
ba123b0d
JG
11289
11290 if (CONST_INT_P (op1))
11291 {
11292 /* MOV immediate is assumed to always be cheap. */
11293 *cost = COSTS_N_INSNS (1);
11294 }
11295 else
11296 {
11297 /* BFM. */
11298 if (speed)
11299 *cost += extra_cost->alu.bfi;
e548c9df 11300 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
11301 }
11302
43e9d192
IB
11303 return true;
11304
11305 default:
ba123b0d
JG
11306 /* We can't make sense of this, assume default cost. */
11307 *cost = COSTS_N_INSNS (1);
61263118 11308 return false;
43e9d192
IB
11309 }
11310 return false;
11311
9dfc162c
JG
11312 case CONST_INT:
11313 /* If an instruction can incorporate a constant within the
11314 instruction, the instruction's expression avoids calling
11315 rtx_cost() on the constant. If rtx_cost() is called on a
11316 constant, then it is usually because the constant must be
11317 moved into a register by one or more instructions.
11318
11319 The exception is constant 0, which can be expressed
11320 as XZR/WZR and is therefore free. The exception to this is
11321 if we have (set (reg) (const0_rtx)) in which case we must cost
11322 the move. However, we can catch that when we cost the SET, so
11323 we don't need to consider that here. */
11324 if (x == const0_rtx)
11325 *cost = 0;
11326 else
11327 {
11328 /* To an approximation, building any other constant is
11329 proportionally expensive to the number of instructions
11330 required to build that constant. This is true whether we
11331 are compiling for SPEED or otherwise. */
77e994c9
RS
11332 if (!is_a <scalar_int_mode> (mode, &int_mode))
11333 int_mode = word_mode;
82614948 11334 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 11335 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
11336 }
11337 return true;
11338
11339 case CONST_DOUBLE:
a2170965
TC
11340
11341 /* First determine number of instructions to do the move
11342 as an integer constant. */
11343 if (!aarch64_float_const_representable_p (x)
11344 && !aarch64_can_const_movi_rtx_p (x, mode)
11345 && aarch64_float_const_rtx_p (x))
11346 {
11347 unsigned HOST_WIDE_INT ival;
11348 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
11349 gcc_assert (succeed);
11350
77e994c9
RS
11351 scalar_int_mode imode = (mode == HFmode
11352 ? SImode
11353 : int_mode_for_mode (mode).require ());
a2170965
TC
11354 int ncost = aarch64_internal_mov_immediate
11355 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
11356 *cost += COSTS_N_INSNS (ncost);
11357 return true;
11358 }
11359
9dfc162c
JG
11360 if (speed)
11361 {
11362 /* mov[df,sf]_aarch64. */
11363 if (aarch64_float_const_representable_p (x))
11364 /* FMOV (scalar immediate). */
11365 *cost += extra_cost->fp[mode == DFmode].fpconst;
11366 else if (!aarch64_float_const_zero_rtx_p (x))
11367 {
11368 /* This will be a load from memory. */
11369 if (mode == DFmode)
11370 *cost += extra_cost->ldst.loadd;
11371 else
11372 *cost += extra_cost->ldst.loadf;
11373 }
11374 else
11375 /* Otherwise this is +0.0. We get this using MOVI d0, #0
11376 or MOV v0.s[0], wzr - neither of which are modeled by the
11377 cost tables. Just use the default cost. */
11378 {
11379 }
11380 }
11381
11382 return true;
11383
43e9d192
IB
11384 case MEM:
11385 if (speed)
2961177e
JG
11386 {
11387 /* For loads we want the base cost of a load, plus an
11388 approximation for the additional cost of the addressing
11389 mode. */
11390 rtx address = XEXP (x, 0);
b6875aac
KV
11391 if (VECTOR_MODE_P (mode))
11392 *cost += extra_cost->ldst.loadv;
11393 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
11394 *cost += extra_cost->ldst.load;
11395 else if (mode == SFmode)
11396 *cost += extra_cost->ldst.loadf;
11397 else if (mode == DFmode)
11398 *cost += extra_cost->ldst.loadd;
11399
11400 *cost +=
11401 COSTS_N_INSNS (aarch64_address_cost (address, mode,
11402 0, speed));
11403 }
43e9d192
IB
11404
11405 return true;
11406
11407 case NEG:
4745e701
JG
11408 op0 = XEXP (x, 0);
11409
b6875aac
KV
11410 if (VECTOR_MODE_P (mode))
11411 {
11412 if (speed)
11413 {
11414 /* FNEG. */
11415 *cost += extra_cost->vect.alu;
11416 }
11417 return false;
11418 }
11419
e548c9df
AM
11420 if (GET_MODE_CLASS (mode) == MODE_INT)
11421 {
4745e701
JG
11422 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
11423 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
11424 {
11425 /* CSETM. */
e548c9df 11426 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
11427 return true;
11428 }
11429
11430 /* Cost this as SUB wzr, X. */
e548c9df 11431 op0 = CONST0_RTX (mode);
4745e701
JG
11432 op1 = XEXP (x, 0);
11433 goto cost_minus;
11434 }
11435
e548c9df 11436 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
11437 {
11438 /* Support (neg(fma...)) as a single instruction only if
11439 sign of zeros is unimportant. This matches the decision
11440 making in aarch64.md. */
11441 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
11442 {
11443 /* FNMADD. */
e548c9df 11444 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
11445 return true;
11446 }
d318517d
SN
11447 if (GET_CODE (op0) == MULT)
11448 {
11449 /* FNMUL. */
11450 *cost = rtx_cost (op0, mode, NEG, 0, speed);
11451 return true;
11452 }
4745e701
JG
11453 if (speed)
11454 /* FNEG. */
11455 *cost += extra_cost->fp[mode == DFmode].neg;
11456 return false;
11457 }
11458
11459 return false;
43e9d192 11460
781aeb73
KT
11461 case CLRSB:
11462 case CLZ:
11463 if (speed)
b6875aac
KV
11464 {
11465 if (VECTOR_MODE_P (mode))
11466 *cost += extra_cost->vect.alu;
11467 else
11468 *cost += extra_cost->alu.clz;
11469 }
781aeb73
KT
11470
11471 return false;
11472
43e9d192
IB
11473 case COMPARE:
11474 op0 = XEXP (x, 0);
11475 op1 = XEXP (x, 1);
11476
11477 if (op1 == const0_rtx
11478 && GET_CODE (op0) == AND)
11479 {
11480 x = op0;
e548c9df 11481 mode = GET_MODE (op0);
43e9d192
IB
11482 goto cost_logic;
11483 }
11484
a8eecd00
JG
11485 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
11486 {
11487 /* TODO: A write to the CC flags possibly costs extra, this
11488 needs encoding in the cost tables. */
11489
e548c9df 11490 mode = GET_MODE (op0);
a8eecd00
JG
11491 /* ANDS. */
11492 if (GET_CODE (op0) == AND)
11493 {
11494 x = op0;
11495 goto cost_logic;
11496 }
11497
11498 if (GET_CODE (op0) == PLUS)
11499 {
11500 /* ADDS (and CMN alias). */
11501 x = op0;
11502 goto cost_plus;
11503 }
11504
11505 if (GET_CODE (op0) == MINUS)
11506 {
11507 /* SUBS. */
11508 x = op0;
11509 goto cost_minus;
11510 }
11511
345854d8
KT
11512 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
11513 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
11514 && CONST_INT_P (XEXP (op0, 2)))
11515 {
11516 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
11517 Handle it here directly rather than going to cost_logic
11518 since we know the immediate generated for the TST is valid
11519 so we can avoid creating an intermediate rtx for it only
11520 for costing purposes. */
11521 if (speed)
11522 *cost += extra_cost->alu.logical;
11523
11524 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
11525 ZERO_EXTRACT, 0, speed);
11526 return true;
11527 }
11528
a8eecd00
JG
11529 if (GET_CODE (op1) == NEG)
11530 {
11531 /* CMN. */
11532 if (speed)
11533 *cost += extra_cost->alu.arith;
11534
e548c9df
AM
11535 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
11536 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
11537 return true;
11538 }
11539
11540 /* CMP.
11541
11542 Compare can freely swap the order of operands, and
11543 canonicalization puts the more complex operation first.
11544 But the integer MINUS logic expects the shift/extend
11545 operation in op1. */
11546 if (! (REG_P (op0)
11547 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
11548 {
11549 op0 = XEXP (x, 1);
11550 op1 = XEXP (x, 0);
11551 }
11552 goto cost_minus;
11553 }
11554
11555 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
11556 {
11557 /* FCMP. */
11558 if (speed)
11559 *cost += extra_cost->fp[mode == DFmode].compare;
11560
11561 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
11562 {
e548c9df 11563 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
11564 /* FCMP supports constant 0.0 for no extra cost. */
11565 return true;
11566 }
11567 return false;
11568 }
11569
b6875aac
KV
11570 if (VECTOR_MODE_P (mode))
11571 {
11572 /* Vector compare. */
11573 if (speed)
11574 *cost += extra_cost->vect.alu;
11575
11576 if (aarch64_float_const_zero_rtx_p (op1))
11577 {
11578 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
11579 cost. */
11580 return true;
11581 }
11582 return false;
11583 }
a8eecd00 11584 return false;
43e9d192
IB
11585
11586 case MINUS:
4745e701
JG
11587 {
11588 op0 = XEXP (x, 0);
11589 op1 = XEXP (x, 1);
11590
11591cost_minus:
e548c9df 11592 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 11593
4745e701
JG
11594 /* Detect valid immediates. */
11595 if ((GET_MODE_CLASS (mode) == MODE_INT
11596 || (GET_MODE_CLASS (mode) == MODE_CC
11597 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
11598 && CONST_INT_P (op1)
11599 && aarch64_uimm12_shift (INTVAL (op1)))
11600 {
4745e701
JG
11601 if (speed)
11602 /* SUB(S) (immediate). */
11603 *cost += extra_cost->alu.arith;
11604 return true;
4745e701
JG
11605 }
11606
7cc2145f 11607 /* Look for SUB (extended register). */
77e994c9
RS
11608 if (is_a <scalar_int_mode> (mode, &int_mode)
11609 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
11610 {
11611 if (speed)
2533c820 11612 *cost += extra_cost->alu.extend_arith;
7cc2145f 11613
b10f1009 11614 op1 = aarch64_strip_extend (op1, true);
e47c4031 11615 *cost += rtx_cost (op1, VOIDmode,
e548c9df 11616 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
11617 return true;
11618 }
11619
b10f1009 11620 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
11621
11622 /* Cost this as an FMA-alike operation. */
11623 if ((GET_CODE (new_op1) == MULT
0a78ebe4 11624 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
11625 && code != COMPARE)
11626 {
11627 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
11628 (enum rtx_code) code,
11629 speed);
4745e701
JG
11630 return true;
11631 }
43e9d192 11632
e548c9df 11633 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 11634
4745e701
JG
11635 if (speed)
11636 {
b6875aac
KV
11637 if (VECTOR_MODE_P (mode))
11638 {
11639 /* Vector SUB. */
11640 *cost += extra_cost->vect.alu;
11641 }
11642 else if (GET_MODE_CLASS (mode) == MODE_INT)
11643 {
11644 /* SUB(S). */
11645 *cost += extra_cost->alu.arith;
11646 }
4745e701 11647 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
11648 {
11649 /* FSUB. */
11650 *cost += extra_cost->fp[mode == DFmode].addsub;
11651 }
4745e701
JG
11652 }
11653 return true;
11654 }
43e9d192
IB
11655
11656 case PLUS:
4745e701
JG
11657 {
11658 rtx new_op0;
43e9d192 11659
4745e701
JG
11660 op0 = XEXP (x, 0);
11661 op1 = XEXP (x, 1);
43e9d192 11662
a8eecd00 11663cost_plus:
4745e701
JG
11664 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
11665 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
11666 {
11667 /* CSINC. */
e548c9df
AM
11668 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
11669 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
11670 return true;
11671 }
43e9d192 11672
4745e701 11673 if (GET_MODE_CLASS (mode) == MODE_INT
835d50c6 11674 && (aarch64_plus_immediate (op1, mode)
43cacb12 11675 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 11676 {
e548c9df 11677 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 11678
4745e701
JG
11679 if (speed)
11680 /* ADD (immediate). */
11681 *cost += extra_cost->alu.arith;
11682 return true;
11683 }
11684
e548c9df 11685 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 11686
7cc2145f 11687 /* Look for ADD (extended register). */
77e994c9
RS
11688 if (is_a <scalar_int_mode> (mode, &int_mode)
11689 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
11690 {
11691 if (speed)
2533c820 11692 *cost += extra_cost->alu.extend_arith;
7cc2145f 11693
b10f1009 11694 op0 = aarch64_strip_extend (op0, true);
e47c4031 11695 *cost += rtx_cost (op0, VOIDmode,
e548c9df 11696 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
11697 return true;
11698 }
11699
4745e701
JG
11700 /* Strip any extend, leave shifts behind as we will
11701 cost them through mult_cost. */
b10f1009 11702 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
11703
11704 if (GET_CODE (new_op0) == MULT
0a78ebe4 11705 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
11706 {
11707 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
11708 speed);
4745e701
JG
11709 return true;
11710 }
11711
e548c9df 11712 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
11713
11714 if (speed)
11715 {
b6875aac
KV
11716 if (VECTOR_MODE_P (mode))
11717 {
11718 /* Vector ADD. */
11719 *cost += extra_cost->vect.alu;
11720 }
11721 else if (GET_MODE_CLASS (mode) == MODE_INT)
11722 {
11723 /* ADD. */
11724 *cost += extra_cost->alu.arith;
11725 }
4745e701 11726 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
11727 {
11728 /* FADD. */
11729 *cost += extra_cost->fp[mode == DFmode].addsub;
11730 }
4745e701
JG
11731 }
11732 return true;
11733 }
43e9d192 11734
18b42b2a
KT
11735 case BSWAP:
11736 *cost = COSTS_N_INSNS (1);
11737
11738 if (speed)
b6875aac
KV
11739 {
11740 if (VECTOR_MODE_P (mode))
11741 *cost += extra_cost->vect.alu;
11742 else
11743 *cost += extra_cost->alu.rev;
11744 }
18b42b2a
KT
11745 return false;
11746
43e9d192 11747 case IOR:
f7d5cf8d
KT
11748 if (aarch_rev16_p (x))
11749 {
11750 *cost = COSTS_N_INSNS (1);
11751
b6875aac
KV
11752 if (speed)
11753 {
11754 if (VECTOR_MODE_P (mode))
11755 *cost += extra_cost->vect.alu;
11756 else
11757 *cost += extra_cost->alu.rev;
11758 }
11759 return true;
f7d5cf8d 11760 }
fb0cb7fa
KT
11761
11762 if (aarch64_extr_rtx_p (x, &op0, &op1))
11763 {
e548c9df
AM
11764 *cost += rtx_cost (op0, mode, IOR, 0, speed);
11765 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
11766 if (speed)
11767 *cost += extra_cost->alu.shift;
11768
11769 return true;
11770 }
f7d5cf8d 11771 /* Fall through. */
43e9d192
IB
11772 case XOR:
11773 case AND:
11774 cost_logic:
11775 op0 = XEXP (x, 0);
11776 op1 = XEXP (x, 1);
11777
b6875aac
KV
11778 if (VECTOR_MODE_P (mode))
11779 {
11780 if (speed)
11781 *cost += extra_cost->vect.alu;
11782 return true;
11783 }
11784
268c3b47
JG
11785 if (code == AND
11786 && GET_CODE (op0) == MULT
11787 && CONST_INT_P (XEXP (op0, 1))
11788 && CONST_INT_P (op1)
11789 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
11790 INTVAL (op1)) != 0)
11791 {
11792 /* This is a UBFM/SBFM. */
e548c9df 11793 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
11794 if (speed)
11795 *cost += extra_cost->alu.bfx;
11796 return true;
11797 }
11798
b4206259 11799 if (is_int_mode (mode, &int_mode))
43e9d192 11800 {
8c83f71d 11801 if (CONST_INT_P (op1))
43e9d192 11802 {
8c83f71d
KT
11803 /* We have a mask + shift version of a UBFIZ
11804 i.e. the *andim_ashift<mode>_bfiz pattern. */
11805 if (GET_CODE (op0) == ASHIFT
b4206259
RS
11806 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
11807 XEXP (op0, 1)))
8c83f71d 11808 {
b4206259 11809 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
11810 (enum rtx_code) code, 0, speed);
11811 if (speed)
11812 *cost += extra_cost->alu.bfx;
268c3b47 11813
8c83f71d
KT
11814 return true;
11815 }
b4206259 11816 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
11817 {
11818 /* We possibly get the immediate for free, this is not
11819 modelled. */
b4206259
RS
11820 *cost += rtx_cost (op0, int_mode,
11821 (enum rtx_code) code, 0, speed);
8c83f71d
KT
11822 if (speed)
11823 *cost += extra_cost->alu.logical;
268c3b47 11824
8c83f71d
KT
11825 return true;
11826 }
43e9d192
IB
11827 }
11828 else
11829 {
268c3b47
JG
11830 rtx new_op0 = op0;
11831
11832 /* Handle ORN, EON, or BIC. */
43e9d192
IB
11833 if (GET_CODE (op0) == NOT)
11834 op0 = XEXP (op0, 0);
268c3b47
JG
11835
11836 new_op0 = aarch64_strip_shift (op0);
11837
11838 /* If we had a shift on op0 then this is a logical-shift-
11839 by-register/immediate operation. Otherwise, this is just
11840 a logical operation. */
11841 if (speed)
11842 {
11843 if (new_op0 != op0)
11844 {
11845 /* Shift by immediate. */
11846 if (CONST_INT_P (XEXP (op0, 1)))
11847 *cost += extra_cost->alu.log_shift;
11848 else
11849 *cost += extra_cost->alu.log_shift_reg;
11850 }
11851 else
11852 *cost += extra_cost->alu.logical;
11853 }
11854
11855 /* In both cases we want to cost both operands. */
b4206259
RS
11856 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
11857 0, speed);
11858 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
11859 1, speed);
268c3b47
JG
11860
11861 return true;
43e9d192 11862 }
43e9d192
IB
11863 }
11864 return false;
11865
268c3b47 11866 case NOT:
6365da9e
KT
11867 x = XEXP (x, 0);
11868 op0 = aarch64_strip_shift (x);
11869
b6875aac
KV
11870 if (VECTOR_MODE_P (mode))
11871 {
11872 /* Vector NOT. */
11873 *cost += extra_cost->vect.alu;
11874 return false;
11875 }
11876
6365da9e
KT
11877 /* MVN-shifted-reg. */
11878 if (op0 != x)
11879 {
e548c9df 11880 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
11881
11882 if (speed)
11883 *cost += extra_cost->alu.log_shift;
11884
11885 return true;
11886 }
11887 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
11888 Handle the second form here taking care that 'a' in the above can
11889 be a shift. */
11890 else if (GET_CODE (op0) == XOR)
11891 {
11892 rtx newop0 = XEXP (op0, 0);
11893 rtx newop1 = XEXP (op0, 1);
11894 rtx op0_stripped = aarch64_strip_shift (newop0);
11895
e548c9df
AM
11896 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
11897 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
11898
11899 if (speed)
11900 {
11901 if (op0_stripped != newop0)
11902 *cost += extra_cost->alu.log_shift;
11903 else
11904 *cost += extra_cost->alu.logical;
11905 }
11906
11907 return true;
11908 }
268c3b47
JG
11909 /* MVN. */
11910 if (speed)
11911 *cost += extra_cost->alu.logical;
11912
268c3b47
JG
11913 return false;
11914
43e9d192 11915 case ZERO_EXTEND:
b1685e62
JG
11916
11917 op0 = XEXP (x, 0);
11918 /* If a value is written in SI mode, then zero extended to DI
11919 mode, the operation will in general be free as a write to
11920 a 'w' register implicitly zeroes the upper bits of an 'x'
11921 register. However, if this is
11922
11923 (set (reg) (zero_extend (reg)))
11924
11925 we must cost the explicit register move. */
11926 if (mode == DImode
11927 && GET_MODE (op0) == SImode
11928 && outer == SET)
11929 {
e548c9df 11930 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 11931
dde23f43
KM
11932 /* If OP_COST is non-zero, then the cost of the zero extend
11933 is effectively the cost of the inner operation. Otherwise
11934 we have a MOV instruction and we take the cost from the MOV
11935 itself. This is true independently of whether we are
11936 optimizing for space or time. */
11937 if (op_cost)
b1685e62
JG
11938 *cost = op_cost;
11939
11940 return true;
11941 }
e548c9df 11942 else if (MEM_P (op0))
43e9d192 11943 {
b1685e62 11944 /* All loads can zero extend to any size for free. */
e548c9df 11945 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
11946 return true;
11947 }
b1685e62 11948
283b6c85
KT
11949 op0 = aarch64_extend_bitfield_pattern_p (x);
11950 if (op0)
11951 {
11952 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
11953 if (speed)
11954 *cost += extra_cost->alu.bfx;
11955 return true;
11956 }
11957
b1685e62 11958 if (speed)
b6875aac
KV
11959 {
11960 if (VECTOR_MODE_P (mode))
11961 {
11962 /* UMOV. */
11963 *cost += extra_cost->vect.alu;
11964 }
11965 else
11966 {
63715e5e
WD
11967 /* We generate an AND instead of UXTB/UXTH. */
11968 *cost += extra_cost->alu.logical;
b6875aac
KV
11969 }
11970 }
43e9d192
IB
11971 return false;
11972
11973 case SIGN_EXTEND:
b1685e62 11974 if (MEM_P (XEXP (x, 0)))
43e9d192 11975 {
b1685e62
JG
11976 /* LDRSH. */
11977 if (speed)
11978 {
11979 rtx address = XEXP (XEXP (x, 0), 0);
11980 *cost += extra_cost->ldst.load_sign_extend;
11981
11982 *cost +=
11983 COSTS_N_INSNS (aarch64_address_cost (address, mode,
11984 0, speed));
11985 }
43e9d192
IB
11986 return true;
11987 }
b1685e62 11988
283b6c85
KT
11989 op0 = aarch64_extend_bitfield_pattern_p (x);
11990 if (op0)
11991 {
11992 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
11993 if (speed)
11994 *cost += extra_cost->alu.bfx;
11995 return true;
11996 }
11997
b1685e62 11998 if (speed)
b6875aac
KV
11999 {
12000 if (VECTOR_MODE_P (mode))
12001 *cost += extra_cost->vect.alu;
12002 else
12003 *cost += extra_cost->alu.extend;
12004 }
43e9d192
IB
12005 return false;
12006
ba0cfa17
JG
12007 case ASHIFT:
12008 op0 = XEXP (x, 0);
12009 op1 = XEXP (x, 1);
12010
12011 if (CONST_INT_P (op1))
12012 {
ba0cfa17 12013 if (speed)
b6875aac
KV
12014 {
12015 if (VECTOR_MODE_P (mode))
12016 {
12017 /* Vector shift (immediate). */
12018 *cost += extra_cost->vect.alu;
12019 }
12020 else
12021 {
12022 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
12023 aliases. */
12024 *cost += extra_cost->alu.shift;
12025 }
12026 }
ba0cfa17
JG
12027
12028 /* We can incorporate zero/sign extend for free. */
12029 if (GET_CODE (op0) == ZERO_EXTEND
12030 || GET_CODE (op0) == SIGN_EXTEND)
12031 op0 = XEXP (op0, 0);
12032
e548c9df 12033 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
12034 return true;
12035 }
12036 else
12037 {
7813b280 12038 if (VECTOR_MODE_P (mode))
b6875aac 12039 {
7813b280
KT
12040 if (speed)
12041 /* Vector shift (register). */
12042 *cost += extra_cost->vect.alu;
12043 }
12044 else
12045 {
12046 if (speed)
12047 /* LSLV. */
12048 *cost += extra_cost->alu.shift_reg;
12049
12050 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
12051 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
12052 && known_eq (INTVAL (XEXP (op1, 1)),
12053 GET_MODE_BITSIZE (mode) - 1))
b6875aac 12054 {
7813b280
KT
12055 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
12056 /* We already demanded XEXP (op1, 0) to be REG_P, so
12057 don't recurse into it. */
12058 return true;
b6875aac
KV
12059 }
12060 }
ba0cfa17
JG
12061 return false; /* All arguments need to be in registers. */
12062 }
12063
43e9d192 12064 case ROTATE:
43e9d192
IB
12065 case ROTATERT:
12066 case LSHIFTRT:
43e9d192 12067 case ASHIFTRT:
ba0cfa17
JG
12068 op0 = XEXP (x, 0);
12069 op1 = XEXP (x, 1);
43e9d192 12070
ba0cfa17
JG
12071 if (CONST_INT_P (op1))
12072 {
12073 /* ASR (immediate) and friends. */
12074 if (speed)
b6875aac
KV
12075 {
12076 if (VECTOR_MODE_P (mode))
12077 *cost += extra_cost->vect.alu;
12078 else
12079 *cost += extra_cost->alu.shift;
12080 }
43e9d192 12081
e548c9df 12082 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
12083 return true;
12084 }
12085 else
12086 {
7813b280 12087 if (VECTOR_MODE_P (mode))
b6875aac 12088 {
7813b280
KT
12089 if (speed)
12090 /* Vector shift (register). */
b6875aac 12091 *cost += extra_cost->vect.alu;
7813b280
KT
12092 }
12093 else
12094 {
12095 if (speed)
12096 /* ASR (register) and friends. */
b6875aac 12097 *cost += extra_cost->alu.shift_reg;
7813b280
KT
12098
12099 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
12100 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
12101 && known_eq (INTVAL (XEXP (op1, 1)),
12102 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
12103 {
12104 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
12105 /* We already demanded XEXP (op1, 0) to be REG_P, so
12106 don't recurse into it. */
12107 return true;
12108 }
b6875aac 12109 }
ba0cfa17
JG
12110 return false; /* All arguments need to be in registers. */
12111 }
43e9d192 12112
909734be
JG
12113 case SYMBOL_REF:
12114
1b1e81f8
JW
12115 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
12116 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
12117 {
12118 /* LDR. */
12119 if (speed)
12120 *cost += extra_cost->ldst.load;
12121 }
12122 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
12123 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
12124 {
12125 /* ADRP, followed by ADD. */
12126 *cost += COSTS_N_INSNS (1);
12127 if (speed)
12128 *cost += 2 * extra_cost->alu.arith;
12129 }
12130 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
12131 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
12132 {
12133 /* ADR. */
12134 if (speed)
12135 *cost += extra_cost->alu.arith;
12136 }
12137
12138 if (flag_pic)
12139 {
12140 /* One extra load instruction, after accessing the GOT. */
12141 *cost += COSTS_N_INSNS (1);
12142 if (speed)
12143 *cost += extra_cost->ldst.load;
12144 }
43e9d192
IB
12145 return true;
12146
909734be 12147 case HIGH:
43e9d192 12148 case LO_SUM:
909734be
JG
12149 /* ADRP/ADD (immediate). */
12150 if (speed)
12151 *cost += extra_cost->alu.arith;
43e9d192
IB
12152 return true;
12153
12154 case ZERO_EXTRACT:
12155 case SIGN_EXTRACT:
7cc2145f
JG
12156 /* UBFX/SBFX. */
12157 if (speed)
b6875aac
KV
12158 {
12159 if (VECTOR_MODE_P (mode))
12160 *cost += extra_cost->vect.alu;
12161 else
12162 *cost += extra_cost->alu.bfx;
12163 }
7cc2145f
JG
12164
12165 /* We can trust that the immediates used will be correct (there
12166 are no by-register forms), so we need only cost op0. */
e548c9df 12167 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
12168 return true;
12169
12170 case MULT:
4745e701
JG
12171 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
12172 /* aarch64_rtx_mult_cost always handles recursion to its
12173 operands. */
12174 return true;
43e9d192
IB
12175
12176 case MOD:
4f58fe36
KT
12177 /* We can expand signed mod by power of 2 using a NEGS, two parallel
12178 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
12179 an unconditional negate. This case should only ever be reached through
12180 the set_smod_pow2_cheap check in expmed.c. */
12181 if (CONST_INT_P (XEXP (x, 1))
12182 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
12183 && (mode == SImode || mode == DImode))
12184 {
12185 /* We expand to 4 instructions. Reset the baseline. */
12186 *cost = COSTS_N_INSNS (4);
12187
12188 if (speed)
12189 *cost += 2 * extra_cost->alu.logical
12190 + 2 * extra_cost->alu.arith;
12191
12192 return true;
12193 }
12194
12195 /* Fall-through. */
43e9d192 12196 case UMOD:
43e9d192
IB
12197 if (speed)
12198 {
cb9ac430 12199 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
12200 if (VECTOR_MODE_P (mode))
12201 *cost += extra_cost->vect.alu;
e548c9df
AM
12202 else if (GET_MODE_CLASS (mode) == MODE_INT)
12203 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
12204 + extra_cost->mult[mode == DImode].idiv
12205 + (code == MOD ? 1 : 0));
43e9d192
IB
12206 }
12207 return false; /* All arguments need to be in registers. */
12208
12209 case DIV:
12210 case UDIV:
4105fe38 12211 case SQRT:
43e9d192
IB
12212 if (speed)
12213 {
b6875aac
KV
12214 if (VECTOR_MODE_P (mode))
12215 *cost += extra_cost->vect.alu;
12216 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
12217 /* There is no integer SQRT, so only DIV and UDIV can get
12218 here. */
cb9ac430
TC
12219 *cost += (extra_cost->mult[mode == DImode].idiv
12220 /* Slighly prefer UDIV over SDIV. */
12221 + (code == DIV ? 1 : 0));
4105fe38
JG
12222 else
12223 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
12224 }
12225 return false; /* All arguments need to be in registers. */
12226
a8eecd00 12227 case IF_THEN_ELSE:
2d5ffe46
AP
12228 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
12229 XEXP (x, 2), cost, speed);
a8eecd00
JG
12230
12231 case EQ:
12232 case NE:
12233 case GT:
12234 case GTU:
12235 case LT:
12236 case LTU:
12237 case GE:
12238 case GEU:
12239 case LE:
12240 case LEU:
12241
12242 return false; /* All arguments must be in registers. */
12243
b292109f
JG
12244 case FMA:
12245 op0 = XEXP (x, 0);
12246 op1 = XEXP (x, 1);
12247 op2 = XEXP (x, 2);
12248
12249 if (speed)
b6875aac
KV
12250 {
12251 if (VECTOR_MODE_P (mode))
12252 *cost += extra_cost->vect.alu;
12253 else
12254 *cost += extra_cost->fp[mode == DFmode].fma;
12255 }
b292109f
JG
12256
12257 /* FMSUB, FNMADD, and FNMSUB are free. */
12258 if (GET_CODE (op0) == NEG)
12259 op0 = XEXP (op0, 0);
12260
12261 if (GET_CODE (op2) == NEG)
12262 op2 = XEXP (op2, 0);
12263
12264 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
12265 and the by-element operand as operand 0. */
12266 if (GET_CODE (op1) == NEG)
12267 op1 = XEXP (op1, 0);
12268
12269 /* Catch vector-by-element operations. The by-element operand can
12270 either be (vec_duplicate (vec_select (x))) or just
12271 (vec_select (x)), depending on whether we are multiplying by
12272 a vector or a scalar.
12273
12274 Canonicalization is not very good in these cases, FMA4 will put the
12275 by-element operand as operand 0, FNMA4 will have it as operand 1. */
12276 if (GET_CODE (op0) == VEC_DUPLICATE)
12277 op0 = XEXP (op0, 0);
12278 else if (GET_CODE (op1) == VEC_DUPLICATE)
12279 op1 = XEXP (op1, 0);
12280
12281 if (GET_CODE (op0) == VEC_SELECT)
12282 op0 = XEXP (op0, 0);
12283 else if (GET_CODE (op1) == VEC_SELECT)
12284 op1 = XEXP (op1, 0);
12285
12286 /* If the remaining parameters are not registers,
12287 get the cost to put them into registers. */
e548c9df
AM
12288 *cost += rtx_cost (op0, mode, FMA, 0, speed);
12289 *cost += rtx_cost (op1, mode, FMA, 1, speed);
12290 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
12291 return true;
12292
5e2a765b
KT
12293 case FLOAT:
12294 case UNSIGNED_FLOAT:
12295 if (speed)
12296 *cost += extra_cost->fp[mode == DFmode].fromint;
12297 return false;
12298
b292109f
JG
12299 case FLOAT_EXTEND:
12300 if (speed)
b6875aac
KV
12301 {
12302 if (VECTOR_MODE_P (mode))
12303 {
12304 /*Vector truncate. */
12305 *cost += extra_cost->vect.alu;
12306 }
12307 else
12308 *cost += extra_cost->fp[mode == DFmode].widen;
12309 }
b292109f
JG
12310 return false;
12311
12312 case FLOAT_TRUNCATE:
12313 if (speed)
b6875aac
KV
12314 {
12315 if (VECTOR_MODE_P (mode))
12316 {
12317 /*Vector conversion. */
12318 *cost += extra_cost->vect.alu;
12319 }
12320 else
12321 *cost += extra_cost->fp[mode == DFmode].narrow;
12322 }
b292109f
JG
12323 return false;
12324
61263118
KT
12325 case FIX:
12326 case UNSIGNED_FIX:
12327 x = XEXP (x, 0);
12328 /* Strip the rounding part. They will all be implemented
12329 by the fcvt* family of instructions anyway. */
12330 if (GET_CODE (x) == UNSPEC)
12331 {
12332 unsigned int uns_code = XINT (x, 1);
12333
12334 if (uns_code == UNSPEC_FRINTA
12335 || uns_code == UNSPEC_FRINTM
12336 || uns_code == UNSPEC_FRINTN
12337 || uns_code == UNSPEC_FRINTP
12338 || uns_code == UNSPEC_FRINTZ)
12339 x = XVECEXP (x, 0, 0);
12340 }
12341
12342 if (speed)
b6875aac
KV
12343 {
12344 if (VECTOR_MODE_P (mode))
12345 *cost += extra_cost->vect.alu;
12346 else
12347 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
12348 }
39252973
KT
12349
12350 /* We can combine fmul by a power of 2 followed by a fcvt into a single
12351 fixed-point fcvt. */
12352 if (GET_CODE (x) == MULT
12353 && ((VECTOR_MODE_P (mode)
12354 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
12355 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
12356 {
12357 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
12358 0, speed);
12359 return true;
12360 }
12361
e548c9df 12362 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
12363 return true;
12364
b292109f 12365 case ABS:
b6875aac
KV
12366 if (VECTOR_MODE_P (mode))
12367 {
12368 /* ABS (vector). */
12369 if (speed)
12370 *cost += extra_cost->vect.alu;
12371 }
12372 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 12373 {
19261b99
KT
12374 op0 = XEXP (x, 0);
12375
12376 /* FABD, which is analogous to FADD. */
12377 if (GET_CODE (op0) == MINUS)
12378 {
e548c9df
AM
12379 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
12380 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
12381 if (speed)
12382 *cost += extra_cost->fp[mode == DFmode].addsub;
12383
12384 return true;
12385 }
12386 /* Simple FABS is analogous to FNEG. */
b292109f
JG
12387 if (speed)
12388 *cost += extra_cost->fp[mode == DFmode].neg;
12389 }
12390 else
12391 {
12392 /* Integer ABS will either be split to
12393 two arithmetic instructions, or will be an ABS
12394 (scalar), which we don't model. */
12395 *cost = COSTS_N_INSNS (2);
12396 if (speed)
12397 *cost += 2 * extra_cost->alu.arith;
12398 }
12399 return false;
12400
12401 case SMAX:
12402 case SMIN:
12403 if (speed)
12404 {
b6875aac
KV
12405 if (VECTOR_MODE_P (mode))
12406 *cost += extra_cost->vect.alu;
12407 else
12408 {
12409 /* FMAXNM/FMINNM/FMAX/FMIN.
12410 TODO: This may not be accurate for all implementations, but
12411 we do not model this in the cost tables. */
12412 *cost += extra_cost->fp[mode == DFmode].addsub;
12413 }
b292109f
JG
12414 }
12415 return false;
12416
61263118
KT
12417 case UNSPEC:
12418 /* The floating point round to integer frint* instructions. */
12419 if (aarch64_frint_unspec_p (XINT (x, 1)))
12420 {
12421 if (speed)
12422 *cost += extra_cost->fp[mode == DFmode].roundint;
12423
12424 return false;
12425 }
781aeb73
KT
12426
12427 if (XINT (x, 1) == UNSPEC_RBIT)
12428 {
12429 if (speed)
12430 *cost += extra_cost->alu.rev;
12431
12432 return false;
12433 }
61263118
KT
12434 break;
12435
fb620c4a
JG
12436 case TRUNCATE:
12437
12438 /* Decompose <su>muldi3_highpart. */
12439 if (/* (truncate:DI */
12440 mode == DImode
12441 /* (lshiftrt:TI */
12442 && GET_MODE (XEXP (x, 0)) == TImode
12443 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
12444 /* (mult:TI */
12445 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12446 /* (ANY_EXTEND:TI (reg:DI))
12447 (ANY_EXTEND:TI (reg:DI))) */
12448 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
12449 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
12450 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
12451 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
12452 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
12453 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
12454 /* (const_int 64) */
12455 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12456 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
12457 {
12458 /* UMULH/SMULH. */
12459 if (speed)
12460 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
12461 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
12462 mode, MULT, 0, speed);
12463 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
12464 mode, MULT, 1, speed);
fb620c4a
JG
12465 return true;
12466 }
12467
12468 /* Fall through. */
43e9d192 12469 default:
61263118 12470 break;
43e9d192 12471 }
61263118 12472
c10e3d7f
AP
12473 if (dump_file
12474 && flag_aarch64_verbose_cost)
61263118
KT
12475 fprintf (dump_file,
12476 "\nFailed to cost RTX. Assuming default cost.\n");
12477
12478 return true;
43e9d192
IB
12479}
12480
0ee859b5
JG
12481/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
12482 calculated for X. This cost is stored in *COST. Returns true
12483 if the total cost of X was calculated. */
12484static bool
e548c9df 12485aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
12486 int param, int *cost, bool speed)
12487{
e548c9df 12488 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 12489
c10e3d7f
AP
12490 if (dump_file
12491 && flag_aarch64_verbose_cost)
0ee859b5
JG
12492 {
12493 print_rtl_single (dump_file, x);
12494 fprintf (dump_file, "\n%s cost: %d (%s)\n",
12495 speed ? "Hot" : "Cold",
12496 *cost, result ? "final" : "partial");
12497 }
12498
12499 return result;
12500}
12501
43e9d192 12502static int
ef4bddc2 12503aarch64_register_move_cost (machine_mode mode,
8a3a7e67 12504 reg_class_t from_i, reg_class_t to_i)
43e9d192 12505{
8a3a7e67
RH
12506 enum reg_class from = (enum reg_class) from_i;
12507 enum reg_class to = (enum reg_class) to_i;
43e9d192 12508 const struct cpu_regmove_cost *regmove_cost
b175b679 12509 = aarch64_tune_params.regmove_cost;
43e9d192 12510
3be07662 12511 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 12512 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
12513 to = GENERAL_REGS;
12514
d677263e 12515 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
12516 from = GENERAL_REGS;
12517
183bfdaf
RS
12518 /* Make RDFFR very expensive. In particular, if we know that the FFR
12519 contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR
12520 as a way of obtaining a PTRUE. */
12521 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
12522 && hard_reg_set_subset_p (reg_class_contents[from_i],
12523 reg_class_contents[FFR_REGS]))
12524 return 80;
12525
6ee70f81
AP
12526 /* Moving between GPR and stack cost is the same as GP2GP. */
12527 if ((from == GENERAL_REGS && to == STACK_REG)
12528 || (to == GENERAL_REGS && from == STACK_REG))
12529 return regmove_cost->GP2GP;
12530
12531 /* To/From the stack register, we move via the gprs. */
12532 if (to == STACK_REG || from == STACK_REG)
12533 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
12534 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
12535
6a70badb 12536 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
12537 {
12538 /* 128-bit operations on general registers require 2 instructions. */
12539 if (from == GENERAL_REGS && to == GENERAL_REGS)
12540 return regmove_cost->GP2GP * 2;
12541 else if (from == GENERAL_REGS)
12542 return regmove_cost->GP2FP * 2;
12543 else if (to == GENERAL_REGS)
12544 return regmove_cost->FP2GP * 2;
12545
12546 /* When AdvSIMD instructions are disabled it is not possible to move
12547 a 128-bit value directly between Q registers. This is handled in
12548 secondary reload. A general register is used as a scratch to move
12549 the upper DI value and the lower DI value is moved directly,
12550 hence the cost is the sum of three moves. */
12551 if (! TARGET_SIMD)
12552 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
12553
12554 return regmove_cost->FP2FP;
12555 }
12556
43e9d192
IB
12557 if (from == GENERAL_REGS && to == GENERAL_REGS)
12558 return regmove_cost->GP2GP;
12559 else if (from == GENERAL_REGS)
12560 return regmove_cost->GP2FP;
12561 else if (to == GENERAL_REGS)
12562 return regmove_cost->FP2GP;
12563
43e9d192
IB
12564 return regmove_cost->FP2FP;
12565}
12566
12567static int
ef4bddc2 12568aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
12569 reg_class_t rclass ATTRIBUTE_UNUSED,
12570 bool in ATTRIBUTE_UNUSED)
12571{
b175b679 12572 return aarch64_tune_params.memmov_cost;
43e9d192
IB
12573}
12574
6d4d616a
RS
12575/* Implement TARGET_INIT_BUILTINS. */
12576static void
12577aarch64_init_builtins ()
12578{
12579 aarch64_general_init_builtins ();
624d0f07 12580 aarch64_sve::init_builtins ();
6d4d616a
RS
12581}
12582
12583/* Implement TARGET_FOLD_BUILTIN. */
12584static tree
12585aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool)
12586{
12587 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
12588 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
12589 tree type = TREE_TYPE (TREE_TYPE (fndecl));
12590 switch (code & AARCH64_BUILTIN_CLASS)
12591 {
12592 case AARCH64_BUILTIN_GENERAL:
12593 return aarch64_general_fold_builtin (subcode, type, nargs, args);
624d0f07
RS
12594
12595 case AARCH64_BUILTIN_SVE:
12596 return NULL_TREE;
6d4d616a
RS
12597 }
12598 gcc_unreachable ();
12599}
12600
12601/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
12602static bool
12603aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
12604{
12605 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
12606 tree fndecl = gimple_call_fndecl (stmt);
12607 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
12608 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
12609 gimple *new_stmt = NULL;
12610 switch (code & AARCH64_BUILTIN_CLASS)
12611 {
12612 case AARCH64_BUILTIN_GENERAL:
12613 new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt);
12614 break;
624d0f07
RS
12615
12616 case AARCH64_BUILTIN_SVE:
12617 new_stmt = aarch64_sve::gimple_fold_builtin (subcode, gsi, stmt);
12618 break;
6d4d616a
RS
12619 }
12620
12621 if (!new_stmt)
12622 return false;
12623
12624 gsi_replace (gsi, new_stmt, true);
12625 return true;
12626}
12627
12628/* Implement TARGET_EXPAND_BUILTIN. */
12629static rtx
c5dc215d 12630aarch64_expand_builtin (tree exp, rtx target, rtx, machine_mode, int ignore)
6d4d616a
RS
12631{
12632 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12633 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
12634 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
12635 switch (code & AARCH64_BUILTIN_CLASS)
12636 {
12637 case AARCH64_BUILTIN_GENERAL:
c5dc215d 12638 return aarch64_general_expand_builtin (subcode, exp, target, ignore);
624d0f07
RS
12639
12640 case AARCH64_BUILTIN_SVE:
12641 return aarch64_sve::expand_builtin (subcode, exp, target);
6d4d616a
RS
12642 }
12643 gcc_unreachable ();
12644}
12645
12646/* Implement TARGET_BUILTIN_DECL. */
12647static tree
12648aarch64_builtin_decl (unsigned int code, bool initialize_p)
12649{
12650 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
12651 switch (code & AARCH64_BUILTIN_CLASS)
12652 {
12653 case AARCH64_BUILTIN_GENERAL:
12654 return aarch64_general_builtin_decl (subcode, initialize_p);
624d0f07
RS
12655
12656 case AARCH64_BUILTIN_SVE:
12657 return aarch64_sve::builtin_decl (subcode, initialize_p);
6d4d616a
RS
12658 }
12659 gcc_unreachable ();
12660}
12661
0c30e0f3
EM
12662/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
12663 to optimize 1.0/sqrt. */
ee62a5a6
RS
12664
12665static bool
9acc9cbe 12666use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
12667{
12668 return (!flag_trapping_math
12669 && flag_unsafe_math_optimizations
9acc9cbe
EM
12670 && ((aarch64_tune_params.approx_modes->recip_sqrt
12671 & AARCH64_APPROX_MODE (mode))
1a33079e 12672 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
12673}
12674
0c30e0f3
EM
12675/* Function to decide when to use the approximate reciprocal square root
12676 builtin. */
a6fc00da
BH
12677
12678static tree
ee62a5a6 12679aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 12680{
9acc9cbe
EM
12681 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
12682
12683 if (!use_rsqrt_p (mode))
a6fc00da 12684 return NULL_TREE;
6d4d616a
RS
12685 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
12686 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
12687 switch (code & AARCH64_BUILTIN_CLASS)
12688 {
12689 case AARCH64_BUILTIN_GENERAL:
12690 return aarch64_general_builtin_rsqrt (subcode);
624d0f07
RS
12691
12692 case AARCH64_BUILTIN_SVE:
12693 return NULL_TREE;
6d4d616a
RS
12694 }
12695 gcc_unreachable ();
a6fc00da
BH
12696}
12697
98daafa0
EM
12698/* Emit instruction sequence to compute either the approximate square root
12699 or its approximate reciprocal, depending on the flag RECP, and return
12700 whether the sequence was emitted or not. */
a6fc00da 12701
98daafa0
EM
12702bool
12703aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 12704{
98daafa0 12705 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
12706
12707 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
12708 {
12709 gcc_assert (!recp);
12710 return false;
12711 }
12712
2e19adc8
RE
12713 if (!recp)
12714 {
12715 if (!(flag_mlow_precision_sqrt
12716 || (aarch64_tune_params.approx_modes->sqrt
12717 & AARCH64_APPROX_MODE (mode))))
12718 return false;
12719
12720 if (flag_finite_math_only
12721 || flag_trapping_math
12722 || !flag_unsafe_math_optimizations
12723 || optimize_function_for_size_p (cfun))
12724 return false;
12725 }
12726 else
12727 /* Caller assumes we cannot fail. */
12728 gcc_assert (use_rsqrt_p (mode));
daef0a8c 12729
d7814449 12730 machine_mode mmsk = (VECTOR_MODE_P (mode)
d083ee47 12731 ? related_int_vector_mode (mode).require ()
d7814449 12732 : int_mode_for_mode (mode).require ());
98daafa0
EM
12733 rtx xmsk = gen_reg_rtx (mmsk);
12734 if (!recp)
2e19adc8
RE
12735 /* When calculating the approximate square root, compare the
12736 argument with 0.0 and create a mask. */
12737 emit_insn (gen_rtx_SET (xmsk,
12738 gen_rtx_NEG (mmsk,
12739 gen_rtx_EQ (mmsk, src,
12740 CONST0_RTX (mode)))));
a6fc00da 12741
98daafa0
EM
12742 /* Estimate the approximate reciprocal square root. */
12743 rtx xdst = gen_reg_rtx (mode);
0016d8d9 12744 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 12745
98daafa0
EM
12746 /* Iterate over the series twice for SF and thrice for DF. */
12747 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 12748
98daafa0
EM
12749 /* Optionally iterate over the series once less for faster performance
12750 while sacrificing the accuracy. */
12751 if ((recp && flag_mrecip_low_precision_sqrt)
12752 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
12753 iterations--;
12754
98daafa0
EM
12755 /* Iterate over the series to calculate the approximate reciprocal square
12756 root. */
12757 rtx x1 = gen_reg_rtx (mode);
12758 while (iterations--)
a6fc00da 12759 {
a6fc00da 12760 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
12761 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
12762
0016d8d9 12763 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 12764
98daafa0
EM
12765 if (iterations > 0)
12766 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
12767 }
12768
12769 if (!recp)
12770 {
12771 /* Qualify the approximate reciprocal square root when the argument is
12772 0.0 by squashing the intermediary result to 0.0. */
12773 rtx xtmp = gen_reg_rtx (mmsk);
12774 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
12775 gen_rtx_SUBREG (mmsk, xdst, 0)));
12776 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 12777
98daafa0
EM
12778 /* Calculate the approximate square root. */
12779 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
12780 }
12781
98daafa0
EM
12782 /* Finalize the approximation. */
12783 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
12784
12785 return true;
a6fc00da
BH
12786}
12787
79a2bc2d
EM
12788/* Emit the instruction sequence to compute the approximation for the division
12789 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
12790
12791bool
12792aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
12793{
12794 machine_mode mode = GET_MODE (quo);
33d72b63
JW
12795
12796 if (GET_MODE_INNER (mode) == HFmode)
12797 return false;
12798
79a2bc2d
EM
12799 bool use_approx_division_p = (flag_mlow_precision_div
12800 || (aarch64_tune_params.approx_modes->division
12801 & AARCH64_APPROX_MODE (mode)));
12802
12803 if (!flag_finite_math_only
12804 || flag_trapping_math
12805 || !flag_unsafe_math_optimizations
12806 || optimize_function_for_size_p (cfun)
12807 || !use_approx_division_p)
12808 return false;
12809
1be49a38
RR
12810 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
12811 return false;
12812
79a2bc2d
EM
12813 /* Estimate the approximate reciprocal. */
12814 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 12815 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
12816
12817 /* Iterate over the series twice for SF and thrice for DF. */
12818 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
12819
12820 /* Optionally iterate over the series once less for faster performance,
12821 while sacrificing the accuracy. */
12822 if (flag_mlow_precision_div)
12823 iterations--;
12824
12825 /* Iterate over the series to calculate the approximate reciprocal. */
12826 rtx xtmp = gen_reg_rtx (mode);
12827 while (iterations--)
12828 {
0016d8d9 12829 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
12830
12831 if (iterations > 0)
12832 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
12833 }
12834
12835 if (num != CONST1_RTX (mode))
12836 {
12837 /* As the approximate reciprocal of DEN is already calculated, only
12838 calculate the approximate division when NUM is not 1.0. */
12839 rtx xnum = force_reg (mode, num);
12840 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
12841 }
12842
12843 /* Finalize the approximation. */
12844 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
12845 return true;
12846}
12847
d126a4ae
AP
12848/* Return the number of instructions that can be issued per cycle. */
12849static int
12850aarch64_sched_issue_rate (void)
12851{
b175b679 12852 return aarch64_tune_params.issue_rate;
d126a4ae
AP
12853}
12854
d0bc0cb6
RS
12855/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
12856static int
12857aarch64_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
12858{
12859 if (DEBUG_INSN_P (insn))
12860 return more;
12861
12862 rtx_code code = GET_CODE (PATTERN (insn));
12863 if (code == USE || code == CLOBBER)
12864 return more;
12865
12866 if (get_attr_type (insn) == TYPE_NO_INSN)
12867 return more;
12868
12869 return more - 1;
12870}
12871
d03f7e44
MK
12872static int
12873aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
12874{
12875 int issue_rate = aarch64_sched_issue_rate ();
12876
12877 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
12878}
12879
2d6bc7fa
KT
12880
12881/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
12882 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
12883 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
12884
12885static int
12886aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
12887 int ready_index)
12888{
12889 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
12890}
12891
12892
8990e73a
TB
12893/* Vectorizer cost model target hooks. */
12894
12895/* Implement targetm.vectorize.builtin_vectorization_cost. */
12896static int
12897aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12898 tree vectype,
12899 int misalign ATTRIBUTE_UNUSED)
12900{
12901 unsigned elements;
cd8ae5ed
AP
12902 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
12903 bool fp = false;
12904
12905 if (vectype != NULL)
12906 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
12907
12908 switch (type_of_cost)
12909 {
12910 case scalar_stmt:
cd8ae5ed 12911 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
12912
12913 case scalar_load:
cd8ae5ed 12914 return costs->scalar_load_cost;
8990e73a
TB
12915
12916 case scalar_store:
cd8ae5ed 12917 return costs->scalar_store_cost;
8990e73a
TB
12918
12919 case vector_stmt:
cd8ae5ed 12920 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
12921
12922 case vector_load:
cd8ae5ed 12923 return costs->vec_align_load_cost;
8990e73a
TB
12924
12925 case vector_store:
cd8ae5ed 12926 return costs->vec_store_cost;
8990e73a
TB
12927
12928 case vec_to_scalar:
cd8ae5ed 12929 return costs->vec_to_scalar_cost;
8990e73a
TB
12930
12931 case scalar_to_vec:
cd8ae5ed 12932 return costs->scalar_to_vec_cost;
8990e73a
TB
12933
12934 case unaligned_load:
cc9fe6bb 12935 case vector_gather_load:
cd8ae5ed 12936 return costs->vec_unalign_load_cost;
8990e73a
TB
12937
12938 case unaligned_store:
cc9fe6bb 12939 case vector_scatter_store:
cd8ae5ed 12940 return costs->vec_unalign_store_cost;
8990e73a
TB
12941
12942 case cond_branch_taken:
cd8ae5ed 12943 return costs->cond_taken_branch_cost;
8990e73a
TB
12944
12945 case cond_branch_not_taken:
cd8ae5ed 12946 return costs->cond_not_taken_branch_cost;
8990e73a
TB
12947
12948 case vec_perm:
cd8ae5ed 12949 return costs->vec_permute_cost;
c428f91c 12950
8990e73a 12951 case vec_promote_demote:
cd8ae5ed 12952 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
12953
12954 case vec_construct:
6a70badb 12955 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
12956 return elements / 2 + 1;
12957
12958 default:
12959 gcc_unreachable ();
12960 }
12961}
12962
217ccab8
RS
12963/* Return true if STMT_INFO extends the result of a load. */
12964static bool
12965aarch64_extending_load_p (stmt_vec_info stmt_info)
12966{
12967 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
12968 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
12969 return false;
12970
12971 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
12972 tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
12973 tree rhs_type = TREE_TYPE (rhs);
12974 if (!INTEGRAL_TYPE_P (lhs_type)
12975 || !INTEGRAL_TYPE_P (rhs_type)
12976 || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
12977 return false;
12978
12979 stmt_vec_info def_stmt_info = stmt_info->vinfo->lookup_def (rhs);
12980 return (def_stmt_info
12981 && STMT_VINFO_DATA_REF (def_stmt_info)
12982 && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
12983}
12984
2d56600c
RS
12985/* Return true if STMT_INFO is an integer truncation. */
12986static bool
12987aarch64_integer_truncation_p (stmt_vec_info stmt_info)
12988{
12989 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
12990 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
12991 return false;
12992
12993 tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
12994 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
12995 return (INTEGRAL_TYPE_P (lhs_type)
12996 && INTEGRAL_TYPE_P (rhs_type)
12997 && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
12998}
12999
217ccab8
RS
13000/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
13001 for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary
13002 for SVE targets. */
13003static unsigned int
13004aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
13005 unsigned int stmt_cost)
13006{
13007 /* Unlike vec_promote_demote, vector_stmt conversions do not change the
13008 vector register size or number of units. Integer promotions of this
13009 type therefore map to SXT[BHW] or UXT[BHW].
13010
13011 Most loads have extending forms that can do the sign or zero extension
13012 on the fly. Optimistically assume that a load followed by an extension
13013 will fold to this form during combine, and that the extension therefore
13014 comes for free. */
13015 if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
13016 stmt_cost = 0;
13017
2d56600c
RS
13018 /* For similar reasons, vector_stmt integer truncations are a no-op,
13019 because we can just ignore the unused upper bits of the source. */
13020 if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
13021 stmt_cost = 0;
13022
217ccab8
RS
13023 return stmt_cost;
13024}
13025
8990e73a
TB
13026/* Implement targetm.vectorize.add_stmt_cost. */
13027static unsigned
13028aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
13029 struct _stmt_vec_info *stmt_info, int misalign,
13030 enum vect_cost_model_location where)
13031{
13032 unsigned *cost = (unsigned *) data;
13033 unsigned retval = 0;
13034
13035 if (flag_vect_cost_model)
13036 {
13037 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
13038 int stmt_cost =
13039 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
13040
217ccab8
RS
13041 if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
13042 stmt_cost = aarch64_sve_adjust_stmt_cost (kind, stmt_info, stmt_cost);
13043
8990e73a
TB
13044 /* Statements in an inner loop relative to the loop being
13045 vectorized are weighted more heavily. The value here is
058e4c71 13046 arbitrary and could potentially be improved with analysis. */
8990e73a 13047 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 13048 count *= 50; /* FIXME */
8990e73a
TB
13049
13050 retval = (unsigned) (count * stmt_cost);
13051 cost[where] += retval;
13052 }
13053
13054 return retval;
13055}
13056
0cfff2a1 13057static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 13058
0cfff2a1
KT
13059/* Parse the TO_PARSE string and put the architecture struct that it
13060 selects into RES and the architectural features into ISA_FLAGS.
13061 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
13062 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
13063 When the TO_PARSE string contains an invalid extension,
13064 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 13065
0cfff2a1
KT
13066static enum aarch64_parse_opt_result
13067aarch64_parse_arch (const char *to_parse, const struct processor **res,
28108a53 13068 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 13069{
ff150bc4 13070 const char *ext;
43e9d192 13071 const struct processor *arch;
43e9d192
IB
13072 size_t len;
13073
ff150bc4 13074 ext = strchr (to_parse, '+');
43e9d192
IB
13075
13076 if (ext != NULL)
ff150bc4 13077 len = ext - to_parse;
43e9d192 13078 else
ff150bc4 13079 len = strlen (to_parse);
43e9d192
IB
13080
13081 if (len == 0)
0cfff2a1
KT
13082 return AARCH64_PARSE_MISSING_ARG;
13083
43e9d192 13084
0cfff2a1 13085 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
13086 for (arch = all_architectures; arch->name != NULL; arch++)
13087 {
ff150bc4
ML
13088 if (strlen (arch->name) == len
13089 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 13090 {
28108a53 13091 uint64_t isa_temp = arch->flags;
43e9d192
IB
13092
13093 if (ext != NULL)
13094 {
0cfff2a1
KT
13095 /* TO_PARSE string contains at least one extension. */
13096 enum aarch64_parse_opt_result ext_res
c7887347 13097 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 13098
0cfff2a1
KT
13099 if (ext_res != AARCH64_PARSE_OK)
13100 return ext_res;
ffee7aa9 13101 }
0cfff2a1
KT
13102 /* Extension parsing was successful. Confirm the result
13103 arch and ISA flags. */
13104 *res = arch;
13105 *isa_flags = isa_temp;
13106 return AARCH64_PARSE_OK;
43e9d192
IB
13107 }
13108 }
13109
13110 /* ARCH name not found in list. */
0cfff2a1 13111 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
13112}
13113
0cfff2a1
KT
13114/* Parse the TO_PARSE string and put the result tuning in RES and the
13115 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
13116 describing the parse result. If there is an error parsing, RES and
c7887347
ML
13117 ISA_FLAGS are left unchanged.
13118 When the TO_PARSE string contains an invalid extension,
13119 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 13120
0cfff2a1
KT
13121static enum aarch64_parse_opt_result
13122aarch64_parse_cpu (const char *to_parse, const struct processor **res,
28108a53 13123 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 13124{
ff150bc4 13125 const char *ext;
43e9d192 13126 const struct processor *cpu;
43e9d192
IB
13127 size_t len;
13128
ff150bc4 13129 ext = strchr (to_parse, '+');
43e9d192
IB
13130
13131 if (ext != NULL)
ff150bc4 13132 len = ext - to_parse;
43e9d192 13133 else
ff150bc4 13134 len = strlen (to_parse);
43e9d192
IB
13135
13136 if (len == 0)
0cfff2a1
KT
13137 return AARCH64_PARSE_MISSING_ARG;
13138
43e9d192
IB
13139
13140 /* Loop through the list of supported CPUs to find a match. */
13141 for (cpu = all_cores; cpu->name != NULL; cpu++)
13142 {
ff150bc4 13143 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 13144 {
28108a53 13145 uint64_t isa_temp = cpu->flags;
0cfff2a1 13146
43e9d192
IB
13147
13148 if (ext != NULL)
13149 {
0cfff2a1
KT
13150 /* TO_PARSE string contains at least one extension. */
13151 enum aarch64_parse_opt_result ext_res
c7887347 13152 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 13153
0cfff2a1
KT
13154 if (ext_res != AARCH64_PARSE_OK)
13155 return ext_res;
13156 }
13157 /* Extension parsing was successfull. Confirm the result
13158 cpu and ISA flags. */
13159 *res = cpu;
13160 *isa_flags = isa_temp;
13161 return AARCH64_PARSE_OK;
43e9d192
IB
13162 }
13163 }
13164
13165 /* CPU name not found in list. */
0cfff2a1 13166 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
13167}
13168
0cfff2a1
KT
13169/* Parse the TO_PARSE string and put the cpu it selects into RES.
13170 Return an aarch64_parse_opt_result describing the parse result.
13171 If the parsing fails the RES does not change. */
43e9d192 13172
0cfff2a1
KT
13173static enum aarch64_parse_opt_result
13174aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
13175{
13176 const struct processor *cpu;
43e9d192
IB
13177
13178 /* Loop through the list of supported CPUs to find a match. */
13179 for (cpu = all_cores; cpu->name != NULL; cpu++)
13180 {
ff150bc4 13181 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 13182 {
0cfff2a1
KT
13183 *res = cpu;
13184 return AARCH64_PARSE_OK;
43e9d192
IB
13185 }
13186 }
13187
13188 /* CPU name not found in list. */
0cfff2a1 13189 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
13190}
13191
8dec06f2
JG
13192/* Parse TOKEN, which has length LENGTH to see if it is an option
13193 described in FLAG. If it is, return the index bit for that fusion type.
13194 If not, error (printing OPTION_NAME) and return zero. */
13195
13196static unsigned int
13197aarch64_parse_one_option_token (const char *token,
13198 size_t length,
13199 const struct aarch64_flag_desc *flag,
13200 const char *option_name)
13201{
13202 for (; flag->name != NULL; flag++)
13203 {
13204 if (length == strlen (flag->name)
13205 && !strncmp (flag->name, token, length))
13206 return flag->flag;
13207 }
13208
a3f9f006 13209 error ("unknown flag passed in %<-moverride=%s%> (%s)", option_name, token);
8dec06f2
JG
13210 return 0;
13211}
13212
13213/* Parse OPTION which is a comma-separated list of flags to enable.
13214 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
13215 default state we inherit from the CPU tuning structures. OPTION_NAME
13216 gives the top-level option we are parsing in the -moverride string,
13217 for use in error messages. */
13218
13219static unsigned int
13220aarch64_parse_boolean_options (const char *option,
13221 const struct aarch64_flag_desc *flags,
13222 unsigned int initial_state,
13223 const char *option_name)
13224{
13225 const char separator = '.';
13226 const char* specs = option;
13227 const char* ntoken = option;
13228 unsigned int found_flags = initial_state;
13229
13230 while ((ntoken = strchr (specs, separator)))
13231 {
13232 size_t token_length = ntoken - specs;
13233 unsigned token_ops = aarch64_parse_one_option_token (specs,
13234 token_length,
13235 flags,
13236 option_name);
13237 /* If we find "none" (or, for simplicity's sake, an error) anywhere
13238 in the token stream, reset the supported operations. So:
13239
13240 adrp+add.cmp+branch.none.adrp+add
13241
13242 would have the result of turning on only adrp+add fusion. */
13243 if (!token_ops)
13244 found_flags = 0;
13245
13246 found_flags |= token_ops;
13247 specs = ++ntoken;
13248 }
13249
13250 /* We ended with a comma, print something. */
13251 if (!(*specs))
13252 {
13253 error ("%s string ill-formed\n", option_name);
13254 return 0;
13255 }
13256
13257 /* We still have one more token to parse. */
13258 size_t token_length = strlen (specs);
13259 unsigned token_ops = aarch64_parse_one_option_token (specs,
13260 token_length,
13261 flags,
13262 option_name);
13263 if (!token_ops)
13264 found_flags = 0;
13265
13266 found_flags |= token_ops;
13267 return found_flags;
13268}
13269
13270/* Support for overriding instruction fusion. */
13271
13272static void
13273aarch64_parse_fuse_string (const char *fuse_string,
13274 struct tune_params *tune)
13275{
13276 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
13277 aarch64_fusible_pairs,
13278 tune->fusible_ops,
13279 "fuse=");
13280}
13281
13282/* Support for overriding other tuning flags. */
13283
13284static void
13285aarch64_parse_tune_string (const char *tune_string,
13286 struct tune_params *tune)
13287{
13288 tune->extra_tuning_flags
13289 = aarch64_parse_boolean_options (tune_string,
13290 aarch64_tuning_flags,
13291 tune->extra_tuning_flags,
13292 "tune=");
13293}
13294
886f092f
KT
13295/* Parse the sve_width tuning moverride string in TUNE_STRING.
13296 Accept the valid SVE vector widths allowed by
13297 aarch64_sve_vector_bits_enum and use it to override sve_width
13298 in TUNE. */
13299
13300static void
13301aarch64_parse_sve_width_string (const char *tune_string,
13302 struct tune_params *tune)
13303{
13304 int width = -1;
13305
13306 int n = sscanf (tune_string, "%d", &width);
13307 if (n == EOF)
13308 {
13309 error ("invalid format for sve_width");
13310 return;
13311 }
13312 switch (width)
13313 {
13314 case SVE_128:
13315 case SVE_256:
13316 case SVE_512:
13317 case SVE_1024:
13318 case SVE_2048:
13319 break;
13320 default:
13321 error ("invalid sve_width value: %d", width);
13322 }
13323 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
13324}
13325
8dec06f2
JG
13326/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
13327 we understand. If it is, extract the option string and handoff to
13328 the appropriate function. */
13329
13330void
13331aarch64_parse_one_override_token (const char* token,
13332 size_t length,
13333 struct tune_params *tune)
13334{
13335 const struct aarch64_tuning_override_function *fn
13336 = aarch64_tuning_override_functions;
13337
13338 const char *option_part = strchr (token, '=');
13339 if (!option_part)
13340 {
13341 error ("tuning string missing in option (%s)", token);
13342 return;
13343 }
13344
13345 /* Get the length of the option name. */
13346 length = option_part - token;
13347 /* Skip the '=' to get to the option string. */
13348 option_part++;
13349
13350 for (; fn->name != NULL; fn++)
13351 {
13352 if (!strncmp (fn->name, token, length))
13353 {
13354 fn->parse_override (option_part, tune);
13355 return;
13356 }
13357 }
13358
13359 error ("unknown tuning option (%s)",token);
13360 return;
13361}
13362
5eee3c34
JW
13363/* A checking mechanism for the implementation of the tls size. */
13364
13365static void
13366initialize_aarch64_tls_size (struct gcc_options *opts)
13367{
13368 if (aarch64_tls_size == 0)
13369 aarch64_tls_size = 24;
13370
13371 switch (opts->x_aarch64_cmodel_var)
13372 {
13373 case AARCH64_CMODEL_TINY:
13374 /* Both the default and maximum TLS size allowed under tiny is 1M which
13375 needs two instructions to address, so we clamp the size to 24. */
13376 if (aarch64_tls_size > 24)
13377 aarch64_tls_size = 24;
13378 break;
13379 case AARCH64_CMODEL_SMALL:
13380 /* The maximum TLS size allowed under small is 4G. */
13381 if (aarch64_tls_size > 32)
13382 aarch64_tls_size = 32;
13383 break;
13384 case AARCH64_CMODEL_LARGE:
13385 /* The maximum TLS size allowed under large is 16E.
13386 FIXME: 16E should be 64bit, we only support 48bit offset now. */
13387 if (aarch64_tls_size > 48)
13388 aarch64_tls_size = 48;
13389 break;
13390 default:
13391 gcc_unreachable ();
13392 }
13393
13394 return;
13395}
13396
8dec06f2
JG
13397/* Parse STRING looking for options in the format:
13398 string :: option:string
13399 option :: name=substring
13400 name :: {a-z}
13401 substring :: defined by option. */
13402
13403static void
13404aarch64_parse_override_string (const char* input_string,
13405 struct tune_params* tune)
13406{
13407 const char separator = ':';
13408 size_t string_length = strlen (input_string) + 1;
13409 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
13410 char *string = string_root;
13411 strncpy (string, input_string, string_length);
13412 string[string_length - 1] = '\0';
13413
13414 char* ntoken = string;
13415
13416 while ((ntoken = strchr (string, separator)))
13417 {
13418 size_t token_length = ntoken - string;
13419 /* Make this substring look like a string. */
13420 *ntoken = '\0';
13421 aarch64_parse_one_override_token (string, token_length, tune);
13422 string = ++ntoken;
13423 }
13424
13425 /* One last option to parse. */
13426 aarch64_parse_one_override_token (string, strlen (string), tune);
13427 free (string_root);
13428}
43e9d192 13429
43e9d192
IB
13430
13431static void
0cfff2a1 13432aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 13433{
efac62a3
ST
13434 if (accepted_branch_protection_string)
13435 {
13436 opts->x_aarch64_branch_protection_string
13437 = xstrdup (accepted_branch_protection_string);
13438 }
13439
acea40ac
WD
13440 /* PR 70044: We have to be careful about being called multiple times for the
13441 same function. This means all changes should be repeatable. */
13442
d6cb6d6a
WD
13443 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
13444 Disable the frame pointer flag so the mid-end will not use a frame
13445 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
13446 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
13447 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
13448 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 13449 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 13450 opts->x_flag_omit_frame_pointer = 2;
43e9d192 13451
1be34295 13452 /* If not optimizing for size, set the default
0cfff2a1
KT
13453 alignment to what the target wants. */
13454 if (!opts->x_optimize_size)
43e9d192 13455 {
c518c102
ML
13456 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
13457 opts->x_str_align_loops = aarch64_tune_params.loop_align;
13458 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
13459 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
13460 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
13461 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 13462 }
b4f50fd4 13463
9ee6540a
WD
13464 /* We default to no pc-relative literal loads. */
13465
13466 aarch64_pcrelative_literal_loads = false;
13467
13468 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 13469 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
13470 if (opts->x_pcrelative_literal_loads == 1)
13471 aarch64_pcrelative_literal_loads = true;
b4f50fd4 13472
9ee6540a
WD
13473 /* In the tiny memory model it makes no sense to disallow PC relative
13474 literal pool loads. */
13475 if (aarch64_cmodel == AARCH64_CMODEL_TINY
13476 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
13477 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
13478
13479 /* When enabling the lower precision Newton series for the square root, also
13480 enable it for the reciprocal square root, since the latter is an
13481 intermediary step for the former. */
13482 if (flag_mlow_precision_sqrt)
13483 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 13484}
43e9d192 13485
0cfff2a1
KT
13486/* 'Unpack' up the internal tuning structs and update the options
13487 in OPTS. The caller must have set up selected_tune and selected_arch
13488 as all the other target-specific codegen decisions are
13489 derived from them. */
13490
e4ea20c8 13491void
0cfff2a1
KT
13492aarch64_override_options_internal (struct gcc_options *opts)
13493{
13494 aarch64_tune_flags = selected_tune->flags;
13495 aarch64_tune = selected_tune->sched_core;
13496 /* Make a copy of the tuning parameters attached to the core, which
13497 we may later overwrite. */
13498 aarch64_tune_params = *(selected_tune->tune);
13499 aarch64_architecture_version = selected_arch->architecture_version;
13500
13501 if (opts->x_aarch64_override_tune_string)
13502 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
13503 &aarch64_tune_params);
13504
13505 /* This target defaults to strict volatile bitfields. */
13506 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
13507 opts->x_flag_strict_volatile_bitfields = 1;
13508
cd0b2d36
RR
13509 if (aarch64_stack_protector_guard == SSP_GLOBAL
13510 && opts->x_aarch64_stack_protector_guard_offset_str)
13511 {
41804907 13512 error ("incompatible options %<-mstack-protector-guard=global%> and "
63d42e89 13513 "%<-mstack-protector-guard-offset=%s%>",
cd0b2d36
RR
13514 aarch64_stack_protector_guard_offset_str);
13515 }
13516
13517 if (aarch64_stack_protector_guard == SSP_SYSREG
13518 && !(opts->x_aarch64_stack_protector_guard_offset_str
13519 && opts->x_aarch64_stack_protector_guard_reg_str))
13520 {
a3f9f006
ML
13521 error ("both %<-mstack-protector-guard-offset%> and "
13522 "%<-mstack-protector-guard-reg%> must be used "
13523 "with %<-mstack-protector-guard=sysreg%>");
cd0b2d36
RR
13524 }
13525
13526 if (opts->x_aarch64_stack_protector_guard_reg_str)
13527 {
13528 if (strlen (opts->x_aarch64_stack_protector_guard_reg_str) > 100)
13529 error ("specify a system register with a small string length.");
13530 }
13531
13532 if (opts->x_aarch64_stack_protector_guard_offset_str)
13533 {
13534 char *end;
13535 const char *str = aarch64_stack_protector_guard_offset_str;
13536 errno = 0;
13537 long offs = strtol (aarch64_stack_protector_guard_offset_str, &end, 0);
13538 if (!*str || *end || errno)
13539 error ("%qs is not a valid offset in %qs", str,
63d42e89 13540 "-mstack-protector-guard-offset=");
cd0b2d36
RR
13541 aarch64_stack_protector_guard_offset = offs;
13542 }
13543
0cfff2a1 13544 initialize_aarch64_code_model (opts);
5eee3c34 13545 initialize_aarch64_tls_size (opts);
63892fa2 13546
2d6bc7fa
KT
13547 int queue_depth = 0;
13548 switch (aarch64_tune_params.autoprefetcher_model)
13549 {
13550 case tune_params::AUTOPREFETCHER_OFF:
13551 queue_depth = -1;
13552 break;
13553 case tune_params::AUTOPREFETCHER_WEAK:
13554 queue_depth = 0;
13555 break;
13556 case tune_params::AUTOPREFETCHER_STRONG:
13557 queue_depth = max_insn_queue_index + 1;
13558 break;
13559 default:
13560 gcc_unreachable ();
13561 }
13562
13563 /* We don't mind passing in global_options_set here as we don't use
13564 the *options_set structs anyway. */
028d4092
ML
13565 SET_OPTION_IF_UNSET (opts, &global_options_set,
13566 param_sched_autopref_queue_depth, queue_depth);
2d6bc7fa 13567
9d2c6e2e
MK
13568 /* Set up parameters to be used in prefetching algorithm. Do not
13569 override the defaults unless we are tuning for a core we have
13570 researched values for. */
13571 if (aarch64_tune_params.prefetch->num_slots > 0)
028d4092
ML
13572 SET_OPTION_IF_UNSET (opts, &global_options_set,
13573 param_simultaneous_prefetches,
13574 aarch64_tune_params.prefetch->num_slots);
9d2c6e2e 13575 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
028d4092
ML
13576 SET_OPTION_IF_UNSET (opts, &global_options_set,
13577 param_l1_cache_size,
13578 aarch64_tune_params.prefetch->l1_cache_size);
9d2c6e2e 13579 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
028d4092
ML
13580 SET_OPTION_IF_UNSET (opts, &global_options_set,
13581 param_l1_cache_line_size,
13582 aarch64_tune_params.prefetch->l1_cache_line_size);
9d2c6e2e 13583 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
028d4092
ML
13584 SET_OPTION_IF_UNSET (opts, &global_options_set,
13585 param_l2_cache_size,
13586 aarch64_tune_params.prefetch->l2_cache_size);
d2ff35c0 13587 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
028d4092
ML
13588 SET_OPTION_IF_UNSET (opts, &global_options_set,
13589 param_prefetch_dynamic_strides, 0);
59100dfc 13590 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
028d4092
ML
13591 SET_OPTION_IF_UNSET (opts, &global_options_set,
13592 param_prefetch_minimum_stride,
13593 aarch64_tune_params.prefetch->minimum_stride);
50487d79 13594
13494fcb 13595 /* Use the alternative scheduling-pressure algorithm by default. */
028d4092
ML
13596 SET_OPTION_IF_UNSET (opts, &global_options_set,
13597 param_sched_pressure_algorithm,
13598 SCHED_PRESSURE_MODEL);
13494fcb 13599
fbe9af50 13600 /* Validate the guard size. */
028d4092 13601 int guard_size = param_stack_clash_protection_guard_size;
fbe9af50 13602
8100e93b
ML
13603 if (guard_size != 12 && guard_size != 16)
13604 error ("only values 12 (4 KB) and 16 (64 KB) are supported for guard "
13605 "size. Given value %d (%llu KB) is out of range",
13606 guard_size, (1ULL << guard_size) / 1024ULL);
13607
fbe9af50
TC
13608 /* Enforce that interval is the same size as size so the mid-end does the
13609 right thing. */
028d4092
ML
13610 SET_OPTION_IF_UNSET (opts, &global_options_set,
13611 param_stack_clash_protection_probe_interval,
13612 guard_size);
fbe9af50
TC
13613
13614 /* The maybe_set calls won't update the value if the user has explicitly set
13615 one. Which means we need to validate that probing interval and guard size
13616 are equal. */
13617 int probe_interval
028d4092 13618 = param_stack_clash_protection_probe_interval;
fbe9af50 13619 if (guard_size != probe_interval)
904f3daa
ML
13620 error ("stack clash guard size %<%d%> must be equal to probing interval "
13621 "%<%d%>", guard_size, probe_interval);
fbe9af50 13622
16b2cafd
MK
13623 /* Enable sw prefetching at specified optimization level for
13624 CPUS that have prefetch. Lower optimization level threshold by 1
13625 when profiling is enabled. */
13626 if (opts->x_flag_prefetch_loop_arrays < 0
13627 && !opts->x_optimize_size
13628 && aarch64_tune_params.prefetch->default_opt_level >= 0
13629 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
13630 opts->x_flag_prefetch_loop_arrays = 1;
13631
266c2b54
ML
13632 if (opts->x_aarch64_arch_string == NULL)
13633 opts->x_aarch64_arch_string = selected_arch->name;
13634 if (opts->x_aarch64_cpu_string == NULL)
13635 opts->x_aarch64_cpu_string = selected_cpu->name;
13636 if (opts->x_aarch64_tune_string == NULL)
13637 opts->x_aarch64_tune_string = selected_tune->name;
13638
0cfff2a1
KT
13639 aarch64_override_options_after_change_1 (opts);
13640}
43e9d192 13641
01f44038
KT
13642/* Print a hint with a suggestion for a core or architecture name that
13643 most closely resembles what the user passed in STR. ARCH is true if
13644 the user is asking for an architecture name. ARCH is false if the user
13645 is asking for a core name. */
13646
13647static void
13648aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
13649{
13650 auto_vec<const char *> candidates;
13651 const struct processor *entry = arch ? all_architectures : all_cores;
13652 for (; entry->name != NULL; entry++)
13653 candidates.safe_push (entry->name);
a08b5429
ML
13654
13655#ifdef HAVE_LOCAL_CPU_DETECT
13656 /* Add also "native" as possible value. */
13657 if (arch)
13658 candidates.safe_push ("native");
13659#endif
13660
01f44038
KT
13661 char *s;
13662 const char *hint = candidates_list_and_hint (str, s, candidates);
13663 if (hint)
13664 inform (input_location, "valid arguments are: %s;"
13665 " did you mean %qs?", s, hint);
6285e915
ML
13666 else
13667 inform (input_location, "valid arguments are: %s", s);
13668
01f44038
KT
13669 XDELETEVEC (s);
13670}
13671
13672/* Print a hint with a suggestion for a core name that most closely resembles
13673 what the user passed in STR. */
13674
13675inline static void
13676aarch64_print_hint_for_core (const char *str)
13677{
13678 aarch64_print_hint_for_core_or_arch (str, false);
13679}
13680
13681/* Print a hint with a suggestion for an architecture name that most closely
13682 resembles what the user passed in STR. */
13683
13684inline static void
13685aarch64_print_hint_for_arch (const char *str)
13686{
13687 aarch64_print_hint_for_core_or_arch (str, true);
13688}
13689
c7887347
ML
13690
13691/* Print a hint with a suggestion for an extension name
13692 that most closely resembles what the user passed in STR. */
13693
13694void
13695aarch64_print_hint_for_extensions (const std::string &str)
13696{
13697 auto_vec<const char *> candidates;
13698 aarch64_get_all_extension_candidates (&candidates);
13699 char *s;
13700 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
13701 if (hint)
13702 inform (input_location, "valid arguments are: %s;"
13703 " did you mean %qs?", s, hint);
13704 else
13705 inform (input_location, "valid arguments are: %s;", s);
13706
13707 XDELETEVEC (s);
13708}
13709
0cfff2a1
KT
13710/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
13711 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
13712 they are valid in RES and ISA_FLAGS. Return whether the option is
13713 valid. */
43e9d192 13714
361fb3ee 13715static bool
0cfff2a1 13716aarch64_validate_mcpu (const char *str, const struct processor **res,
28108a53 13717 uint64_t *isa_flags)
0cfff2a1 13718{
c7887347 13719 std::string invalid_extension;
0cfff2a1 13720 enum aarch64_parse_opt_result parse_res
c7887347 13721 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
13722
13723 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 13724 return true;
0cfff2a1
KT
13725
13726 switch (parse_res)
13727 {
13728 case AARCH64_PARSE_MISSING_ARG:
fb241da2 13729 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
13730 break;
13731 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 13732 error ("unknown value %qs for %<-mcpu%>", str);
01f44038 13733 aarch64_print_hint_for_core (str);
0cfff2a1
KT
13734 break;
13735 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
13736 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
13737 invalid_extension.c_str (), str);
13738 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
13739 break;
13740 default:
13741 gcc_unreachable ();
13742 }
361fb3ee
KT
13743
13744 return false;
0cfff2a1
KT
13745}
13746
efac62a3
ST
13747/* Parses CONST_STR for branch protection features specified in
13748 aarch64_branch_protect_types, and set any global variables required. Returns
13749 the parsing result and assigns LAST_STR to the last processed token from
13750 CONST_STR so that it can be used for error reporting. */
13751
13752static enum
13753aarch64_parse_opt_result aarch64_parse_branch_protection (const char *const_str,
13754 char** last_str)
13755{
13756 char *str_root = xstrdup (const_str);
13757 char* token_save = NULL;
13758 char *str = strtok_r (str_root, "+", &token_save);
13759 enum aarch64_parse_opt_result res = AARCH64_PARSE_OK;
13760 if (!str)
13761 res = AARCH64_PARSE_MISSING_ARG;
13762 else
13763 {
13764 char *next_str = strtok_r (NULL, "+", &token_save);
13765 /* Reset the branch protection features to their defaults. */
13766 aarch64_handle_no_branch_protection (NULL, NULL);
13767
13768 while (str && res == AARCH64_PARSE_OK)
13769 {
13770 const aarch64_branch_protect_type* type = aarch64_branch_protect_types;
13771 bool found = false;
13772 /* Search for this type. */
13773 while (type && type->name && !found && res == AARCH64_PARSE_OK)
13774 {
13775 if (strcmp (str, type->name) == 0)
13776 {
13777 found = true;
13778 res = type->handler (str, next_str);
13779 str = next_str;
13780 next_str = strtok_r (NULL, "+", &token_save);
13781 }
13782 else
13783 type++;
13784 }
13785 if (found && res == AARCH64_PARSE_OK)
13786 {
13787 bool found_subtype = true;
13788 /* Loop through each token until we find one that isn't a
13789 subtype. */
13790 while (found_subtype)
13791 {
13792 found_subtype = false;
13793 const aarch64_branch_protect_type *subtype = type->subtypes;
13794 /* Search for the subtype. */
13795 while (str && subtype && subtype->name && !found_subtype
13796 && res == AARCH64_PARSE_OK)
13797 {
13798 if (strcmp (str, subtype->name) == 0)
13799 {
13800 found_subtype = true;
13801 res = subtype->handler (str, next_str);
13802 str = next_str;
13803 next_str = strtok_r (NULL, "+", &token_save);
13804 }
13805 else
13806 subtype++;
13807 }
13808 }
13809 }
13810 else if (!found)
13811 res = AARCH64_PARSE_INVALID_ARG;
13812 }
13813 }
13814 /* Copy the last processed token into the argument to pass it back.
13815 Used by option and attribute validation to print the offending token. */
13816 if (last_str)
13817 {
13818 if (str) strcpy (*last_str, str);
13819 else *last_str = NULL;
13820 }
13821 if (res == AARCH64_PARSE_OK)
13822 {
13823 /* If needed, alloc the accepted string then copy in const_str.
13824 Used by override_option_after_change_1. */
13825 if (!accepted_branch_protection_string)
13826 accepted_branch_protection_string = (char *) xmalloc (
13827 BRANCH_PROTECT_STR_MAX
13828 + 1);
13829 strncpy (accepted_branch_protection_string, const_str,
13830 BRANCH_PROTECT_STR_MAX + 1);
13831 /* Forcibly null-terminate. */
13832 accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0';
13833 }
13834 return res;
13835}
13836
13837static bool
13838aarch64_validate_mbranch_protection (const char *const_str)
13839{
13840 char *str = (char *) xmalloc (strlen (const_str));
13841 enum aarch64_parse_opt_result res =
13842 aarch64_parse_branch_protection (const_str, &str);
13843 if (res == AARCH64_PARSE_INVALID_ARG)
a9c697b8 13844 error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str);
efac62a3 13845 else if (res == AARCH64_PARSE_MISSING_ARG)
a9c697b8 13846 error ("missing argument for %<-mbranch-protection=%>");
efac62a3
ST
13847 free (str);
13848 return res == AARCH64_PARSE_OK;
13849}
13850
0cfff2a1
KT
13851/* Validate a command-line -march option. Parse the arch and extensions
13852 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
13853 results, if they are valid, in RES and ISA_FLAGS. Return whether the
13854 option is valid. */
0cfff2a1 13855
361fb3ee 13856static bool
0cfff2a1 13857aarch64_validate_march (const char *str, const struct processor **res,
28108a53 13858 uint64_t *isa_flags)
0cfff2a1 13859{
c7887347 13860 std::string invalid_extension;
0cfff2a1 13861 enum aarch64_parse_opt_result parse_res
c7887347 13862 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
13863
13864 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 13865 return true;
0cfff2a1
KT
13866
13867 switch (parse_res)
13868 {
13869 case AARCH64_PARSE_MISSING_ARG:
fb241da2 13870 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
13871 break;
13872 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 13873 error ("unknown value %qs for %<-march%>", str);
01f44038 13874 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
13875 break;
13876 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
13877 error ("invalid feature modifier %qs in %<-march=%s%>",
13878 invalid_extension.c_str (), str);
13879 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
13880 break;
13881 default:
13882 gcc_unreachable ();
13883 }
361fb3ee
KT
13884
13885 return false;
0cfff2a1
KT
13886}
13887
13888/* Validate a command-line -mtune option. Parse the cpu
13889 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
13890 result, if it is valid, in RES. Return whether the option is
13891 valid. */
0cfff2a1 13892
361fb3ee 13893static bool
0cfff2a1
KT
13894aarch64_validate_mtune (const char *str, const struct processor **res)
13895{
13896 enum aarch64_parse_opt_result parse_res
13897 = aarch64_parse_tune (str, res);
13898
13899 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 13900 return true;
0cfff2a1
KT
13901
13902 switch (parse_res)
13903 {
13904 case AARCH64_PARSE_MISSING_ARG:
fb241da2 13905 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
13906 break;
13907 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 13908 error ("unknown value %qs for %<-mtune%>", str);
01f44038 13909 aarch64_print_hint_for_core (str);
0cfff2a1
KT
13910 break;
13911 default:
13912 gcc_unreachable ();
13913 }
361fb3ee
KT
13914 return false;
13915}
13916
13917/* Return the CPU corresponding to the enum CPU.
13918 If it doesn't specify a cpu, return the default. */
13919
13920static const struct processor *
13921aarch64_get_tune_cpu (enum aarch64_processor cpu)
13922{
13923 if (cpu != aarch64_none)
13924 return &all_cores[cpu];
13925
13926 /* The & 0x3f is to extract the bottom 6 bits that encode the
13927 default cpu as selected by the --with-cpu GCC configure option
13928 in config.gcc.
13929 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
13930 flags mechanism should be reworked to make it more sane. */
13931 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
13932}
13933
13934/* Return the architecture corresponding to the enum ARCH.
13935 If it doesn't specify a valid architecture, return the default. */
13936
13937static const struct processor *
13938aarch64_get_arch (enum aarch64_arch arch)
13939{
13940 if (arch != aarch64_no_arch)
13941 return &all_architectures[arch];
13942
13943 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
13944
13945 return &all_architectures[cpu->arch];
0cfff2a1
KT
13946}
13947
43cacb12
RS
13948/* Return the VG value associated with -msve-vector-bits= value VALUE. */
13949
13950static poly_uint16
13951aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
13952{
13953 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
13954 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
13955 deciding which .md file patterns to use and when deciding whether
13956 something is a legitimate address or constant. */
13957 if (value == SVE_SCALABLE || value == SVE_128)
13958 return poly_uint16 (2, 2);
13959 else
13960 return (int) value / 64;
13961}
13962
0cfff2a1
KT
13963/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
13964 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
13965 tuning structs. In particular it must set selected_tune and
13966 aarch64_isa_flags that define the available ISA features and tuning
13967 decisions. It must also set selected_arch as this will be used to
13968 output the .arch asm tags for each function. */
13969
13970static void
13971aarch64_override_options (void)
13972{
28108a53
MM
13973 uint64_t cpu_isa = 0;
13974 uint64_t arch_isa = 0;
0cfff2a1
KT
13975 aarch64_isa_flags = 0;
13976
361fb3ee
KT
13977 bool valid_cpu = true;
13978 bool valid_tune = true;
13979 bool valid_arch = true;
13980
0cfff2a1
KT
13981 selected_cpu = NULL;
13982 selected_arch = NULL;
13983 selected_tune = NULL;
13984
efac62a3
ST
13985 if (aarch64_branch_protection_string)
13986 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
13987
0cfff2a1
KT
13988 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
13989 If either of -march or -mtune is given, they override their
13990 respective component of -mcpu. */
13991 if (aarch64_cpu_string)
361fb3ee
KT
13992 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
13993 &cpu_isa);
0cfff2a1
KT
13994
13995 if (aarch64_arch_string)
361fb3ee
KT
13996 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
13997 &arch_isa);
0cfff2a1
KT
13998
13999 if (aarch64_tune_string)
361fb3ee 14000 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192 14001
6881e3c1
OH
14002#ifdef SUBTARGET_OVERRIDE_OPTIONS
14003 SUBTARGET_OVERRIDE_OPTIONS;
14004#endif
14005
43e9d192
IB
14006 /* If the user did not specify a processor, choose the default
14007 one for them. This will be the CPU set during configuration using
a3cd0246 14008 --with-cpu, otherwise it is "generic". */
43e9d192
IB
14009 if (!selected_cpu)
14010 {
0cfff2a1
KT
14011 if (selected_arch)
14012 {
14013 selected_cpu = &all_cores[selected_arch->ident];
14014 aarch64_isa_flags = arch_isa;
361fb3ee 14015 explicit_arch = selected_arch->arch;
0cfff2a1
KT
14016 }
14017 else
14018 {
361fb3ee
KT
14019 /* Get default configure-time CPU. */
14020 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
14021 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
14022 }
361fb3ee
KT
14023
14024 if (selected_tune)
14025 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
14026 }
14027 /* If both -mcpu and -march are specified check that they are architecturally
14028 compatible, warn if they're not and prefer the -march ISA flags. */
14029 else if (selected_arch)
14030 {
14031 if (selected_arch->arch != selected_cpu->arch)
14032 {
a3f9f006 14033 warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
0cfff2a1
KT
14034 all_architectures[selected_cpu->arch].name,
14035 selected_arch->name);
14036 }
14037 aarch64_isa_flags = arch_isa;
361fb3ee
KT
14038 explicit_arch = selected_arch->arch;
14039 explicit_tune_core = selected_tune ? selected_tune->ident
14040 : selected_cpu->ident;
0cfff2a1
KT
14041 }
14042 else
14043 {
14044 /* -mcpu but no -march. */
14045 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
14046 explicit_tune_core = selected_tune ? selected_tune->ident
14047 : selected_cpu->ident;
14048 gcc_assert (selected_cpu);
14049 selected_arch = &all_architectures[selected_cpu->arch];
14050 explicit_arch = selected_arch->arch;
43e9d192
IB
14051 }
14052
0cfff2a1
KT
14053 /* Set the arch as well as we will need it when outputing
14054 the .arch directive in assembly. */
14055 if (!selected_arch)
14056 {
14057 gcc_assert (selected_cpu);
14058 selected_arch = &all_architectures[selected_cpu->arch];
14059 }
43e9d192 14060
43e9d192 14061 if (!selected_tune)
3edaf26d 14062 selected_tune = selected_cpu;
43e9d192 14063
c7ff4f0f
SD
14064 if (aarch64_enable_bti == 2)
14065 {
14066#ifdef TARGET_ENABLE_BTI
14067 aarch64_enable_bti = 1;
14068#else
14069 aarch64_enable_bti = 0;
14070#endif
14071 }
14072
14073 /* Return address signing is currently not supported for ILP32 targets. For
14074 LP64 targets use the configured option in the absence of a command-line
14075 option for -mbranch-protection. */
14076 if (!TARGET_ILP32 && accepted_branch_protection_string == NULL)
14077 {
14078#ifdef TARGET_ENABLE_PAC_RET
14079 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
c7ff4f0f
SD
14080#else
14081 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
14082#endif
14083 }
14084
0cfff2a1
KT
14085#ifndef HAVE_AS_MABI_OPTION
14086 /* The compiler may have been configured with 2.23.* binutils, which does
14087 not have support for ILP32. */
14088 if (TARGET_ILP32)
a3f9f006 14089 error ("assembler does not support %<-mabi=ilp32%>");
0cfff2a1 14090#endif
43e9d192 14091
43cacb12
RS
14092 /* Convert -msve-vector-bits to a VG count. */
14093 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
14094
db58fd89 14095 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
a3f9f006 14096 sorry ("return address signing is only supported for %<-mabi=lp64%>");
db58fd89 14097
361fb3ee
KT
14098 /* Make sure we properly set up the explicit options. */
14099 if ((aarch64_cpu_string && valid_cpu)
14100 || (aarch64_tune_string && valid_tune))
14101 gcc_assert (explicit_tune_core != aarch64_none);
14102
14103 if ((aarch64_cpu_string && valid_cpu)
14104 || (aarch64_arch_string && valid_arch))
14105 gcc_assert (explicit_arch != aarch64_no_arch);
14106
5f7dbaa0
RE
14107 /* The pass to insert speculation tracking runs before
14108 shrink-wrapping and the latter does not know how to update the
14109 tracking status. So disable it in this case. */
14110 if (aarch64_track_speculation)
14111 flag_shrink_wrap = 0;
14112
0cfff2a1
KT
14113 aarch64_override_options_internal (&global_options);
14114
14115 /* Save these options as the default ones in case we push and pop them later
14116 while processing functions with potential target attributes. */
14117 target_option_default_node = target_option_current_node
14118 = build_target_option_node (&global_options);
43e9d192
IB
14119}
14120
14121/* Implement targetm.override_options_after_change. */
14122
14123static void
14124aarch64_override_options_after_change (void)
14125{
0cfff2a1 14126 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
14127}
14128
14129static struct machine_function *
14130aarch64_init_machine_status (void)
14131{
14132 struct machine_function *machine;
766090c2 14133 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
14134 return machine;
14135}
14136
14137void
14138aarch64_init_expanders (void)
14139{
14140 init_machine_status = aarch64_init_machine_status;
14141}
14142
14143/* A checking mechanism for the implementation of the various code models. */
14144static void
0cfff2a1 14145initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 14146{
0cfff2a1 14147 if (opts->x_flag_pic)
43e9d192 14148 {
0cfff2a1 14149 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
14150 {
14151 case AARCH64_CMODEL_TINY:
14152 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
14153 break;
14154 case AARCH64_CMODEL_SMALL:
34ecdb0f 14155#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
14156 aarch64_cmodel = (flag_pic == 2
14157 ? AARCH64_CMODEL_SMALL_PIC
14158 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
14159#else
14160 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
14161#endif
43e9d192
IB
14162 break;
14163 case AARCH64_CMODEL_LARGE:
a3f9f006 14164 sorry ("code model %qs with %<-f%s%>", "large",
0cfff2a1 14165 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 14166 break;
43e9d192
IB
14167 default:
14168 gcc_unreachable ();
14169 }
14170 }
14171 else
0cfff2a1 14172 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
14173}
14174
361fb3ee
KT
14175/* Implement TARGET_OPTION_SAVE. */
14176
14177static void
14178aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
14179{
14180 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
efac62a3
ST
14181 ptr->x_aarch64_branch_protection_string
14182 = opts->x_aarch64_branch_protection_string;
361fb3ee
KT
14183}
14184
14185/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
14186 using the information saved in PTR. */
14187
14188static void
14189aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
14190{
14191 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
14192 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
14193 opts->x_explicit_arch = ptr->x_explicit_arch;
14194 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
14195 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
efac62a3
ST
14196 opts->x_aarch64_branch_protection_string
14197 = ptr->x_aarch64_branch_protection_string;
14198 if (opts->x_aarch64_branch_protection_string)
14199 {
14200 aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
14201 NULL);
14202 }
361fb3ee
KT
14203
14204 aarch64_override_options_internal (opts);
14205}
14206
14207/* Implement TARGET_OPTION_PRINT. */
14208
14209static void
14210aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
14211{
14212 const struct processor *cpu
14213 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
28108a53 14214 uint64_t isa_flags = ptr->x_aarch64_isa_flags;
361fb3ee 14215 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 14216 std::string extension
04a99ebe 14217 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
14218
14219 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
14220 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
14221 arch->name, extension.c_str ());
361fb3ee
KT
14222}
14223
d78006d9
KT
14224static GTY(()) tree aarch64_previous_fndecl;
14225
e4ea20c8
KT
14226void
14227aarch64_reset_previous_fndecl (void)
14228{
14229 aarch64_previous_fndecl = NULL;
14230}
14231
acfc1ac1
KT
14232/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
14233 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
14234 make sure optab availability predicates are recomputed when necessary. */
14235
14236void
14237aarch64_save_restore_target_globals (tree new_tree)
14238{
14239 if (TREE_TARGET_GLOBALS (new_tree))
14240 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
14241 else if (new_tree == target_option_default_node)
14242 restore_target_globals (&default_target_globals);
14243 else
14244 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
14245}
14246
d78006d9
KT
14247/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
14248 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
14249 of the function, if such exists. This function may be called multiple
14250 times on a single function so use aarch64_previous_fndecl to avoid
14251 setting up identical state. */
14252
14253static void
14254aarch64_set_current_function (tree fndecl)
14255{
acfc1ac1
KT
14256 if (!fndecl || fndecl == aarch64_previous_fndecl)
14257 return;
14258
d78006d9
KT
14259 tree old_tree = (aarch64_previous_fndecl
14260 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
14261 : NULL_TREE);
14262
acfc1ac1 14263 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 14264
acfc1ac1
KT
14265 /* If current function has no attributes but the previous one did,
14266 use the default node. */
14267 if (!new_tree && old_tree)
14268 new_tree = target_option_default_node;
d78006d9 14269
acfc1ac1
KT
14270 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
14271 the default have been handled by aarch64_save_restore_target_globals from
14272 aarch64_pragma_target_parse. */
14273 if (old_tree == new_tree)
14274 return;
d78006d9 14275
acfc1ac1 14276 aarch64_previous_fndecl = fndecl;
6e17a23b 14277
acfc1ac1
KT
14278 /* First set the target options. */
14279 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 14280
acfc1ac1 14281 aarch64_save_restore_target_globals (new_tree);
d78006d9 14282}
361fb3ee 14283
5a2c8331
KT
14284/* Enum describing the various ways we can handle attributes.
14285 In many cases we can reuse the generic option handling machinery. */
14286
14287enum aarch64_attr_opt_type
14288{
14289 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
14290 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
14291 aarch64_attr_enum, /* Attribute sets an enum variable. */
14292 aarch64_attr_custom /* Attribute requires a custom handling function. */
14293};
14294
14295/* All the information needed to handle a target attribute.
14296 NAME is the name of the attribute.
9c582551 14297 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
14298 in the definition of enum aarch64_attr_opt_type.
14299 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
14300 HANDLER is the function that takes the attribute string as an argument
14301 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 14302 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 14303 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
14304 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
14305 aarch64_attr_enum. */
14306
14307struct aarch64_attribute_info
14308{
14309 const char *name;
14310 enum aarch64_attr_opt_type attr_type;
14311 bool allow_neg;
ab93e9b7 14312 bool (*handler) (const char *);
5a2c8331
KT
14313 enum opt_code opt_num;
14314};
14315
ab93e9b7 14316/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
14317
14318static bool
ab93e9b7 14319aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
14320{
14321 const struct processor *tmp_arch = NULL;
c7887347 14322 std::string invalid_extension;
5a2c8331 14323 enum aarch64_parse_opt_result parse_res
c7887347 14324 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
14325
14326 if (parse_res == AARCH64_PARSE_OK)
14327 {
14328 gcc_assert (tmp_arch);
14329 selected_arch = tmp_arch;
14330 explicit_arch = selected_arch->arch;
14331 return true;
14332 }
14333
14334 switch (parse_res)
14335 {
14336 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 14337 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
14338 break;
14339 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 14340 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 14341 aarch64_print_hint_for_arch (str);
5a2c8331
KT
14342 break;
14343 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
14344 error ("invalid feature modifier %s of value (\"%s\") in "
14345 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
14346 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
14347 break;
14348 default:
14349 gcc_unreachable ();
14350 }
14351
14352 return false;
14353}
14354
ab93e9b7 14355/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
14356
14357static bool
ab93e9b7 14358aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
14359{
14360 const struct processor *tmp_cpu = NULL;
c7887347 14361 std::string invalid_extension;
5a2c8331 14362 enum aarch64_parse_opt_result parse_res
c7887347 14363 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
14364
14365 if (parse_res == AARCH64_PARSE_OK)
14366 {
14367 gcc_assert (tmp_cpu);
14368 selected_tune = tmp_cpu;
14369 explicit_tune_core = selected_tune->ident;
14370
14371 selected_arch = &all_architectures[tmp_cpu->arch];
14372 explicit_arch = selected_arch->arch;
14373 return true;
14374 }
14375
14376 switch (parse_res)
14377 {
14378 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 14379 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
14380 break;
14381 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 14382 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 14383 aarch64_print_hint_for_core (str);
5a2c8331
KT
14384 break;
14385 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
14386 error ("invalid feature modifier %s of value (\"%s\") in "
14387 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
14388 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
14389 break;
14390 default:
14391 gcc_unreachable ();
14392 }
14393
14394 return false;
14395}
14396
efac62a3
ST
14397/* Handle the argument STR to the branch-protection= attribute. */
14398
14399 static bool
14400 aarch64_handle_attr_branch_protection (const char* str)
14401 {
81e40f3a 14402 char *err_str = (char *) xmalloc (strlen (str) + 1);
efac62a3
ST
14403 enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
14404 &err_str);
14405 bool success = false;
14406 switch (res)
14407 {
14408 case AARCH64_PARSE_MISSING_ARG:
14409 error ("missing argument to %<target(\"branch-protection=\")%> pragma or"
14410 " attribute");
14411 break;
14412 case AARCH64_PARSE_INVALID_ARG:
14413 error ("invalid protection type (\"%s\") in %<target(\"branch-protection"
14414 "=\")%> pragma or attribute", err_str);
14415 break;
14416 case AARCH64_PARSE_OK:
14417 success = true;
14418 /* Fall through. */
14419 case AARCH64_PARSE_INVALID_FEATURE:
14420 break;
14421 default:
14422 gcc_unreachable ();
14423 }
14424 free (err_str);
14425 return success;
14426 }
14427
ab93e9b7 14428/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
14429
14430static bool
ab93e9b7 14431aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
14432{
14433 const struct processor *tmp_tune = NULL;
14434 enum aarch64_parse_opt_result parse_res
14435 = aarch64_parse_tune (str, &tmp_tune);
14436
14437 if (parse_res == AARCH64_PARSE_OK)
14438 {
14439 gcc_assert (tmp_tune);
14440 selected_tune = tmp_tune;
14441 explicit_tune_core = selected_tune->ident;
14442 return true;
14443 }
14444
14445 switch (parse_res)
14446 {
14447 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 14448 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 14449 aarch64_print_hint_for_core (str);
5a2c8331
KT
14450 break;
14451 default:
14452 gcc_unreachable ();
14453 }
14454
14455 return false;
14456}
14457
14458/* Parse an architecture extensions target attribute string specified in STR.
14459 For example "+fp+nosimd". Show any errors if needed. Return TRUE
14460 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 14461 modified. */
5a2c8331
KT
14462
14463static bool
ab93e9b7 14464aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
14465{
14466 enum aarch64_parse_opt_result parse_res;
28108a53 14467 uint64_t isa_flags = aarch64_isa_flags;
5a2c8331 14468
e4ea20c8
KT
14469 /* We allow "+nothing" in the beginning to clear out all architectural
14470 features if the user wants to handpick specific features. */
14471 if (strncmp ("+nothing", str, 8) == 0)
14472 {
14473 isa_flags = 0;
14474 str += 8;
14475 }
14476
c7887347
ML
14477 std::string invalid_extension;
14478 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
14479
14480 if (parse_res == AARCH64_PARSE_OK)
14481 {
14482 aarch64_isa_flags = isa_flags;
14483 return true;
14484 }
14485
14486 switch (parse_res)
14487 {
14488 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 14489 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
14490 break;
14491
14492 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
14493 error ("invalid feature modifier %s of value (\"%s\") in "
14494 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
14495 break;
14496
14497 default:
14498 gcc_unreachable ();
14499 }
14500
14501 return false;
14502}
14503
14504/* The target attributes that we support. On top of these we also support just
14505 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
14506 handled explicitly in aarch64_process_one_target_attr. */
14507
14508static const struct aarch64_attribute_info aarch64_attributes[] =
14509{
14510 { "general-regs-only", aarch64_attr_mask, false, NULL,
14511 OPT_mgeneral_regs_only },
14512 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
14513 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
14514 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
14515 OPT_mfix_cortex_a53_843419 },
5a2c8331 14516 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 14517 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
14518 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
14519 OPT_momit_leaf_frame_pointer },
14520 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
14521 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
14522 OPT_march_ },
14523 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
14524 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
14525 OPT_mtune_ },
efac62a3
ST
14526 { "branch-protection", aarch64_attr_custom, false,
14527 aarch64_handle_attr_branch_protection, OPT_mbranch_protection_ },
db58fd89
JW
14528 { "sign-return-address", aarch64_attr_enum, false, NULL,
14529 OPT_msign_return_address_ },
5a2c8331
KT
14530 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
14531};
14532
14533/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 14534 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
14535
14536static bool
ab93e9b7 14537aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
14538{
14539 bool invert = false;
14540
14541 size_t len = strlen (arg_str);
14542
14543 if (len == 0)
14544 {
ab93e9b7 14545 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
14546 return false;
14547 }
14548
14549 char *str_to_check = (char *) alloca (len + 1);
14550 strcpy (str_to_check, arg_str);
14551
5a2c8331
KT
14552 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
14553 It is easier to detect and handle it explicitly here rather than going
14554 through the machinery for the rest of the target attributes in this
14555 function. */
14556 if (*str_to_check == '+')
ab93e9b7 14557 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
14558
14559 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
14560 {
14561 invert = true;
14562 str_to_check += 3;
14563 }
14564 char *arg = strchr (str_to_check, '=');
14565
14566 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
14567 and point ARG to "foo". */
14568 if (arg)
14569 {
14570 *arg = '\0';
14571 arg++;
14572 }
14573 const struct aarch64_attribute_info *p_attr;
16d12992 14574 bool found = false;
5a2c8331
KT
14575 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
14576 {
14577 /* If the names don't match up, or the user has given an argument
14578 to an attribute that doesn't accept one, or didn't give an argument
14579 to an attribute that expects one, fail to match. */
14580 if (strcmp (str_to_check, p_attr->name) != 0)
14581 continue;
14582
16d12992 14583 found = true;
5a2c8331
KT
14584 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
14585 || p_attr->attr_type == aarch64_attr_enum;
14586
14587 if (attr_need_arg_p ^ (arg != NULL))
14588 {
ab93e9b7 14589 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
14590 return false;
14591 }
14592
14593 /* If the name matches but the attribute does not allow "no-" versions
14594 then we can't match. */
14595 if (invert && !p_attr->allow_neg)
14596 {
ab93e9b7 14597 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
14598 return false;
14599 }
14600
14601 switch (p_attr->attr_type)
14602 {
14603 /* Has a custom handler registered.
14604 For example, cpu=, arch=, tune=. */
14605 case aarch64_attr_custom:
14606 gcc_assert (p_attr->handler);
ab93e9b7 14607 if (!p_attr->handler (arg))
5a2c8331
KT
14608 return false;
14609 break;
14610
14611 /* Either set or unset a boolean option. */
14612 case aarch64_attr_bool:
14613 {
14614 struct cl_decoded_option decoded;
14615
14616 generate_option (p_attr->opt_num, NULL, !invert,
14617 CL_TARGET, &decoded);
14618 aarch64_handle_option (&global_options, &global_options_set,
14619 &decoded, input_location);
14620 break;
14621 }
14622 /* Set or unset a bit in the target_flags. aarch64_handle_option
14623 should know what mask to apply given the option number. */
14624 case aarch64_attr_mask:
14625 {
14626 struct cl_decoded_option decoded;
14627 /* We only need to specify the option number.
14628 aarch64_handle_option will know which mask to apply. */
14629 decoded.opt_index = p_attr->opt_num;
14630 decoded.value = !invert;
14631 aarch64_handle_option (&global_options, &global_options_set,
14632 &decoded, input_location);
14633 break;
14634 }
14635 /* Use the option setting machinery to set an option to an enum. */
14636 case aarch64_attr_enum:
14637 {
14638 gcc_assert (arg);
14639 bool valid;
14640 int value;
14641 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
14642 &value, CL_TARGET);
14643 if (valid)
14644 {
14645 set_option (&global_options, NULL, p_attr->opt_num, value,
14646 NULL, DK_UNSPECIFIED, input_location,
14647 global_dc);
14648 }
14649 else
14650 {
ab93e9b7 14651 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
14652 }
14653 break;
14654 }
14655 default:
14656 gcc_unreachable ();
14657 }
14658 }
14659
16d12992
KT
14660 /* If we reached here we either have found an attribute and validated
14661 it or didn't match any. If we matched an attribute but its arguments
14662 were malformed we will have returned false already. */
14663 return found;
5a2c8331
KT
14664}
14665
14666/* Count how many times the character C appears in
14667 NULL-terminated string STR. */
14668
14669static unsigned int
14670num_occurences_in_str (char c, char *str)
14671{
14672 unsigned int res = 0;
14673 while (*str != '\0')
14674 {
14675 if (*str == c)
14676 res++;
14677
14678 str++;
14679 }
14680
14681 return res;
14682}
14683
14684/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 14685 and update the global target options space. */
5a2c8331
KT
14686
14687bool
ab93e9b7 14688aarch64_process_target_attr (tree args)
5a2c8331
KT
14689{
14690 if (TREE_CODE (args) == TREE_LIST)
14691 {
14692 do
14693 {
14694 tree head = TREE_VALUE (args);
14695 if (head)
14696 {
ab93e9b7 14697 if (!aarch64_process_target_attr (head))
5a2c8331
KT
14698 return false;
14699 }
14700 args = TREE_CHAIN (args);
14701 } while (args);
14702
14703 return true;
14704 }
3b6cb9e3
ML
14705
14706 if (TREE_CODE (args) != STRING_CST)
14707 {
14708 error ("attribute %<target%> argument not a string");
14709 return false;
14710 }
5a2c8331
KT
14711
14712 size_t len = strlen (TREE_STRING_POINTER (args));
14713 char *str_to_check = (char *) alloca (len + 1);
14714 strcpy (str_to_check, TREE_STRING_POINTER (args));
14715
14716 if (len == 0)
14717 {
ab93e9b7 14718 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
14719 return false;
14720 }
14721
14722 /* Used to catch empty spaces between commas i.e.
14723 attribute ((target ("attr1,,attr2"))). */
14724 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
14725
14726 /* Handle multiple target attributes separated by ','. */
7185a4eb 14727 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
14728
14729 unsigned int num_attrs = 0;
14730 while (token)
14731 {
14732 num_attrs++;
ab93e9b7 14733 if (!aarch64_process_one_target_attr (token))
5a2c8331 14734 {
ab93e9b7 14735 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
14736 return false;
14737 }
14738
7185a4eb 14739 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
14740 }
14741
14742 if (num_attrs != num_commas + 1)
14743 {
ab93e9b7 14744 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
14745 return false;
14746 }
14747
14748 return true;
14749}
14750
14751/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
14752 process attribute ((target ("..."))). */
14753
14754static bool
14755aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
14756{
14757 struct cl_target_option cur_target;
14758 bool ret;
14759 tree old_optimize;
14760 tree new_target, new_optimize;
14761 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
14762
14763 /* If what we're processing is the current pragma string then the
14764 target option node is already stored in target_option_current_node
14765 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
14766 having to re-parse the string. This is especially useful to keep
14767 arm_neon.h compile times down since that header contains a lot
14768 of intrinsics enclosed in pragmas. */
14769 if (!existing_target && args == current_target_pragma)
14770 {
14771 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
14772 return true;
14773 }
5a2c8331
KT
14774 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14775
14776 old_optimize = build_optimization_node (&global_options);
14777 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14778
14779 /* If the function changed the optimization levels as well as setting
14780 target options, start with the optimizations specified. */
14781 if (func_optimize && func_optimize != old_optimize)
14782 cl_optimization_restore (&global_options,
14783 TREE_OPTIMIZATION (func_optimize));
14784
14785 /* Save the current target options to restore at the end. */
14786 cl_target_option_save (&cur_target, &global_options);
14787
14788 /* If fndecl already has some target attributes applied to it, unpack
14789 them so that we add this attribute on top of them, rather than
14790 overwriting them. */
14791 if (existing_target)
14792 {
14793 struct cl_target_option *existing_options
14794 = TREE_TARGET_OPTION (existing_target);
14795
14796 if (existing_options)
14797 cl_target_option_restore (&global_options, existing_options);
14798 }
14799 else
14800 cl_target_option_restore (&global_options,
14801 TREE_TARGET_OPTION (target_option_current_node));
14802
ab93e9b7 14803 ret = aarch64_process_target_attr (args);
5a2c8331
KT
14804
14805 /* Set up any additional state. */
14806 if (ret)
14807 {
14808 aarch64_override_options_internal (&global_options);
e95a988a
KT
14809 /* Initialize SIMD builtins if we haven't already.
14810 Set current_target_pragma to NULL for the duration so that
14811 the builtin initialization code doesn't try to tag the functions
14812 being built with the attributes specified by any current pragma, thus
14813 going into an infinite recursion. */
14814 if (TARGET_SIMD)
14815 {
14816 tree saved_current_target_pragma = current_target_pragma;
14817 current_target_pragma = NULL;
14818 aarch64_init_simd_builtins ();
14819 current_target_pragma = saved_current_target_pragma;
14820 }
5a2c8331
KT
14821 new_target = build_target_option_node (&global_options);
14822 }
14823 else
14824 new_target = NULL;
14825
14826 new_optimize = build_optimization_node (&global_options);
14827
14828 if (fndecl && ret)
14829 {
14830 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
14831
14832 if (old_optimize != new_optimize)
14833 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
14834 }
14835
14836 cl_target_option_restore (&global_options, &cur_target);
14837
14838 if (old_optimize != new_optimize)
14839 cl_optimization_restore (&global_options,
14840 TREE_OPTIMIZATION (old_optimize));
14841 return ret;
14842}
14843
1fd8d40c
KT
14844/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
14845 tri-bool options (yes, no, don't care) and the default value is
14846 DEF, determine whether to reject inlining. */
14847
14848static bool
14849aarch64_tribools_ok_for_inlining_p (int caller, int callee,
14850 int dont_care, int def)
14851{
14852 /* If the callee doesn't care, always allow inlining. */
14853 if (callee == dont_care)
14854 return true;
14855
14856 /* If the caller doesn't care, always allow inlining. */
14857 if (caller == dont_care)
14858 return true;
14859
14860 /* Otherwise, allow inlining if either the callee and caller values
14861 agree, or if the callee is using the default value. */
14862 return (callee == caller || callee == def);
14863}
14864
14865/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
14866 to inline CALLEE into CALLER based on target-specific info.
14867 Make sure that the caller and callee have compatible architectural
14868 features. Then go through the other possible target attributes
14869 and see if they can block inlining. Try not to reject always_inline
14870 callees unless they are incompatible architecturally. */
14871
14872static bool
14873aarch64_can_inline_p (tree caller, tree callee)
14874{
14875 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
14876 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
14877
1fd8d40c
KT
14878 struct cl_target_option *caller_opts
14879 = TREE_TARGET_OPTION (caller_tree ? caller_tree
14880 : target_option_default_node);
14881
675d044c
SD
14882 struct cl_target_option *callee_opts
14883 = TREE_TARGET_OPTION (callee_tree ? callee_tree
14884 : target_option_default_node);
1fd8d40c
KT
14885
14886 /* Callee's ISA flags should be a subset of the caller's. */
14887 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
14888 != callee_opts->x_aarch64_isa_flags)
14889 return false;
14890
14891 /* Allow non-strict aligned functions inlining into strict
14892 aligned ones. */
14893 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
14894 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
14895 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
14896 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
14897 return false;
14898
14899 bool always_inline = lookup_attribute ("always_inline",
14900 DECL_ATTRIBUTES (callee));
14901
14902 /* If the architectural features match up and the callee is always_inline
14903 then the other attributes don't matter. */
14904 if (always_inline)
14905 return true;
14906
14907 if (caller_opts->x_aarch64_cmodel_var
14908 != callee_opts->x_aarch64_cmodel_var)
14909 return false;
14910
14911 if (caller_opts->x_aarch64_tls_dialect
14912 != callee_opts->x_aarch64_tls_dialect)
14913 return false;
14914
14915 /* Honour explicit requests to workaround errata. */
14916 if (!aarch64_tribools_ok_for_inlining_p (
14917 caller_opts->x_aarch64_fix_a53_err835769,
14918 callee_opts->x_aarch64_fix_a53_err835769,
14919 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
14920 return false;
14921
48bb1a55
CL
14922 if (!aarch64_tribools_ok_for_inlining_p (
14923 caller_opts->x_aarch64_fix_a53_err843419,
14924 callee_opts->x_aarch64_fix_a53_err843419,
14925 2, TARGET_FIX_ERR_A53_843419))
14926 return false;
14927
1fd8d40c
KT
14928 /* If the user explicitly specified -momit-leaf-frame-pointer for the
14929 caller and calle and they don't match up, reject inlining. */
14930 if (!aarch64_tribools_ok_for_inlining_p (
14931 caller_opts->x_flag_omit_leaf_frame_pointer,
14932 callee_opts->x_flag_omit_leaf_frame_pointer,
14933 2, 1))
14934 return false;
14935
14936 /* If the callee has specific tuning overrides, respect them. */
14937 if (callee_opts->x_aarch64_override_tune_string != NULL
14938 && caller_opts->x_aarch64_override_tune_string == NULL)
14939 return false;
14940
14941 /* If the user specified tuning override strings for the
14942 caller and callee and they don't match up, reject inlining.
14943 We just do a string compare here, we don't analyze the meaning
14944 of the string, as it would be too costly for little gain. */
14945 if (callee_opts->x_aarch64_override_tune_string
14946 && caller_opts->x_aarch64_override_tune_string
14947 && (strcmp (callee_opts->x_aarch64_override_tune_string,
14948 caller_opts->x_aarch64_override_tune_string) != 0))
14949 return false;
14950
14951 return true;
14952}
14953
bb6ce448
RS
14954/* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't
14955 been already. */
14956
14957unsigned int
14958aarch64_tlsdesc_abi_id ()
14959{
14960 predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC];
14961 if (!tlsdesc_abi.initialized_p ())
14962 {
14963 HARD_REG_SET full_reg_clobbers;
14964 CLEAR_HARD_REG_SET (full_reg_clobbers);
14965 SET_HARD_REG_BIT (full_reg_clobbers, R0_REGNUM);
14966 SET_HARD_REG_BIT (full_reg_clobbers, CC_REGNUM);
14967 for (int regno = P0_REGNUM; regno <= P15_REGNUM; ++regno)
14968 SET_HARD_REG_BIT (full_reg_clobbers, regno);
14969 tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers);
14970 }
14971 return tlsdesc_abi.id ();
14972}
14973
43e9d192
IB
14974/* Return true if SYMBOL_REF X binds locally. */
14975
14976static bool
14977aarch64_symbol_binds_local_p (const_rtx x)
14978{
14979 return (SYMBOL_REF_DECL (x)
14980 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
14981 : SYMBOL_REF_LOCAL_P (x));
14982}
14983
14984/* Return true if SYMBOL_REF X is thread local */
14985static bool
14986aarch64_tls_symbol_p (rtx x)
14987{
14988 if (! TARGET_HAVE_TLS)
14989 return false;
14990
14991 if (GET_CODE (x) != SYMBOL_REF)
14992 return false;
14993
14994 return SYMBOL_REF_TLS_MODEL (x) != 0;
14995}
14996
14997/* Classify a TLS symbol into one of the TLS kinds. */
14998enum aarch64_symbol_type
14999aarch64_classify_tls_symbol (rtx x)
15000{
15001 enum tls_model tls_kind = tls_symbolic_operand_type (x);
15002
15003 switch (tls_kind)
15004 {
15005 case TLS_MODEL_GLOBAL_DYNAMIC:
15006 case TLS_MODEL_LOCAL_DYNAMIC:
15007 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
15008
15009 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
15010 switch (aarch64_cmodel)
15011 {
15012 case AARCH64_CMODEL_TINY:
15013 case AARCH64_CMODEL_TINY_PIC:
15014 return SYMBOL_TINY_TLSIE;
15015 default:
79496620 15016 return SYMBOL_SMALL_TLSIE;
5ae7caad 15017 }
43e9d192
IB
15018
15019 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
15020 if (aarch64_tls_size == 12)
15021 return SYMBOL_TLSLE12;
15022 else if (aarch64_tls_size == 24)
15023 return SYMBOL_TLSLE24;
15024 else if (aarch64_tls_size == 32)
15025 return SYMBOL_TLSLE32;
15026 else if (aarch64_tls_size == 48)
15027 return SYMBOL_TLSLE48;
15028 else
15029 gcc_unreachable ();
43e9d192
IB
15030
15031 case TLS_MODEL_EMULATED:
15032 case TLS_MODEL_NONE:
15033 return SYMBOL_FORCE_TO_MEM;
15034
15035 default:
15036 gcc_unreachable ();
15037 }
15038}
15039
43cacb12
RS
15040/* Return the correct method for accessing X + OFFSET, where X is either
15041 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 15042
43e9d192 15043enum aarch64_symbol_type
43cacb12 15044aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
15045{
15046 if (GET_CODE (x) == LABEL_REF)
15047 {
15048 switch (aarch64_cmodel)
15049 {
15050 case AARCH64_CMODEL_LARGE:
15051 return SYMBOL_FORCE_TO_MEM;
15052
15053 case AARCH64_CMODEL_TINY_PIC:
15054 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
15055 return SYMBOL_TINY_ABSOLUTE;
15056
1b1e81f8 15057 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
15058 case AARCH64_CMODEL_SMALL_PIC:
15059 case AARCH64_CMODEL_SMALL:
15060 return SYMBOL_SMALL_ABSOLUTE;
15061
15062 default:
15063 gcc_unreachable ();
15064 }
15065 }
15066
17f4d4bf 15067 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 15068 {
43e9d192
IB
15069 if (aarch64_tls_symbol_p (x))
15070 return aarch64_classify_tls_symbol (x);
15071
17f4d4bf
CSS
15072 switch (aarch64_cmodel)
15073 {
15074 case AARCH64_CMODEL_TINY:
15f6e0da 15075 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
15076 the offset does not cause overflow of the final address. But
15077 we have no way of knowing the address of symbol at compile time
15078 so we can't accurately say if the distance between the PC and
7d3b27ff
WD
15079 symbol + offset is outside the addressible range of +/-1MB in the
15080 TINY code model. So we limit the maximum offset to +/-64KB and
15081 assume the offset to the symbol is not larger than +/-(1MB - 64KB).
15082 If offset_within_block_p is true we allow larger offsets.
15083 Furthermore force to memory if the symbol is a weak reference to
15084 something that doesn't resolve to a symbol in this module. */
15085
15086 if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
a5350ddc 15087 return SYMBOL_FORCE_TO_MEM;
7d3b27ff
WD
15088 if (!(IN_RANGE (offset, -0x10000, 0x10000)
15089 || offset_within_block_p (x, offset)))
15090 return SYMBOL_FORCE_TO_MEM;
15091
a5350ddc
CSS
15092 return SYMBOL_TINY_ABSOLUTE;
15093
17f4d4bf 15094 case AARCH64_CMODEL_SMALL:
f8b756b7 15095 /* Same reasoning as the tiny code model, but the offset cap here is
7d3b27ff
WD
15096 1MB, allowing +/-3.9GB for the offset to the symbol. */
15097
15098 if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
17f4d4bf 15099 return SYMBOL_FORCE_TO_MEM;
7d3b27ff
WD
15100 if (!(IN_RANGE (offset, -0x100000, 0x100000)
15101 || offset_within_block_p (x, offset)))
15102 return SYMBOL_FORCE_TO_MEM;
15103
17f4d4bf 15104 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 15105
17f4d4bf 15106 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 15107 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 15108 return SYMBOL_TINY_GOT;
38e6c9a6
MS
15109 return SYMBOL_TINY_ABSOLUTE;
15110
1b1e81f8 15111 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
15112 case AARCH64_CMODEL_SMALL_PIC:
15113 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
15114 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
15115 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 15116 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 15117
9ee6540a
WD
15118 case AARCH64_CMODEL_LARGE:
15119 /* This is alright even in PIC code as the constant
15120 pool reference is always PC relative and within
15121 the same translation unit. */
d47d34bb 15122 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
15123 return SYMBOL_SMALL_ABSOLUTE;
15124 else
15125 return SYMBOL_FORCE_TO_MEM;
15126
17f4d4bf
CSS
15127 default:
15128 gcc_unreachable ();
15129 }
43e9d192 15130 }
17f4d4bf 15131
43e9d192
IB
15132 /* By default push everything into the constant pool. */
15133 return SYMBOL_FORCE_TO_MEM;
15134}
15135
43e9d192
IB
15136bool
15137aarch64_constant_address_p (rtx x)
15138{
15139 return (CONSTANT_P (x) && memory_address_p (DImode, x));
15140}
15141
15142bool
15143aarch64_legitimate_pic_operand_p (rtx x)
15144{
15145 if (GET_CODE (x) == SYMBOL_REF
15146 || (GET_CODE (x) == CONST
15147 && GET_CODE (XEXP (x, 0)) == PLUS
15148 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
15149 return false;
15150
15151 return true;
15152}
15153
26895c21
WD
15154/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
15155 that should be rematerialized rather than spilled. */
3520f7cc 15156
43e9d192 15157static bool
ef4bddc2 15158aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 15159{
26895c21 15160 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 15161 if (CONST_INT_P (x)
9f7b87ca 15162 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 15163 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
15164 return true;
15165
43cacb12
RS
15166 /* Do not allow vector struct mode constants for Advanced SIMD.
15167 We could support 0 and -1 easily, but they need support in
15168 aarch64-simd.md. */
15169 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
15170 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
15171 return false;
15172
43cacb12
RS
15173 /* Only accept variable-length vector constants if they can be
15174 handled directly.
15175
15176 ??? It would be possible to handle rematerialization of other
15177 constants via secondary reloads. */
15178 if (vec_flags & VEC_ANY_SVE)
15179 return aarch64_simd_valid_immediate (x, NULL);
15180
509bb9b6
RS
15181 if (GET_CODE (x) == HIGH)
15182 x = XEXP (x, 0);
15183
43cacb12
RS
15184 /* Accept polynomial constants that can be calculated by using the
15185 destination of a move as the sole temporary. Constants that
15186 require a second temporary cannot be rematerialized (they can't be
15187 forced to memory and also aren't legitimate constants). */
15188 poly_int64 offset;
15189 if (poly_int_rtx_p (x, &offset))
15190 return aarch64_offset_temporaries (false, offset) <= 1;
15191
15192 /* If an offset is being added to something else, we need to allow the
15193 base to be moved into the destination register, meaning that there
15194 are no free temporaries for the offset. */
15195 x = strip_offset (x, &offset);
15196 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
15197 return false;
26895c21 15198
43cacb12
RS
15199 /* Do not allow const (plus (anchor_symbol, const_int)). */
15200 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
15201 return false;
26895c21 15202
f28e54bd
WD
15203 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
15204 so spilling them is better than rematerialization. */
15205 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
15206 return true;
15207
26895c21
WD
15208 /* Label references are always constant. */
15209 if (GET_CODE (x) == LABEL_REF)
15210 return true;
15211
15212 return false;
43e9d192
IB
15213}
15214
a5bc806c 15215rtx
43e9d192
IB
15216aarch64_load_tp (rtx target)
15217{
15218 if (!target
15219 || GET_MODE (target) != Pmode
15220 || !register_operand (target, Pmode))
15221 target = gen_reg_rtx (Pmode);
15222
15223 /* Can return in any reg. */
15224 emit_insn (gen_aarch64_load_tp_hard (target));
15225 return target;
15226}
15227
43e9d192
IB
15228/* On AAPCS systems, this is the "struct __va_list". */
15229static GTY(()) tree va_list_type;
15230
15231/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
15232 Return the type to use as __builtin_va_list.
15233
15234 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
15235
15236 struct __va_list
15237 {
15238 void *__stack;
15239 void *__gr_top;
15240 void *__vr_top;
15241 int __gr_offs;
15242 int __vr_offs;
15243 }; */
15244
15245static tree
15246aarch64_build_builtin_va_list (void)
15247{
15248 tree va_list_name;
15249 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
15250
15251 /* Create the type. */
15252 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
15253 /* Give it the required name. */
15254 va_list_name = build_decl (BUILTINS_LOCATION,
15255 TYPE_DECL,
15256 get_identifier ("__va_list"),
15257 va_list_type);
15258 DECL_ARTIFICIAL (va_list_name) = 1;
15259 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 15260 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
15261
15262 /* Create the fields. */
15263 f_stack = build_decl (BUILTINS_LOCATION,
15264 FIELD_DECL, get_identifier ("__stack"),
15265 ptr_type_node);
15266 f_grtop = build_decl (BUILTINS_LOCATION,
15267 FIELD_DECL, get_identifier ("__gr_top"),
15268 ptr_type_node);
15269 f_vrtop = build_decl (BUILTINS_LOCATION,
15270 FIELD_DECL, get_identifier ("__vr_top"),
15271 ptr_type_node);
15272 f_groff = build_decl (BUILTINS_LOCATION,
15273 FIELD_DECL, get_identifier ("__gr_offs"),
15274 integer_type_node);
15275 f_vroff = build_decl (BUILTINS_LOCATION,
15276 FIELD_DECL, get_identifier ("__vr_offs"),
15277 integer_type_node);
15278
88e3bdd1 15279 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
15280 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
15281 purpose to identify whether the code is updating va_list internal
15282 offset fields through irregular way. */
15283 va_list_gpr_counter_field = f_groff;
15284 va_list_fpr_counter_field = f_vroff;
15285
43e9d192
IB
15286 DECL_ARTIFICIAL (f_stack) = 1;
15287 DECL_ARTIFICIAL (f_grtop) = 1;
15288 DECL_ARTIFICIAL (f_vrtop) = 1;
15289 DECL_ARTIFICIAL (f_groff) = 1;
15290 DECL_ARTIFICIAL (f_vroff) = 1;
15291
15292 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
15293 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
15294 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
15295 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
15296 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
15297
15298 TYPE_FIELDS (va_list_type) = f_stack;
15299 DECL_CHAIN (f_stack) = f_grtop;
15300 DECL_CHAIN (f_grtop) = f_vrtop;
15301 DECL_CHAIN (f_vrtop) = f_groff;
15302 DECL_CHAIN (f_groff) = f_vroff;
15303
15304 /* Compute its layout. */
15305 layout_type (va_list_type);
15306
15307 return va_list_type;
15308}
15309
15310/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
15311static void
15312aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
15313{
15314 const CUMULATIVE_ARGS *cum;
15315 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
15316 tree stack, grtop, vrtop, groff, vroff;
15317 tree t;
88e3bdd1
JW
15318 int gr_save_area_size = cfun->va_list_gpr_size;
15319 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
15320 int vr_offset;
15321
15322 cum = &crtl->args.info;
88e3bdd1
JW
15323 if (cfun->va_list_gpr_size)
15324 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
15325 cfun->va_list_gpr_size);
15326 if (cfun->va_list_fpr_size)
15327 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
15328 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 15329
d5726973 15330 if (!TARGET_FLOAT)
43e9d192 15331 {
261fb553 15332 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
15333 vr_save_area_size = 0;
15334 }
15335
15336 f_stack = TYPE_FIELDS (va_list_type_node);
15337 f_grtop = DECL_CHAIN (f_stack);
15338 f_vrtop = DECL_CHAIN (f_grtop);
15339 f_groff = DECL_CHAIN (f_vrtop);
15340 f_vroff = DECL_CHAIN (f_groff);
15341
15342 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
15343 NULL_TREE);
15344 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
15345 NULL_TREE);
15346 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
15347 NULL_TREE);
15348 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
15349 NULL_TREE);
15350 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
15351 NULL_TREE);
15352
15353 /* Emit code to initialize STACK, which points to the next varargs stack
15354 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
15355 by named arguments. STACK is 8-byte aligned. */
15356 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
15357 if (cum->aapcs_stack_size > 0)
15358 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
15359 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
15360 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
15361
15362 /* Emit code to initialize GRTOP, the top of the GR save area.
15363 virtual_incoming_args_rtx should have been 16 byte aligned. */
15364 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
15365 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
15366 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
15367
15368 /* Emit code to initialize VRTOP, the top of the VR save area.
15369 This address is gr_save_area_bytes below GRTOP, rounded
15370 down to the next 16-byte boundary. */
15371 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
15372 vr_offset = ROUND_UP (gr_save_area_size,
15373 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
15374
15375 if (vr_offset)
15376 t = fold_build_pointer_plus_hwi (t, -vr_offset);
15377 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
15378 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
15379
15380 /* Emit code to initialize GROFF, the offset from GRTOP of the
15381 next GPR argument. */
15382 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
15383 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
15384 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
15385
15386 /* Likewise emit code to initialize VROFF, the offset from FTOP
15387 of the next VR argument. */
15388 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
15389 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
15390 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
15391}
15392
15393/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
15394
15395static tree
15396aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
15397 gimple_seq *post_p ATTRIBUTE_UNUSED)
15398{
15399 tree addr;
15400 bool indirect_p;
15401 bool is_ha; /* is HFA or HVA. */
15402 bool dw_align; /* double-word align. */
ef4bddc2 15403 machine_mode ag_mode = VOIDmode;
43e9d192 15404 int nregs;
ef4bddc2 15405 machine_mode mode;
43e9d192
IB
15406
15407 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
15408 tree stack, f_top, f_off, off, arg, roundup, on_stack;
15409 HOST_WIDE_INT size, rsize, adjust, align;
15410 tree t, u, cond1, cond2;
15411
fde65a89 15412 indirect_p = pass_va_arg_by_reference (type);
43e9d192
IB
15413 if (indirect_p)
15414 type = build_pointer_type (type);
15415
15416 mode = TYPE_MODE (type);
15417
15418 f_stack = TYPE_FIELDS (va_list_type_node);
15419 f_grtop = DECL_CHAIN (f_stack);
15420 f_vrtop = DECL_CHAIN (f_grtop);
15421 f_groff = DECL_CHAIN (f_vrtop);
15422 f_vroff = DECL_CHAIN (f_groff);
15423
15424 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
15425 f_stack, NULL_TREE);
15426 size = int_size_in_bytes (type);
c590597c
RE
15427
15428 bool abi_break;
15429 align
15430 = aarch64_function_arg_alignment (mode, type, &abi_break) / BITS_PER_UNIT;
43e9d192
IB
15431
15432 dw_align = false;
15433 adjust = 0;
15434 if (aarch64_vfp_is_call_or_return_candidate (mode,
15435 type,
15436 &ag_mode,
15437 &nregs,
15438 &is_ha))
15439 {
6a70badb
RS
15440 /* No frontends can create types with variable-sized modes, so we
15441 shouldn't be asked to pass or return them. */
15442 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
15443
43e9d192 15444 /* TYPE passed in fp/simd registers. */
d5726973 15445 if (!TARGET_FLOAT)
fc29dfc9 15446 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
15447
15448 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
15449 unshare_expr (valist), f_vrtop, NULL_TREE);
15450 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
15451 unshare_expr (valist), f_vroff, NULL_TREE);
15452
15453 rsize = nregs * UNITS_PER_VREG;
15454
15455 if (is_ha)
15456 {
6a70badb
RS
15457 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
15458 adjust = UNITS_PER_VREG - ag_size;
43e9d192 15459 }
76b0cbf8 15460 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
15461 && size < UNITS_PER_VREG)
15462 {
15463 adjust = UNITS_PER_VREG - size;
15464 }
15465 }
15466 else
15467 {
15468 /* TYPE passed in general registers. */
15469 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
15470 unshare_expr (valist), f_grtop, NULL_TREE);
15471 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
15472 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 15473 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
15474 nregs = rsize / UNITS_PER_WORD;
15475
15476 if (align > 8)
c590597c
RE
15477 {
15478 if (abi_break && warn_psabi)
15479 inform (input_location, "parameter passing for argument of type "
15480 "%qT changed in GCC 9.1", type);
15481 dw_align = true;
15482 }
43e9d192 15483
76b0cbf8 15484 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
15485 && size < UNITS_PER_WORD)
15486 {
15487 adjust = UNITS_PER_WORD - size;
15488 }
15489 }
15490
15491 /* Get a local temporary for the field value. */
15492 off = get_initialized_tmp_var (f_off, pre_p, NULL);
15493
15494 /* Emit code to branch if off >= 0. */
15495 t = build2 (GE_EXPR, boolean_type_node, off,
15496 build_int_cst (TREE_TYPE (off), 0));
15497 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
15498
15499 if (dw_align)
15500 {
15501 /* Emit: offs = (offs + 15) & -16. */
15502 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
15503 build_int_cst (TREE_TYPE (off), 15));
15504 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
15505 build_int_cst (TREE_TYPE (off), -16));
15506 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
15507 }
15508 else
15509 roundup = NULL;
15510
15511 /* Update ap.__[g|v]r_offs */
15512 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
15513 build_int_cst (TREE_TYPE (off), rsize));
15514 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
15515
15516 /* String up. */
15517 if (roundup)
15518 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
15519
15520 /* [cond2] if (ap.__[g|v]r_offs > 0) */
15521 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
15522 build_int_cst (TREE_TYPE (f_off), 0));
15523 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
15524
15525 /* String up: make sure the assignment happens before the use. */
15526 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
15527 COND_EXPR_ELSE (cond1) = t;
15528
15529 /* Prepare the trees handling the argument that is passed on the stack;
15530 the top level node will store in ON_STACK. */
15531 arg = get_initialized_tmp_var (stack, pre_p, NULL);
15532 if (align > 8)
15533 {
15534 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 15535 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
15536 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
15537 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
15538 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
15539 }
15540 else
15541 roundup = NULL;
15542 /* Advance ap.__stack */
4bdc2738 15543 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
15544 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
15545 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
15546 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
15547 /* String up roundup and advance. */
15548 if (roundup)
15549 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
15550 /* String up with arg */
15551 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
15552 /* Big-endianness related address adjustment. */
76b0cbf8 15553 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
15554 && size < UNITS_PER_WORD)
15555 {
15556 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
15557 size_int (UNITS_PER_WORD - size));
15558 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
15559 }
15560
15561 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
15562 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
15563
15564 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
15565 t = off;
15566 if (adjust)
15567 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
15568 build_int_cst (TREE_TYPE (off), adjust));
15569
15570 t = fold_convert (sizetype, t);
15571 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
15572
15573 if (is_ha)
15574 {
15575 /* type ha; // treat as "struct {ftype field[n];}"
15576 ... [computing offs]
15577 for (i = 0; i <nregs; ++i, offs += 16)
15578 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
15579 return ha; */
15580 int i;
15581 tree tmp_ha, field_t, field_ptr_t;
15582
15583 /* Declare a local variable. */
15584 tmp_ha = create_tmp_var_raw (type, "ha");
15585 gimple_add_tmp_var (tmp_ha);
15586
15587 /* Establish the base type. */
15588 switch (ag_mode)
15589 {
4e10a5a7 15590 case E_SFmode:
43e9d192
IB
15591 field_t = float_type_node;
15592 field_ptr_t = float_ptr_type_node;
15593 break;
4e10a5a7 15594 case E_DFmode:
43e9d192
IB
15595 field_t = double_type_node;
15596 field_ptr_t = double_ptr_type_node;
15597 break;
4e10a5a7 15598 case E_TFmode:
43e9d192
IB
15599 field_t = long_double_type_node;
15600 field_ptr_t = long_double_ptr_type_node;
15601 break;
4e10a5a7 15602 case E_HFmode:
1b62ed4f
JG
15603 field_t = aarch64_fp16_type_node;
15604 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 15605 break;
4e10a5a7
RS
15606 case E_V2SImode:
15607 case E_V4SImode:
43e9d192
IB
15608 {
15609 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
15610 field_t = build_vector_type_for_mode (innertype, ag_mode);
15611 field_ptr_t = build_pointer_type (field_t);
15612 }
15613 break;
15614 default:
15615 gcc_assert (0);
15616 }
15617
15618 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
15619 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
15620 addr = t;
15621 t = fold_convert (field_ptr_t, addr);
15622 t = build2 (MODIFY_EXPR, field_t,
15623 build1 (INDIRECT_REF, field_t, tmp_ha),
15624 build1 (INDIRECT_REF, field_t, t));
15625
15626 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
15627 for (i = 1; i < nregs; ++i)
15628 {
15629 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
15630 u = fold_convert (field_ptr_t, addr);
15631 u = build2 (MODIFY_EXPR, field_t,
15632 build2 (MEM_REF, field_t, tmp_ha,
15633 build_int_cst (field_ptr_t,
15634 (i *
15635 int_size_in_bytes (field_t)))),
15636 build1 (INDIRECT_REF, field_t, u));
15637 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
15638 }
15639
15640 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
15641 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
15642 }
15643
15644 COND_EXPR_ELSE (cond2) = t;
15645 addr = fold_convert (build_pointer_type (type), cond1);
15646 addr = build_va_arg_indirect_ref (addr);
15647
15648 if (indirect_p)
15649 addr = build_va_arg_indirect_ref (addr);
15650
15651 return addr;
15652}
15653
15654/* Implement TARGET_SETUP_INCOMING_VARARGS. */
15655
15656static void
e7056ca4
RS
15657aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
15658 const function_arg_info &arg,
15659 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
43e9d192
IB
15660{
15661 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
15662 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
15663 int gr_saved = cfun->va_list_gpr_size;
15664 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
15665
15666 /* The caller has advanced CUM up to, but not beyond, the last named
15667 argument. Advance a local copy of CUM past the last "real" named
15668 argument, to find out how many registers are left over. */
15669 local_cum = *cum;
6930c98c 15670 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), arg);
43e9d192 15671
88e3bdd1
JW
15672 /* Found out how many registers we need to save.
15673 Honor tree-stdvar analysis results. */
15674 if (cfun->va_list_gpr_size)
15675 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
15676 cfun->va_list_gpr_size / UNITS_PER_WORD);
15677 if (cfun->va_list_fpr_size)
15678 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
15679 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 15680
d5726973 15681 if (!TARGET_FLOAT)
43e9d192 15682 {
261fb553 15683 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
15684 vr_saved = 0;
15685 }
15686
15687 if (!no_rtl)
15688 {
15689 if (gr_saved > 0)
15690 {
15691 rtx ptr, mem;
15692
15693 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
15694 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
15695 - gr_saved * UNITS_PER_WORD);
15696 mem = gen_frame_mem (BLKmode, ptr);
15697 set_mem_alias_set (mem, get_varargs_alias_set ());
15698
15699 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
15700 mem, gr_saved);
15701 }
15702 if (vr_saved > 0)
15703 {
15704 /* We can't use move_block_from_reg, because it will use
15705 the wrong mode, storing D regs only. */
ef4bddc2 15706 machine_mode mode = TImode;
88e3bdd1 15707 int off, i, vr_start;
43e9d192
IB
15708
15709 /* Set OFF to the offset from virtual_incoming_args_rtx of
15710 the first vector register. The VR save area lies below
15711 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
15712 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
15713 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
15714 off -= vr_saved * UNITS_PER_VREG;
15715
88e3bdd1
JW
15716 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
15717 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
15718 {
15719 rtx ptr, mem;
15720
15721 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
15722 mem = gen_frame_mem (mode, ptr);
15723 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 15724 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
15725 off += UNITS_PER_VREG;
15726 }
15727 }
15728 }
15729
15730 /* We don't save the size into *PRETEND_SIZE because we want to avoid
15731 any complication of having crtl->args.pretend_args_size changed. */
8799637a 15732 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
15733 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
15734 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
15735 + vr_saved * UNITS_PER_VREG);
15736}
15737
15738static void
15739aarch64_conditional_register_usage (void)
15740{
15741 int i;
15742 if (!TARGET_FLOAT)
15743 {
15744 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
15745 {
15746 fixed_regs[i] = 1;
15747 call_used_regs[i] = 1;
15748 }
15749 }
43cacb12
RS
15750 if (!TARGET_SVE)
15751 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
15752 {
15753 fixed_regs[i] = 1;
15754 call_used_regs[i] = 1;
15755 }
3751345d 15756
183bfdaf
RS
15757 /* Only allow the FFR and FFRT to be accessed via special patterns. */
15758 CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
15759 CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
15760
3751345d
RE
15761 /* When tracking speculation, we need a couple of call-clobbered registers
15762 to track the speculation state. It would be nice to just use
15763 IP0 and IP1, but currently there are numerous places that just
15764 assume these registers are free for other uses (eg pointer
15765 authentication). */
15766 if (aarch64_track_speculation)
15767 {
15768 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
15769 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
15770 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
15771 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
15772 }
43e9d192
IB
15773}
15774
15775/* Walk down the type tree of TYPE counting consecutive base elements.
15776 If *MODEP is VOIDmode, then set it to the first valid floating point
15777 type. If a non-floating point type is found, or if a floating point
15778 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
15779 otherwise return the count in the sub-tree. */
15780static int
ef4bddc2 15781aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 15782{
ef4bddc2 15783 machine_mode mode;
43e9d192
IB
15784 HOST_WIDE_INT size;
15785
c600df9a
RS
15786 /* SVE types (and types containing SVE types) must be handled
15787 before calling this function. */
15788 gcc_assert (!aarch64_sve::builtin_type_p (type));
15789
43e9d192
IB
15790 switch (TREE_CODE (type))
15791 {
15792 case REAL_TYPE:
15793 mode = TYPE_MODE (type);
1b62ed4f
JG
15794 if (mode != DFmode && mode != SFmode
15795 && mode != TFmode && mode != HFmode)
43e9d192
IB
15796 return -1;
15797
15798 if (*modep == VOIDmode)
15799 *modep = mode;
15800
15801 if (*modep == mode)
15802 return 1;
15803
15804 break;
15805
15806 case COMPLEX_TYPE:
15807 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
15808 if (mode != DFmode && mode != SFmode
15809 && mode != TFmode && mode != HFmode)
43e9d192
IB
15810 return -1;
15811
15812 if (*modep == VOIDmode)
15813 *modep = mode;
15814
15815 if (*modep == mode)
15816 return 2;
15817
15818 break;
15819
15820 case VECTOR_TYPE:
15821 /* Use V2SImode and V4SImode as representatives of all 64-bit
15822 and 128-bit vector types. */
15823 size = int_size_in_bytes (type);
15824 switch (size)
15825 {
15826 case 8:
15827 mode = V2SImode;
15828 break;
15829 case 16:
15830 mode = V4SImode;
15831 break;
15832 default:
15833 return -1;
15834 }
15835
15836 if (*modep == VOIDmode)
15837 *modep = mode;
15838
15839 /* Vector modes are considered to be opaque: two vectors are
15840 equivalent for the purposes of being homogeneous aggregates
15841 if they are the same size. */
15842 if (*modep == mode)
15843 return 1;
15844
15845 break;
15846
15847 case ARRAY_TYPE:
15848 {
15849 int count;
15850 tree index = TYPE_DOMAIN (type);
15851
807e902e
KZ
15852 /* Can't handle incomplete types nor sizes that are not
15853 fixed. */
15854 if (!COMPLETE_TYPE_P (type)
15855 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
15856 return -1;
15857
15858 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
15859 if (count == -1
15860 || !index
15861 || !TYPE_MAX_VALUE (index)
cc269bb6 15862 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 15863 || !TYPE_MIN_VALUE (index)
cc269bb6 15864 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
15865 || count < 0)
15866 return -1;
15867
ae7e9ddd
RS
15868 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
15869 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
15870
15871 /* There must be no padding. */
6a70badb
RS
15872 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
15873 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
15874 return -1;
15875
15876 return count;
15877 }
15878
15879 case RECORD_TYPE:
15880 {
15881 int count = 0;
15882 int sub_count;
15883 tree field;
15884
807e902e
KZ
15885 /* Can't handle incomplete types nor sizes that are not
15886 fixed. */
15887 if (!COMPLETE_TYPE_P (type)
15888 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
15889 return -1;
15890
15891 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
15892 {
15893 if (TREE_CODE (field) != FIELD_DECL)
15894 continue;
15895
15896 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
15897 if (sub_count < 0)
15898 return -1;
15899 count += sub_count;
15900 }
15901
15902 /* There must be no padding. */
6a70badb
RS
15903 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
15904 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
15905 return -1;
15906
15907 return count;
15908 }
15909
15910 case UNION_TYPE:
15911 case QUAL_UNION_TYPE:
15912 {
15913 /* These aren't very interesting except in a degenerate case. */
15914 int count = 0;
15915 int sub_count;
15916 tree field;
15917
807e902e
KZ
15918 /* Can't handle incomplete types nor sizes that are not
15919 fixed. */
15920 if (!COMPLETE_TYPE_P (type)
15921 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
15922 return -1;
15923
15924 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
15925 {
15926 if (TREE_CODE (field) != FIELD_DECL)
15927 continue;
15928
15929 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
15930 if (sub_count < 0)
15931 return -1;
15932 count = count > sub_count ? count : sub_count;
15933 }
15934
15935 /* There must be no padding. */
6a70badb
RS
15936 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
15937 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
15938 return -1;
15939
15940 return count;
15941 }
15942
15943 default:
15944 break;
15945 }
15946
15947 return -1;
15948}
15949
b6ec6215
KT
15950/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
15951 type as described in AAPCS64 \S 4.1.2.
15952
15953 See the comment above aarch64_composite_type_p for the notes on MODE. */
15954
15955static bool
15956aarch64_short_vector_p (const_tree type,
15957 machine_mode mode)
15958{
6a70badb 15959 poly_int64 size = -1;
b6ec6215 15960
c600df9a
RS
15961 if (type && aarch64_sve::builtin_type_p (type))
15962 return false;
15963
b6ec6215
KT
15964 if (type && TREE_CODE (type) == VECTOR_TYPE)
15965 size = int_size_in_bytes (type);
15966 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
15967 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
15968 size = GET_MODE_SIZE (mode);
15969
6a70badb 15970 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
15971}
15972
43e9d192
IB
15973/* Return TRUE if the type, as described by TYPE and MODE, is a composite
15974 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
15975 array types. The C99 floating-point complex types are also considered
15976 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
15977 types, which are GCC extensions and out of the scope of AAPCS64, are
15978 treated as composite types here as well.
15979
15980 Note that MODE itself is not sufficient in determining whether a type
15981 is such a composite type or not. This is because
15982 stor-layout.c:compute_record_mode may have already changed the MODE
15983 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
15984 structure with only one field may have its MODE set to the mode of the
15985 field. Also an integer mode whose size matches the size of the
15986 RECORD_TYPE type may be used to substitute the original mode
15987 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
15988 solely relied on. */
15989
15990static bool
15991aarch64_composite_type_p (const_tree type,
ef4bddc2 15992 machine_mode mode)
43e9d192 15993{
b6ec6215
KT
15994 if (aarch64_short_vector_p (type, mode))
15995 return false;
15996
43e9d192
IB
15997 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
15998 return true;
15999
16000 if (mode == BLKmode
16001 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
16002 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16003 return true;
16004
16005 return false;
16006}
16007
43e9d192
IB
16008/* Return TRUE if an argument, whose type is described by TYPE and MODE,
16009 shall be passed or returned in simd/fp register(s) (providing these
16010 parameter passing registers are available).
16011
16012 Upon successful return, *COUNT returns the number of needed registers,
16013 *BASE_MODE returns the mode of the individual register and when IS_HAF
16014 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
16015 floating-point aggregate or a homogeneous short-vector aggregate. */
16016
16017static bool
ef4bddc2 16018aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 16019 const_tree type,
ef4bddc2 16020 machine_mode *base_mode,
43e9d192
IB
16021 int *count,
16022 bool *is_ha)
16023{
c600df9a
RS
16024 if (is_ha != NULL) *is_ha = false;
16025
16026 if (type && aarch64_sve::builtin_type_p (type))
16027 return false;
16028
ef4bddc2 16029 machine_mode new_mode = VOIDmode;
43e9d192
IB
16030 bool composite_p = aarch64_composite_type_p (type, mode);
16031
43e9d192
IB
16032 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
16033 || aarch64_short_vector_p (type, mode))
16034 {
16035 *count = 1;
16036 new_mode = mode;
16037 }
16038 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
16039 {
16040 if (is_ha != NULL) *is_ha = true;
16041 *count = 2;
16042 new_mode = GET_MODE_INNER (mode);
16043 }
16044 else if (type && composite_p)
16045 {
16046 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
16047
16048 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
16049 {
16050 if (is_ha != NULL) *is_ha = true;
16051 *count = ag_count;
16052 }
16053 else
16054 return false;
16055 }
16056 else
16057 return false;
16058
16059 *base_mode = new_mode;
16060 return true;
16061}
16062
16063/* Implement TARGET_STRUCT_VALUE_RTX. */
16064
16065static rtx
16066aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
16067 int incoming ATTRIBUTE_UNUSED)
16068{
16069 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
16070}
16071
16072/* Implements target hook vector_mode_supported_p. */
16073static bool
ef4bddc2 16074aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 16075{
43cacb12 16076 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
cc68f7c2 16077 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
16078}
16079
4aeb1ba7
RS
16080/* Return the full-width SVE vector mode for element mode MODE, if one
16081 exists. */
16082opt_machine_mode
16083aarch64_full_sve_mode (scalar_mode mode)
16084{
16085 switch (mode)
16086 {
16087 case E_DFmode:
16088 return VNx2DFmode;
16089 case E_SFmode:
16090 return VNx4SFmode;
16091 case E_HFmode:
16092 return VNx8HFmode;
16093 case E_DImode:
16094 return VNx2DImode;
16095 case E_SImode:
16096 return VNx4SImode;
16097 case E_HImode:
16098 return VNx8HImode;
16099 case E_QImode:
16100 return VNx16QImode;
16101 default:
16102 return opt_machine_mode ();
16103 }
16104}
16105
16106/* Return the 128-bit Advanced SIMD vector mode for element mode MODE,
16107 if it exists. */
16108opt_machine_mode
16109aarch64_vq_mode (scalar_mode mode)
16110{
16111 switch (mode)
16112 {
16113 case E_DFmode:
16114 return V2DFmode;
16115 case E_SFmode:
16116 return V4SFmode;
16117 case E_HFmode:
16118 return V8HFmode;
16119 case E_SImode:
16120 return V4SImode;
16121 case E_HImode:
16122 return V8HImode;
16123 case E_QImode:
16124 return V16QImode;
16125 case E_DImode:
16126 return V2DImode;
16127 default:
16128 return opt_machine_mode ();
16129 }
16130}
16131
b7342d25
IB
16132/* Return appropriate SIMD container
16133 for MODE within a vector of WIDTH bits. */
ef4bddc2 16134static machine_mode
43cacb12 16135aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 16136{
43cacb12 16137 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
4aeb1ba7 16138 return aarch64_full_sve_mode (mode).else_mode (word_mode);
43cacb12
RS
16139
16140 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 16141 if (TARGET_SIMD)
b7342d25 16142 {
43cacb12 16143 if (known_eq (width, 128))
4aeb1ba7 16144 return aarch64_vq_mode (mode).else_mode (word_mode);
b7342d25
IB
16145 else
16146 switch (mode)
16147 {
4e10a5a7 16148 case E_SFmode:
b7342d25 16149 return V2SFmode;
4e10a5a7 16150 case E_HFmode:
b719f884 16151 return V4HFmode;
4e10a5a7 16152 case E_SImode:
b7342d25 16153 return V2SImode;
4e10a5a7 16154 case E_HImode:
b7342d25 16155 return V4HImode;
4e10a5a7 16156 case E_QImode:
b7342d25
IB
16157 return V8QImode;
16158 default:
16159 break;
16160 }
16161 }
43e9d192
IB
16162 return word_mode;
16163}
16164
b7342d25 16165/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 16166static machine_mode
005ba29c 16167aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 16168{
43cacb12
RS
16169 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
16170 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
16171}
16172
86e36728 16173/* Return a list of possible vector sizes for the vectorizer
3b357264 16174 to iterate over. */
bcc7e346 16175static unsigned int
e021fb86 16176aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
3b357264 16177{
cc68f7c2
RS
16178 static const machine_mode sve_modes[] = {
16179 /* Try using full vectors for all element types. */
16180 VNx16QImode,
16181
16182 /* Try using 16-bit containers for 8-bit elements and full vectors
16183 for wider elements. */
16184 VNx8QImode,
16185
16186 /* Try using 32-bit containers for 8-bit and 16-bit elements and
16187 full vectors for wider elements. */
16188 VNx4QImode,
74166aab 16189
cc68f7c2
RS
16190 /* Try using 64-bit containers for all element types. */
16191 VNx2QImode
16192 };
16193
16194 static const machine_mode advsimd_modes[] = {
16195 /* Try using 128-bit vectors for all element types. */
16196 V16QImode,
16197
16198 /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
16199 for wider elements. */
16200 V8QImode,
16201
16202 /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
16203 for wider elements.
16204
16205 TODO: We could support a limited form of V4QImode too, so that
16206 we use 32-bit vectors for 8-bit elements. */
16207 V4HImode,
16208
16209 /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
16210 for 64-bit elements.
74166aab 16211
cc68f7c2
RS
16212 TODO: We could similarly support limited forms of V2QImode and V2HImode
16213 for this case. */
16214 V2SImode
16215 };
74166aab 16216
cc68f7c2
RS
16217 /* Try using N-byte SVE modes only after trying N-byte Advanced SIMD mode.
16218 This is because:
74166aab 16219
cc68f7c2
RS
16220 - If we can't use N-byte Advanced SIMD vectors then the placement
16221 doesn't matter; we'll just continue as though the Advanced SIMD
16222 entry didn't exist.
74166aab 16223
cc68f7c2
RS
16224 - If an SVE main loop with N bytes ends up being cheaper than an
16225 Advanced SIMD main loop with N bytes then by default we'll replace
16226 the Advanced SIMD version with the SVE one.
74166aab 16227
cc68f7c2
RS
16228 - If an Advanced SIMD main loop with N bytes ends up being cheaper
16229 than an SVE main loop with N bytes then by default we'll try to
16230 use the SVE loop to vectorize the epilogue instead. */
16231 unsigned int sve_i = TARGET_SVE ? 0 : ARRAY_SIZE (sve_modes);
16232 unsigned int advsimd_i = 0;
16233 while (advsimd_i < ARRAY_SIZE (advsimd_modes))
16234 {
16235 if (sve_i < ARRAY_SIZE (sve_modes)
16236 && maybe_gt (GET_MODE_NUNITS (sve_modes[sve_i]),
16237 GET_MODE_NUNITS (advsimd_modes[advsimd_i])))
16238 modes->safe_push (sve_modes[sve_i++]);
16239 else
16240 modes->safe_push (advsimd_modes[advsimd_i++]);
16241 }
16242 while (sve_i < ARRAY_SIZE (sve_modes))
16243 modes->safe_push (sve_modes[sve_i++]);
bcc7e346 16244
eb23241b
RS
16245 unsigned int flags = 0;
16246 /* Consider enabling VECT_COMPARE_COSTS for SVE, both so that we
16247 can compare SVE against Advanced SIMD and so that we can compare
16248 multiple SVE vectorization approaches against each other. There's
16249 not really any point doing this for Advanced SIMD only, since the
16250 first mode that works should always be the best. */
16251 if (TARGET_SVE && aarch64_sve_compare_costs)
16252 flags |= VECT_COMPARE_COSTS;
16253 return flags;
3b357264
JG
16254}
16255
ac2b960f
YZ
16256/* Implement TARGET_MANGLE_TYPE. */
16257
6f549691 16258static const char *
ac2b960f
YZ
16259aarch64_mangle_type (const_tree type)
16260{
16261 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 16262 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
16263 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
16264 return "St9__va_list";
16265
c2ec330c
AL
16266 /* Half-precision float. */
16267 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
16268 return "Dh";
16269
f9d53c27
TB
16270 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
16271 builtin types. */
16272 if (TYPE_NAME (type) != NULL)
624d0f07
RS
16273 {
16274 const char *res;
16275 if ((res = aarch64_general_mangle_builtin_type (type))
16276 || (res = aarch64_sve::mangle_builtin_type (type)))
16277 return res;
16278 }
c6fc9e43 16279
ac2b960f
YZ
16280 /* Use the default mangling. */
16281 return NULL;
16282}
16283
65ef05d0
RS
16284/* Implement TARGET_VERIFY_TYPE_CONTEXT. */
16285
16286static bool
16287aarch64_verify_type_context (location_t loc, type_context_kind context,
16288 const_tree type, bool silent_p)
16289{
16290 return aarch64_sve::verify_type_context (loc, context, type, silent_p);
16291}
16292
75cf1494
KT
16293/* Find the first rtx_insn before insn that will generate an assembly
16294 instruction. */
16295
16296static rtx_insn *
16297aarch64_prev_real_insn (rtx_insn *insn)
16298{
16299 if (!insn)
16300 return NULL;
16301
16302 do
16303 {
16304 insn = prev_real_insn (insn);
16305 }
16306 while (insn && recog_memoized (insn) < 0);
16307
16308 return insn;
16309}
16310
16311static bool
16312is_madd_op (enum attr_type t1)
16313{
16314 unsigned int i;
16315 /* A number of these may be AArch32 only. */
16316 enum attr_type mlatypes[] = {
16317 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
16318 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
16319 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
16320 };
16321
16322 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
16323 {
16324 if (t1 == mlatypes[i])
16325 return true;
16326 }
16327
16328 return false;
16329}
16330
16331/* Check if there is a register dependency between a load and the insn
16332 for which we hold recog_data. */
16333
16334static bool
16335dep_between_memop_and_curr (rtx memop)
16336{
16337 rtx load_reg;
16338 int opno;
16339
8baff86e 16340 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
16341
16342 if (!REG_P (SET_DEST (memop)))
16343 return false;
16344
16345 load_reg = SET_DEST (memop);
8baff86e 16346 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
16347 {
16348 rtx operand = recog_data.operand[opno];
16349 if (REG_P (operand)
16350 && reg_overlap_mentioned_p (load_reg, operand))
16351 return true;
16352
16353 }
16354 return false;
16355}
16356
8baff86e
KT
16357
16358/* When working around the Cortex-A53 erratum 835769,
16359 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
16360 instruction and has a preceding memory instruction such that a NOP
16361 should be inserted between them. */
16362
75cf1494
KT
16363bool
16364aarch64_madd_needs_nop (rtx_insn* insn)
16365{
16366 enum attr_type attr_type;
16367 rtx_insn *prev;
16368 rtx body;
16369
b32c1043 16370 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
16371 return false;
16372
e322d6e3 16373 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
16374 return false;
16375
16376 attr_type = get_attr_type (insn);
16377 if (!is_madd_op (attr_type))
16378 return false;
16379
16380 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
16381 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
16382 Restore recog state to INSN to avoid state corruption. */
16383 extract_constrain_insn_cached (insn);
16384
550e2205 16385 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
16386 return false;
16387
16388 body = single_set (prev);
16389
16390 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
16391 it and the DImode madd, emit a NOP between them. If body is NULL then we
16392 have a complex memory operation, probably a load/store pair.
16393 Be conservative for now and emit a NOP. */
16394 if (GET_MODE (recog_data.operand[0]) == DImode
16395 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
16396 return true;
16397
16398 return false;
16399
16400}
16401
8baff86e
KT
16402
16403/* Implement FINAL_PRESCAN_INSN. */
16404
75cf1494
KT
16405void
16406aarch64_final_prescan_insn (rtx_insn *insn)
16407{
16408 if (aarch64_madd_needs_nop (insn))
16409 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
16410}
16411
16412
43cacb12
RS
16413/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
16414 instruction. */
16415
16416bool
16417aarch64_sve_index_immediate_p (rtx base_or_step)
16418{
16419 return (CONST_INT_P (base_or_step)
16420 && IN_RANGE (INTVAL (base_or_step), -16, 15));
16421}
16422
16423/* Return true if X is a valid immediate for the SVE ADD and SUB
16424 instructions. Negate X first if NEGATE_P is true. */
16425
16426bool
16427aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
16428{
16429 rtx elt;
16430
16431 if (!const_vec_duplicate_p (x, &elt)
16432 || !CONST_INT_P (elt))
16433 return false;
16434
16435 HOST_WIDE_INT val = INTVAL (elt);
16436 if (negate_p)
16437 val = -val;
16438 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
16439
16440 if (val & 0xff)
16441 return IN_RANGE (val, 0, 0xff);
16442 return IN_RANGE (val, 0, 0xff00);
16443}
16444
624d0f07
RS
16445/* Return true if X is a valid immediate for the SVE SQADD and SQSUB
16446 instructions. Negate X first if NEGATE_P is true. */
16447
16448bool
16449aarch64_sve_sqadd_sqsub_immediate_p (rtx x, bool negate_p)
16450{
16451 rtx elt;
16452
16453 if (!const_vec_duplicate_p (x, &elt)
16454 || !CONST_INT_P (elt))
16455 return false;
16456
16457 if (!aarch64_sve_arith_immediate_p (x, negate_p))
16458 return false;
16459
16460 /* After the optional negation, the immediate must be nonnegative.
16461 E.g. a saturating add of -127 must be done via SQSUB Zn.B, Zn.B, #127
16462 instead of SQADD Zn.B, Zn.B, #129. */
16463 return negate_p == (INTVAL (elt) < 0);
16464}
16465
43cacb12
RS
16466/* Return true if X is a valid immediate operand for an SVE logical
16467 instruction such as AND. */
16468
16469bool
16470aarch64_sve_bitmask_immediate_p (rtx x)
16471{
16472 rtx elt;
16473
16474 return (const_vec_duplicate_p (x, &elt)
16475 && CONST_INT_P (elt)
16476 && aarch64_bitmask_imm (INTVAL (elt),
16477 GET_MODE_INNER (GET_MODE (x))));
16478}
16479
16480/* Return true if X is a valid immediate for the SVE DUP and CPY
16481 instructions. */
16482
16483bool
16484aarch64_sve_dup_immediate_p (rtx x)
16485{
d29f7dd5
RS
16486 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
16487 if (!CONST_INT_P (x))
43cacb12
RS
16488 return false;
16489
d29f7dd5 16490 HOST_WIDE_INT val = INTVAL (x);
43cacb12
RS
16491 if (val & 0xff)
16492 return IN_RANGE (val, -0x80, 0x7f);
16493 return IN_RANGE (val, -0x8000, 0x7f00);
16494}
16495
16496/* Return true if X is a valid immediate operand for an SVE CMP instruction.
16497 SIGNED_P says whether the operand is signed rather than unsigned. */
16498
16499bool
16500aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
16501{
6bc67182
RS
16502 x = unwrap_const_vec_duplicate (x);
16503 return (CONST_INT_P (x)
43cacb12 16504 && (signed_p
6bc67182
RS
16505 ? IN_RANGE (INTVAL (x), -16, 15)
16506 : IN_RANGE (INTVAL (x), 0, 127)));
43cacb12
RS
16507}
16508
16509/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
16510 instruction. Negate X first if NEGATE_P is true. */
16511
16512bool
16513aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
16514{
16515 rtx elt;
16516 REAL_VALUE_TYPE r;
16517
16518 if (!const_vec_duplicate_p (x, &elt)
16519 || GET_CODE (elt) != CONST_DOUBLE)
16520 return false;
16521
16522 r = *CONST_DOUBLE_REAL_VALUE (elt);
16523
16524 if (negate_p)
16525 r = real_value_negate (&r);
16526
16527 if (real_equal (&r, &dconst1))
16528 return true;
16529 if (real_equal (&r, &dconsthalf))
16530 return true;
16531 return false;
16532}
16533
16534/* Return true if X is a valid immediate operand for an SVE FMUL
16535 instruction. */
16536
16537bool
16538aarch64_sve_float_mul_immediate_p (rtx x)
16539{
16540 rtx elt;
16541
43cacb12
RS
16542 return (const_vec_duplicate_p (x, &elt)
16543 && GET_CODE (elt) == CONST_DOUBLE
a19ba9e1
RS
16544 && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)
16545 || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2)));
43cacb12
RS
16546}
16547
b187677b
RS
16548/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
16549 for the Advanced SIMD operation described by WHICH and INSN. If INFO
16550 is nonnull, use it to describe valid immediates. */
3520f7cc 16551static bool
b187677b
RS
16552aarch64_advsimd_valid_immediate_hs (unsigned int val32,
16553 simd_immediate_info *info,
16554 enum simd_immediate_check which,
16555 simd_immediate_info::insn_type insn)
16556{
16557 /* Try a 4-byte immediate with LSL. */
16558 for (unsigned int shift = 0; shift < 32; shift += 8)
16559 if ((val32 & (0xff << shift)) == val32)
16560 {
16561 if (info)
16562 *info = simd_immediate_info (SImode, val32 >> shift, insn,
16563 simd_immediate_info::LSL, shift);
16564 return true;
16565 }
3520f7cc 16566
b187677b
RS
16567 /* Try a 2-byte immediate with LSL. */
16568 unsigned int imm16 = val32 & 0xffff;
16569 if (imm16 == (val32 >> 16))
16570 for (unsigned int shift = 0; shift < 16; shift += 8)
16571 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 16572 {
b187677b
RS
16573 if (info)
16574 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
16575 simd_immediate_info::LSL, shift);
16576 return true;
48063b9d 16577 }
3520f7cc 16578
b187677b
RS
16579 /* Try a 4-byte immediate with MSL, except for cases that MVN
16580 can handle. */
16581 if (which == AARCH64_CHECK_MOV)
16582 for (unsigned int shift = 8; shift < 24; shift += 8)
16583 {
16584 unsigned int low = (1 << shift) - 1;
16585 if (((val32 & (0xff << shift)) | low) == val32)
16586 {
16587 if (info)
16588 *info = simd_immediate_info (SImode, val32 >> shift, insn,
16589 simd_immediate_info::MSL, shift);
16590 return true;
16591 }
16592 }
43e9d192 16593
b187677b
RS
16594 return false;
16595}
16596
16597/* Return true if replicating VAL64 is a valid immediate for the
16598 Advanced SIMD operation described by WHICH. If INFO is nonnull,
16599 use it to describe valid immediates. */
16600static bool
16601aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
16602 simd_immediate_info *info,
16603 enum simd_immediate_check which)
16604{
16605 unsigned int val32 = val64 & 0xffffffff;
16606 unsigned int val16 = val64 & 0xffff;
16607 unsigned int val8 = val64 & 0xff;
16608
16609 if (val32 == (val64 >> 32))
43e9d192 16610 {
b187677b
RS
16611 if ((which & AARCH64_CHECK_ORR) != 0
16612 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
16613 simd_immediate_info::MOV))
16614 return true;
43e9d192 16615
b187677b
RS
16616 if ((which & AARCH64_CHECK_BIC) != 0
16617 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
16618 simd_immediate_info::MVN))
16619 return true;
ee78df47 16620
b187677b
RS
16621 /* Try using a replicated byte. */
16622 if (which == AARCH64_CHECK_MOV
16623 && val16 == (val32 >> 16)
16624 && val8 == (val16 >> 8))
ee78df47 16625 {
b187677b
RS
16626 if (info)
16627 *info = simd_immediate_info (QImode, val8);
16628 return true;
ee78df47 16629 }
43e9d192
IB
16630 }
16631
b187677b
RS
16632 /* Try using a bit-to-bytemask. */
16633 if (which == AARCH64_CHECK_MOV)
43e9d192 16634 {
b187677b
RS
16635 unsigned int i;
16636 for (i = 0; i < 64; i += 8)
ab6501d7 16637 {
b187677b
RS
16638 unsigned char byte = (val64 >> i) & 0xff;
16639 if (byte != 0 && byte != 0xff)
16640 break;
ab6501d7 16641 }
b187677b 16642 if (i == 64)
ab6501d7 16643 {
b187677b
RS
16644 if (info)
16645 *info = simd_immediate_info (DImode, val64);
16646 return true;
ab6501d7 16647 }
43e9d192 16648 }
b187677b
RS
16649 return false;
16650}
43e9d192 16651
43cacb12
RS
16652/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
16653 instruction. If INFO is nonnull, use it to describe valid immediates. */
16654
16655static bool
16656aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
16657 simd_immediate_info *info)
16658{
16659 scalar_int_mode mode = DImode;
16660 unsigned int val32 = val64 & 0xffffffff;
16661 if (val32 == (val64 >> 32))
16662 {
16663 mode = SImode;
16664 unsigned int val16 = val32 & 0xffff;
16665 if (val16 == (val32 >> 16))
16666 {
16667 mode = HImode;
16668 unsigned int val8 = val16 & 0xff;
16669 if (val8 == (val16 >> 8))
16670 mode = QImode;
16671 }
16672 }
16673 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
16674 if (IN_RANGE (val, -0x80, 0x7f))
16675 {
16676 /* DUP with no shift. */
16677 if (info)
16678 *info = simd_immediate_info (mode, val);
16679 return true;
16680 }
16681 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
16682 {
16683 /* DUP with LSL #8. */
16684 if (info)
16685 *info = simd_immediate_info (mode, val);
16686 return true;
16687 }
16688 if (aarch64_bitmask_imm (val64, mode))
16689 {
16690 /* DUPM. */
16691 if (info)
16692 *info = simd_immediate_info (mode, val);
16693 return true;
16694 }
16695 return false;
16696}
16697
624d0f07
RS
16698/* Return true if X is an UNSPEC_PTRUE constant of the form:
16699
16700 (const (unspec [PATTERN ZERO] UNSPEC_PTRUE))
16701
16702 where PATTERN is the svpattern as a CONST_INT and where ZERO
16703 is a zero constant of the required PTRUE mode (which can have
16704 fewer elements than X's mode, if zero bits are significant).
16705
16706 If so, and if INFO is nonnull, describe the immediate in INFO. */
16707bool
16708aarch64_sve_ptrue_svpattern_p (rtx x, struct simd_immediate_info *info)
16709{
16710 if (GET_CODE (x) != CONST)
16711 return false;
16712
16713 x = XEXP (x, 0);
16714 if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_PTRUE)
16715 return false;
16716
16717 if (info)
16718 {
16719 aarch64_svpattern pattern
16720 = (aarch64_svpattern) INTVAL (XVECEXP (x, 0, 0));
16721 machine_mode pred_mode = GET_MODE (XVECEXP (x, 0, 1));
16722 scalar_int_mode int_mode = aarch64_sve_element_int_mode (pred_mode);
16723 *info = simd_immediate_info (int_mode, pattern);
16724 }
16725 return true;
16726}
16727
0b1fe8cf
RS
16728/* Return true if X is a valid SVE predicate. If INFO is nonnull, use
16729 it to describe valid immediates. */
16730
16731static bool
16732aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
16733{
624d0f07
RS
16734 if (aarch64_sve_ptrue_svpattern_p (x, info))
16735 return true;
16736
0b1fe8cf
RS
16737 if (x == CONST0_RTX (GET_MODE (x)))
16738 {
16739 if (info)
16740 *info = simd_immediate_info (DImode, 0);
16741 return true;
16742 }
16743
16744 /* Analyze the value as a VNx16BImode. This should be relatively
16745 efficient, since rtx_vector_builder has enough built-in capacity
16746 to store all VLA predicate constants without needing the heap. */
16747 rtx_vector_builder builder;
16748 if (!aarch64_get_sve_pred_bits (builder, x))
16749 return false;
16750
16751 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
16752 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
16753 {
16754 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
16755 aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl);
16756 if (pattern != AARCH64_NUM_SVPATTERNS)
16757 {
16758 if (info)
16759 {
16760 scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
16761 *info = simd_immediate_info (int_mode, pattern);
16762 }
16763 return true;
16764 }
16765 }
16766 return false;
16767}
16768
b187677b
RS
16769/* Return true if OP is a valid SIMD immediate for the operation
16770 described by WHICH. If INFO is nonnull, use it to describe valid
16771 immediates. */
16772bool
16773aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
16774 enum simd_immediate_check which)
16775{
43cacb12
RS
16776 machine_mode mode = GET_MODE (op);
16777 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
16778 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
16779 return false;
16780
0b1fe8cf
RS
16781 if (vec_flags & VEC_SVE_PRED)
16782 return aarch64_sve_pred_valid_immediate (op, info);
16783
43cacb12 16784 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 16785 rtx base, step;
b187677b 16786 unsigned int n_elts;
f9093f23
RS
16787 if (GET_CODE (op) == CONST_VECTOR
16788 && CONST_VECTOR_DUPLICATE_P (op))
16789 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
16790 else if ((vec_flags & VEC_SVE_DATA)
16791 && const_vec_series_p (op, &base, &step))
16792 {
16793 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
16794 if (!aarch64_sve_index_immediate_p (base)
16795 || !aarch64_sve_index_immediate_p (step))
16796 return false;
16797
16798 if (info)
cc68f7c2
RS
16799 {
16800 /* Get the corresponding container mode. E.g. an INDEX on V2SI
16801 should yield two integer values per 128-bit block, meaning
16802 that we need to treat it in the same way as V2DI and then
16803 ignore the upper 32 bits of each element. */
16804 elt_mode = aarch64_sve_container_int_mode (mode);
16805 *info = simd_immediate_info (elt_mode, base, step);
16806 }
43cacb12
RS
16807 return true;
16808 }
6a70badb
RS
16809 else if (GET_CODE (op) == CONST_VECTOR
16810 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
16811 /* N_ELTS set above. */;
b187677b 16812 else
d8edd899 16813 return false;
43e9d192 16814
b187677b 16815 scalar_float_mode elt_float_mode;
f9093f23
RS
16816 if (n_elts == 1
16817 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 16818 {
f9093f23
RS
16819 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
16820 if (aarch64_float_const_zero_rtx_p (elt)
16821 || aarch64_float_const_representable_p (elt))
16822 {
16823 if (info)
16824 *info = simd_immediate_info (elt_float_mode, elt);
16825 return true;
16826 }
b187677b 16827 }
43e9d192 16828
b23c6a2c
RS
16829 /* If all elements in an SVE vector have the same value, we have a free
16830 choice between using the element mode and using the container mode.
16831 Using the element mode means that unused parts of the vector are
16832 duplicates of the used elements, while using the container mode means
16833 that the unused parts are an extension of the used elements. Using the
16834 element mode is better for (say) VNx4HI 0x101, since 0x01010101 is valid
16835 for its container mode VNx4SI while 0x00000101 isn't.
16836
16837 If not all elements in an SVE vector have the same value, we need the
16838 transition from one element to the next to occur at container boundaries.
16839 E.g. a fixed-length VNx4HI containing { 1, 2, 3, 4 } should be treated
16840 in the same way as a VNx4SI containing { 1, 2, 3, 4 }. */
16841 scalar_int_mode elt_int_mode;
16842 if ((vec_flags & VEC_SVE_DATA) && n_elts > 1)
16843 elt_int_mode = aarch64_sve_container_int_mode (mode);
16844 else
16845 elt_int_mode = int_mode_for_mode (elt_mode).require ();
16846
16847 unsigned int elt_size = GET_MODE_SIZE (elt_int_mode);
b187677b
RS
16848 if (elt_size > 8)
16849 return false;
e4f0f84d 16850
b187677b
RS
16851 /* Expand the vector constant out into a byte vector, with the least
16852 significant byte of the register first. */
16853 auto_vec<unsigned char, 16> bytes;
16854 bytes.reserve (n_elts * elt_size);
16855 for (unsigned int i = 0; i < n_elts; i++)
16856 {
f9093f23
RS
16857 /* The vector is provided in gcc endian-neutral fashion.
16858 For aarch64_be Advanced SIMD, it must be laid out in the vector
16859 register in reverse order. */
16860 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
16861 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 16862
b187677b
RS
16863 if (elt_mode != elt_int_mode)
16864 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 16865
b187677b
RS
16866 if (!CONST_INT_P (elt))
16867 return false;
43e9d192 16868
b187677b
RS
16869 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
16870 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 16871 {
b187677b
RS
16872 bytes.quick_push (elt_val & 0xff);
16873 elt_val >>= BITS_PER_UNIT;
48063b9d 16874 }
43e9d192
IB
16875 }
16876
b187677b
RS
16877 /* The immediate must repeat every eight bytes. */
16878 unsigned int nbytes = bytes.length ();
16879 for (unsigned i = 8; i < nbytes; ++i)
16880 if (bytes[i] != bytes[i - 8])
16881 return false;
16882
16883 /* Get the repeating 8-byte value as an integer. No endian correction
16884 is needed here because bytes is already in lsb-first order. */
16885 unsigned HOST_WIDE_INT val64 = 0;
16886 for (unsigned int i = 0; i < 8; i++)
16887 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
16888 << (i * BITS_PER_UNIT));
16889
43cacb12
RS
16890 if (vec_flags & VEC_SVE_DATA)
16891 return aarch64_sve_valid_immediate (val64, info);
16892 else
16893 return aarch64_advsimd_valid_immediate (val64, info, which);
16894}
16895
16896/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
16897 has a step in the range of INDEX. Return the index expression if so,
16898 otherwise return null. */
16899rtx
16900aarch64_check_zero_based_sve_index_immediate (rtx x)
16901{
16902 rtx base, step;
16903 if (const_vec_series_p (x, &base, &step)
16904 && base == const0_rtx
16905 && aarch64_sve_index_immediate_p (step))
16906 return step;
16907 return NULL_RTX;
43e9d192
IB
16908}
16909
43e9d192
IB
16910/* Check of immediate shift constants are within range. */
16911bool
ef4bddc2 16912aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192 16913{
6bc67182
RS
16914 x = unwrap_const_vec_duplicate (x);
16915 if (!CONST_INT_P (x))
16916 return false;
43e9d192
IB
16917 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
16918 if (left)
6bc67182 16919 return IN_RANGE (INTVAL (x), 0, bit_width - 1);
43e9d192 16920 else
6bc67182 16921 return IN_RANGE (INTVAL (x), 1, bit_width);
43e9d192
IB
16922}
16923
7325d85a
KT
16924/* Return the bitmask CONST_INT to select the bits required by a zero extract
16925 operation of width WIDTH at bit position POS. */
16926
16927rtx
16928aarch64_mask_from_zextract_ops (rtx width, rtx pos)
16929{
16930 gcc_assert (CONST_INT_P (width));
16931 gcc_assert (CONST_INT_P (pos));
16932
16933 unsigned HOST_WIDE_INT mask
16934 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
16935 return GEN_INT (mask << UINTVAL (pos));
16936}
16937
83f8c414 16938bool
a6e0bfa7 16939aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 16940{
83f8c414
CSS
16941 if (GET_CODE (x) == HIGH
16942 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
16943 return true;
16944
82614948 16945 if (CONST_INT_P (x))
83f8c414
CSS
16946 return true;
16947
43cacb12 16948 if (VECTOR_MODE_P (GET_MODE (x)))
678faefc
RS
16949 {
16950 /* Require predicate constants to be VNx16BI before RA, so that we
16951 force everything to have a canonical form. */
16952 if (!lra_in_progress
16953 && !reload_completed
16954 && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
16955 && GET_MODE (x) != VNx16BImode)
16956 return false;
16957
16958 return aarch64_simd_valid_immediate (x, NULL);
16959 }
43cacb12 16960
83f8c414
CSS
16961 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
16962 return true;
16963
c0e0174b 16964 if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
43cacb12
RS
16965 return true;
16966
a6e0bfa7 16967 return aarch64_classify_symbolic_expression (x)
a5350ddc 16968 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
16969}
16970
43e9d192
IB
16971/* Return a const_int vector of VAL. */
16972rtx
ab014eb3 16973aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 16974{
59d06c05
RS
16975 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
16976 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
16977}
16978
051d0e2f
SN
16979/* Check OP is a legal scalar immediate for the MOVI instruction. */
16980
16981bool
77e994c9 16982aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 16983{
ef4bddc2 16984 machine_mode vmode;
051d0e2f 16985
43cacb12 16986 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 16987 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 16988 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
16989}
16990
988fa693
JG
16991/* Construct and return a PARALLEL RTX vector with elements numbering the
16992 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
16993 the vector - from the perspective of the architecture. This does not
16994 line up with GCC's perspective on lane numbers, so we end up with
16995 different masks depending on our target endian-ness. The diagram
16996 below may help. We must draw the distinction when building masks
16997 which select one half of the vector. An instruction selecting
16998 architectural low-lanes for a big-endian target, must be described using
16999 a mask selecting GCC high-lanes.
17000
17001 Big-Endian Little-Endian
17002
17003GCC 0 1 2 3 3 2 1 0
17004 | x | x | x | x | | x | x | x | x |
17005Architecture 3 2 1 0 3 2 1 0
17006
17007Low Mask: { 2, 3 } { 0, 1 }
17008High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
17009
17010 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 17011
43e9d192 17012rtx
f5cbabc1 17013aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 17014{
43e9d192 17015 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
17016 int high_base = nunits / 2;
17017 int low_base = 0;
17018 int base;
43e9d192
IB
17019 rtx t1;
17020 int i;
17021
988fa693
JG
17022 if (BYTES_BIG_ENDIAN)
17023 base = high ? low_base : high_base;
17024 else
17025 base = high ? high_base : low_base;
17026
17027 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
17028 RTVEC_ELT (v, i) = GEN_INT (base + i);
17029
17030 t1 = gen_rtx_PARALLEL (mode, v);
17031 return t1;
17032}
17033
988fa693
JG
17034/* Check OP for validity as a PARALLEL RTX vector with elements
17035 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
17036 from the perspective of the architecture. See the diagram above
17037 aarch64_simd_vect_par_cnst_half for more details. */
17038
17039bool
ef4bddc2 17040aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
17041 bool high)
17042{
6a70badb
RS
17043 int nelts;
17044 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
17045 return false;
17046
6a70badb 17047 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
17048 HOST_WIDE_INT count_op = XVECLEN (op, 0);
17049 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
17050 int i = 0;
17051
988fa693
JG
17052 if (count_op != count_ideal)
17053 return false;
17054
17055 for (i = 0; i < count_ideal; i++)
17056 {
17057 rtx elt_op = XVECEXP (op, 0, i);
17058 rtx elt_ideal = XVECEXP (ideal, 0, i);
17059
4aa81c2e 17060 if (!CONST_INT_P (elt_op)
988fa693
JG
17061 || INTVAL (elt_ideal) != INTVAL (elt_op))
17062 return false;
17063 }
17064 return true;
17065}
17066
4aeb1ba7
RS
17067/* Return a PARALLEL containing NELTS elements, with element I equal
17068 to BASE + I * STEP. */
17069
17070rtx
17071aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step)
17072{
17073 rtvec vec = rtvec_alloc (nelts);
17074 for (unsigned int i = 0; i < nelts; ++i)
17075 RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode);
17076 return gen_rtx_PARALLEL (VOIDmode, vec);
17077}
17078
17079/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
17080 series with step STEP. */
17081
17082bool
17083aarch64_stepped_int_parallel_p (rtx op, int step)
17084{
17085 if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
17086 return false;
17087
17088 unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0));
17089 for (int i = 1; i < XVECLEN (op, 0); ++i)
17090 if (!CONST_INT_P (XVECEXP (op, 0, i))
17091 || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
17092 return false;
17093
17094 return true;
17095}
17096
43e9d192
IB
17097/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
17098 HIGH (exclusive). */
17099void
46ed6024
CB
17100aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
17101 const_tree exp)
43e9d192
IB
17102{
17103 HOST_WIDE_INT lane;
4aa81c2e 17104 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
17105 lane = INTVAL (operand);
17106
17107 if (lane < low || lane >= high)
46ed6024
CB
17108 {
17109 if (exp)
cf0c27ef 17110 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 17111 else
cf0c27ef 17112 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 17113 }
43e9d192
IB
17114}
17115
7ac29c0f
RS
17116/* Peform endian correction on lane number N, which indexes a vector
17117 of mode MODE, and return the result as an SImode rtx. */
17118
17119rtx
17120aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
17121{
17122 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
17123}
17124
43e9d192 17125/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 17126
43e9d192
IB
17127bool
17128aarch64_simd_mem_operand_p (rtx op)
17129{
17130 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 17131 || REG_P (XEXP (op, 0)));
43e9d192
IB
17132}
17133
43cacb12
RS
17134/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
17135
17136bool
17137aarch64_sve_ld1r_operand_p (rtx op)
17138{
17139 struct aarch64_address_info addr;
17140 scalar_mode mode;
17141
17142 return (MEM_P (op)
17143 && is_a <scalar_mode> (GET_MODE (op), &mode)
17144 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
17145 && addr.type == ADDRESS_REG_IMM
17146 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
17147}
17148
4aeb1ba7
RS
17149/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */
17150bool
17151aarch64_sve_ld1rq_operand_p (rtx op)
17152{
17153 struct aarch64_address_info addr;
17154 scalar_mode elem_mode = GET_MODE_INNER (GET_MODE (op));
17155 if (!MEM_P (op)
17156 || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false))
17157 return false;
17158
17159 if (addr.type == ADDRESS_REG_IMM)
17160 return offset_4bit_signed_scaled_p (TImode, addr.const_offset);
17161
17162 if (addr.type == ADDRESS_REG_REG)
17163 return (1U << addr.shift) == GET_MODE_SIZE (elem_mode);
17164
17165 return false;
17166}
17167
624d0f07
RS
17168/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */
17169bool
17170aarch64_sve_ldff1_operand_p (rtx op)
17171{
17172 if (!MEM_P (op))
17173 return false;
17174
17175 struct aarch64_address_info addr;
17176 if (!aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op), false))
17177 return false;
17178
17179 if (addr.type == ADDRESS_REG_IMM)
17180 return known_eq (addr.const_offset, 0);
17181
17182 return addr.type == ADDRESS_REG_REG;
17183}
17184
17185/* Return true if OP is a valid MEM operand for an SVE LDNF1 instruction. */
17186bool
17187aarch64_sve_ldnf1_operand_p (rtx op)
17188{
17189 struct aarch64_address_info addr;
17190
17191 return (MEM_P (op)
17192 && aarch64_classify_address (&addr, XEXP (op, 0),
17193 GET_MODE (op), false)
17194 && addr.type == ADDRESS_REG_IMM);
17195}
17196
43cacb12
RS
17197/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
17198 The conditions for STR are the same. */
17199bool
17200aarch64_sve_ldr_operand_p (rtx op)
17201{
17202 struct aarch64_address_info addr;
17203
17204 return (MEM_P (op)
17205 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
17206 false, ADDR_QUERY_ANY)
17207 && addr.type == ADDRESS_REG_IMM);
17208}
17209
624d0f07
RS
17210/* Return true if OP is a valid address for an SVE PRF[BHWD] instruction,
17211 addressing memory of mode MODE. */
17212bool
17213aarch64_sve_prefetch_operand_p (rtx op, machine_mode mode)
17214{
17215 struct aarch64_address_info addr;
17216 if (!aarch64_classify_address (&addr, op, mode, false))
17217 return false;
17218
17219 if (addr.type == ADDRESS_REG_IMM)
17220 return known_eq (addr.const_offset, 0);
17221
17222 return addr.type == ADDRESS_REG_REG;
17223}
17224
9f4cbab8
RS
17225/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
17226 We need to be able to access the individual pieces, so the range
17227 is different from LD[234] and ST[234]. */
17228bool
17229aarch64_sve_struct_memory_operand_p (rtx op)
17230{
17231 if (!MEM_P (op))
17232 return false;
17233
17234 machine_mode mode = GET_MODE (op);
17235 struct aarch64_address_info addr;
17236 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
17237 ADDR_QUERY_ANY)
17238 || addr.type != ADDRESS_REG_IMM)
17239 return false;
17240
17241 poly_int64 first = addr.const_offset;
17242 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
17243 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
17244 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
17245}
17246
2d8c6dc1
AH
17247/* Emit a register copy from operand to operand, taking care not to
17248 early-clobber source registers in the process.
43e9d192 17249
2d8c6dc1
AH
17250 COUNT is the number of components into which the copy needs to be
17251 decomposed. */
43e9d192 17252void
b8506a8a 17253aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 17254 unsigned int count)
43e9d192
IB
17255{
17256 unsigned int i;
2d8c6dc1
AH
17257 int rdest = REGNO (operands[0]);
17258 int rsrc = REGNO (operands[1]);
43e9d192
IB
17259
17260 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
17261 || rdest < rsrc)
17262 for (i = 0; i < count; i++)
17263 emit_move_insn (gen_rtx_REG (mode, rdest + i),
17264 gen_rtx_REG (mode, rsrc + i));
43e9d192 17265 else
2d8c6dc1
AH
17266 for (i = 0; i < count; i++)
17267 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
17268 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
17269}
17270
668046d1 17271/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 17272 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 17273int
b8506a8a 17274aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 17275{
6a70badb
RS
17276 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
17277 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
17278}
17279
db0253a4 17280/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
17281 alignment of a vector to 128 bits. SVE predicates have an alignment of
17282 16 bits. */
db0253a4
TB
17283static HOST_WIDE_INT
17284aarch64_simd_vector_alignment (const_tree type)
17285{
07108a9e
RS
17286 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
17287 be set for non-predicate vectors of booleans. Modes are the most
17288 direct way we have of identifying real SVE predicate types. */
17289 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
17290 return 16;
cc68f7c2
RS
17291 widest_int min_size
17292 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
17293 return wi::umin (min_size, 128).to_uhwi ();
db0253a4
TB
17294}
17295
43cacb12 17296/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 17297static poly_uint64
43cacb12
RS
17298aarch64_vectorize_preferred_vector_alignment (const_tree type)
17299{
17300 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
17301 {
17302 /* If the length of the vector is fixed, try to align to that length,
17303 otherwise don't try to align at all. */
17304 HOST_WIDE_INT result;
17305 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
17306 result = TYPE_ALIGN (TREE_TYPE (type));
17307 return result;
17308 }
17309 return TYPE_ALIGN (type);
17310}
17311
db0253a4
TB
17312/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
17313static bool
17314aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
17315{
17316 if (is_packed)
17317 return false;
17318
43cacb12
RS
17319 /* For fixed-length vectors, check that the vectorizer will aim for
17320 full-vector alignment. This isn't true for generic GCC vectors
17321 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
17322 poly_uint64 preferred_alignment =
17323 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 17324 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
17325 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
17326 preferred_alignment))
db0253a4
TB
17327 return false;
17328
17329 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
17330 return true;
17331}
17332
7df76747
N
17333/* Return true if the vector misalignment factor is supported by the
17334 target. */
17335static bool
17336aarch64_builtin_support_vector_misalignment (machine_mode mode,
17337 const_tree type, int misalignment,
17338 bool is_packed)
17339{
17340 if (TARGET_SIMD && STRICT_ALIGNMENT)
17341 {
17342 /* Return if movmisalign pattern is not supported for this mode. */
17343 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
17344 return false;
17345
a509c571 17346 /* Misalignment factor is unknown at compile time. */
7df76747 17347 if (misalignment == -1)
a509c571 17348 return false;
7df76747
N
17349 }
17350 return default_builtin_support_vector_misalignment (mode, type, misalignment,
17351 is_packed);
17352}
17353
4369c11e
TB
17354/* If VALS is a vector constant that can be loaded into a register
17355 using DUP, generate instructions to do so and return an RTX to
17356 assign to the register. Otherwise return NULL_RTX. */
17357static rtx
17358aarch64_simd_dup_constant (rtx vals)
17359{
ef4bddc2
RS
17360 machine_mode mode = GET_MODE (vals);
17361 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 17362 rtx x;
4369c11e 17363
92695fbb 17364 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
17365 return NULL_RTX;
17366
17367 /* We can load this constant by using DUP and a constant in a
17368 single ARM register. This will be cheaper than a vector
17369 load. */
92695fbb 17370 x = copy_to_mode_reg (inner_mode, x);
59d06c05 17371 return gen_vec_duplicate (mode, x);
4369c11e
TB
17372}
17373
17374
17375/* Generate code to load VALS, which is a PARALLEL containing only
17376 constants (for vec_init) or CONST_VECTOR, efficiently into a
17377 register. Returns an RTX to copy into the register, or NULL_RTX
67914693 17378 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
1df3f464 17379static rtx
4369c11e
TB
17380aarch64_simd_make_constant (rtx vals)
17381{
ef4bddc2 17382 machine_mode mode = GET_MODE (vals);
4369c11e
TB
17383 rtx const_dup;
17384 rtx const_vec = NULL_RTX;
4369c11e
TB
17385 int n_const = 0;
17386 int i;
17387
17388 if (GET_CODE (vals) == CONST_VECTOR)
17389 const_vec = vals;
17390 else if (GET_CODE (vals) == PARALLEL)
17391 {
17392 /* A CONST_VECTOR must contain only CONST_INTs and
17393 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
17394 Only store valid constants in a CONST_VECTOR. */
6a70badb 17395 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
17396 for (i = 0; i < n_elts; ++i)
17397 {
17398 rtx x = XVECEXP (vals, 0, i);
17399 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17400 n_const++;
17401 }
17402 if (n_const == n_elts)
17403 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
17404 }
17405 else
17406 gcc_unreachable ();
17407
17408 if (const_vec != NULL_RTX
b187677b 17409 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
17410 /* Load using MOVI/MVNI. */
17411 return const_vec;
17412 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
17413 /* Loaded using DUP. */
17414 return const_dup;
17415 else if (const_vec != NULL_RTX)
67914693 17416 /* Load from constant pool. We cannot take advantage of single-cycle
4369c11e
TB
17417 LD1 because we need a PC-relative addressing mode. */
17418 return const_vec;
17419 else
17420 /* A PARALLEL containing something not valid inside CONST_VECTOR.
67914693 17421 We cannot construct an initializer. */
4369c11e
TB
17422 return NULL_RTX;
17423}
17424
35a093b6
JG
17425/* Expand a vector initialisation sequence, such that TARGET is
17426 initialised to contain VALS. */
17427
4369c11e
TB
17428void
17429aarch64_expand_vector_init (rtx target, rtx vals)
17430{
ef4bddc2 17431 machine_mode mode = GET_MODE (target);
146c2e3a 17432 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 17433 /* The number of vector elements. */
6a70badb 17434 int n_elts = XVECLEN (vals, 0);
35a093b6 17435 /* The number of vector elements which are not constant. */
8b66a2d4
AL
17436 int n_var = 0;
17437 rtx any_const = NULL_RTX;
35a093b6
JG
17438 /* The first element of vals. */
17439 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 17440 bool all_same = true;
4369c11e 17441
41dab855
KT
17442 /* This is a special vec_init<M><N> where N is not an element mode but a
17443 vector mode with half the elements of M. We expect to find two entries
17444 of mode N in VALS and we must put their concatentation into TARGET. */
17445 if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
17446 {
17447 gcc_assert (known_eq (GET_MODE_SIZE (mode),
17448 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
17449 rtx lo = XVECEXP (vals, 0, 0);
17450 rtx hi = XVECEXP (vals, 0, 1);
17451 machine_mode narrow_mode = GET_MODE (lo);
17452 gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
17453 gcc_assert (narrow_mode == GET_MODE (hi));
17454
17455 /* When we want to concatenate a half-width vector with zeroes we can
17456 use the aarch64_combinez[_be] patterns. Just make sure that the
17457 zeroes are in the right half. */
17458 if (BYTES_BIG_ENDIAN
17459 && aarch64_simd_imm_zero (lo, narrow_mode)
17460 && general_operand (hi, narrow_mode))
17461 emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
17462 else if (!BYTES_BIG_ENDIAN
17463 && aarch64_simd_imm_zero (hi, narrow_mode)
17464 && general_operand (lo, narrow_mode))
17465 emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
17466 else
17467 {
17468 /* Else create the two half-width registers and combine them. */
17469 if (!REG_P (lo))
17470 lo = force_reg (GET_MODE (lo), lo);
17471 if (!REG_P (hi))
17472 hi = force_reg (GET_MODE (hi), hi);
17473
17474 if (BYTES_BIG_ENDIAN)
17475 std::swap (lo, hi);
17476 emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
17477 }
17478 return;
17479 }
17480
35a093b6 17481 /* Count the number of variable elements to initialise. */
8b66a2d4 17482 for (int i = 0; i < n_elts; ++i)
4369c11e 17483 {
8b66a2d4 17484 rtx x = XVECEXP (vals, 0, i);
35a093b6 17485 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
17486 ++n_var;
17487 else
17488 any_const = x;
4369c11e 17489
35a093b6 17490 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
17491 }
17492
35a093b6
JG
17493 /* No variable elements, hand off to aarch64_simd_make_constant which knows
17494 how best to handle this. */
4369c11e
TB
17495 if (n_var == 0)
17496 {
17497 rtx constant = aarch64_simd_make_constant (vals);
17498 if (constant != NULL_RTX)
17499 {
17500 emit_move_insn (target, constant);
17501 return;
17502 }
17503 }
17504
17505 /* Splat a single non-constant element if we can. */
17506 if (all_same)
17507 {
35a093b6 17508 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 17509 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
17510 return;
17511 }
17512
85c1b6d7
AP
17513 enum insn_code icode = optab_handler (vec_set_optab, mode);
17514 gcc_assert (icode != CODE_FOR_nothing);
17515
17516 /* If there are only variable elements, try to optimize
17517 the insertion using dup for the most common element
17518 followed by insertions. */
17519
17520 /* The algorithm will fill matches[*][0] with the earliest matching element,
17521 and matches[X][1] with the count of duplicate elements (if X is the
17522 earliest element which has duplicates). */
17523
17524 if (n_var == n_elts && n_elts <= 16)
17525 {
17526 int matches[16][2] = {0};
17527 for (int i = 0; i < n_elts; i++)
17528 {
17529 for (int j = 0; j <= i; j++)
17530 {
17531 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
17532 {
17533 matches[i][0] = j;
17534 matches[j][1]++;
17535 break;
17536 }
17537 }
17538 }
17539 int maxelement = 0;
17540 int maxv = 0;
17541 for (int i = 0; i < n_elts; i++)
17542 if (matches[i][1] > maxv)
17543 {
17544 maxelement = i;
17545 maxv = matches[i][1];
17546 }
17547
b4e2cd5b
JG
17548 /* Create a duplicate of the most common element, unless all elements
17549 are equally useless to us, in which case just immediately set the
17550 vector register using the first element. */
17551
17552 if (maxv == 1)
17553 {
17554 /* For vectors of two 64-bit elements, we can do even better. */
17555 if (n_elts == 2
17556 && (inner_mode == E_DImode
17557 || inner_mode == E_DFmode))
17558
17559 {
17560 rtx x0 = XVECEXP (vals, 0, 0);
17561 rtx x1 = XVECEXP (vals, 0, 1);
17562 /* Combine can pick up this case, but handling it directly
17563 here leaves clearer RTL.
17564
17565 This is load_pair_lanes<mode>, and also gives us a clean-up
17566 for store_pair_lanes<mode>. */
17567 if (memory_operand (x0, inner_mode)
17568 && memory_operand (x1, inner_mode)
17569 && !STRICT_ALIGNMENT
17570 && rtx_equal_p (XEXP (x1, 0),
17571 plus_constant (Pmode,
17572 XEXP (x0, 0),
17573 GET_MODE_SIZE (inner_mode))))
17574 {
17575 rtx t;
17576 if (inner_mode == DFmode)
17577 t = gen_load_pair_lanesdf (target, x0, x1);
17578 else
17579 t = gen_load_pair_lanesdi (target, x0, x1);
17580 emit_insn (t);
17581 return;
17582 }
17583 }
17584 /* The subreg-move sequence below will move into lane zero of the
17585 vector register. For big-endian we want that position to hold
17586 the last element of VALS. */
17587 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
17588 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
17589 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
17590 }
17591 else
17592 {
17593 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
17594 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
17595 }
85c1b6d7
AP
17596
17597 /* Insert the rest. */
17598 for (int i = 0; i < n_elts; i++)
17599 {
17600 rtx x = XVECEXP (vals, 0, i);
17601 if (matches[i][0] == maxelement)
17602 continue;
17603 x = copy_to_mode_reg (inner_mode, x);
17604 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
17605 }
17606 return;
17607 }
17608
35a093b6
JG
17609 /* Initialise a vector which is part-variable. We want to first try
17610 to build those lanes which are constant in the most efficient way we
17611 can. */
17612 if (n_var != n_elts)
4369c11e
TB
17613 {
17614 rtx copy = copy_rtx (vals);
4369c11e 17615
8b66a2d4
AL
17616 /* Load constant part of vector. We really don't care what goes into the
17617 parts we will overwrite, but we're more likely to be able to load the
17618 constant efficiently if it has fewer, larger, repeating parts
17619 (see aarch64_simd_valid_immediate). */
17620 for (int i = 0; i < n_elts; i++)
17621 {
17622 rtx x = XVECEXP (vals, 0, i);
17623 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17624 continue;
17625 rtx subst = any_const;
17626 for (int bit = n_elts / 2; bit > 0; bit /= 2)
17627 {
17628 /* Look in the copied vector, as more elements are const. */
17629 rtx test = XVECEXP (copy, 0, i ^ bit);
17630 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
17631 {
17632 subst = test;
17633 break;
17634 }
17635 }
17636 XVECEXP (copy, 0, i) = subst;
17637 }
4369c11e 17638 aarch64_expand_vector_init (target, copy);
35a093b6 17639 }
4369c11e 17640
35a093b6 17641 /* Insert the variable lanes directly. */
8b66a2d4 17642 for (int i = 0; i < n_elts; i++)
35a093b6
JG
17643 {
17644 rtx x = XVECEXP (vals, 0, i);
17645 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17646 continue;
17647 x = copy_to_mode_reg (inner_mode, x);
17648 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
17649 }
4369c11e
TB
17650}
17651
3a0afad0
PK
17652/* Emit RTL corresponding to:
17653 insr TARGET, ELEM. */
17654
17655static void
17656emit_insr (rtx target, rtx elem)
17657{
17658 machine_mode mode = GET_MODE (target);
17659 scalar_mode elem_mode = GET_MODE_INNER (mode);
17660 elem = force_reg (elem_mode, elem);
17661
17662 insn_code icode = optab_handler (vec_shl_insert_optab, mode);
17663 gcc_assert (icode != CODE_FOR_nothing);
17664 emit_insn (GEN_FCN (icode) (target, target, elem));
17665}
17666
17667/* Subroutine of aarch64_sve_expand_vector_init for handling
17668 trailing constants.
17669 This function works as follows:
17670 (a) Create a new vector consisting of trailing constants.
17671 (b) Initialize TARGET with the constant vector using emit_move_insn.
17672 (c) Insert remaining elements in TARGET using insr.
17673 NELTS is the total number of elements in original vector while
17674 while NELTS_REQD is the number of elements that are actually
17675 significant.
17676
17677 ??? The heuristic used is to do above only if number of constants
17678 is at least half the total number of elements. May need fine tuning. */
17679
17680static bool
17681aarch64_sve_expand_vector_init_handle_trailing_constants
17682 (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd)
17683{
17684 machine_mode mode = GET_MODE (target);
17685 scalar_mode elem_mode = GET_MODE_INNER (mode);
17686 int n_trailing_constants = 0;
17687
17688 for (int i = nelts_reqd - 1;
17689 i >= 0 && aarch64_legitimate_constant_p (elem_mode, builder.elt (i));
17690 i--)
17691 n_trailing_constants++;
17692
17693 if (n_trailing_constants >= nelts_reqd / 2)
17694 {
17695 rtx_vector_builder v (mode, 1, nelts);
17696 for (int i = 0; i < nelts; i++)
17697 v.quick_push (builder.elt (i + nelts_reqd - n_trailing_constants));
17698 rtx const_vec = v.build ();
17699 emit_move_insn (target, const_vec);
17700
17701 for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--)
17702 emit_insr (target, builder.elt (i));
17703
17704 return true;
17705 }
17706
17707 return false;
17708}
17709
17710/* Subroutine of aarch64_sve_expand_vector_init.
17711 Works as follows:
17712 (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
17713 (b) Skip trailing elements from BUILDER, which are the same as
17714 element NELTS_REQD - 1.
17715 (c) Insert earlier elements in reverse order in TARGET using insr. */
17716
17717static void
17718aarch64_sve_expand_vector_init_insert_elems (rtx target,
17719 const rtx_vector_builder &builder,
17720 int nelts_reqd)
17721{
17722 machine_mode mode = GET_MODE (target);
17723 scalar_mode elem_mode = GET_MODE_INNER (mode);
17724
17725 struct expand_operand ops[2];
17726 enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
17727 gcc_assert (icode != CODE_FOR_nothing);
17728
17729 create_output_operand (&ops[0], target, mode);
17730 create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode);
17731 expand_insn (icode, 2, ops);
17732
17733 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
17734 for (int i = nelts_reqd - ndups - 1; i >= 0; i--)
17735 emit_insr (target, builder.elt (i));
17736}
17737
17738/* Subroutine of aarch64_sve_expand_vector_init to handle case
17739 when all trailing elements of builder are same.
17740 This works as follows:
17741 (a) Use expand_insn interface to broadcast last vector element in TARGET.
17742 (b) Insert remaining elements in TARGET using insr.
17743
17744 ??? The heuristic used is to do above if number of same trailing elements
17745 is at least 3/4 of total number of elements, loosely based on
17746 heuristic from mostly_zeros_p. May need fine-tuning. */
17747
17748static bool
17749aarch64_sve_expand_vector_init_handle_trailing_same_elem
17750 (rtx target, const rtx_vector_builder &builder, int nelts_reqd)
17751{
17752 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
17753 if (ndups >= (3 * nelts_reqd) / 4)
17754 {
17755 aarch64_sve_expand_vector_init_insert_elems (target, builder,
17756 nelts_reqd - ndups + 1);
17757 return true;
17758 }
17759
17760 return false;
17761}
17762
17763/* Initialize register TARGET from BUILDER. NELTS is the constant number
17764 of elements in BUILDER.
17765
17766 The function tries to initialize TARGET from BUILDER if it fits one
17767 of the special cases outlined below.
17768
17769 Failing that, the function divides BUILDER into two sub-vectors:
17770 v_even = even elements of BUILDER;
17771 v_odd = odd elements of BUILDER;
17772
17773 and recursively calls itself with v_even and v_odd.
17774
17775 if (recursive call succeeded for v_even or v_odd)
17776 TARGET = zip (v_even, v_odd)
17777
17778 The function returns true if it managed to build TARGET from BUILDER
17779 with one of the special cases, false otherwise.
17780
17781 Example: {a, 1, b, 2, c, 3, d, 4}
17782
17783 The vector gets divided into:
17784 v_even = {a, b, c, d}
17785 v_odd = {1, 2, 3, 4}
17786
17787 aarch64_sve_expand_vector_init(v_odd) hits case 1 and
17788 initialize tmp2 from constant vector v_odd using emit_move_insn.
17789
17790 aarch64_sve_expand_vector_init(v_even) fails since v_even contains
17791 4 elements, so we construct tmp1 from v_even using insr:
17792 tmp1 = dup(d)
17793 insr tmp1, c
17794 insr tmp1, b
17795 insr tmp1, a
17796
17797 And finally:
17798 TARGET = zip (tmp1, tmp2)
17799 which sets TARGET to {a, 1, b, 2, c, 3, d, 4}. */
17800
17801static bool
17802aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder,
17803 int nelts, int nelts_reqd)
17804{
17805 machine_mode mode = GET_MODE (target);
17806
17807 /* Case 1: Vector contains trailing constants. */
17808
17809 if (aarch64_sve_expand_vector_init_handle_trailing_constants
17810 (target, builder, nelts, nelts_reqd))
17811 return true;
17812
17813 /* Case 2: Vector contains leading constants. */
17814
17815 rtx_vector_builder rev_builder (mode, 1, nelts_reqd);
17816 for (int i = 0; i < nelts_reqd; i++)
17817 rev_builder.quick_push (builder.elt (nelts_reqd - i - 1));
17818 rev_builder.finalize ();
17819
17820 if (aarch64_sve_expand_vector_init_handle_trailing_constants
17821 (target, rev_builder, nelts, nelts_reqd))
17822 {
17823 emit_insn (gen_aarch64_sve_rev (mode, target, target));
17824 return true;
17825 }
17826
17827 /* Case 3: Vector contains trailing same element. */
17828
17829 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
17830 (target, builder, nelts_reqd))
17831 return true;
17832
17833 /* Case 4: Vector contains leading same element. */
17834
17835 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
17836 (target, rev_builder, nelts_reqd) && nelts_reqd == nelts)
17837 {
17838 emit_insn (gen_aarch64_sve_rev (mode, target, target));
17839 return true;
17840 }
17841
17842 /* Avoid recursing below 4-elements.
17843 ??? The threshold 4 may need fine-tuning. */
17844
17845 if (nelts_reqd <= 4)
17846 return false;
17847
17848 rtx_vector_builder v_even (mode, 1, nelts);
17849 rtx_vector_builder v_odd (mode, 1, nelts);
17850
17851 for (int i = 0; i < nelts * 2; i += 2)
17852 {
17853 v_even.quick_push (builder.elt (i));
17854 v_odd.quick_push (builder.elt (i + 1));
17855 }
17856
17857 v_even.finalize ();
17858 v_odd.finalize ();
17859
17860 rtx tmp1 = gen_reg_rtx (mode);
17861 bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even,
17862 nelts, nelts_reqd / 2);
17863
17864 rtx tmp2 = gen_reg_rtx (mode);
17865 bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd,
17866 nelts, nelts_reqd / 2);
17867
17868 if (!did_even_p && !did_odd_p)
17869 return false;
17870
17871 /* Initialize v_even and v_odd using INSR if it didn't match any of the
17872 special cases and zip v_even, v_odd. */
17873
17874 if (!did_even_p)
17875 aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2);
17876
17877 if (!did_odd_p)
17878 aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2);
17879
17880 rtvec v = gen_rtvec (2, tmp1, tmp2);
17881 emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
17882 return true;
17883}
17884
17885/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */
17886
17887void
17888aarch64_sve_expand_vector_init (rtx target, rtx vals)
17889{
17890 machine_mode mode = GET_MODE (target);
17891 int nelts = XVECLEN (vals, 0);
17892
17893 rtx_vector_builder v (mode, 1, nelts);
17894 for (int i = 0; i < nelts; i++)
17895 v.quick_push (XVECEXP (vals, 0, i));
17896 v.finalize ();
17897
17898 /* If neither sub-vectors of v could be initialized specially,
17899 then use INSR to insert all elements from v into TARGET.
17900 ??? This might not be optimal for vectors with large
17901 initializers like 16-element or above.
17902 For nelts < 4, it probably isn't useful to handle specially. */
17903
17904 if (nelts < 4
17905 || !aarch64_sve_expand_vector_init (target, v, nelts, nelts))
17906 aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
17907}
17908
b6c3aea1
RS
17909/* Check whether VALUE is a vector constant in which every element
17910 is either a power of 2 or a negated power of 2. If so, return
17911 a constant vector of log2s, and flip CODE between PLUS and MINUS
17912 if VALUE contains negated powers of 2. Return NULL_RTX otherwise. */
17913
17914static rtx
17915aarch64_convert_mult_to_shift (rtx value, rtx_code &code)
17916{
17917 if (GET_CODE (value) != CONST_VECTOR)
17918 return NULL_RTX;
17919
17920 rtx_vector_builder builder;
17921 if (!builder.new_unary_operation (GET_MODE (value), value, false))
17922 return NULL_RTX;
17923
17924 scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value));
17925 /* 1 if the result of the multiplication must be negated,
17926 0 if it mustn't, or -1 if we don't yet care. */
17927 int negate = -1;
17928 unsigned int encoded_nelts = const_vector_encoded_nelts (value);
17929 for (unsigned int i = 0; i < encoded_nelts; ++i)
17930 {
17931 rtx elt = CONST_VECTOR_ENCODED_ELT (value, i);
17932 if (!CONST_SCALAR_INT_P (elt))
17933 return NULL_RTX;
17934 rtx_mode_t val (elt, int_mode);
17935 wide_int pow2 = wi::neg (val);
17936 if (val != pow2)
17937 {
17938 /* It matters whether we negate or not. Make that choice,
17939 and make sure that it's consistent with previous elements. */
17940 if (negate == !wi::neg_p (val))
17941 return NULL_RTX;
17942 negate = wi::neg_p (val);
17943 if (!negate)
17944 pow2 = val;
17945 }
17946 /* POW2 is now the value that we want to be a power of 2. */
17947 int shift = wi::exact_log2 (pow2);
17948 if (shift < 0)
17949 return NULL_RTX;
17950 builder.quick_push (gen_int_mode (shift, int_mode));
17951 }
17952 if (negate == -1)
17953 /* PLUS and MINUS are equivalent; canonicalize on PLUS. */
17954 code = PLUS;
17955 else if (negate == 1)
17956 code = code == PLUS ? MINUS : PLUS;
17957 return builder.build ();
17958}
17959
17960/* Prepare for an integer SVE multiply-add or multiply-subtract pattern;
17961 CODE is PLUS for the former and MINUS for the latter. OPERANDS is the
17962 operands array, in the same order as for fma_optab. Return true if
17963 the function emitted all the necessary instructions, false if the caller
17964 should generate the pattern normally with the new OPERANDS array. */
17965
17966bool
17967aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code)
17968{
17969 machine_mode mode = GET_MODE (operands[0]);
17970 if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code))
17971 {
17972 rtx product = expand_binop (mode, vashl_optab, operands[1], shifts,
17973 NULL_RTX, true, OPTAB_DIRECT);
17974 force_expand_binop (mode, code == PLUS ? add_optab : sub_optab,
17975 operands[3], product, operands[0], true,
17976 OPTAB_DIRECT);
17977 return true;
17978 }
17979 operands[2] = force_reg (mode, operands[2]);
17980 return false;
17981}
17982
17983/* Likewise, but for a conditional pattern. */
17984
17985bool
17986aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code)
17987{
17988 machine_mode mode = GET_MODE (operands[0]);
17989 if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code))
17990 {
17991 rtx product = expand_binop (mode, vashl_optab, operands[2], shifts,
17992 NULL_RTX, true, OPTAB_DIRECT);
17993 emit_insn (gen_cond (code, mode, operands[0], operands[1],
17994 operands[4], product, operands[5]));
17995 return true;
17996 }
17997 operands[3] = force_reg (mode, operands[3]);
17998 return false;
17999}
18000
43e9d192 18001static unsigned HOST_WIDE_INT
ef4bddc2 18002aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 18003{
43cacb12
RS
18004 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
18005 return 0;
18006 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
18007}
18008
43e9d192
IB
18009/* Select a format to encode pointers in exception handling data. */
18010int
18011aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
18012{
18013 int type;
18014 switch (aarch64_cmodel)
18015 {
18016 case AARCH64_CMODEL_TINY:
18017 case AARCH64_CMODEL_TINY_PIC:
18018 case AARCH64_CMODEL_SMALL:
18019 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 18020 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
18021 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
18022 for everything. */
18023 type = DW_EH_PE_sdata4;
18024 break;
18025 default:
18026 /* No assumptions here. 8-byte relocs required. */
18027 type = DW_EH_PE_sdata8;
18028 break;
18029 }
18030 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18031}
18032
b07fc91c
SN
18033/* Output .variant_pcs for aarch64_vector_pcs function symbols. */
18034
18035static void
18036aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
18037{
c600df9a 18038 if (TREE_CODE (decl) == FUNCTION_DECL)
b07fc91c 18039 {
c600df9a
RS
18040 arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id ();
18041 if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
18042 {
18043 fprintf (stream, "\t.variant_pcs\t");
18044 assemble_name (stream, name);
18045 fprintf (stream, "\n");
18046 }
b07fc91c
SN
18047 }
18048}
18049
e1c1ecb0
KT
18050/* The last .arch and .tune assembly strings that we printed. */
18051static std::string aarch64_last_printed_arch_string;
18052static std::string aarch64_last_printed_tune_string;
18053
361fb3ee
KT
18054/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
18055 by the function fndecl. */
18056
18057void
18058aarch64_declare_function_name (FILE *stream, const char* name,
18059 tree fndecl)
18060{
18061 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
18062
18063 struct cl_target_option *targ_options;
18064 if (target_parts)
18065 targ_options = TREE_TARGET_OPTION (target_parts);
18066 else
18067 targ_options = TREE_TARGET_OPTION (target_option_current_node);
18068 gcc_assert (targ_options);
18069
18070 const struct processor *this_arch
18071 = aarch64_get_arch (targ_options->x_explicit_arch);
18072
28108a53 18073 uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
054b4005 18074 std::string extension
04a99ebe
JG
18075 = aarch64_get_extension_string_for_isa_flags (isa_flags,
18076 this_arch->flags);
e1c1ecb0
KT
18077 /* Only update the assembler .arch string if it is distinct from the last
18078 such string we printed. */
18079 std::string to_print = this_arch->name + extension;
18080 if (to_print != aarch64_last_printed_arch_string)
18081 {
18082 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
18083 aarch64_last_printed_arch_string = to_print;
18084 }
361fb3ee
KT
18085
18086 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
18087 useful to readers of the generated asm. Do it only when it changes
18088 from function to function and verbose assembly is requested. */
361fb3ee
KT
18089 const struct processor *this_tune
18090 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
18091
e1c1ecb0
KT
18092 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
18093 {
18094 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
18095 this_tune->name);
18096 aarch64_last_printed_tune_string = this_tune->name;
18097 }
361fb3ee 18098
b07fc91c
SN
18099 aarch64_asm_output_variant_pcs (stream, fndecl, name);
18100
361fb3ee
KT
18101 /* Don't forget the type directive for ELF. */
18102 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
18103 ASM_OUTPUT_LABEL (stream, name);
18104}
18105
b07fc91c
SN
18106/* Implement ASM_OUTPUT_DEF_FROM_DECLS. Output .variant_pcs for aliases. */
18107
18108void
18109aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
18110{
18111 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
18112 const char *value = IDENTIFIER_POINTER (target);
18113 aarch64_asm_output_variant_pcs (stream, decl, name);
18114 ASM_OUTPUT_DEF (stream, name, value);
18115}
18116
18117/* Implement ASM_OUTPUT_EXTERNAL. Output .variant_pcs for undefined
18118 function symbol references. */
18119
18120void
e8c47069 18121aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
b07fc91c 18122{
e8c47069 18123 default_elf_asm_output_external (stream, decl, name);
b07fc91c
SN
18124 aarch64_asm_output_variant_pcs (stream, decl, name);
18125}
18126
8fc16d72
ST
18127/* Triggered after a .cfi_startproc directive is emitted into the assembly file.
18128 Used to output the .cfi_b_key_frame directive when signing the current
18129 function with the B key. */
18130
18131void
18132aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED)
18133{
2bdc7dcb 18134 if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled ()
8fc16d72
ST
18135 && aarch64_ra_sign_key == AARCH64_KEY_B)
18136 asm_fprintf (f, "\t.cfi_b_key_frame\n");
18137}
18138
e1c1ecb0
KT
18139/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
18140
18141static void
18142aarch64_start_file (void)
18143{
18144 struct cl_target_option *default_options
18145 = TREE_TARGET_OPTION (target_option_default_node);
18146
18147 const struct processor *default_arch
18148 = aarch64_get_arch (default_options->x_explicit_arch);
28108a53 18149 uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
e1c1ecb0 18150 std::string extension
04a99ebe
JG
18151 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
18152 default_arch->flags);
e1c1ecb0
KT
18153
18154 aarch64_last_printed_arch_string = default_arch->name + extension;
18155 aarch64_last_printed_tune_string = "";
18156 asm_fprintf (asm_out_file, "\t.arch %s\n",
18157 aarch64_last_printed_arch_string.c_str ());
18158
18159 default_file_start ();
18160}
18161
0462169c
SN
18162/* Emit load exclusive. */
18163
18164static void
ef4bddc2 18165aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
18166 rtx mem, rtx model_rtx)
18167{
4a2095eb
RH
18168 if (mode == TImode)
18169 emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
18170 gen_highpart (DImode, rval),
18171 mem, model_rtx));
18172 else
18173 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
18174}
18175
18176/* Emit store exclusive. */
18177
18178static void
ef4bddc2 18179aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
4a2095eb 18180 rtx mem, rtx rval, rtx model_rtx)
0462169c 18181{
4a2095eb
RH
18182 if (mode == TImode)
18183 emit_insn (gen_aarch64_store_exclusive_pair
18184 (bval, mem, operand_subword (rval, 0, 0, TImode),
18185 operand_subword (rval, 1, 0, TImode), model_rtx));
18186 else
18187 emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx));
0462169c
SN
18188}
18189
18190/* Mark the previous jump instruction as unlikely. */
18191
18192static void
18193aarch64_emit_unlikely_jump (rtx insn)
18194{
f370536c 18195 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 18196 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
18197}
18198
3950b229
RH
18199/* We store the names of the various atomic helpers in a 5x4 array.
18200 Return the libcall function given MODE, MODEL and NAMES. */
18201
18202rtx
18203aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
18204 const atomic_ool_names *names)
18205{
18206 memmodel model = memmodel_base (INTVAL (model_rtx));
18207 int mode_idx, model_idx;
18208
18209 switch (mode)
18210 {
18211 case E_QImode:
18212 mode_idx = 0;
18213 break;
18214 case E_HImode:
18215 mode_idx = 1;
18216 break;
18217 case E_SImode:
18218 mode_idx = 2;
18219 break;
18220 case E_DImode:
18221 mode_idx = 3;
18222 break;
18223 case E_TImode:
18224 mode_idx = 4;
18225 break;
18226 default:
18227 gcc_unreachable ();
18228 }
18229
18230 switch (model)
18231 {
18232 case MEMMODEL_RELAXED:
18233 model_idx = 0;
18234 break;
18235 case MEMMODEL_CONSUME:
18236 case MEMMODEL_ACQUIRE:
18237 model_idx = 1;
18238 break;
18239 case MEMMODEL_RELEASE:
18240 model_idx = 2;
18241 break;
18242 case MEMMODEL_ACQ_REL:
18243 case MEMMODEL_SEQ_CST:
18244 model_idx = 3;
18245 break;
18246 default:
18247 gcc_unreachable ();
18248 }
18249
18250 return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
18251 VISIBILITY_HIDDEN);
18252}
18253
18254#define DEF0(B, N) \
18255 { "__aarch64_" #B #N "_relax", \
18256 "__aarch64_" #B #N "_acq", \
18257 "__aarch64_" #B #N "_rel", \
18258 "__aarch64_" #B #N "_acq_rel" }
18259
18260#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
18261 { NULL, NULL, NULL, NULL }
18262#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
18263
18264static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
18265const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
18266const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
18267const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
18268const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
18269const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
18270
18271#undef DEF0
18272#undef DEF4
18273#undef DEF5
18274
0462169c
SN
18275/* Expand a compare and swap pattern. */
18276
18277void
18278aarch64_expand_compare_and_swap (rtx operands[])
18279{
d400fda3
RH
18280 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
18281 machine_mode mode, r_mode;
0462169c
SN
18282
18283 bval = operands[0];
18284 rval = operands[1];
18285 mem = operands[2];
18286 oldval = operands[3];
18287 newval = operands[4];
18288 is_weak = operands[5];
18289 mod_s = operands[6];
18290 mod_f = operands[7];
18291 mode = GET_MODE (mem);
0462169c
SN
18292
18293 /* Normally the succ memory model must be stronger than fail, but in the
18294 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
18295 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
18296 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
18297 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
18298 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
18299
d400fda3
RH
18300 r_mode = mode;
18301 if (mode == QImode || mode == HImode)
0462169c 18302 {
d400fda3
RH
18303 r_mode = SImode;
18304 rval = gen_reg_rtx (r_mode);
0462169c
SN
18305 }
18306
b0770c0f 18307 if (TARGET_LSE)
77f33f44
RH
18308 {
18309 /* The CAS insn requires oldval and rval overlap, but we need to
18310 have a copy of oldval saved across the operation to tell if
18311 the operation is successful. */
d400fda3
RH
18312 if (reg_overlap_mentioned_p (rval, oldval))
18313 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 18314 else
d400fda3
RH
18315 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
18316
77f33f44
RH
18317 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
18318 newval, mod_s));
d400fda3 18319 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 18320 }
3950b229
RH
18321 else if (TARGET_OUTLINE_ATOMICS)
18322 {
18323 /* Oldval must satisfy compare afterward. */
18324 if (!aarch64_plus_operand (oldval, mode))
18325 oldval = force_reg (mode, oldval);
18326 rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
18327 rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
18328 oldval, mode, newval, mode,
18329 XEXP (mem, 0), Pmode);
18330 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
18331 }
b0770c0f 18332 else
d400fda3
RH
18333 {
18334 /* The oldval predicate varies by mode. Test it and force to reg. */
18335 insn_code code = code_for_aarch64_compare_and_swap (mode);
18336 if (!insn_data[code].operand[2].predicate (oldval, mode))
18337 oldval = force_reg (mode, oldval);
0462169c 18338
d400fda3
RH
18339 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
18340 is_weak, mod_s, mod_f));
18341 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
18342 }
18343
18344 if (r_mode != mode)
77f33f44
RH
18345 rval = gen_lowpart (mode, rval);
18346 emit_move_insn (operands[1], rval);
0462169c 18347
d400fda3 18348 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 18349 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
18350}
18351
f70fb3b6
MW
18352/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
18353 sequence implementing an atomic operation. */
18354
18355static void
18356aarch64_emit_post_barrier (enum memmodel model)
18357{
18358 const enum memmodel base_model = memmodel_base (model);
18359
18360 if (is_mm_sync (model)
18361 && (base_model == MEMMODEL_ACQUIRE
18362 || base_model == MEMMODEL_ACQ_REL
18363 || base_model == MEMMODEL_SEQ_CST))
18364 {
18365 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
18366 }
18367}
18368
0462169c
SN
18369/* Split a compare and swap pattern. */
18370
18371void
18372aarch64_split_compare_and_swap (rtx operands[])
18373{
b7e560de 18374 rtx rval, mem, oldval, newval, scratch, x, model_rtx;
ef4bddc2 18375 machine_mode mode;
0462169c 18376 bool is_weak;
5d8a22a5 18377 rtx_code_label *label1, *label2;
ab876106 18378 enum memmodel model;
0462169c
SN
18379
18380 rval = operands[0];
18381 mem = operands[1];
18382 oldval = operands[2];
18383 newval = operands[3];
18384 is_weak = (operands[4] != const0_rtx);
ab876106 18385 model_rtx = operands[5];
0462169c
SN
18386 scratch = operands[7];
18387 mode = GET_MODE (mem);
ab876106 18388 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 18389
17f47f86
KT
18390 /* When OLDVAL is zero and we want the strong version we can emit a tighter
18391 loop:
18392 .label1:
18393 LD[A]XR rval, [mem]
18394 CBNZ rval, .label2
18395 ST[L]XR scratch, newval, [mem]
18396 CBNZ scratch, .label1
18397 .label2:
18398 CMP rval, 0. */
b7e560de
RH
18399 bool strong_zero_p = (!is_weak && !aarch64_track_speculation &&
18400 oldval == const0_rtx && mode != TImode);
17f47f86 18401
5d8a22a5 18402 label1 = NULL;
0462169c
SN
18403 if (!is_weak)
18404 {
18405 label1 = gen_label_rtx ();
18406 emit_label (label1);
18407 }
18408 label2 = gen_label_rtx ();
18409
ab876106
MW
18410 /* The initial load can be relaxed for a __sync operation since a final
18411 barrier will be emitted to stop code hoisting. */
18412 if (is_mm_sync (model))
b7e560de 18413 aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED));
ab876106
MW
18414 else
18415 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 18416
17f47f86 18417 if (strong_zero_p)
b7e560de 18418 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
17f47f86
KT
18419 else
18420 {
b7e560de
RH
18421 rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
18422 x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
17f47f86 18423 }
b7e560de
RH
18424 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
18425 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
18426 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 18427
ab876106 18428 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
18429
18430 if (!is_weak)
18431 {
6e1eaca9
RE
18432 if (aarch64_track_speculation)
18433 {
18434 /* Emit an explicit compare instruction, so that we can correctly
18435 track the condition codes. */
18436 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
18437 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
18438 }
18439 else
18440 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
18441
0462169c
SN
18442 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
18443 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 18444 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
18445 }
18446 else
b7e560de 18447 aarch64_gen_compare_reg (NE, scratch, const0_rtx);
0462169c
SN
18448
18449 emit_label (label2);
b7e560de 18450
17f47f86
KT
18451 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
18452 to set the condition flags. If this is not used it will be removed by
18453 later passes. */
18454 if (strong_zero_p)
b7e560de
RH
18455 aarch64_gen_compare_reg (NE, rval, const0_rtx);
18456
ab876106
MW
18457 /* Emit any final barrier needed for a __sync operation. */
18458 if (is_mm_sync (model))
18459 aarch64_emit_post_barrier (model);
0462169c 18460}
9cd7b720 18461
0462169c
SN
18462/* Split an atomic operation. */
18463
18464void
18465aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 18466 rtx value, rtx model_rtx, rtx cond)
0462169c 18467{
ef4bddc2
RS
18468 machine_mode mode = GET_MODE (mem);
18469 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
18470 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
18471 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
18472 rtx_code_label *label;
18473 rtx x;
0462169c 18474
9cd7b720 18475 /* Split the atomic operation into a sequence. */
0462169c
SN
18476 label = gen_label_rtx ();
18477 emit_label (label);
18478
18479 if (new_out)
18480 new_out = gen_lowpart (wmode, new_out);
18481 if (old_out)
18482 old_out = gen_lowpart (wmode, old_out);
18483 else
18484 old_out = new_out;
18485 value = simplify_gen_subreg (wmode, value, mode, 0);
18486
f70fb3b6
MW
18487 /* The initial load can be relaxed for a __sync operation since a final
18488 barrier will be emitted to stop code hoisting. */
18489 if (is_sync)
18490 aarch64_emit_load_exclusive (mode, old_out, mem,
18491 GEN_INT (MEMMODEL_RELAXED));
18492 else
18493 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
18494
18495 switch (code)
18496 {
18497 case SET:
18498 new_out = value;
18499 break;
18500
18501 case NOT:
18502 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 18503 emit_insn (gen_rtx_SET (new_out, x));
0462169c 18504 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 18505 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
18506 break;
18507
18508 case MINUS:
18509 if (CONST_INT_P (value))
18510 {
18511 value = GEN_INT (-INTVAL (value));
18512 code = PLUS;
18513 }
18514 /* Fall through. */
18515
18516 default:
18517 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 18518 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
18519 break;
18520 }
18521
18522 aarch64_emit_store_exclusive (mode, cond, mem,
18523 gen_lowpart (mode, new_out), model_rtx);
18524
6e1eaca9
RE
18525 if (aarch64_track_speculation)
18526 {
18527 /* Emit an explicit compare instruction, so that we can correctly
18528 track the condition codes. */
18529 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
18530 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
18531 }
18532 else
18533 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18534
0462169c
SN
18535 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
18536 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 18537 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
18538
18539 /* Emit any final barrier needed for a __sync operation. */
18540 if (is_sync)
18541 aarch64_emit_post_barrier (model);
0462169c
SN
18542}
18543
c2ec330c
AL
18544static void
18545aarch64_init_libfuncs (void)
18546{
18547 /* Half-precision float operations. The compiler handles all operations
18548 with NULL libfuncs by converting to SFmode. */
18549
18550 /* Conversions. */
18551 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
18552 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
18553
18554 /* Arithmetic. */
18555 set_optab_libfunc (add_optab, HFmode, NULL);
18556 set_optab_libfunc (sdiv_optab, HFmode, NULL);
18557 set_optab_libfunc (smul_optab, HFmode, NULL);
18558 set_optab_libfunc (neg_optab, HFmode, NULL);
18559 set_optab_libfunc (sub_optab, HFmode, NULL);
18560
18561 /* Comparisons. */
18562 set_optab_libfunc (eq_optab, HFmode, NULL);
18563 set_optab_libfunc (ne_optab, HFmode, NULL);
18564 set_optab_libfunc (lt_optab, HFmode, NULL);
18565 set_optab_libfunc (le_optab, HFmode, NULL);
18566 set_optab_libfunc (ge_optab, HFmode, NULL);
18567 set_optab_libfunc (gt_optab, HFmode, NULL);
18568 set_optab_libfunc (unord_optab, HFmode, NULL);
18569}
18570
43e9d192 18571/* Target hook for c_mode_for_suffix. */
ef4bddc2 18572static machine_mode
43e9d192
IB
18573aarch64_c_mode_for_suffix (char suffix)
18574{
18575 if (suffix == 'q')
18576 return TFmode;
18577
18578 return VOIDmode;
18579}
18580
3520f7cc
JG
18581/* We can only represent floating point constants which will fit in
18582 "quarter-precision" values. These values are characterised by
18583 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
18584 by:
18585
18586 (-1)^s * (n/16) * 2^r
18587
18588 Where:
18589 's' is the sign bit.
18590 'n' is an integer in the range 16 <= n <= 31.
18591 'r' is an integer in the range -3 <= r <= 4. */
18592
18593/* Return true iff X can be represented by a quarter-precision
18594 floating point immediate operand X. Note, we cannot represent 0.0. */
18595bool
18596aarch64_float_const_representable_p (rtx x)
18597{
18598 /* This represents our current view of how many bits
18599 make up the mantissa. */
18600 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 18601 int exponent;
3520f7cc 18602 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 18603 REAL_VALUE_TYPE r, m;
807e902e 18604 bool fail;
3520f7cc 18605
d29f7dd5 18606 x = unwrap_const_vec_duplicate (x);
3520f7cc
JG
18607 if (!CONST_DOUBLE_P (x))
18608 return false;
18609
a4518821
RS
18610 if (GET_MODE (x) == VOIDmode
18611 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
18612 return false;
18613
34a72c33 18614 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
18615
18616 /* We cannot represent infinities, NaNs or +/-zero. We won't
18617 know if we have +zero until we analyse the mantissa, but we
18618 can reject the other invalid values. */
18619 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
18620 || REAL_VALUE_MINUS_ZERO (r))
18621 return false;
18622
ba96cdfb 18623 /* Extract exponent. */
3520f7cc
JG
18624 r = real_value_abs (&r);
18625 exponent = REAL_EXP (&r);
18626
18627 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
18628 highest (sign) bit, with a fixed binary point at bit point_pos.
18629 m1 holds the low part of the mantissa, m2 the high part.
18630 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
18631 bits for the mantissa, this can fail (low bits will be lost). */
18632 real_ldexp (&m, &r, point_pos - exponent);
807e902e 18633 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
18634
18635 /* If the low part of the mantissa has bits set we cannot represent
18636 the value. */
d9074b29 18637 if (w.ulow () != 0)
3520f7cc
JG
18638 return false;
18639 /* We have rejected the lower HOST_WIDE_INT, so update our
18640 understanding of how many bits lie in the mantissa and
18641 look only at the high HOST_WIDE_INT. */
807e902e 18642 mantissa = w.elt (1);
3520f7cc
JG
18643 point_pos -= HOST_BITS_PER_WIDE_INT;
18644
18645 /* We can only represent values with a mantissa of the form 1.xxxx. */
18646 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
18647 if ((mantissa & mask) != 0)
18648 return false;
18649
18650 /* Having filtered unrepresentable values, we may now remove all
18651 but the highest 5 bits. */
18652 mantissa >>= point_pos - 5;
18653
18654 /* We cannot represent the value 0.0, so reject it. This is handled
18655 elsewhere. */
18656 if (mantissa == 0)
18657 return false;
18658
18659 /* Then, as bit 4 is always set, we can mask it off, leaving
18660 the mantissa in the range [0, 15]. */
18661 mantissa &= ~(1 << 4);
18662 gcc_assert (mantissa <= 15);
18663
18664 /* GCC internally does not use IEEE754-like encoding (where normalized
18665 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
18666 Our mantissa values are shifted 4 places to the left relative to
18667 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
18668 by 5 places to correct for GCC's representation. */
18669 exponent = 5 - exponent;
18670
18671 return (exponent >= 0 && exponent <= 7);
18672}
18673
ab6501d7
SD
18674/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
18675 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
18676 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 18677char*
b187677b 18678aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 18679 enum simd_immediate_check which)
3520f7cc 18680{
3ea63f60 18681 bool is_valid;
3520f7cc 18682 static char templ[40];
3520f7cc 18683 const char *mnemonic;
e4f0f84d 18684 const char *shift_op;
3520f7cc 18685 unsigned int lane_count = 0;
81c2dfb9 18686 char element_char;
3520f7cc 18687
b187677b 18688 struct simd_immediate_info info;
48063b9d
IB
18689
18690 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
18691 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
18692 It will also update INFO to show how the immediate should be generated.
18693 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 18694 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
18695 gcc_assert (is_valid);
18696
b187677b
RS
18697 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
18698 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 18699
b187677b 18700 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 18701 {
1da83cce
RS
18702 gcc_assert (info.insn == simd_immediate_info::MOV
18703 && info.u.mov.shift == 0);
0d8e1702
KT
18704 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
18705 move immediate path. */
1da83cce
RS
18706 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
18707 info.u.mov.value = GEN_INT (0);
48063b9d
IB
18708 else
18709 {
83faf7d0 18710 const unsigned int buf_size = 20;
48063b9d 18711 char float_buf[buf_size] = {'\0'};
34a72c33 18712 real_to_decimal_for_mode (float_buf,
1da83cce 18713 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
b187677b 18714 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
18715
18716 if (lane_count == 1)
18717 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
18718 else
18719 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 18720 lane_count, element_char, float_buf);
48063b9d
IB
18721 return templ;
18722 }
3520f7cc 18723 }
3520f7cc 18724
1da83cce 18725 gcc_assert (CONST_INT_P (info.u.mov.value));
ab6501d7
SD
18726
18727 if (which == AARCH64_CHECK_MOV)
18728 {
b187677b 18729 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
1da83cce
RS
18730 shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
18731 ? "msl" : "lsl");
ab6501d7
SD
18732 if (lane_count == 1)
18733 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
1da83cce
RS
18734 mnemonic, UINTVAL (info.u.mov.value));
18735 else if (info.u.mov.shift)
ab6501d7
SD
18736 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
18737 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
1da83cce
RS
18738 element_char, UINTVAL (info.u.mov.value), shift_op,
18739 info.u.mov.shift);
ab6501d7
SD
18740 else
18741 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
18742 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
1da83cce 18743 element_char, UINTVAL (info.u.mov.value));
ab6501d7 18744 }
3520f7cc 18745 else
ab6501d7
SD
18746 {
18747 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 18748 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
1da83cce 18749 if (info.u.mov.shift)
ab6501d7
SD
18750 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
18751 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
1da83cce
RS
18752 element_char, UINTVAL (info.u.mov.value), "lsl",
18753 info.u.mov.shift);
ab6501d7
SD
18754 else
18755 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
18756 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
1da83cce 18757 element_char, UINTVAL (info.u.mov.value));
ab6501d7 18758 }
3520f7cc
JG
18759 return templ;
18760}
18761
b7342d25 18762char*
77e994c9 18763aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 18764{
a2170965
TC
18765
18766 /* If a floating point number was passed and we desire to use it in an
18767 integer mode do the conversion to integer. */
18768 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
18769 {
18770 unsigned HOST_WIDE_INT ival;
18771 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
18772 gcc_unreachable ();
18773 immediate = gen_int_mode (ival, mode);
18774 }
18775
ef4bddc2 18776 machine_mode vmode;
a2170965
TC
18777 /* use a 64 bit mode for everything except for DI/DF mode, where we use
18778 a 128 bit vector mode. */
18779 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 18780
a2170965 18781 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 18782 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 18783 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
18784}
18785
43cacb12
RS
18786/* Return the output string to use for moving immediate CONST_VECTOR
18787 into an SVE register. */
18788
18789char *
18790aarch64_output_sve_mov_immediate (rtx const_vector)
18791{
18792 static char templ[40];
18793 struct simd_immediate_info info;
18794 char element_char;
18795
18796 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
18797 gcc_assert (is_valid);
18798
18799 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
18800
1044fa32
RS
18801 machine_mode vec_mode = GET_MODE (const_vector);
18802 if (aarch64_sve_pred_mode_p (vec_mode))
18803 {
18804 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
0b1fe8cf
RS
18805 if (info.insn == simd_immediate_info::MOV)
18806 {
18807 gcc_assert (info.u.mov.value == const0_rtx);
18808 snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
18809 }
1044fa32 18810 else
0b1fe8cf
RS
18811 {
18812 gcc_assert (info.insn == simd_immediate_info::PTRUE);
18813 unsigned int total_bytes;
18814 if (info.u.pattern == AARCH64_SV_ALL
18815 && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
18816 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
18817 total_bytes / GET_MODE_SIZE (info.elt_mode));
18818 else
18819 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char,
18820 svpattern_token (info.u.pattern));
18821 }
1044fa32
RS
18822 return buf;
18823 }
18824
1da83cce 18825 if (info.insn == simd_immediate_info::INDEX)
43cacb12
RS
18826 {
18827 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
18828 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
1da83cce
RS
18829 element_char, INTVAL (info.u.index.base),
18830 INTVAL (info.u.index.step));
43cacb12
RS
18831 return templ;
18832 }
18833
18834 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
18835 {
1da83cce
RS
18836 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
18837 info.u.mov.value = GEN_INT (0);
43cacb12
RS
18838 else
18839 {
18840 const int buf_size = 20;
18841 char float_buf[buf_size] = {};
18842 real_to_decimal_for_mode (float_buf,
1da83cce 18843 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
43cacb12
RS
18844 buf_size, buf_size, 1, info.elt_mode);
18845
18846 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
18847 element_char, float_buf);
18848 return templ;
18849 }
18850 }
18851
18852 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
1da83cce 18853 element_char, INTVAL (info.u.mov.value));
43cacb12
RS
18854 return templ;
18855}
18856
624d0f07
RS
18857/* Return the asm template for a PTRUES. CONST_UNSPEC is the
18858 aarch64_sve_ptrue_svpattern_immediate that describes the predicate
18859 pattern. */
18860
18861char *
18862aarch64_output_sve_ptrues (rtx const_unspec)
18863{
18864 static char templ[40];
18865
18866 struct simd_immediate_info info;
18867 bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info);
18868 gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE);
18869
18870 char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
18871 snprintf (templ, sizeof (templ), "ptrues\t%%0.%c, %s", element_char,
18872 svpattern_token (info.u.pattern));
18873 return templ;
18874}
18875
88b08073
JG
18876/* Split operands into moves from op[1] + op[2] into op[0]. */
18877
18878void
18879aarch64_split_combinev16qi (rtx operands[3])
18880{
18881 unsigned int dest = REGNO (operands[0]);
18882 unsigned int src1 = REGNO (operands[1]);
18883 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 18884 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 18885 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
18886 rtx destlo, desthi;
18887
18888 gcc_assert (halfmode == V16QImode);
18889
18890 if (src1 == dest && src2 == dest + halfregs)
18891 {
18892 /* No-op move. Can't split to nothing; emit something. */
18893 emit_note (NOTE_INSN_DELETED);
18894 return;
18895 }
18896
18897 /* Preserve register attributes for variable tracking. */
18898 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
18899 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
18900 GET_MODE_SIZE (halfmode));
18901
18902 /* Special case of reversed high/low parts. */
18903 if (reg_overlap_mentioned_p (operands[2], destlo)
18904 && reg_overlap_mentioned_p (operands[1], desthi))
18905 {
18906 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
18907 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
18908 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
18909 }
18910 else if (!reg_overlap_mentioned_p (operands[2], destlo))
18911 {
18912 /* Try to avoid unnecessary moves if part of the result
18913 is in the right place already. */
18914 if (src1 != dest)
18915 emit_move_insn (destlo, operands[1]);
18916 if (src2 != dest + halfregs)
18917 emit_move_insn (desthi, operands[2]);
18918 }
18919 else
18920 {
18921 if (src2 != dest + halfregs)
18922 emit_move_insn (desthi, operands[2]);
18923 if (src1 != dest)
18924 emit_move_insn (destlo, operands[1]);
18925 }
18926}
18927
18928/* vec_perm support. */
18929
88b08073
JG
18930struct expand_vec_perm_d
18931{
18932 rtx target, op0, op1;
e3342de4 18933 vec_perm_indices perm;
ef4bddc2 18934 machine_mode vmode;
43cacb12 18935 unsigned int vec_flags;
88b08073
JG
18936 bool one_vector_p;
18937 bool testing_p;
18938};
18939
18940/* Generate a variable permutation. */
18941
18942static void
18943aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
18944{
ef4bddc2 18945 machine_mode vmode = GET_MODE (target);
88b08073
JG
18946 bool one_vector_p = rtx_equal_p (op0, op1);
18947
18948 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
18949 gcc_checking_assert (GET_MODE (op0) == vmode);
18950 gcc_checking_assert (GET_MODE (op1) == vmode);
18951 gcc_checking_assert (GET_MODE (sel) == vmode);
18952 gcc_checking_assert (TARGET_SIMD);
18953
18954 if (one_vector_p)
18955 {
18956 if (vmode == V8QImode)
18957 {
18958 /* Expand the argument to a V16QI mode by duplicating it. */
18959 rtx pair = gen_reg_rtx (V16QImode);
18960 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
18961 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
18962 }
18963 else
18964 {
18965 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
18966 }
18967 }
18968 else
18969 {
18970 rtx pair;
18971
18972 if (vmode == V8QImode)
18973 {
18974 pair = gen_reg_rtx (V16QImode);
18975 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
18976 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
18977 }
18978 else
18979 {
18980 pair = gen_reg_rtx (OImode);
18981 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
18982 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
18983 }
18984 }
18985}
18986
80940017
RS
18987/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
18988 NELT is the number of elements in the vector. */
18989
88b08073 18990void
80940017
RS
18991aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
18992 unsigned int nelt)
88b08073 18993{
ef4bddc2 18994 machine_mode vmode = GET_MODE (target);
88b08073 18995 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 18996 rtx mask;
88b08073
JG
18997
18998 /* The TBL instruction does not use a modulo index, so we must take care
18999 of that ourselves. */
f7c4e5b8
AL
19000 mask = aarch64_simd_gen_const_vector_dup (vmode,
19001 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
19002 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
19003
f7c4e5b8
AL
19004 /* For big-endian, we also need to reverse the index within the vector
19005 (but not which vector). */
19006 if (BYTES_BIG_ENDIAN)
19007 {
19008 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
19009 if (!one_vector_p)
19010 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
19011 sel = expand_simple_binop (vmode, XOR, sel, mask,
19012 NULL, 0, OPTAB_LIB_WIDEN);
19013 }
88b08073
JG
19014 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
19015}
19016
43cacb12
RS
19017/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
19018
19019static void
19020emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
19021{
19022 emit_insn (gen_rtx_SET (target,
19023 gen_rtx_UNSPEC (GET_MODE (target),
19024 gen_rtvec (2, op0, op1), code)));
19025}
19026
19027/* Expand an SVE vec_perm with the given operands. */
19028
19029void
19030aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
19031{
19032 machine_mode data_mode = GET_MODE (target);
19033 machine_mode sel_mode = GET_MODE (sel);
19034 /* Enforced by the pattern condition. */
19035 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
19036
19037 /* Note: vec_perm indices are supposed to wrap when they go beyond the
19038 size of the two value vectors, i.e. the upper bits of the indices
19039 are effectively ignored. SVE TBL instead produces 0 for any
19040 out-of-range indices, so we need to modulo all the vec_perm indices
19041 to ensure they are all in range. */
19042 rtx sel_reg = force_reg (sel_mode, sel);
19043
19044 /* Check if the sel only references the first values vector. */
19045 if (GET_CODE (sel) == CONST_VECTOR
19046 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
19047 {
19048 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
19049 return;
19050 }
19051
19052 /* Check if the two values vectors are the same. */
19053 if (rtx_equal_p (op0, op1))
19054 {
19055 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
19056 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
19057 NULL, 0, OPTAB_DIRECT);
19058 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
19059 return;
19060 }
19061
19062 /* Run TBL on for each value vector and combine the results. */
19063
19064 rtx res0 = gen_reg_rtx (data_mode);
19065 rtx res1 = gen_reg_rtx (data_mode);
19066 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
19067 if (GET_CODE (sel) != CONST_VECTOR
19068 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
19069 {
19070 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
19071 2 * nunits - 1);
19072 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
19073 NULL, 0, OPTAB_DIRECT);
19074 }
19075 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
19076 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
19077 NULL, 0, OPTAB_DIRECT);
19078 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
19079 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
19080 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
19081 else
19082 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
19083}
19084
cc4d934f
JG
19085/* Recognize patterns suitable for the TRN instructions. */
19086static bool
19087aarch64_evpc_trn (struct expand_vec_perm_d *d)
19088{
6a70badb
RS
19089 HOST_WIDE_INT odd;
19090 poly_uint64 nelt = d->perm.length ();
cc4d934f 19091 rtx out, in0, in1, x;
ef4bddc2 19092 machine_mode vmode = d->vmode;
cc4d934f
JG
19093
19094 if (GET_MODE_UNIT_SIZE (vmode) > 8)
19095 return false;
19096
19097 /* Note that these are little-endian tests.
19098 We correct for big-endian later. */
6a70badb
RS
19099 if (!d->perm[0].is_constant (&odd)
19100 || (odd != 0 && odd != 1)
326ac20e
RS
19101 || !d->perm.series_p (0, 2, odd, 2)
19102 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 19103 return false;
cc4d934f
JG
19104
19105 /* Success! */
19106 if (d->testing_p)
19107 return true;
19108
19109 in0 = d->op0;
19110 in1 = d->op1;
43cacb12
RS
19111 /* We don't need a big-endian lane correction for SVE; see the comment
19112 at the head of aarch64-sve.md for details. */
19113 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
19114 {
19115 x = in0, in0 = in1, in1 = x;
19116 odd = !odd;
19117 }
19118 out = d->target;
19119
3f8334a5
RS
19120 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
19121 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
19122 return true;
19123}
19124
19125/* Recognize patterns suitable for the UZP instructions. */
19126static bool
19127aarch64_evpc_uzp (struct expand_vec_perm_d *d)
19128{
6a70badb 19129 HOST_WIDE_INT odd;
cc4d934f 19130 rtx out, in0, in1, x;
ef4bddc2 19131 machine_mode vmode = d->vmode;
cc4d934f
JG
19132
19133 if (GET_MODE_UNIT_SIZE (vmode) > 8)
19134 return false;
19135
19136 /* Note that these are little-endian tests.
19137 We correct for big-endian later. */
6a70badb
RS
19138 if (!d->perm[0].is_constant (&odd)
19139 || (odd != 0 && odd != 1)
326ac20e 19140 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 19141 return false;
cc4d934f
JG
19142
19143 /* Success! */
19144 if (d->testing_p)
19145 return true;
19146
19147 in0 = d->op0;
19148 in1 = d->op1;
43cacb12
RS
19149 /* We don't need a big-endian lane correction for SVE; see the comment
19150 at the head of aarch64-sve.md for details. */
19151 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
19152 {
19153 x = in0, in0 = in1, in1 = x;
19154 odd = !odd;
19155 }
19156 out = d->target;
19157
3f8334a5
RS
19158 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
19159 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
19160 return true;
19161}
19162
19163/* Recognize patterns suitable for the ZIP instructions. */
19164static bool
19165aarch64_evpc_zip (struct expand_vec_perm_d *d)
19166{
6a70badb
RS
19167 unsigned int high;
19168 poly_uint64 nelt = d->perm.length ();
cc4d934f 19169 rtx out, in0, in1, x;
ef4bddc2 19170 machine_mode vmode = d->vmode;
cc4d934f
JG
19171
19172 if (GET_MODE_UNIT_SIZE (vmode) > 8)
19173 return false;
19174
19175 /* Note that these are little-endian tests.
19176 We correct for big-endian later. */
6a70badb
RS
19177 poly_uint64 first = d->perm[0];
19178 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
19179 || !d->perm.series_p (0, 2, first, 1)
19180 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 19181 return false;
6a70badb 19182 high = maybe_ne (first, 0U);
cc4d934f
JG
19183
19184 /* Success! */
19185 if (d->testing_p)
19186 return true;
19187
19188 in0 = d->op0;
19189 in1 = d->op1;
43cacb12
RS
19190 /* We don't need a big-endian lane correction for SVE; see the comment
19191 at the head of aarch64-sve.md for details. */
19192 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
19193 {
19194 x = in0, in0 = in1, in1 = x;
19195 high = !high;
19196 }
19197 out = d->target;
19198
3f8334a5
RS
19199 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
19200 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
19201 return true;
19202}
19203
ae0533da
AL
19204/* Recognize patterns for the EXT insn. */
19205
19206static bool
19207aarch64_evpc_ext (struct expand_vec_perm_d *d)
19208{
6a70badb 19209 HOST_WIDE_INT location;
ae0533da
AL
19210 rtx offset;
19211
6a70badb
RS
19212 /* The first element always refers to the first vector.
19213 Check if the extracted indices are increasing by one. */
43cacb12
RS
19214 if (d->vec_flags == VEC_SVE_PRED
19215 || !d->perm[0].is_constant (&location)
6a70badb 19216 || !d->perm.series_p (0, 1, location, 1))
326ac20e 19217 return false;
ae0533da 19218
ae0533da
AL
19219 /* Success! */
19220 if (d->testing_p)
19221 return true;
19222
b31e65bb 19223 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 19224 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 19225
43cacb12
RS
19226 We don't need a big-endian lane correction for SVE; see the comment
19227 at the head of aarch64-sve.md for details. */
19228 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
19229 {
19230 /* After setup, we want the high elements of the first vector (stored
19231 at the LSB end of the register), and the low elements of the second
19232 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 19233 std::swap (d->op0, d->op1);
6a70badb
RS
19234 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
19235 to_constant () is safe since this is restricted to Advanced SIMD
19236 vectors. */
19237 location = d->perm.length ().to_constant () - location;
ae0533da
AL
19238 }
19239
19240 offset = GEN_INT (location);
3f8334a5
RS
19241 emit_set_insn (d->target,
19242 gen_rtx_UNSPEC (d->vmode,
19243 gen_rtvec (3, d->op0, d->op1, offset),
19244 UNSPEC_EXT));
ae0533da
AL
19245 return true;
19246}
19247
43cacb12
RS
19248/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
19249 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
19250
19251static bool
43cacb12 19252aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 19253{
6a70badb
RS
19254 HOST_WIDE_INT diff;
19255 unsigned int i, size, unspec;
43cacb12 19256 machine_mode pred_mode;
923fcec3 19257
43cacb12
RS
19258 if (d->vec_flags == VEC_SVE_PRED
19259 || !d->one_vector_p
6a70badb 19260 || !d->perm[0].is_constant (&diff))
923fcec3
AL
19261 return false;
19262
3f8334a5
RS
19263 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
19264 if (size == 8)
43cacb12
RS
19265 {
19266 unspec = UNSPEC_REV64;
19267 pred_mode = VNx2BImode;
19268 }
3f8334a5 19269 else if (size == 4)
43cacb12
RS
19270 {
19271 unspec = UNSPEC_REV32;
19272 pred_mode = VNx4BImode;
19273 }
3f8334a5 19274 else if (size == 2)
43cacb12
RS
19275 {
19276 unspec = UNSPEC_REV16;
19277 pred_mode = VNx8BImode;
19278 }
3f8334a5
RS
19279 else
19280 return false;
923fcec3 19281
326ac20e
RS
19282 unsigned int step = diff + 1;
19283 for (i = 0; i < step; ++i)
19284 if (!d->perm.series_p (i, step, diff - i, step))
19285 return false;
923fcec3
AL
19286
19287 /* Success! */
19288 if (d->testing_p)
19289 return true;
19290
43cacb12
RS
19291 if (d->vec_flags == VEC_SVE_DATA)
19292 {
d7a09c44
RS
19293 machine_mode int_mode = aarch64_sve_int_mode (pred_mode);
19294 rtx target = gen_reg_rtx (int_mode);
19295 if (BYTES_BIG_ENDIAN)
19296 /* The act of taking a subreg between INT_MODE and d->vmode
19297 is itself a reversing operation on big-endian targets;
19298 see the comment at the head of aarch64-sve.md for details.
19299 First reinterpret OP0 as INT_MODE without using a subreg
19300 and without changing the contents. */
19301 emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0));
19302 else
19303 {
19304 /* For SVE we use REV[BHW] unspecs derived from the element size
19305 of v->mode and vector modes whose elements have SIZE bytes.
19306 This ensures that the vector modes match the predicate modes. */
19307 int unspec = aarch64_sve_rev_unspec (d->vmode);
19308 rtx pred = aarch64_ptrue_reg (pred_mode);
19309 emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred,
19310 gen_lowpart (int_mode, d->op0)));
19311 }
19312 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
19313 return true;
43cacb12 19314 }
d7a09c44 19315 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
43cacb12
RS
19316 emit_set_insn (d->target, src);
19317 return true;
19318}
19319
19320/* Recognize patterns for the REV insn, which reverses elements within
19321 a full vector. */
19322
19323static bool
19324aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
19325{
19326 poly_uint64 nelt = d->perm.length ();
19327
28350fd1 19328 if (!d->one_vector_p || d->vec_flags == VEC_ADVSIMD)
43cacb12
RS
19329 return false;
19330
19331 if (!d->perm.series_p (0, 1, nelt - 1, -1))
19332 return false;
19333
19334 /* Success! */
19335 if (d->testing_p)
19336 return true;
19337
19338 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
19339 emit_set_insn (d->target, src);
923fcec3
AL
19340 return true;
19341}
19342
91bd4114
JG
19343static bool
19344aarch64_evpc_dup (struct expand_vec_perm_d *d)
19345{
91bd4114
JG
19346 rtx out = d->target;
19347 rtx in0;
6a70badb 19348 HOST_WIDE_INT elt;
ef4bddc2 19349 machine_mode vmode = d->vmode;
91bd4114
JG
19350 rtx lane;
19351
43cacb12
RS
19352 if (d->vec_flags == VEC_SVE_PRED
19353 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 19354 || !d->perm[0].is_constant (&elt))
326ac20e
RS
19355 return false;
19356
43cacb12
RS
19357 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
19358 return false;
19359
326ac20e
RS
19360 /* Success! */
19361 if (d->testing_p)
19362 return true;
19363
91bd4114
JG
19364 /* The generic preparation in aarch64_expand_vec_perm_const_1
19365 swaps the operand order and the permute indices if it finds
19366 d->perm[0] to be in the second operand. Thus, we can always
19367 use d->op0 and need not do any extra arithmetic to get the
19368 correct lane number. */
19369 in0 = d->op0;
f901401e 19370 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 19371
3f8334a5
RS
19372 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
19373 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
19374 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
19375 return true;
19376}
19377
88b08073
JG
19378static bool
19379aarch64_evpc_tbl (struct expand_vec_perm_d *d)
19380{
43cacb12 19381 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 19382 machine_mode vmode = d->vmode;
6a70badb
RS
19383
19384 /* Make sure that the indices are constant. */
19385 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
19386 for (unsigned int i = 0; i < encoded_nelts; ++i)
19387 if (!d->perm[i].is_constant ())
19388 return false;
88b08073 19389
88b08073
JG
19390 if (d->testing_p)
19391 return true;
19392
19393 /* Generic code will try constant permutation twice. Once with the
19394 original mode and again with the elements lowered to QImode.
19395 So wait and don't do the selector expansion ourselves. */
19396 if (vmode != V8QImode && vmode != V16QImode)
19397 return false;
19398
6a70badb
RS
19399 /* to_constant is safe since this routine is specific to Advanced SIMD
19400 vectors. */
19401 unsigned int nelt = d->perm.length ().to_constant ();
19402 for (unsigned int i = 0; i < nelt; ++i)
19403 /* If big-endian and two vectors we end up with a weird mixed-endian
19404 mode on NEON. Reverse the index within each word but not the word
19405 itself. to_constant is safe because we checked is_constant above. */
19406 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
19407 ? d->perm[i].to_constant () ^ (nelt - 1)
19408 : d->perm[i].to_constant ());
bbcc9c00 19409
88b08073
JG
19410 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
19411 sel = force_reg (vmode, sel);
19412
19413 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
19414 return true;
19415}
19416
43cacb12
RS
19417/* Try to implement D using an SVE TBL instruction. */
19418
19419static bool
19420aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
19421{
19422 unsigned HOST_WIDE_INT nelt;
19423
19424 /* Permuting two variable-length vectors could overflow the
19425 index range. */
19426 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
19427 return false;
19428
19429 if (d->testing_p)
19430 return true;
19431
d083ee47 19432 machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
43cacb12 19433 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
19434 if (d->one_vector_p)
19435 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
19436 else
19437 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
19438 return true;
19439}
19440
9556ef20
PK
19441/* Try to implement D using SVE SEL instruction. */
19442
19443static bool
19444aarch64_evpc_sel (struct expand_vec_perm_d *d)
19445{
19446 machine_mode vmode = d->vmode;
19447 int unit_size = GET_MODE_UNIT_SIZE (vmode);
19448
19449 if (d->vec_flags != VEC_SVE_DATA
19450 || unit_size > 8)
19451 return false;
19452
19453 int n_patterns = d->perm.encoding ().npatterns ();
19454 poly_int64 vec_len = d->perm.length ();
19455
19456 for (int i = 0; i < n_patterns; ++i)
19457 if (!known_eq (d->perm[i], i)
19458 && !known_eq (d->perm[i], vec_len + i))
19459 return false;
19460
19461 for (int i = n_patterns; i < n_patterns * 2; i++)
19462 if (!d->perm.series_p (i, n_patterns, i, n_patterns)
19463 && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
19464 return false;
19465
19466 if (d->testing_p)
19467 return true;
19468
cc68f7c2 19469 machine_mode pred_mode = aarch64_sve_pred_mode (vmode);
9556ef20
PK
19470
19471 rtx_vector_builder builder (pred_mode, n_patterns, 2);
19472 for (int i = 0; i < n_patterns * 2; i++)
19473 {
19474 rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode)
19475 : CONST0_RTX (BImode);
19476 builder.quick_push (elem);
19477 }
19478
19479 rtx const_vec = builder.build ();
19480 rtx pred = force_reg (pred_mode, const_vec);
19481 emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op1, d->op0, pred));
19482 return true;
19483}
19484
88b08073
JG
19485static bool
19486aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
19487{
19488 /* The pattern matching functions above are written to look for a small
19489 number to begin the sequence (0, 1, N/2). If we begin with an index
19490 from the second operand, we can swap the operands. */
6a70badb
RS
19491 poly_int64 nelt = d->perm.length ();
19492 if (known_ge (d->perm[0], nelt))
88b08073 19493 {
e3342de4 19494 d->perm.rotate_inputs (1);
cb5c6c29 19495 std::swap (d->op0, d->op1);
88b08073
JG
19496 }
19497
43cacb12
RS
19498 if ((d->vec_flags == VEC_ADVSIMD
19499 || d->vec_flags == VEC_SVE_DATA
19500 || d->vec_flags == VEC_SVE_PRED)
19501 && known_gt (nelt, 1))
cc4d934f 19502 {
43cacb12
RS
19503 if (aarch64_evpc_rev_local (d))
19504 return true;
19505 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
19506 return true;
19507 else if (aarch64_evpc_ext (d))
ae0533da 19508 return true;
f901401e
AL
19509 else if (aarch64_evpc_dup (d))
19510 return true;
ae0533da 19511 else if (aarch64_evpc_zip (d))
cc4d934f
JG
19512 return true;
19513 else if (aarch64_evpc_uzp (d))
19514 return true;
19515 else if (aarch64_evpc_trn (d))
19516 return true;
9556ef20
PK
19517 else if (aarch64_evpc_sel (d))
19518 return true;
43cacb12
RS
19519 if (d->vec_flags == VEC_SVE_DATA)
19520 return aarch64_evpc_sve_tbl (d);
4ec8bb67 19521 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 19522 return aarch64_evpc_tbl (d);
cc4d934f 19523 }
88b08073
JG
19524 return false;
19525}
19526
f151c9e1 19527/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 19528
f151c9e1
RS
19529static bool
19530aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
19531 rtx op1, const vec_perm_indices &sel)
88b08073
JG
19532{
19533 struct expand_vec_perm_d d;
88b08073 19534
326ac20e 19535 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
19536 if (sel.ninputs () == 1
19537 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
19538 d.one_vector_p = true;
19539 else if (sel.all_from_input_p (0))
88b08073 19540 {
326ac20e
RS
19541 d.one_vector_p = true;
19542 op1 = op0;
88b08073 19543 }
326ac20e 19544 else if (sel.all_from_input_p (1))
88b08073 19545 {
88b08073 19546 d.one_vector_p = true;
326ac20e 19547 op0 = op1;
88b08073 19548 }
326ac20e
RS
19549 else
19550 d.one_vector_p = false;
88b08073 19551
326ac20e
RS
19552 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
19553 sel.nelts_per_input ());
19554 d.vmode = vmode;
43cacb12 19555 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
19556 d.target = target;
19557 d.op0 = op0;
19558 d.op1 = op1;
19559 d.testing_p = !target;
e3342de4 19560
f151c9e1
RS
19561 if (!d.testing_p)
19562 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 19563
326ac20e 19564 rtx_insn *last = get_last_insn ();
f151c9e1 19565 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 19566 gcc_assert (last == get_last_insn ());
88b08073
JG
19567
19568 return ret;
19569}
19570
73e3da51
RS
19571/* Generate a byte permute mask for a register of mode MODE,
19572 which has NUNITS units. */
19573
668046d1 19574rtx
73e3da51 19575aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
19576{
19577 /* We have to reverse each vector because we dont have
19578 a permuted load that can reverse-load according to ABI rules. */
19579 rtx mask;
19580 rtvec v = rtvec_alloc (16);
73e3da51
RS
19581 unsigned int i, j;
19582 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
19583
19584 gcc_assert (BYTES_BIG_ENDIAN);
19585 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
19586
19587 for (i = 0; i < nunits; i++)
19588 for (j = 0; j < usize; j++)
19589 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
19590 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
19591 return force_reg (V16QImode, mask);
19592}
19593
4a942af6 19594/* Expand an SVE integer comparison using the SVE equivalent of:
f22d7973 19595
4a942af6
RS
19596 (set TARGET (CODE OP0 OP1)). */
19597
19598void
19599aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
f22d7973 19600{
4a942af6
RS
19601 machine_mode pred_mode = GET_MODE (target);
19602 machine_mode data_mode = GET_MODE (op0);
00fa90d9
RS
19603 rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
19604 op0, op1);
19605 if (!rtx_equal_p (target, res))
19606 emit_move_insn (target, res);
f22d7973
RS
19607}
19608
43cacb12
RS
19609/* Return the UNSPEC_COND_* code for comparison CODE. */
19610
19611static unsigned int
19612aarch64_unspec_cond_code (rtx_code code)
19613{
19614 switch (code)
19615 {
19616 case NE:
cb18e86d 19617 return UNSPEC_COND_FCMNE;
43cacb12 19618 case EQ:
cb18e86d 19619 return UNSPEC_COND_FCMEQ;
43cacb12 19620 case LT:
cb18e86d 19621 return UNSPEC_COND_FCMLT;
43cacb12 19622 case GT:
cb18e86d 19623 return UNSPEC_COND_FCMGT;
43cacb12 19624 case LE:
cb18e86d 19625 return UNSPEC_COND_FCMLE;
43cacb12 19626 case GE:
cb18e86d 19627 return UNSPEC_COND_FCMGE;
4a942af6
RS
19628 case UNORDERED:
19629 return UNSPEC_COND_FCMUO;
43cacb12
RS
19630 default:
19631 gcc_unreachable ();
19632 }
19633}
19634
f22d7973 19635/* Emit:
43cacb12 19636
4a942af6 19637 (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 19638
4a942af6
RS
19639 where <X> is the operation associated with comparison CODE.
19640 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
f22d7973
RS
19641
19642static void
4a942af6
RS
19643aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
19644 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 19645{
4a942af6 19646 rtx flag = gen_int_mode (known_ptrue_p, SImode);
f22d7973 19647 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
4a942af6 19648 gen_rtvec (4, pred, flag, op0, op1),
f22d7973
RS
19649 aarch64_unspec_cond_code (code));
19650 emit_set_insn (target, unspec);
43cacb12
RS
19651}
19652
f22d7973 19653/* Emit the SVE equivalent of:
43cacb12 19654
4a942af6
RS
19655 (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
19656 (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
f22d7973 19657 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 19658
4a942af6
RS
19659 where <Xi> is the operation associated with comparison CODEi.
19660 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
19661
19662static void
4a942af6
RS
19663aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
19664 rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 19665{
4a942af6 19666 machine_mode pred_mode = GET_MODE (pred);
43cacb12 19667 rtx tmp1 = gen_reg_rtx (pred_mode);
4a942af6 19668 aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
43cacb12 19669 rtx tmp2 = gen_reg_rtx (pred_mode);
4a942af6 19670 aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
f22d7973 19671 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
19672}
19673
f22d7973 19674/* Emit the SVE equivalent of:
43cacb12 19675
4a942af6 19676 (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 19677 (set TARGET (not TMP))
43cacb12 19678
4a942af6
RS
19679 where <X> is the operation associated with comparison CODE.
19680 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
19681
19682static void
4a942af6
RS
19683aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
19684 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 19685{
4a942af6 19686 machine_mode pred_mode = GET_MODE (pred);
f22d7973 19687 rtx tmp = gen_reg_rtx (pred_mode);
4a942af6 19688 aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
f22d7973 19689 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
19690}
19691
f22d7973 19692/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 19693
f22d7973 19694 (set TARGET (CODE OP0 OP1))
43cacb12
RS
19695
19696 If CAN_INVERT_P is true, the caller can also handle inverted results;
19697 return true if the result is in fact inverted. */
19698
19699bool
19700aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
19701 rtx op0, rtx op1, bool can_invert_p)
19702{
19703 machine_mode pred_mode = GET_MODE (target);
19704 machine_mode data_mode = GET_MODE (op0);
19705
16de3637 19706 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
19707 switch (code)
19708 {
19709 case UNORDERED:
19710 /* UNORDERED has no immediate form. */
19711 op1 = force_reg (data_mode, op1);
f22d7973 19712 /* fall through */
43cacb12
RS
19713 case LT:
19714 case LE:
19715 case GT:
19716 case GE:
19717 case EQ:
19718 case NE:
f22d7973
RS
19719 {
19720 /* There is native support for the comparison. */
4a942af6 19721 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
19722 return false;
19723 }
43cacb12
RS
19724
19725 case LTGT:
19726 /* This is a trapping operation (LT or GT). */
4a942af6 19727 aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
43cacb12
RS
19728 return false;
19729
19730 case UNEQ:
19731 if (!flag_trapping_math)
19732 {
19733 /* This would trap for signaling NaNs. */
19734 op1 = force_reg (data_mode, op1);
4a942af6
RS
19735 aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
19736 ptrue, true, op0, op1);
43cacb12
RS
19737 return false;
19738 }
19739 /* fall through */
43cacb12
RS
19740 case UNLT:
19741 case UNLE:
19742 case UNGT:
19743 case UNGE:
f22d7973
RS
19744 if (flag_trapping_math)
19745 {
19746 /* Work out which elements are ordered. */
19747 rtx ordered = gen_reg_rtx (pred_mode);
19748 op1 = force_reg (data_mode, op1);
4a942af6
RS
19749 aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
19750 ptrue, true, op0, op1);
f22d7973
RS
19751
19752 /* Test the opposite condition for the ordered elements,
19753 then invert the result. */
19754 if (code == UNEQ)
19755 code = NE;
19756 else
19757 code = reverse_condition_maybe_unordered (code);
19758 if (can_invert_p)
19759 {
4a942af6
RS
19760 aarch64_emit_sve_fp_cond (target, code,
19761 ordered, false, op0, op1);
f22d7973
RS
19762 return true;
19763 }
4a942af6
RS
19764 aarch64_emit_sve_invert_fp_cond (target, code,
19765 ordered, false, op0, op1);
f22d7973
RS
19766 return false;
19767 }
19768 break;
19769
19770 case ORDERED:
19771 /* ORDERED has no immediate form. */
19772 op1 = force_reg (data_mode, op1);
19773 break;
43cacb12
RS
19774
19775 default:
19776 gcc_unreachable ();
19777 }
f22d7973
RS
19778
19779 /* There is native support for the inverse comparison. */
19780 code = reverse_condition_maybe_unordered (code);
19781 if (can_invert_p)
19782 {
4a942af6 19783 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
19784 return true;
19785 }
4a942af6 19786 aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973 19787 return false;
43cacb12
RS
19788}
19789
19790/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
19791 of the data being selected and CMP_MODE is the mode of the values being
19792 compared. */
19793
19794void
19795aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
19796 rtx *ops)
19797{
10116ec1 19798 machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
43cacb12
RS
19799 rtx pred = gen_reg_rtx (pred_mode);
19800 if (FLOAT_MODE_P (cmp_mode))
19801 {
19802 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
19803 ops[4], ops[5], true))
19804 std::swap (ops[1], ops[2]);
19805 }
19806 else
19807 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
19808
d29f7dd5
RS
19809 if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
19810 ops[1] = force_reg (data_mode, ops[1]);
19811 /* The "false" value can only be zero if the "true" value is a constant. */
19812 if (register_operand (ops[1], data_mode)
19813 || !aarch64_simd_reg_or_zero (ops[2], data_mode))
19814 ops[2] = force_reg (data_mode, ops[2]);
19815
43cacb12
RS
19816 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
19817 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
19818}
19819
99e1629f
RS
19820/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
19821 true. However due to issues with register allocation it is preferable
19822 to avoid tieing integer scalar and FP scalar modes. Executing integer
19823 operations in general registers is better than treating them as scalar
19824 vector operations. This reduces latency and avoids redundant int<->FP
19825 moves. So tie modes if they are either the same class, or vector modes
19826 with other vector modes, vector structs or any scalar mode. */
97e1ad78 19827
99e1629f 19828static bool
ef4bddc2 19829aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
19830{
19831 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
19832 return true;
19833
19834 /* We specifically want to allow elements of "structure" modes to
19835 be tieable to the structure. This more general condition allows
43cacb12
RS
19836 other rarer situations too. The reason we don't extend this to
19837 predicate modes is that there are no predicate structure modes
19838 nor any specific instructions for extracting part of a predicate
19839 register. */
19840 if (aarch64_vector_data_mode_p (mode1)
19841 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
19842 return true;
19843
19844 /* Also allow any scalar modes with vectors. */
19845 if (aarch64_vector_mode_supported_p (mode1)
19846 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
19847 return true;
19848
19849 return false;
19850}
19851
e2c75eea
JG
19852/* Return a new RTX holding the result of moving POINTER forward by
19853 AMOUNT bytes. */
19854
19855static rtx
6a70badb 19856aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
19857{
19858 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
19859
19860 return adjust_automodify_address (pointer, GET_MODE (pointer),
19861 next, amount);
19862}
19863
19864/* Return a new RTX holding the result of moving POINTER forward by the
19865 size of the mode it points to. */
19866
19867static rtx
19868aarch64_progress_pointer (rtx pointer)
19869{
6a70badb 19870 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
19871}
19872
19873/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
19874 MODE bytes. */
19875
19876static void
19877aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 19878 machine_mode mode)
e2c75eea
JG
19879{
19880 rtx reg = gen_reg_rtx (mode);
19881
19882 /* "Cast" the pointers to the correct mode. */
19883 *src = adjust_address (*src, mode, 0);
19884 *dst = adjust_address (*dst, mode, 0);
19885 /* Emit the memcpy. */
19886 emit_move_insn (reg, *src);
19887 emit_move_insn (*dst, reg);
19888 /* Move the pointers forward. */
19889 *src = aarch64_progress_pointer (*src);
19890 *dst = aarch64_progress_pointer (*dst);
19891}
19892
76715c32 19893/* Expand cpymem, as if from a __builtin_memcpy. Return true if
e2c75eea
JG
19894 we succeed, otherwise return false. */
19895
19896bool
76715c32 19897aarch64_expand_cpymem (rtx *operands)
e2c75eea 19898{
89c52e5e 19899 int n, mode_bits;
e2c75eea
JG
19900 rtx dst = operands[0];
19901 rtx src = operands[1];
19902 rtx base;
89c52e5e 19903 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
19904 bool speed_p = !optimize_function_for_size_p (cfun);
19905
19906 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
19907 memcpy call, but use the default otherwise. Moves larger than 8 bytes
19908 will always require an even number of instructions to do now. And each
19909 operation requires both a load+store, so devide the max number by 2. */
19910 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
19911
19912 /* We can't do anything smart if the amount to copy is not constant. */
19913 if (!CONST_INT_P (operands[2]))
19914 return false;
19915
89c52e5e 19916 n = INTVAL (operands[2]);
e2c75eea 19917
89c52e5e
TC
19918 /* Try to keep the number of instructions low. For all cases we will do at
19919 most two moves for the residual amount, since we'll always overlap the
19920 remainder. */
19921 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
19922 return false;
19923
19924 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19925 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
19926
19927 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
19928 src = adjust_automodify_address (src, VOIDmode, base, 0);
19929
89c52e5e
TC
19930 /* Convert n to bits to make the rest of the code simpler. */
19931 n = n * BITS_PER_UNIT;
e2c75eea 19932
f7e1d19d
TC
19933 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
19934 larger than TImode, but we should not use them for loads/stores here. */
19935 const int copy_limit = GET_MODE_BITSIZE (TImode);
19936
89c52e5e 19937 while (n > 0)
e2c75eea 19938 {
89c52e5e
TC
19939 /* Find the largest mode in which to do the copy in without over reading
19940 or writing. */
19941 opt_scalar_int_mode mode_iter;
19942 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 19943 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 19944 cur_mode = mode_iter.require ();
e2c75eea 19945
89c52e5e 19946 gcc_assert (cur_mode != BLKmode);
e2c75eea 19947
89c52e5e
TC
19948 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
19949 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 19950
89c52e5e 19951 n -= mode_bits;
e2c75eea 19952
89c52e5e
TC
19953 /* Do certain trailing copies as overlapping if it's going to be
19954 cheaper. i.e. less instructions to do so. For instance doing a 15
19955 byte copy it's more efficient to do two overlapping 8 byte copies than
19956 8 + 6 + 1. */
f7e1d19d 19957 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 19958 {
f7e1d19d
TC
19959 next_mode = smallest_mode_for_size (n, MODE_INT);
19960 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
19961 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
19962 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
19963 n = n_bits;
e2c75eea
JG
19964 }
19965 }
19966
19967 return true;
19968}
19969
141a3ccf
KT
19970/* Split a DImode store of a CONST_INT SRC to MEM DST as two
19971 SImode stores. Handle the case when the constant has identical
19972 bottom and top halves. This is beneficial when the two stores can be
19973 merged into an STP and we avoid synthesising potentially expensive
19974 immediates twice. Return true if such a split is possible. */
19975
19976bool
19977aarch64_split_dimode_const_store (rtx dst, rtx src)
19978{
19979 rtx lo = gen_lowpart (SImode, src);
19980 rtx hi = gen_highpart_mode (SImode, DImode, src);
19981
19982 bool size_p = optimize_function_for_size_p (cfun);
19983
19984 if (!rtx_equal_p (lo, hi))
19985 return false;
19986
19987 unsigned int orig_cost
19988 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
19989 unsigned int lo_cost
19990 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
19991
19992 /* We want to transform:
19993 MOV x1, 49370
19994 MOVK x1, 0x140, lsl 16
19995 MOVK x1, 0xc0da, lsl 32
19996 MOVK x1, 0x140, lsl 48
19997 STR x1, [x0]
19998 into:
19999 MOV w1, 49370
20000 MOVK w1, 0x140, lsl 16
20001 STP w1, w1, [x0]
20002 So we want to perform this only when we save two instructions
20003 or more. When optimizing for size, however, accept any code size
20004 savings we can. */
20005 if (size_p && orig_cost <= lo_cost)
20006 return false;
20007
20008 if (!size_p
20009 && (orig_cost <= lo_cost + 1))
20010 return false;
20011
20012 rtx mem_lo = adjust_address (dst, SImode, 0);
20013 if (!aarch64_mem_pair_operand (mem_lo, SImode))
20014 return false;
20015
20016 rtx tmp_reg = gen_reg_rtx (SImode);
20017 aarch64_expand_mov_immediate (tmp_reg, lo);
20018 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
20019 /* Don't emit an explicit store pair as this may not be always profitable.
20020 Let the sched-fusion logic decide whether to merge them. */
20021 emit_move_insn (mem_lo, tmp_reg);
20022 emit_move_insn (mem_hi, tmp_reg);
20023
20024 return true;
20025}
20026
30c46053
MC
20027/* Generate RTL for a conditional branch with rtx comparison CODE in
20028 mode CC_MODE. The destination of the unlikely conditional branch
20029 is LABEL_REF. */
20030
20031void
20032aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
20033 rtx label_ref)
20034{
20035 rtx x;
20036 x = gen_rtx_fmt_ee (code, VOIDmode,
20037 gen_rtx_REG (cc_mode, CC_REGNUM),
20038 const0_rtx);
20039
20040 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
20041 gen_rtx_LABEL_REF (VOIDmode, label_ref),
20042 pc_rtx);
20043 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
20044}
20045
20046/* Generate DImode scratch registers for 128-bit (TImode) addition.
20047
20048 OP1 represents the TImode destination operand 1
20049 OP2 represents the TImode destination operand 2
20050 LOW_DEST represents the low half (DImode) of TImode operand 0
20051 LOW_IN1 represents the low half (DImode) of TImode operand 1
20052 LOW_IN2 represents the low half (DImode) of TImode operand 2
20053 HIGH_DEST represents the high half (DImode) of TImode operand 0
20054 HIGH_IN1 represents the high half (DImode) of TImode operand 1
20055 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
20056
20057void
20058aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
20059 rtx *low_in1, rtx *low_in2,
20060 rtx *high_dest, rtx *high_in1,
20061 rtx *high_in2)
20062{
20063 *low_dest = gen_reg_rtx (DImode);
20064 *low_in1 = gen_lowpart (DImode, op1);
20065 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
20066 subreg_lowpart_offset (DImode, TImode));
20067 *high_dest = gen_reg_rtx (DImode);
20068 *high_in1 = gen_highpart (DImode, op1);
20069 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
20070 subreg_highpart_offset (DImode, TImode));
20071}
20072
20073/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
20074
20075 This function differs from 'arch64_addti_scratch_regs' in that
20076 OP1 can be an immediate constant (zero). We must call
20077 subreg_highpart_offset with DImode and TImode arguments, otherwise
20078 VOIDmode will be used for the const_int which generates an internal
20079 error from subreg_size_highpart_offset which does not expect a size of zero.
20080
20081 OP1 represents the TImode destination operand 1
20082 OP2 represents the TImode destination operand 2
20083 LOW_DEST represents the low half (DImode) of TImode operand 0
20084 LOW_IN1 represents the low half (DImode) of TImode operand 1
20085 LOW_IN2 represents the low half (DImode) of TImode operand 2
20086 HIGH_DEST represents the high half (DImode) of TImode operand 0
20087 HIGH_IN1 represents the high half (DImode) of TImode operand 1
20088 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
20089
20090
20091void
20092aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
20093 rtx *low_in1, rtx *low_in2,
20094 rtx *high_dest, rtx *high_in1,
20095 rtx *high_in2)
20096{
20097 *low_dest = gen_reg_rtx (DImode);
20098 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
20099 subreg_lowpart_offset (DImode, TImode));
20100
20101 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
20102 subreg_lowpart_offset (DImode, TImode));
20103 *high_dest = gen_reg_rtx (DImode);
20104
20105 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
20106 subreg_highpart_offset (DImode, TImode));
20107 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
20108 subreg_highpart_offset (DImode, TImode));
20109}
20110
20111/* Generate RTL for 128-bit (TImode) subtraction with overflow.
20112
20113 OP0 represents the TImode destination operand 0
20114 LOW_DEST represents the low half (DImode) of TImode operand 0
20115 LOW_IN1 represents the low half (DImode) of TImode operand 1
20116 LOW_IN2 represents the low half (DImode) of TImode operand 2
20117 HIGH_DEST represents the high half (DImode) of TImode operand 0
20118 HIGH_IN1 represents the high half (DImode) of TImode operand 1
a58fe3c5
RE
20119 HIGH_IN2 represents the high half (DImode) of TImode operand 2
20120 UNSIGNED_P is true if the operation is being performed on unsigned
20121 values. */
30c46053
MC
20122void
20123aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
20124 rtx low_in2, rtx high_dest, rtx high_in1,
a58fe3c5 20125 rtx high_in2, bool unsigned_p)
30c46053
MC
20126{
20127 if (low_in2 == const0_rtx)
20128 {
20129 low_dest = low_in1;
a58fe3c5
RE
20130 high_in2 = force_reg (DImode, high_in2);
20131 if (unsigned_p)
20132 emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2));
20133 else
20134 emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2));
30c46053
MC
20135 }
20136 else
20137 {
20138 if (CONST_INT_P (low_in2))
20139 {
30c46053 20140 high_in2 = force_reg (DImode, high_in2);
a58fe3c5
RE
20141 emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
20142 GEN_INT (-INTVAL (low_in2))));
30c46053
MC
20143 }
20144 else
20145 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
a58fe3c5
RE
20146
20147 if (unsigned_p)
20148 emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
20149 else
20150 emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2));
30c46053
MC
20151 }
20152
20153 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
20154 emit_move_insn (gen_highpart (DImode, op0), high_dest);
20155
20156}
20157
a3125fc2
CL
20158/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
20159
20160static unsigned HOST_WIDE_INT
20161aarch64_asan_shadow_offset (void)
20162{
10078f3e
AP
20163 if (TARGET_ILP32)
20164 return (HOST_WIDE_INT_1 << 29);
20165 else
20166 return (HOST_WIDE_INT_1 << 36);
a3125fc2
CL
20167}
20168
5f3bc026 20169static rtx
cb4347e8 20170aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
20171 int code, tree treeop0, tree treeop1)
20172{
c8012fbc
WD
20173 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
20174 rtx op0, op1;
5f3bc026 20175 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 20176 insn_code icode;
5f3bc026
ZC
20177 struct expand_operand ops[4];
20178
5f3bc026
ZC
20179 start_sequence ();
20180 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
20181
20182 op_mode = GET_MODE (op0);
20183 if (op_mode == VOIDmode)
20184 op_mode = GET_MODE (op1);
20185
20186 switch (op_mode)
20187 {
4e10a5a7
RS
20188 case E_QImode:
20189 case E_HImode:
20190 case E_SImode:
5f3bc026
ZC
20191 cmp_mode = SImode;
20192 icode = CODE_FOR_cmpsi;
20193 break;
20194
4e10a5a7 20195 case E_DImode:
5f3bc026
ZC
20196 cmp_mode = DImode;
20197 icode = CODE_FOR_cmpdi;
20198 break;
20199
4e10a5a7 20200 case E_SFmode:
786e3c06
WD
20201 cmp_mode = SFmode;
20202 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
20203 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
20204 break;
20205
4e10a5a7 20206 case E_DFmode:
786e3c06
WD
20207 cmp_mode = DFmode;
20208 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
20209 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
20210 break;
20211
5f3bc026
ZC
20212 default:
20213 end_sequence ();
20214 return NULL_RTX;
20215 }
20216
c8012fbc
WD
20217 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
20218 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
20219 if (!op0 || !op1)
20220 {
20221 end_sequence ();
20222 return NULL_RTX;
20223 }
20224 *prep_seq = get_insns ();
20225 end_sequence ();
20226
c8012fbc
WD
20227 create_fixed_operand (&ops[0], op0);
20228 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
20229
20230 start_sequence ();
c8012fbc 20231 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
20232 {
20233 end_sequence ();
20234 return NULL_RTX;
20235 }
20236 *gen_seq = get_insns ();
20237 end_sequence ();
20238
c8012fbc
WD
20239 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
20240 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
20241}
20242
20243static rtx
cb4347e8
TS
20244aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
20245 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 20246{
c8012fbc
WD
20247 rtx op0, op1, target;
20248 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 20249 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 20250 insn_code icode;
5f3bc026 20251 struct expand_operand ops[6];
c8012fbc 20252 int aarch64_cond;
5f3bc026 20253
cb4347e8 20254 push_to_sequence (*prep_seq);
5f3bc026
ZC
20255 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
20256
20257 op_mode = GET_MODE (op0);
20258 if (op_mode == VOIDmode)
20259 op_mode = GET_MODE (op1);
20260
20261 switch (op_mode)
20262 {
4e10a5a7
RS
20263 case E_QImode:
20264 case E_HImode:
20265 case E_SImode:
5f3bc026 20266 cmp_mode = SImode;
c8012fbc 20267 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
20268 break;
20269
4e10a5a7 20270 case E_DImode:
5f3bc026 20271 cmp_mode = DImode;
c8012fbc 20272 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
20273 break;
20274
4e10a5a7 20275 case E_SFmode:
786e3c06
WD
20276 cmp_mode = SFmode;
20277 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
20278 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
20279 break;
20280
4e10a5a7 20281 case E_DFmode:
786e3c06
WD
20282 cmp_mode = DFmode;
20283 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
20284 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
20285 break;
20286
5f3bc026
ZC
20287 default:
20288 end_sequence ();
20289 return NULL_RTX;
20290 }
20291
20292 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
20293 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
20294 if (!op0 || !op1)
20295 {
20296 end_sequence ();
20297 return NULL_RTX;
20298 }
20299 *prep_seq = get_insns ();
20300 end_sequence ();
20301
20302 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 20303 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 20304
c8012fbc
WD
20305 if (bit_code != AND)
20306 {
20307 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
20308 GET_MODE (XEXP (prev, 0))),
20309 VOIDmode, XEXP (prev, 0), const0_rtx);
20310 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
20311 }
20312
20313 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
20314 create_fixed_operand (&ops[1], target);
20315 create_fixed_operand (&ops[2], op0);
20316 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
20317 create_fixed_operand (&ops[4], prev);
20318 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 20319
cb4347e8 20320 push_to_sequence (*gen_seq);
5f3bc026
ZC
20321 if (!maybe_expand_insn (icode, 6, ops))
20322 {
20323 end_sequence ();
20324 return NULL_RTX;
20325 }
20326
20327 *gen_seq = get_insns ();
20328 end_sequence ();
20329
c8012fbc 20330 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
20331}
20332
20333#undef TARGET_GEN_CCMP_FIRST
20334#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
20335
20336#undef TARGET_GEN_CCMP_NEXT
20337#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
20338
6a569cdd
KT
20339/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
20340 instruction fusion of some sort. */
20341
20342static bool
20343aarch64_macro_fusion_p (void)
20344{
b175b679 20345 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
20346}
20347
20348
20349/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
20350 should be kept together during scheduling. */
20351
20352static bool
20353aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
20354{
20355 rtx set_dest;
20356 rtx prev_set = single_set (prev);
20357 rtx curr_set = single_set (curr);
20358 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
20359 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
20360
20361 if (!aarch64_macro_fusion_p ())
20362 return false;
20363
d7b03373 20364 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
20365 {
20366 /* We are trying to match:
20367 prev (mov) == (set (reg r0) (const_int imm16))
20368 curr (movk) == (set (zero_extract (reg r0)
20369 (const_int 16)
20370 (const_int 16))
20371 (const_int imm16_1)) */
20372
20373 set_dest = SET_DEST (curr_set);
20374
20375 if (GET_CODE (set_dest) == ZERO_EXTRACT
20376 && CONST_INT_P (SET_SRC (curr_set))
20377 && CONST_INT_P (SET_SRC (prev_set))
20378 && CONST_INT_P (XEXP (set_dest, 2))
20379 && INTVAL (XEXP (set_dest, 2)) == 16
20380 && REG_P (XEXP (set_dest, 0))
20381 && REG_P (SET_DEST (prev_set))
20382 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
20383 {
20384 return true;
20385 }
20386 }
20387
d7b03373 20388 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
20389 {
20390
20391 /* We're trying to match:
20392 prev (adrp) == (set (reg r1)
20393 (high (symbol_ref ("SYM"))))
20394 curr (add) == (set (reg r0)
20395 (lo_sum (reg r1)
20396 (symbol_ref ("SYM"))))
20397 Note that r0 need not necessarily be the same as r1, especially
20398 during pre-regalloc scheduling. */
20399
20400 if (satisfies_constraint_Ush (SET_SRC (prev_set))
20401 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
20402 {
20403 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
20404 && REG_P (XEXP (SET_SRC (curr_set), 0))
20405 && REGNO (XEXP (SET_SRC (curr_set), 0))
20406 == REGNO (SET_DEST (prev_set))
20407 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
20408 XEXP (SET_SRC (curr_set), 1)))
20409 return true;
20410 }
20411 }
20412
d7b03373 20413 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
20414 {
20415
20416 /* We're trying to match:
20417 prev (movk) == (set (zero_extract (reg r0)
20418 (const_int 16)
20419 (const_int 32))
20420 (const_int imm16_1))
20421 curr (movk) == (set (zero_extract (reg r0)
20422 (const_int 16)
20423 (const_int 48))
20424 (const_int imm16_2)) */
20425
20426 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
20427 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
20428 && REG_P (XEXP (SET_DEST (prev_set), 0))
20429 && REG_P (XEXP (SET_DEST (curr_set), 0))
20430 && REGNO (XEXP (SET_DEST (prev_set), 0))
20431 == REGNO (XEXP (SET_DEST (curr_set), 0))
20432 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
20433 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
20434 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
20435 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
20436 && CONST_INT_P (SET_SRC (prev_set))
20437 && CONST_INT_P (SET_SRC (curr_set)))
20438 return true;
20439
20440 }
d7b03373 20441 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
20442 {
20443 /* We're trying to match:
20444 prev (adrp) == (set (reg r0)
20445 (high (symbol_ref ("SYM"))))
20446 curr (ldr) == (set (reg r1)
20447 (mem (lo_sum (reg r0)
20448 (symbol_ref ("SYM")))))
20449 or
20450 curr (ldr) == (set (reg r1)
20451 (zero_extend (mem
20452 (lo_sum (reg r0)
20453 (symbol_ref ("SYM")))))) */
20454 if (satisfies_constraint_Ush (SET_SRC (prev_set))
20455 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
20456 {
20457 rtx curr_src = SET_SRC (curr_set);
20458
20459 if (GET_CODE (curr_src) == ZERO_EXTEND)
20460 curr_src = XEXP (curr_src, 0);
20461
20462 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
20463 && REG_P (XEXP (XEXP (curr_src, 0), 0))
20464 && REGNO (XEXP (XEXP (curr_src, 0), 0))
20465 == REGNO (SET_DEST (prev_set))
20466 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
20467 XEXP (SET_SRC (prev_set), 0)))
20468 return true;
20469 }
20470 }
cd0cb232 20471
a4f3fa71 20472 /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
d7b03373 20473 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
a4f3fa71
WD
20474 && prev_set && curr_set && any_condjump_p (curr)
20475 && GET_CODE (SET_SRC (prev_set)) == COMPARE
20476 && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
20477 && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
20478 return true;
20479
20480 /* Fuse flag-setting ALU instructions and conditional branch. */
20481 if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
3759108f
AP
20482 && any_condjump_p (curr))
20483 {
509f819a
N
20484 unsigned int condreg1, condreg2;
20485 rtx cc_reg_1;
20486 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
20487 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
20488
20489 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
20490 && prev
20491 && modified_in_p (cc_reg_1, prev))
20492 {
f8a27206
AP
20493 enum attr_type prev_type = get_attr_type (prev);
20494
509f819a
N
20495 /* FIXME: this misses some which is considered simple arthematic
20496 instructions for ThunderX. Simple shifts are missed here. */
20497 if (prev_type == TYPE_ALUS_SREG
20498 || prev_type == TYPE_ALUS_IMM
20499 || prev_type == TYPE_LOGICS_REG
20500 || prev_type == TYPE_LOGICS_IMM)
20501 return true;
20502 }
3759108f
AP
20503 }
20504
a4f3fa71 20505 /* Fuse ALU instructions and CBZ/CBNZ. */
bee7e0fc
AP
20506 if (prev_set
20507 && curr_set
a4f3fa71 20508 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
00c7c57f
JB
20509 && any_condjump_p (curr))
20510 {
20511 /* We're trying to match:
20512 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
20513 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
20514 (const_int 0))
20515 (label_ref ("SYM"))
20516 (pc)) */
20517 if (SET_DEST (curr_set) == (pc_rtx)
20518 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
20519 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
20520 && REG_P (SET_DEST (prev_set))
20521 && REGNO (SET_DEST (prev_set))
20522 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
20523 {
20524 /* Fuse ALU operations followed by conditional branch instruction. */
20525 switch (get_attr_type (prev))
20526 {
20527 case TYPE_ALU_IMM:
20528 case TYPE_ALU_SREG:
20529 case TYPE_ADC_REG:
20530 case TYPE_ADC_IMM:
20531 case TYPE_ADCS_REG:
20532 case TYPE_ADCS_IMM:
20533 case TYPE_LOGIC_REG:
20534 case TYPE_LOGIC_IMM:
20535 case TYPE_CSEL:
20536 case TYPE_ADR:
20537 case TYPE_MOV_IMM:
20538 case TYPE_SHIFT_REG:
20539 case TYPE_SHIFT_IMM:
20540 case TYPE_BFM:
20541 case TYPE_RBIT:
20542 case TYPE_REV:
20543 case TYPE_EXTEND:
20544 return true;
20545
20546 default:;
20547 }
20548 }
20549 }
20550
6a569cdd
KT
20551 return false;
20552}
20553
f2879a90
KT
20554/* Return true iff the instruction fusion described by OP is enabled. */
20555
20556bool
20557aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
20558{
20559 return (aarch64_tune_params.fusible_ops & op) != 0;
20560}
20561
350013bc
BC
20562/* If MEM is in the form of [base+offset], extract the two parts
20563 of address and set to BASE and OFFSET, otherwise return false
20564 after clearing BASE and OFFSET. */
20565
20566bool
20567extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
20568{
20569 rtx addr;
20570
20571 gcc_assert (MEM_P (mem));
20572
20573 addr = XEXP (mem, 0);
20574
20575 if (REG_P (addr))
20576 {
20577 *base = addr;
20578 *offset = const0_rtx;
20579 return true;
20580 }
20581
20582 if (GET_CODE (addr) == PLUS
20583 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
20584 {
20585 *base = XEXP (addr, 0);
20586 *offset = XEXP (addr, 1);
20587 return true;
20588 }
20589
20590 *base = NULL_RTX;
20591 *offset = NULL_RTX;
20592
20593 return false;
20594}
20595
20596/* Types for scheduling fusion. */
20597enum sched_fusion_type
20598{
20599 SCHED_FUSION_NONE = 0,
20600 SCHED_FUSION_LD_SIGN_EXTEND,
20601 SCHED_FUSION_LD_ZERO_EXTEND,
20602 SCHED_FUSION_LD,
20603 SCHED_FUSION_ST,
20604 SCHED_FUSION_NUM
20605};
20606
20607/* If INSN is a load or store of address in the form of [base+offset],
20608 extract the two parts and set to BASE and OFFSET. Return scheduling
20609 fusion type this INSN is. */
20610
20611static enum sched_fusion_type
20612fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
20613{
20614 rtx x, dest, src;
20615 enum sched_fusion_type fusion = SCHED_FUSION_LD;
20616
20617 gcc_assert (INSN_P (insn));
20618 x = PATTERN (insn);
20619 if (GET_CODE (x) != SET)
20620 return SCHED_FUSION_NONE;
20621
20622 src = SET_SRC (x);
20623 dest = SET_DEST (x);
20624
abc52318
KT
20625 machine_mode dest_mode = GET_MODE (dest);
20626
20627 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
20628 return SCHED_FUSION_NONE;
20629
20630 if (GET_CODE (src) == SIGN_EXTEND)
20631 {
20632 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
20633 src = XEXP (src, 0);
20634 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
20635 return SCHED_FUSION_NONE;
20636 }
20637 else if (GET_CODE (src) == ZERO_EXTEND)
20638 {
20639 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
20640 src = XEXP (src, 0);
20641 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
20642 return SCHED_FUSION_NONE;
20643 }
20644
20645 if (GET_CODE (src) == MEM && REG_P (dest))
20646 extract_base_offset_in_addr (src, base, offset);
20647 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
20648 {
20649 fusion = SCHED_FUSION_ST;
20650 extract_base_offset_in_addr (dest, base, offset);
20651 }
20652 else
20653 return SCHED_FUSION_NONE;
20654
20655 if (*base == NULL_RTX || *offset == NULL_RTX)
20656 fusion = SCHED_FUSION_NONE;
20657
20658 return fusion;
20659}
20660
20661/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
20662
20663 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
20664 and PRI are only calculated for these instructions. For other instruction,
20665 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
20666 type instruction fusion can be added by returning different priorities.
20667
20668 It's important that irrelevant instructions get the largest FUSION_PRI. */
20669
20670static void
20671aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
20672 int *fusion_pri, int *pri)
20673{
20674 int tmp, off_val;
20675 rtx base, offset;
20676 enum sched_fusion_type fusion;
20677
20678 gcc_assert (INSN_P (insn));
20679
20680 tmp = max_pri - 1;
20681 fusion = fusion_load_store (insn, &base, &offset);
20682 if (fusion == SCHED_FUSION_NONE)
20683 {
20684 *pri = tmp;
20685 *fusion_pri = tmp;
20686 return;
20687 }
20688
20689 /* Set FUSION_PRI according to fusion type and base register. */
20690 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
20691
20692 /* Calculate PRI. */
20693 tmp /= 2;
20694
20695 /* INSN with smaller offset goes first. */
20696 off_val = (int)(INTVAL (offset));
20697 if (off_val >= 0)
20698 tmp -= (off_val & 0xfffff);
20699 else
20700 tmp += ((- off_val) & 0xfffff);
20701
20702 *pri = tmp;
20703 return;
20704}
20705
9bca63d4
WD
20706/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
20707 Adjust priority of sha1h instructions so they are scheduled before
20708 other SHA1 instructions. */
20709
20710static int
20711aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
20712{
20713 rtx x = PATTERN (insn);
20714
20715 if (GET_CODE (x) == SET)
20716 {
20717 x = SET_SRC (x);
20718
20719 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
20720 return priority + 10;
20721 }
20722
20723 return priority;
20724}
20725
350013bc
BC
20726/* Given OPERANDS of consecutive load/store, check if we can merge
20727 them into ldp/stp. LOAD is true if they are load instructions.
20728 MODE is the mode of memory operands. */
20729
20730bool
20731aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 20732 machine_mode mode)
350013bc
BC
20733{
20734 HOST_WIDE_INT offval_1, offval_2, msize;
20735 enum reg_class rclass_1, rclass_2;
20736 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
20737
20738 if (load)
20739 {
20740 mem_1 = operands[1];
20741 mem_2 = operands[3];
20742 reg_1 = operands[0];
20743 reg_2 = operands[2];
20744 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
20745 if (REGNO (reg_1) == REGNO (reg_2))
20746 return false;
20747 }
20748 else
20749 {
20750 mem_1 = operands[0];
20751 mem_2 = operands[2];
20752 reg_1 = operands[1];
20753 reg_2 = operands[3];
20754 }
20755
bf84ac44
AP
20756 /* The mems cannot be volatile. */
20757 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
20758 return false;
20759
54700e2e
AP
20760 /* If we have SImode and slow unaligned ldp,
20761 check the alignment to be at least 8 byte. */
20762 if (mode == SImode
20763 && (aarch64_tune_params.extra_tuning_flags
20764 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
20765 && !optimize_size
20766 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
20767 return false;
20768
350013bc
BC
20769 /* Check if the addresses are in the form of [base+offset]. */
20770 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
20771 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
20772 return false;
20773 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
20774 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
20775 return false;
20776
20777 /* Check if the bases are same. */
20778 if (!rtx_equal_p (base_1, base_2))
20779 return false;
20780
dfe1da23
JW
20781 /* The operands must be of the same size. */
20782 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
20783 GET_MODE_SIZE (GET_MODE (mem_2))));
20784
350013bc
BC
20785 offval_1 = INTVAL (offset_1);
20786 offval_2 = INTVAL (offset_2);
6a70badb
RS
20787 /* We should only be trying this for fixed-sized modes. There is no
20788 SVE LDP/STP instruction. */
20789 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
20790 /* Check if the offsets are consecutive. */
20791 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
20792 return false;
20793
20794 /* Check if the addresses are clobbered by load. */
20795 if (load)
20796 {
20797 if (reg_mentioned_p (reg_1, mem_1))
20798 return false;
20799
20800 /* In increasing order, the last load can clobber the address. */
20801 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 20802 return false;
350013bc
BC
20803 }
20804
9b56ec11
JW
20805 /* One of the memory accesses must be a mempair operand.
20806 If it is not the first one, they need to be swapped by the
20807 peephole. */
20808 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
20809 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
20810 return false;
20811
350013bc
BC
20812 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
20813 rclass_1 = FP_REGS;
20814 else
20815 rclass_1 = GENERAL_REGS;
20816
20817 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
20818 rclass_2 = FP_REGS;
20819 else
20820 rclass_2 = GENERAL_REGS;
20821
20822 /* Check if the registers are of same class. */
20823 if (rclass_1 != rclass_2)
20824 return false;
20825
20826 return true;
20827}
20828
9b56ec11
JW
20829/* Given OPERANDS of consecutive load/store that can be merged,
20830 swap them if they are not in ascending order. */
20831void
20832aarch64_swap_ldrstr_operands (rtx* operands, bool load)
20833{
20834 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
20835 HOST_WIDE_INT offval_1, offval_2;
20836
20837 if (load)
20838 {
20839 mem_1 = operands[1];
20840 mem_2 = operands[3];
20841 }
20842 else
20843 {
20844 mem_1 = operands[0];
20845 mem_2 = operands[2];
20846 }
20847
20848 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
20849 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
20850
20851 offval_1 = INTVAL (offset_1);
20852 offval_2 = INTVAL (offset_2);
20853
20854 if (offval_1 > offval_2)
20855 {
20856 /* Irrespective of whether this is a load or a store,
20857 we do the same swap. */
20858 std::swap (operands[0], operands[2]);
20859 std::swap (operands[1], operands[3]);
20860 }
20861}
20862
d0b51297
JW
20863/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
20864 comparison between the two. */
20865int
20866aarch64_host_wide_int_compare (const void *x, const void *y)
20867{
20868 return wi::cmps (* ((const HOST_WIDE_INT *) x),
20869 * ((const HOST_WIDE_INT *) y));
20870}
20871
20872/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
20873 other pointing to a REG rtx containing an offset, compare the offsets
20874 of the two pairs.
20875
20876 Return:
20877
20878 1 iff offset (X) > offset (Y)
20879 0 iff offset (X) == offset (Y)
20880 -1 iff offset (X) < offset (Y) */
20881int
20882aarch64_ldrstr_offset_compare (const void *x, const void *y)
20883{
20884 const rtx * operands_1 = (const rtx *) x;
20885 const rtx * operands_2 = (const rtx *) y;
20886 rtx mem_1, mem_2, base, offset_1, offset_2;
20887
20888 if (MEM_P (operands_1[0]))
20889 mem_1 = operands_1[0];
20890 else
20891 mem_1 = operands_1[1];
20892
20893 if (MEM_P (operands_2[0]))
20894 mem_2 = operands_2[0];
20895 else
20896 mem_2 = operands_2[1];
20897
20898 /* Extract the offsets. */
20899 extract_base_offset_in_addr (mem_1, &base, &offset_1);
20900 extract_base_offset_in_addr (mem_2, &base, &offset_2);
20901
20902 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
20903
20904 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
20905}
20906
350013bc
BC
20907/* Given OPERANDS of consecutive load/store, check if we can merge
20908 them into ldp/stp by adjusting the offset. LOAD is true if they
20909 are load instructions. MODE is the mode of memory operands.
20910
20911 Given below consecutive stores:
20912
20913 str w1, [xb, 0x100]
20914 str w1, [xb, 0x104]
20915 str w1, [xb, 0x108]
20916 str w1, [xb, 0x10c]
20917
20918 Though the offsets are out of the range supported by stp, we can
20919 still pair them after adjusting the offset, like:
20920
20921 add scratch, xb, 0x100
20922 stp w1, w1, [scratch]
20923 stp w1, w1, [scratch, 0x8]
20924
20925 The peephole patterns detecting this opportunity should guarantee
20926 the scratch register is avaliable. */
20927
20928bool
20929aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 20930 scalar_mode mode)
350013bc 20931{
34d7854d
JW
20932 const int num_insns = 4;
20933 enum reg_class rclass;
20934 HOST_WIDE_INT offvals[num_insns], msize;
20935 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
20936
20937 if (load)
20938 {
34d7854d
JW
20939 for (int i = 0; i < num_insns; i++)
20940 {
20941 reg[i] = operands[2 * i];
20942 mem[i] = operands[2 * i + 1];
20943
20944 gcc_assert (REG_P (reg[i]));
20945 }
d0b51297
JW
20946
20947 /* Do not attempt to merge the loads if the loads clobber each other. */
20948 for (int i = 0; i < 8; i += 2)
20949 for (int j = i + 2; j < 8; j += 2)
20950 if (reg_overlap_mentioned_p (operands[i], operands[j]))
20951 return false;
350013bc
BC
20952 }
20953 else
34d7854d
JW
20954 for (int i = 0; i < num_insns; i++)
20955 {
20956 mem[i] = operands[2 * i];
20957 reg[i] = operands[2 * i + 1];
20958 }
350013bc 20959
34d7854d
JW
20960 /* Skip if memory operand is by itself valid for ldp/stp. */
20961 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
20962 return false;
20963
34d7854d
JW
20964 for (int i = 0; i < num_insns; i++)
20965 {
20966 /* The mems cannot be volatile. */
20967 if (MEM_VOLATILE_P (mem[i]))
20968 return false;
20969
20970 /* Check if the addresses are in the form of [base+offset]. */
20971 extract_base_offset_in_addr (mem[i], base + i, offset + i);
20972 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
20973 return false;
20974 }
20975
363b395b
JW
20976 /* Check if the registers are of same class. */
20977 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
20978 ? FP_REGS : GENERAL_REGS;
20979
20980 for (int i = 1; i < num_insns; i++)
20981 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
20982 {
20983 if (rclass != FP_REGS)
20984 return false;
20985 }
20986 else
20987 {
20988 if (rclass != GENERAL_REGS)
20989 return false;
20990 }
20991
20992 /* Only the last register in the order in which they occur
20993 may be clobbered by the load. */
20994 if (rclass == GENERAL_REGS && load)
20995 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
20996 if (reg_mentioned_p (reg[i], mem[i]))
20997 return false;
350013bc
BC
20998
20999 /* Check if the bases are same. */
34d7854d
JW
21000 for (int i = 0; i < num_insns - 1; i++)
21001 if (!rtx_equal_p (base[i], base[i + 1]))
21002 return false;
21003
21004 for (int i = 0; i < num_insns; i++)
21005 offvals[i] = INTVAL (offset[i]);
350013bc 21006
350013bc 21007 msize = GET_MODE_SIZE (mode);
d0b51297
JW
21008
21009 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
21010 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
21011 aarch64_host_wide_int_compare);
d0b51297
JW
21012
21013 if (!(offvals[1] == offvals[0] + msize
21014 && offvals[3] == offvals[2] + msize))
350013bc
BC
21015 return false;
21016
d0b51297
JW
21017 /* Check that offsets are within range of each other. The ldp/stp
21018 instructions have 7 bit immediate offsets, so use 0x80. */
21019 if (offvals[2] - offvals[0] >= msize * 0x80)
21020 return false;
350013bc 21021
d0b51297
JW
21022 /* The offsets must be aligned with respect to each other. */
21023 if (offvals[0] % msize != offvals[2] % msize)
21024 return false;
21025
54700e2e
AP
21026 /* If we have SImode and slow unaligned ldp,
21027 check the alignment to be at least 8 byte. */
21028 if (mode == SImode
21029 && (aarch64_tune_params.extra_tuning_flags
34d7854d 21030 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 21031 && !optimize_size
34d7854d 21032 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
21033 return false;
21034
350013bc
BC
21035 return true;
21036}
21037
21038/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
21039 into LDP/STP after adjusting the offset. It depends on the fact
21040 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
21041 MODE is the mode of memory operands. CODE is the rtl operator
21042 which should be applied to all memory operands, it's SIGN_EXTEND,
21043 ZERO_EXTEND or UNKNOWN. */
21044
21045bool
21046aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 21047 scalar_mode mode, RTX_CODE code)
350013bc 21048{
d0b51297 21049 rtx base, offset_1, offset_3, t1, t2;
350013bc 21050 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
21051 rtx temp_operands[8];
21052 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
21053 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 21054
d0b51297
JW
21055 /* We make changes on a copy as we may still bail out. */
21056 for (int i = 0; i < 8; i ++)
21057 temp_operands[i] = operands[i];
9b56ec11 21058
d0b51297
JW
21059 /* Sort the operands. */
21060 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 21061
f6af9c21
RE
21062 /* Copy the memory operands so that if we have to bail for some
21063 reason the original addresses are unchanged. */
350013bc
BC
21064 if (load)
21065 {
f6af9c21
RE
21066 mem_1 = copy_rtx (temp_operands[1]);
21067 mem_2 = copy_rtx (temp_operands[3]);
21068 mem_3 = copy_rtx (temp_operands[5]);
21069 mem_4 = copy_rtx (temp_operands[7]);
350013bc
BC
21070 }
21071 else
21072 {
f6af9c21
RE
21073 mem_1 = copy_rtx (temp_operands[0]);
21074 mem_2 = copy_rtx (temp_operands[2]);
21075 mem_3 = copy_rtx (temp_operands[4]);
21076 mem_4 = copy_rtx (temp_operands[6]);
350013bc
BC
21077 gcc_assert (code == UNKNOWN);
21078 }
21079
9b56ec11 21080 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
21081 extract_base_offset_in_addr (mem_3, &base, &offset_3);
21082 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
21083 && offset_3 != NULL_RTX);
350013bc 21084
d0b51297 21085 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 21086 msize = GET_MODE_SIZE (mode);
d0b51297
JW
21087 stp_off_upper_limit = msize * (0x40 - 1);
21088 stp_off_lower_limit = - msize * 0x40;
350013bc 21089
d0b51297
JW
21090 off_val_1 = INTVAL (offset_1);
21091 off_val_3 = INTVAL (offset_3);
21092
21093 /* The base offset is optimally half way between the two STP/LDP offsets. */
21094 if (msize <= 4)
21095 base_off = (off_val_1 + off_val_3) / 2;
21096 else
21097 /* However, due to issues with negative LDP/STP offset generation for
21098 larger modes, for DF, DI and vector modes. we must not use negative
21099 addresses smaller than 9 signed unadjusted bits can store. This
21100 provides the most range in this case. */
21101 base_off = off_val_1;
21102
21103 /* Adjust the base so that it is aligned with the addresses but still
21104 optimal. */
21105 if (base_off % msize != off_val_1 % msize)
21106 /* Fix the offset, bearing in mind we want to make it bigger not
21107 smaller. */
21108 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
21109 else if (msize <= 4)
21110 /* The negative range of LDP/STP is one larger than the positive range. */
21111 base_off += msize;
21112
21113 /* Check if base offset is too big or too small. We can attempt to resolve
21114 this issue by setting it to the maximum value and seeing if the offsets
21115 still fit. */
21116 if (base_off >= 0x1000)
350013bc 21117 {
d0b51297
JW
21118 base_off = 0x1000 - 1;
21119 /* We must still make sure that the base offset is aligned with respect
21120 to the address. But it may may not be made any bigger. */
21121 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
21122 }
21123
d0b51297
JW
21124 /* Likewise for the case where the base is too small. */
21125 if (base_off <= -0x1000)
350013bc 21126 {
d0b51297
JW
21127 base_off = -0x1000 + 1;
21128 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
21129 }
21130
d0b51297
JW
21131 /* Offset of the first STP/LDP. */
21132 new_off_1 = off_val_1 - base_off;
21133
21134 /* Offset of the second STP/LDP. */
21135 new_off_3 = off_val_3 - base_off;
350013bc 21136
d0b51297
JW
21137 /* The offsets must be within the range of the LDP/STP instructions. */
21138 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
21139 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
21140 return false;
21141
d0b51297
JW
21142 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
21143 new_off_1), true);
21144 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
21145 new_off_1 + msize), true);
21146 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
21147 new_off_3), true);
21148 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
21149 new_off_3 + msize), true);
21150
21151 if (!aarch64_mem_pair_operand (mem_1, mode)
21152 || !aarch64_mem_pair_operand (mem_3, mode))
21153 return false;
350013bc
BC
21154
21155 if (code == ZERO_EXTEND)
21156 {
21157 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
21158 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
21159 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
21160 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
21161 }
21162 else if (code == SIGN_EXTEND)
21163 {
21164 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
21165 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
21166 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
21167 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
21168 }
21169
21170 if (load)
21171 {
d0b51297 21172 operands[0] = temp_operands[0];
350013bc 21173 operands[1] = mem_1;
d0b51297 21174 operands[2] = temp_operands[2];
350013bc 21175 operands[3] = mem_2;
d0b51297 21176 operands[4] = temp_operands[4];
350013bc 21177 operands[5] = mem_3;
d0b51297 21178 operands[6] = temp_operands[6];
350013bc
BC
21179 operands[7] = mem_4;
21180 }
21181 else
21182 {
21183 operands[0] = mem_1;
d0b51297 21184 operands[1] = temp_operands[1];
350013bc 21185 operands[2] = mem_2;
d0b51297 21186 operands[3] = temp_operands[3];
350013bc 21187 operands[4] = mem_3;
d0b51297 21188 operands[5] = temp_operands[5];
350013bc 21189 operands[6] = mem_4;
d0b51297 21190 operands[7] = temp_operands[7];
350013bc
BC
21191 }
21192
21193 /* Emit adjusting instruction. */
d0b51297 21194 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 21195 /* Emit ldp/stp instructions. */
f7df4a84
RS
21196 t1 = gen_rtx_SET (operands[0], operands[1]);
21197 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 21198 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
21199 t1 = gen_rtx_SET (operands[4], operands[5]);
21200 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
21201 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
21202 return true;
21203}
21204
76a34e3f
RS
21205/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
21206 it isn't worth branching around empty masked ops (including masked
21207 stores). */
21208
21209static bool
21210aarch64_empty_mask_is_expensive (unsigned)
21211{
21212 return false;
21213}
21214
1b1e81f8
JW
21215/* Return 1 if pseudo register should be created and used to hold
21216 GOT address for PIC code. */
21217
21218bool
21219aarch64_use_pseudo_pic_reg (void)
21220{
21221 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
21222}
21223
7b841a12
JW
21224/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
21225
21226static int
21227aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
21228{
21229 switch (XINT (x, 1))
21230 {
21231 case UNSPEC_GOTSMALLPIC:
21232 case UNSPEC_GOTSMALLPIC28K:
21233 case UNSPEC_GOTTINYPIC:
21234 return 0;
21235 default:
21236 break;
21237 }
21238
21239 return default_unspec_may_trap_p (x, flags);
21240}
21241
39252973
KT
21242
21243/* If X is a positive CONST_DOUBLE with a value that is a power of 2
21244 return the log2 of that value. Otherwise return -1. */
21245
21246int
21247aarch64_fpconst_pow_of_2 (rtx x)
21248{
21249 const REAL_VALUE_TYPE *r;
21250
21251 if (!CONST_DOUBLE_P (x))
21252 return -1;
21253
21254 r = CONST_DOUBLE_REAL_VALUE (x);
21255
21256 if (REAL_VALUE_NEGATIVE (*r)
21257 || REAL_VALUE_ISNAN (*r)
21258 || REAL_VALUE_ISINF (*r)
21259 || !real_isinteger (r, DFmode))
21260 return -1;
21261
21262 return exact_log2 (real_to_integer (r));
21263}
21264
188d0079
JH
21265/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
21266 power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
21267 return n. Otherwise return -1. */
21268
21269int
21270aarch64_fpconst_pow2_recip (rtx x)
21271{
21272 REAL_VALUE_TYPE r0;
21273
21274 if (!CONST_DOUBLE_P (x))
21275 return -1;
21276
21277 r0 = *CONST_DOUBLE_REAL_VALUE (x);
21278 if (exact_real_inverse (DFmode, &r0)
21279 && !REAL_VALUE_NEGATIVE (r0))
21280 {
21281 int ret = exact_log2 (real_to_integer (&r0));
21282 if (ret >= 1 && ret <= 32)
21283 return ret;
21284 }
21285 return -1;
21286}
21287
39252973
KT
21288/* If X is a vector of equal CONST_DOUBLE values and that value is
21289 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
21290
21291int
21292aarch64_vec_fpconst_pow_of_2 (rtx x)
21293{
6a70badb
RS
21294 int nelts;
21295 if (GET_CODE (x) != CONST_VECTOR
21296 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
21297 return -1;
21298
21299 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
21300 return -1;
21301
21302 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
21303 if (firstval <= 0)
21304 return -1;
21305
6a70badb 21306 for (int i = 1; i < nelts; i++)
39252973
KT
21307 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
21308 return -1;
21309
21310 return firstval;
21311}
21312
11e554b3
JG
21313/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
21314 to float.
21315
21316 __fp16 always promotes through this hook.
21317 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
21318 through the generic excess precision logic rather than here. */
21319
c2ec330c
AL
21320static tree
21321aarch64_promoted_type (const_tree t)
21322{
11e554b3
JG
21323 if (SCALAR_FLOAT_TYPE_P (t)
21324 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 21325 return float_type_node;
11e554b3 21326
c2ec330c
AL
21327 return NULL_TREE;
21328}
ee62a5a6
RS
21329
21330/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
21331
21332static bool
9acc9cbe 21333aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
21334 optimization_type opt_type)
21335{
21336 switch (op)
21337 {
21338 case rsqrt_optab:
9acc9cbe 21339 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
21340
21341 default:
21342 return true;
21343 }
21344}
21345
43cacb12
RS
21346/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
21347
21348static unsigned int
21349aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
21350 int *offset)
21351{
21352 /* Polynomial invariant 1 == (VG / 2) - 1. */
21353 gcc_assert (i == 1);
21354 *factor = 2;
21355 *offset = 1;
21356 return AARCH64_DWARF_VG;
21357}
21358
11e554b3
JG
21359/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
21360 if MODE is HFmode, and punt to the generic implementation otherwise. */
21361
21362static bool
7c5bd57a 21363aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
21364{
21365 return (mode == HFmode
21366 ? true
21367 : default_libgcc_floating_mode_supported_p (mode));
21368}
21369
2e5f8203
JG
21370/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
21371 if MODE is HFmode, and punt to the generic implementation otherwise. */
21372
21373static bool
18e2a8b8 21374aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
21375{
21376 return (mode == HFmode
21377 ? true
21378 : default_scalar_mode_supported_p (mode));
21379}
21380
11e554b3
JG
21381/* Set the value of FLT_EVAL_METHOD.
21382 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
21383
21384 0: evaluate all operations and constants, whose semantic type has at
21385 most the range and precision of type float, to the range and
21386 precision of float; evaluate all other operations and constants to
21387 the range and precision of the semantic type;
21388
21389 N, where _FloatN is a supported interchange floating type
21390 evaluate all operations and constants, whose semantic type has at
21391 most the range and precision of _FloatN type, to the range and
21392 precision of the _FloatN type; evaluate all other operations and
21393 constants to the range and precision of the semantic type;
21394
21395 If we have the ARMv8.2-A extensions then we support _Float16 in native
21396 precision, so we should set this to 16. Otherwise, we support the type,
21397 but want to evaluate expressions in float precision, so set this to
21398 0. */
21399
21400static enum flt_eval_method
21401aarch64_excess_precision (enum excess_precision_type type)
21402{
21403 switch (type)
21404 {
21405 case EXCESS_PRECISION_TYPE_FAST:
21406 case EXCESS_PRECISION_TYPE_STANDARD:
21407 /* We can calculate either in 16-bit range and precision or
21408 32-bit range and precision. Make that decision based on whether
21409 we have native support for the ARMv8.2-A 16-bit floating-point
21410 instructions or not. */
21411 return (TARGET_FP_F16INST
21412 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
21413 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
21414 case EXCESS_PRECISION_TYPE_IMPLICIT:
21415 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
21416 default:
21417 gcc_unreachable ();
21418 }
21419 return FLT_EVAL_METHOD_UNPREDICTABLE;
21420}
21421
b48d6421
KT
21422/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
21423 scheduled for speculative execution. Reject the long-running division
21424 and square-root instructions. */
21425
21426static bool
21427aarch64_sched_can_speculate_insn (rtx_insn *insn)
21428{
21429 switch (get_attr_type (insn))
21430 {
21431 case TYPE_SDIV:
21432 case TYPE_UDIV:
21433 case TYPE_FDIVS:
21434 case TYPE_FDIVD:
21435 case TYPE_FSQRTS:
21436 case TYPE_FSQRTD:
21437 case TYPE_NEON_FP_SQRT_S:
21438 case TYPE_NEON_FP_SQRT_D:
21439 case TYPE_NEON_FP_SQRT_S_Q:
21440 case TYPE_NEON_FP_SQRT_D_Q:
21441 case TYPE_NEON_FP_DIV_S:
21442 case TYPE_NEON_FP_DIV_D:
21443 case TYPE_NEON_FP_DIV_S_Q:
21444 case TYPE_NEON_FP_DIV_D_Q:
21445 return false;
21446 default:
21447 return true;
21448 }
21449}
21450
43cacb12
RS
21451/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
21452
21453static int
21454aarch64_compute_pressure_classes (reg_class *classes)
21455{
21456 int i = 0;
21457 classes[i++] = GENERAL_REGS;
21458 classes[i++] = FP_REGS;
21459 /* PR_REGS isn't a useful pressure class because many predicate pseudo
21460 registers need to go in PR_LO_REGS at some point during their
21461 lifetime. Splitting it into two halves has the effect of making
21462 all predicates count against PR_LO_REGS, so that we try whenever
21463 possible to restrict the number of live predicates to 8. This
21464 greatly reduces the amount of spilling in certain loops. */
21465 classes[i++] = PR_LO_REGS;
21466 classes[i++] = PR_HI_REGS;
21467 return i;
21468}
21469
21470/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21471
21472static bool
21473aarch64_can_change_mode_class (machine_mode from,
21474 machine_mode to, reg_class_t)
21475{
76607e7e
RS
21476 unsigned int from_flags = aarch64_classify_vector_mode (from);
21477 unsigned int to_flags = aarch64_classify_vector_mode (to);
21478
21479 bool from_sve_p = (from_flags & VEC_ANY_SVE);
21480 bool to_sve_p = (to_flags & VEC_ANY_SVE);
21481
21482 bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
21483 bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
21484
21485 /* Don't allow changes between partial SVE modes and other modes.
21486 The contents of partial SVE modes are distributed evenly across
21487 the register, whereas GCC expects them to be clustered together. */
21488 if (from_partial_sve_p != to_partial_sve_p)
21489 return false;
21490
21491 /* Similarly reject changes between partial SVE modes that have
21492 different patterns of significant and insignificant bits. */
21493 if (from_partial_sve_p
21494 && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
21495 || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
21496 return false;
21497
002092be
RS
21498 if (BYTES_BIG_ENDIAN)
21499 {
002092be
RS
21500 /* Don't allow changes between SVE data modes and non-SVE modes.
21501 See the comment at the head of aarch64-sve.md for details. */
21502 if (from_sve_p != to_sve_p)
21503 return false;
21504
21505 /* Don't allow changes in element size: lane 0 of the new vector
21506 would not then be lane 0 of the old vector. See the comment
21507 above aarch64_maybe_expand_sve_subreg_move for a more detailed
21508 description.
21509
21510 In the worst case, this forces a register to be spilled in
21511 one mode and reloaded in the other, which handles the
21512 endianness correctly. */
21513 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
21514 return false;
21515 }
43cacb12
RS
21516 return true;
21517}
21518
5cce8171
RS
21519/* Implement TARGET_EARLY_REMAT_MODES. */
21520
21521static void
21522aarch64_select_early_remat_modes (sbitmap modes)
21523{
21524 /* SVE values are not normally live across a call, so it should be
21525 worth doing early rematerialization even in VL-specific mode. */
21526 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
5c38705d
RS
21527 if (aarch64_sve_mode_p ((machine_mode) i))
21528 bitmap_set_bit (modes, i);
5cce8171
RS
21529}
21530
c0111dc4
RE
21531/* Override the default target speculation_safe_value. */
21532static rtx
21533aarch64_speculation_safe_value (machine_mode mode,
21534 rtx result, rtx val, rtx failval)
21535{
21536 /* Maybe we should warn if falling back to hard barriers. They are
21537 likely to be noticably more expensive than the alternative below. */
21538 if (!aarch64_track_speculation)
21539 return default_speculation_safe_value (mode, result, val, failval);
21540
21541 if (!REG_P (val))
21542 val = copy_to_mode_reg (mode, val);
21543
21544 if (!aarch64_reg_or_zero (failval, mode))
21545 failval = copy_to_mode_reg (mode, failval);
21546
21cebf90 21547 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
21548 return result;
21549}
21550
2d56d6ba
KT
21551/* Implement TARGET_ESTIMATED_POLY_VALUE.
21552 Look into the tuning structure for an estimate.
21553 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
21554 Advanced SIMD 128 bits. */
21555
21556static HOST_WIDE_INT
21557aarch64_estimated_poly_value (poly_int64 val)
21558{
21559 enum aarch64_sve_vector_bits_enum width_source
21560 = aarch64_tune_params.sve_width;
21561
21562 /* If we still don't have an estimate, use the default. */
21563 if (width_source == SVE_SCALABLE)
21564 return default_estimated_poly_value (val);
21565
21566 HOST_WIDE_INT over_128 = width_source - 128;
21567 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
21568}
21569
d9186814
SE
21570
21571/* Return true for types that could be supported as SIMD return or
21572 argument types. */
21573
21574static bool
21575supported_simd_type (tree t)
21576{
21577 if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
21578 {
21579 HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t));
21580 return s == 1 || s == 2 || s == 4 || s == 8;
21581 }
21582 return false;
21583}
21584
21585/* Return true for types that currently are supported as SIMD return
21586 or argument types. */
21587
21588static bool
21589currently_supported_simd_type (tree t, tree b)
21590{
21591 if (COMPLEX_FLOAT_TYPE_P (t))
21592 return false;
21593
21594 if (TYPE_SIZE (t) != TYPE_SIZE (b))
21595 return false;
21596
21597 return supported_simd_type (t);
21598}
21599
21600/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
21601
21602static int
21603aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
21604 struct cgraph_simd_clone *clonei,
21605 tree base_type, int num)
21606{
21607 tree t, ret_type, arg_type;
21608 unsigned int elt_bits, vec_bits, count;
21609
21610 if (!TARGET_SIMD)
21611 return 0;
21612
21613 if (clonei->simdlen
21614 && (clonei->simdlen < 2
21615 || clonei->simdlen > 1024
21616 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
21617 {
21618 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21619 "unsupported simdlen %d", clonei->simdlen);
21620 return 0;
21621 }
21622
21623 ret_type = TREE_TYPE (TREE_TYPE (node->decl));
21624 if (TREE_CODE (ret_type) != VOID_TYPE
21625 && !currently_supported_simd_type (ret_type, base_type))
21626 {
21627 if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
21628 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21629 "GCC does not currently support mixed size types "
21630 "for %<simd%> functions");
21631 else if (supported_simd_type (ret_type))
21632 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21633 "GCC does not currently support return type %qT "
21634 "for %<simd%> functions", ret_type);
21635 else
21636 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21637 "unsupported return type %qT for %<simd%> functions",
21638 ret_type);
21639 return 0;
21640 }
21641
21642 for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
21643 {
21644 arg_type = TREE_TYPE (t);
21645
21646 if (!currently_supported_simd_type (arg_type, base_type))
21647 {
21648 if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
21649 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21650 "GCC does not currently support mixed size types "
21651 "for %<simd%> functions");
21652 else
21653 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21654 "GCC does not currently support argument type %qT "
21655 "for %<simd%> functions", arg_type);
21656 return 0;
21657 }
21658 }
21659
21660 clonei->vecsize_mangle = 'n';
21661 clonei->mask_mode = VOIDmode;
21662 elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
21663 if (clonei->simdlen == 0)
21664 {
21665 count = 2;
21666 vec_bits = (num == 0 ? 64 : 128);
21667 clonei->simdlen = vec_bits / elt_bits;
21668 }
21669 else
21670 {
21671 count = 1;
21672 vec_bits = clonei->simdlen * elt_bits;
21673 if (vec_bits != 64 && vec_bits != 128)
21674 {
21675 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21676 "GCC does not currently support simdlen %d for type %qT",
21677 clonei->simdlen, base_type);
21678 return 0;
21679 }
21680 }
21681 clonei->vecsize_int = vec_bits;
21682 clonei->vecsize_float = vec_bits;
21683 return count;
21684}
21685
21686/* Implement TARGET_SIMD_CLONE_ADJUST. */
21687
21688static void
21689aarch64_simd_clone_adjust (struct cgraph_node *node)
21690{
21691 /* Add aarch64_vector_pcs target attribute to SIMD clones so they
21692 use the correct ABI. */
21693
21694 tree t = TREE_TYPE (node->decl);
21695 TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
21696 TYPE_ATTRIBUTES (t));
21697}
21698
21699/* Implement TARGET_SIMD_CLONE_USABLE. */
21700
21701static int
21702aarch64_simd_clone_usable (struct cgraph_node *node)
21703{
21704 switch (node->simdclone->vecsize_mangle)
21705 {
21706 case 'n':
21707 if (!TARGET_SIMD)
21708 return -1;
21709 return 0;
21710 default:
21711 gcc_unreachable ();
21712 }
21713}
21714
497f281c
SE
21715/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
21716
21717static int
21718aarch64_comp_type_attributes (const_tree type1, const_tree type2)
21719{
21720 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type1))
21721 != lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (type2)))
21722 return 0;
21723 return 1;
21724}
21725
3bac1e20
SE
21726/* Implement TARGET_GET_MULTILIB_ABI_NAME */
21727
21728static const char *
21729aarch64_get_multilib_abi_name (void)
21730{
21731 if (TARGET_BIG_END)
21732 return TARGET_ILP32 ? "aarch64_be_ilp32" : "aarch64_be";
21733 return TARGET_ILP32 ? "aarch64_ilp32" : "aarch64";
21734}
21735
e76c8e56
JJ
21736/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
21737 global variable based guard use the default else
21738 return a null tree. */
21739static tree
21740aarch64_stack_protect_guard (void)
21741{
21742 if (aarch64_stack_protector_guard == SSP_GLOBAL)
21743 return default_stack_protect_guard ();
21744
21745 return NULL_TREE;
21746}
21747
32efff9f
SD
21748/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
21749 section at the end if needed. */
21750#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
21751#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
21752#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
21753void
21754aarch64_file_end_indicate_exec_stack ()
21755{
21756 file_end_indicate_exec_stack ();
21757
21758 unsigned feature_1_and = 0;
21759 if (aarch64_bti_enabled ())
21760 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
21761
21762 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
21763 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
21764
21765 if (feature_1_and)
21766 {
21767 /* Generate .note.gnu.property section. */
21768 switch_to_section (get_section (".note.gnu.property",
21769 SECTION_NOTYPE, NULL));
21770
21771 /* PT_NOTE header: namesz, descsz, type.
21772 namesz = 4 ("GNU\0")
21773 descsz = 16 (Size of the program property array)
21774 [(12 + padding) * Number of array elements]
21775 type = 5 (NT_GNU_PROPERTY_TYPE_0). */
21776 assemble_align (POINTER_SIZE);
21777 assemble_integer (GEN_INT (4), 4, 32, 1);
21778 assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
21779 assemble_integer (GEN_INT (5), 4, 32, 1);
21780
21781 /* PT_NOTE name. */
21782 assemble_string ("GNU", 4);
21783
21784 /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
21785 type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
21786 datasz = 4
21787 data = feature_1_and. */
21788 assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
21789 assemble_integer (GEN_INT (4), 4, 32, 1);
21790 assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
21791
21792 /* Pad the size of the note to the required alignment. */
21793 assemble_align (POINTER_SIZE);
21794 }
21795}
21796#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
21797#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
21798#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
e76c8e56 21799
51b86113
DM
21800/* Target-specific selftests. */
21801
21802#if CHECKING_P
21803
21804namespace selftest {
21805
21806/* Selftest for the RTL loader.
21807 Verify that the RTL loader copes with a dump from
21808 print_rtx_function. This is essentially just a test that class
21809 function_reader can handle a real dump, but it also verifies
21810 that lookup_reg_by_dump_name correctly handles hard regs.
21811 The presence of hard reg names in the dump means that the test is
21812 target-specific, hence it is in this file. */
21813
21814static void
21815aarch64_test_loading_full_dump ()
21816{
21817 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
21818
21819 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
21820
21821 rtx_insn *insn_1 = get_insn_by_uid (1);
21822 ASSERT_EQ (NOTE, GET_CODE (insn_1));
21823
21824 rtx_insn *insn_15 = get_insn_by_uid (15);
21825 ASSERT_EQ (INSN, GET_CODE (insn_15));
21826 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
21827
21828 /* Verify crtl->return_rtx. */
21829 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
21830 ASSERT_EQ (0, REGNO (crtl->return_rtx));
21831 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
21832}
21833
21834/* Run all target-specific selftests. */
21835
21836static void
21837aarch64_run_selftests (void)
21838{
21839 aarch64_test_loading_full_dump ();
21840}
21841
21842} // namespace selftest
21843
21844#endif /* #if CHECKING_P */
21845
cd0b2d36
RR
21846#undef TARGET_STACK_PROTECT_GUARD
21847#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
21848
43e9d192
IB
21849#undef TARGET_ADDRESS_COST
21850#define TARGET_ADDRESS_COST aarch64_address_cost
21851
21852/* This hook will determines whether unnamed bitfields affect the alignment
21853 of the containing structure. The hook returns true if the structure
21854 should inherit the alignment requirements of an unnamed bitfield's
21855 type. */
21856#undef TARGET_ALIGN_ANON_BITFIELD
21857#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
21858
21859#undef TARGET_ASM_ALIGNED_DI_OP
21860#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
21861
21862#undef TARGET_ASM_ALIGNED_HI_OP
21863#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
21864
21865#undef TARGET_ASM_ALIGNED_SI_OP
21866#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
21867
21868#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
21869#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
21870 hook_bool_const_tree_hwi_hwi_const_tree_true
21871
e1c1ecb0
KT
21872#undef TARGET_ASM_FILE_START
21873#define TARGET_ASM_FILE_START aarch64_start_file
21874
43e9d192
IB
21875#undef TARGET_ASM_OUTPUT_MI_THUNK
21876#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
21877
21878#undef TARGET_ASM_SELECT_RTX_SECTION
21879#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
21880
21881#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
21882#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
21883
21884#undef TARGET_BUILD_BUILTIN_VA_LIST
21885#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
21886
21887#undef TARGET_CALLEE_COPIES
7256c719 21888#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false
43e9d192
IB
21889
21890#undef TARGET_CAN_ELIMINATE
21891#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
21892
1fd8d40c
KT
21893#undef TARGET_CAN_INLINE_P
21894#define TARGET_CAN_INLINE_P aarch64_can_inline_p
21895
43e9d192
IB
21896#undef TARGET_CANNOT_FORCE_CONST_MEM
21897#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
21898
50487d79
EM
21899#undef TARGET_CASE_VALUES_THRESHOLD
21900#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
21901
43e9d192
IB
21902#undef TARGET_CONDITIONAL_REGISTER_USAGE
21903#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
21904
21905/* Only the least significant bit is used for initialization guard
21906 variables. */
21907#undef TARGET_CXX_GUARD_MASK_BIT
21908#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
21909
21910#undef TARGET_C_MODE_FOR_SUFFIX
21911#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
21912
21913#ifdef TARGET_BIG_ENDIAN_DEFAULT
21914#undef TARGET_DEFAULT_TARGET_FLAGS
21915#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
21916#endif
21917
21918#undef TARGET_CLASS_MAX_NREGS
21919#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
21920
119103ca
JG
21921#undef TARGET_BUILTIN_DECL
21922#define TARGET_BUILTIN_DECL aarch64_builtin_decl
21923
a6fc00da
BH
21924#undef TARGET_BUILTIN_RECIPROCAL
21925#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
21926
11e554b3
JG
21927#undef TARGET_C_EXCESS_PRECISION
21928#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
21929
43e9d192
IB
21930#undef TARGET_EXPAND_BUILTIN
21931#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
21932
21933#undef TARGET_EXPAND_BUILTIN_VA_START
21934#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
21935
9697e620
JG
21936#undef TARGET_FOLD_BUILTIN
21937#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
21938
43e9d192
IB
21939#undef TARGET_FUNCTION_ARG
21940#define TARGET_FUNCTION_ARG aarch64_function_arg
21941
21942#undef TARGET_FUNCTION_ARG_ADVANCE
21943#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
21944
21945#undef TARGET_FUNCTION_ARG_BOUNDARY
21946#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
21947
76b0cbf8
RS
21948#undef TARGET_FUNCTION_ARG_PADDING
21949#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
21950
43cacb12
RS
21951#undef TARGET_GET_RAW_RESULT_MODE
21952#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
21953#undef TARGET_GET_RAW_ARG_MODE
21954#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
21955
43e9d192
IB
21956#undef TARGET_FUNCTION_OK_FOR_SIBCALL
21957#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
21958
21959#undef TARGET_FUNCTION_VALUE
21960#define TARGET_FUNCTION_VALUE aarch64_function_value
21961
21962#undef TARGET_FUNCTION_VALUE_REGNO_P
21963#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
21964
fc72cba7
AL
21965#undef TARGET_GIMPLE_FOLD_BUILTIN
21966#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 21967
43e9d192
IB
21968#undef TARGET_GIMPLIFY_VA_ARG_EXPR
21969#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
21970
21971#undef TARGET_INIT_BUILTINS
21972#define TARGET_INIT_BUILTINS aarch64_init_builtins
21973
c64f7d37
WD
21974#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
21975#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
21976 aarch64_ira_change_pseudo_allocno_class
21977
43e9d192
IB
21978#undef TARGET_LEGITIMATE_ADDRESS_P
21979#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
21980
21981#undef TARGET_LEGITIMATE_CONSTANT_P
21982#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
21983
491ec060
WD
21984#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
21985#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
21986 aarch64_legitimize_address_displacement
21987
43e9d192
IB
21988#undef TARGET_LIBGCC_CMP_RETURN_MODE
21989#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
21990
11e554b3
JG
21991#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
21992#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
21993aarch64_libgcc_floating_mode_supported_p
21994
ac2b960f
YZ
21995#undef TARGET_MANGLE_TYPE
21996#define TARGET_MANGLE_TYPE aarch64_mangle_type
21997
65ef05d0
RS
21998#undef TARGET_VERIFY_TYPE_CONTEXT
21999#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context
22000
43e9d192
IB
22001#undef TARGET_MEMORY_MOVE_COST
22002#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
22003
26e0ff94
WD
22004#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
22005#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
22006
43e9d192
IB
22007#undef TARGET_MUST_PASS_IN_STACK
22008#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
22009
22010/* This target hook should return true if accesses to volatile bitfields
22011 should use the narrowest mode possible. It should return false if these
22012 accesses should use the bitfield container type. */
22013#undef TARGET_NARROW_VOLATILE_BITFIELD
22014#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
22015
22016#undef TARGET_OPTION_OVERRIDE
22017#define TARGET_OPTION_OVERRIDE aarch64_override_options
22018
22019#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
22020#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
22021 aarch64_override_options_after_change
22022
361fb3ee
KT
22023#undef TARGET_OPTION_SAVE
22024#define TARGET_OPTION_SAVE aarch64_option_save
22025
22026#undef TARGET_OPTION_RESTORE
22027#define TARGET_OPTION_RESTORE aarch64_option_restore
22028
22029#undef TARGET_OPTION_PRINT
22030#define TARGET_OPTION_PRINT aarch64_option_print
22031
5a2c8331
KT
22032#undef TARGET_OPTION_VALID_ATTRIBUTE_P
22033#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
22034
d78006d9
KT
22035#undef TARGET_SET_CURRENT_FUNCTION
22036#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
22037
43e9d192
IB
22038#undef TARGET_PASS_BY_REFERENCE
22039#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
22040
22041#undef TARGET_PREFERRED_RELOAD_CLASS
22042#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
22043
cee66c68
WD
22044#undef TARGET_SCHED_REASSOCIATION_WIDTH
22045#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
22046
c2ec330c
AL
22047#undef TARGET_PROMOTED_TYPE
22048#define TARGET_PROMOTED_TYPE aarch64_promoted_type
22049
43e9d192
IB
22050#undef TARGET_SECONDARY_RELOAD
22051#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
22052
22053#undef TARGET_SHIFT_TRUNCATION_MASK
22054#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
22055
22056#undef TARGET_SETUP_INCOMING_VARARGS
22057#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
22058
22059#undef TARGET_STRUCT_VALUE_RTX
22060#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
22061
22062#undef TARGET_REGISTER_MOVE_COST
22063#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
22064
22065#undef TARGET_RETURN_IN_MEMORY
22066#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
22067
22068#undef TARGET_RETURN_IN_MSB
22069#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
22070
22071#undef TARGET_RTX_COSTS
7cc2145f 22072#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 22073
2e5f8203
JG
22074#undef TARGET_SCALAR_MODE_SUPPORTED_P
22075#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
22076
d126a4ae
AP
22077#undef TARGET_SCHED_ISSUE_RATE
22078#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
22079
d0bc0cb6
RS
22080#undef TARGET_SCHED_VARIABLE_ISSUE
22081#define TARGET_SCHED_VARIABLE_ISSUE aarch64_sched_variable_issue
22082
d03f7e44
MK
22083#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22084#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22085 aarch64_sched_first_cycle_multipass_dfa_lookahead
22086
2d6bc7fa
KT
22087#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
22088#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
22089 aarch64_first_cycle_multipass_dfa_lookahead_guard
22090
827ab47a
KT
22091#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
22092#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
22093 aarch64_get_separate_components
22094
22095#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
22096#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
22097 aarch64_components_for_bb
22098
22099#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
22100#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
22101 aarch64_disqualify_components
22102
22103#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
22104#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
22105 aarch64_emit_prologue_components
22106
22107#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
22108#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
22109 aarch64_emit_epilogue_components
22110
22111#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
22112#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
22113 aarch64_set_handled_components
22114
43e9d192
IB
22115#undef TARGET_TRAMPOLINE_INIT
22116#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
22117
22118#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22119#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
22120
22121#undef TARGET_VECTOR_MODE_SUPPORTED_P
22122#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
22123
7df76747
N
22124#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
22125#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
22126 aarch64_builtin_support_vector_misalignment
22127
9f4cbab8
RS
22128#undef TARGET_ARRAY_MODE
22129#define TARGET_ARRAY_MODE aarch64_array_mode
22130
43e9d192
IB
22131#undef TARGET_ARRAY_MODE_SUPPORTED_P
22132#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
22133
8990e73a
TB
22134#undef TARGET_VECTORIZE_ADD_STMT_COST
22135#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
22136
22137#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
22138#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
22139 aarch64_builtin_vectorization_cost
22140
43e9d192
IB
22141#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
22142#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
22143
42fc9a7f
JG
22144#undef TARGET_VECTORIZE_BUILTINS
22145#define TARGET_VECTORIZE_BUILTINS
22146
22147#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22148#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22149 aarch64_builtin_vectorized_function
22150
e021fb86
RS
22151#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
22152#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
22153 aarch64_autovectorize_vector_modes
3b357264 22154
aa87aced
KV
22155#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22156#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
22157 aarch64_atomic_assign_expand_fenv
22158
43e9d192
IB
22159/* Section anchor support. */
22160
22161#undef TARGET_MIN_ANCHOR_OFFSET
22162#define TARGET_MIN_ANCHOR_OFFSET -256
22163
22164/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
22165 byte offset; we can do much more for larger data types, but have no way
22166 to determine the size of the access. We assume accesses are aligned. */
22167#undef TARGET_MAX_ANCHOR_OFFSET
22168#define TARGET_MAX_ANCHOR_OFFSET 4095
22169
db0253a4
TB
22170#undef TARGET_VECTOR_ALIGNMENT
22171#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
22172
43cacb12
RS
22173#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
22174#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
22175 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
22176#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
22177#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
22178 aarch64_simd_vector_alignment_reachable
22179
88b08073
JG
22180/* vec_perm support. */
22181
f151c9e1
RS
22182#undef TARGET_VECTORIZE_VEC_PERM_CONST
22183#define TARGET_VECTORIZE_VEC_PERM_CONST \
22184 aarch64_vectorize_vec_perm_const
88b08073 22185
74166aab
RS
22186#undef TARGET_VECTORIZE_RELATED_MODE
22187#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
43cacb12
RS
22188#undef TARGET_VECTORIZE_GET_MASK_MODE
22189#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
22190#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
22191#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
22192 aarch64_empty_mask_is_expensive
6a86928d
RS
22193#undef TARGET_PREFERRED_ELSE_VALUE
22194#define TARGET_PREFERRED_ELSE_VALUE \
22195 aarch64_preferred_else_value
43cacb12 22196
c2ec330c
AL
22197#undef TARGET_INIT_LIBFUNCS
22198#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 22199
706b2314 22200#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
22201#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
22202
5cb74e90
RR
22203#undef TARGET_FLAGS_REGNUM
22204#define TARGET_FLAGS_REGNUM CC_REGNUM
22205
78607708
TV
22206#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
22207#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
22208
a3125fc2
CL
22209#undef TARGET_ASAN_SHADOW_OFFSET
22210#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
22211
0c4ec427
RE
22212#undef TARGET_LEGITIMIZE_ADDRESS
22213#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
22214
b48d6421
KT
22215#undef TARGET_SCHED_CAN_SPECULATE_INSN
22216#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
22217
594bdd53
FY
22218#undef TARGET_CAN_USE_DOLOOP_P
22219#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
22220
9bca63d4
WD
22221#undef TARGET_SCHED_ADJUST_PRIORITY
22222#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
22223
6a569cdd
KT
22224#undef TARGET_SCHED_MACRO_FUSION_P
22225#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
22226
22227#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22228#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
22229
350013bc
BC
22230#undef TARGET_SCHED_FUSION_PRIORITY
22231#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
22232
7b841a12
JW
22233#undef TARGET_UNSPEC_MAY_TRAP_P
22234#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
22235
1b1e81f8
JW
22236#undef TARGET_USE_PSEUDO_PIC_REG
22237#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
22238
cc8ca59e
JB
22239#undef TARGET_PRINT_OPERAND
22240#define TARGET_PRINT_OPERAND aarch64_print_operand
22241
22242#undef TARGET_PRINT_OPERAND_ADDRESS
22243#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
22244
ee62a5a6
RS
22245#undef TARGET_OPTAB_SUPPORTED_P
22246#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
22247
43203dea
RR
22248#undef TARGET_OMIT_STRUCT_RETURN_REG
22249#define TARGET_OMIT_STRUCT_RETURN_REG true
22250
43cacb12
RS
22251#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
22252#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
22253 aarch64_dwarf_poly_indeterminate_value
22254
f46fe37e
EB
22255/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
22256#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
22257#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
22258
c43f4279
RS
22259#undef TARGET_HARD_REGNO_NREGS
22260#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
22261#undef TARGET_HARD_REGNO_MODE_OK
22262#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
22263
99e1629f
RS
22264#undef TARGET_MODES_TIEABLE_P
22265#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
22266
80ec73f4
RS
22267#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
22268#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
22269 aarch64_hard_regno_call_part_clobbered
22270
5a5a3bc5
RS
22271#undef TARGET_INSN_CALLEE_ABI
22272#define TARGET_INSN_CALLEE_ABI aarch64_insn_callee_abi
b3650d40 22273
58e17cf8
RS
22274#undef TARGET_CONSTANT_ALIGNMENT
22275#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
22276
8c6e3b23
TC
22277#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
22278#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
22279 aarch64_stack_clash_protection_alloca_probe_range
22280
43cacb12
RS
22281#undef TARGET_COMPUTE_PRESSURE_CLASSES
22282#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
22283
22284#undef TARGET_CAN_CHANGE_MODE_CLASS
22285#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
22286
5cce8171
RS
22287#undef TARGET_SELECT_EARLY_REMAT_MODES
22288#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
22289
c0111dc4
RE
22290#undef TARGET_SPECULATION_SAFE_VALUE
22291#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
22292
2d56d6ba
KT
22293#undef TARGET_ESTIMATED_POLY_VALUE
22294#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
22295
a0d0b980
SE
22296#undef TARGET_ATTRIBUTE_TABLE
22297#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
22298
d9186814
SE
22299#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
22300#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
22301 aarch64_simd_clone_compute_vecsize_and_simdlen
22302
22303#undef TARGET_SIMD_CLONE_ADJUST
22304#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
22305
22306#undef TARGET_SIMD_CLONE_USABLE
22307#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
22308
497f281c
SE
22309#undef TARGET_COMP_TYPE_ATTRIBUTES
22310#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
22311
3bac1e20
SE
22312#undef TARGET_GET_MULTILIB_ABI_NAME
22313#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
22314
002ffd3c
RS
22315#undef TARGET_FNTYPE_ABI
22316#define TARGET_FNTYPE_ABI aarch64_fntype_abi
22317
51b86113
DM
22318#if CHECKING_P
22319#undef TARGET_RUN_TARGET_SELFTESTS
22320#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
22321#endif /* #if CHECKING_P */
22322
8fc16d72
ST
22323#undef TARGET_ASM_POST_CFI_STARTPROC
22324#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
22325
c600df9a
RS
22326#undef TARGET_STRICT_ARGUMENT_NAMING
22327#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22328
1a7a35c7
RH
22329#undef TARGET_MD_ASM_ADJUST
22330#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
22331
43e9d192
IB
22332struct gcc_target targetm = TARGET_INITIALIZER;
22333
22334#include "gt-aarch64.h"