]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
tree-optimization/97623 - limit PRE hoist insertion
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
8d9254fc 2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
d9186814 43#include "cgraph.h"
e11c4407 44#include "diagnostic.h"
43e9d192 45#include "insn-attr.h"
40e23961 46#include "alias.h"
40e23961 47#include "fold-const.h"
d8a2d370
DN
48#include "stor-layout.h"
49#include "calls.h"
50#include "varasm.h"
43e9d192 51#include "output.h"
36566b39 52#include "flags.h"
36566b39 53#include "explow.h"
43e9d192
IB
54#include "expr.h"
55#include "reload.h"
43e9d192 56#include "langhooks.h"
5a2c8331 57#include "opts.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
d9186814 74#include "intl.h"
7d8bdfa7 75#include "expmed.h"
002ffd3c 76#include "function-abi.h"
43e9d192 77
994c5d85 78/* This file should be included last. */
d58627a0
RS
79#include "target-def.h"
80
28514dda
YZ
81/* Defined for convenience. */
82#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
83
b187677b 84/* Information about a legitimate vector immediate operand. */
48063b9d
IB
85struct simd_immediate_info
86{
0b1fe8cf 87 enum insn_type { MOV, MVN, INDEX, PTRUE };
b187677b
RS
88 enum modifier_type { LSL, MSL };
89
90 simd_immediate_info () {}
91 simd_immediate_info (scalar_float_mode, rtx);
92 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
93 insn_type = MOV, modifier_type = LSL,
94 unsigned int = 0);
43cacb12 95 simd_immediate_info (scalar_mode, rtx, rtx);
0b1fe8cf 96 simd_immediate_info (scalar_int_mode, aarch64_svpattern);
b187677b
RS
97
98 /* The mode of the elements. */
99 scalar_mode elt_mode;
100
b187677b
RS
101 /* The instruction to use to move the immediate into a vector. */
102 insn_type insn;
103
1da83cce
RS
104 union
105 {
106 /* For MOV and MVN. */
107 struct
108 {
109 /* The value of each element. */
110 rtx value;
111
112 /* The kind of shift modifier to use, and the number of bits to shift.
113 This is (LSL, 0) if no shift is needed. */
114 modifier_type modifier;
115 unsigned int shift;
116 } mov;
117
118 /* For INDEX. */
119 struct
120 {
121 /* The value of the first element and the step to be added for each
122 subsequent element. */
123 rtx base, step;
124 } index;
0b1fe8cf
RS
125
126 /* For PTRUE. */
127 aarch64_svpattern pattern;
1da83cce 128 } u;
48063b9d
IB
129};
130
b187677b
RS
131/* Construct a floating-point immediate in which each element has mode
132 ELT_MODE_IN and value VALUE_IN. */
133inline simd_immediate_info
134::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
1da83cce
RS
135 : elt_mode (elt_mode_in), insn (MOV)
136{
137 u.mov.value = value_in;
138 u.mov.modifier = LSL;
139 u.mov.shift = 0;
140}
b187677b
RS
141
142/* Construct an integer immediate in which each element has mode ELT_MODE_IN
143 and value VALUE_IN. The other parameters are as for the structure
144 fields. */
145inline simd_immediate_info
146::simd_immediate_info (scalar_int_mode elt_mode_in,
147 unsigned HOST_WIDE_INT value_in,
148 insn_type insn_in, modifier_type modifier_in,
149 unsigned int shift_in)
1da83cce
RS
150 : elt_mode (elt_mode_in), insn (insn_in)
151{
152 u.mov.value = gen_int_mode (value_in, elt_mode_in);
153 u.mov.modifier = modifier_in;
154 u.mov.shift = shift_in;
155}
43cacb12
RS
156
157/* Construct an integer immediate in which each element has mode ELT_MODE_IN
1da83cce 158 and where element I is equal to BASE_IN + I * STEP_IN. */
43cacb12 159inline simd_immediate_info
1da83cce
RS
160::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
161 : elt_mode (elt_mode_in), insn (INDEX)
162{
163 u.index.base = base_in;
164 u.index.step = step_in;
165}
b187677b 166
0b1fe8cf
RS
167/* Construct a predicate that controls elements of mode ELT_MODE_IN
168 and has PTRUE pattern PATTERN_IN. */
169inline simd_immediate_info
170::simd_immediate_info (scalar_int_mode elt_mode_in,
171 aarch64_svpattern pattern_in)
172 : elt_mode (elt_mode_in), insn (PTRUE)
173{
174 u.pattern = pattern_in;
175}
176
38e62001
RS
177namespace {
178
179/* Describes types that map to Pure Scalable Types (PSTs) in the AAPCS64. */
180class pure_scalable_type_info
181{
182public:
183 /* Represents the result of analyzing a type. All values are nonzero,
184 in the possibly forlorn hope that accidental conversions to bool
185 trigger a warning. */
186 enum analysis_result
187 {
188 /* The type does not have an ABI identity; i.e. it doesn't contain
189 at least one object whose type is a Fundamental Data Type. */
190 NO_ABI_IDENTITY = 1,
191
192 /* The type is definitely a Pure Scalable Type. */
193 IS_PST,
194
195 /* The type is definitely not a Pure Scalable Type. */
196 ISNT_PST,
197
198 /* It doesn't matter for PCS purposes whether the type is a Pure
199 Scalable Type or not, since the type will be handled the same
200 way regardless.
201
202 Specifically, this means that if the type is a Pure Scalable Type,
203 there aren't enough argument registers to hold it, and so it will
204 need to be passed or returned in memory. If the type isn't a
205 Pure Scalable Type, it's too big to be passed or returned in core
206 or SIMD&FP registers, and so again will need to go in memory. */
207 DOESNT_MATTER
208 };
209
210 /* Aggregates of 17 bytes or more are normally passed and returned
211 in memory, so aggregates of that size can safely be analyzed as
212 DOESNT_MATTER. We need to be able to collect enough pieces to
213 represent a PST that is smaller than that. Since predicates are
214 2 bytes in size for -msve-vector-bits=128, that means we need to be
215 able to store at least 8 pieces.
216
217 We also need to be able to store enough pieces to represent
218 a single vector in each vector argument register and a single
219 predicate in each predicate argument register. This means that
220 we need at least 12 pieces. */
221 static const unsigned int MAX_PIECES = NUM_FP_ARG_REGS + NUM_PR_ARG_REGS;
222#if __cplusplus >= 201103L
223 static_assert (MAX_PIECES >= 8, "Need to store at least 8 predicates");
224#endif
225
226 /* Describes one piece of a PST. Each piece is one of:
227
228 - a single Scalable Vector Type (SVT)
229 - a single Scalable Predicate Type (SPT)
230 - a PST containing 2, 3 or 4 SVTs, with no padding
231
232 It either represents a single built-in type or a PST formed from
233 multiple homogeneous built-in types. */
234 struct piece
235 {
236 rtx get_rtx (unsigned int, unsigned int) const;
237
238 /* The number of vector and predicate registers that the piece
239 occupies. One of the two is always zero. */
240 unsigned int num_zr;
241 unsigned int num_pr;
242
243 /* The mode of the registers described above. */
244 machine_mode mode;
245
246 /* If this piece is formed from multiple homogeneous built-in types,
247 this is the mode of the built-in types, otherwise it is MODE. */
248 machine_mode orig_mode;
249
250 /* The offset in bytes of the piece from the start of the type. */
251 poly_uint64_pod offset;
252 };
253
254 /* Divides types analyzed as IS_PST into individual pieces. The pieces
255 are in memory order. */
256 auto_vec<piece, MAX_PIECES> pieces;
257
258 unsigned int num_zr () const;
259 unsigned int num_pr () const;
260
261 rtx get_rtx (machine_mode mode, unsigned int, unsigned int) const;
262
263 analysis_result analyze (const_tree);
264 bool analyze_registers (const_tree);
265
266private:
267 analysis_result analyze_array (const_tree);
268 analysis_result analyze_record (const_tree);
269 void add_piece (const piece &);
270};
271}
272
43e9d192
IB
273/* The current code model. */
274enum aarch64_code_model aarch64_cmodel;
275
43cacb12
RS
276/* The number of 64-bit elements in an SVE vector. */
277poly_uint16 aarch64_sve_vg;
278
43e9d192
IB
279#ifdef HAVE_AS_TLS
280#undef TARGET_HAVE_TLS
281#define TARGET_HAVE_TLS 1
282#endif
283
ef4bddc2 284static bool aarch64_composite_type_p (const_tree, machine_mode);
38e62001 285static bool aarch64_return_in_memory_1 (const_tree);
ef4bddc2 286static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 287 const_tree,
ef4bddc2 288 machine_mode *, int *,
56fe3ca3 289 bool *, bool);
43e9d192
IB
290static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
291static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 292static void aarch64_override_options_after_change (void);
ef4bddc2 293static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 294static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
295static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
296 const_tree type,
297 int misalignment,
298 bool is_packed);
43cacb12 299static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
300static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
301 aarch64_addr_query_type);
eb471ba3 302static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 303
0c6caaf8
RL
304/* Major revision number of the ARM Architecture implemented by the target. */
305unsigned aarch64_architecture_version;
306
43e9d192 307/* The processor for which instructions should be scheduled. */
02fdbd5b 308enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 309
43e9d192 310/* Mask to specify which instruction scheduling options should be used. */
28108a53 311uint64_t aarch64_tune_flags = 0;
43e9d192 312
1be34295 313/* Global flag for PC relative loads. */
9ee6540a 314bool aarch64_pcrelative_literal_loads;
1be34295 315
d6cb6d6a
WD
316/* Global flag for whether frame pointer is enabled. */
317bool aarch64_use_frame_pointer;
318
efac62a3
ST
319#define BRANCH_PROTECT_STR_MAX 255
320char *accepted_branch_protection_string = NULL;
321
322static enum aarch64_parse_opt_result
323aarch64_parse_branch_protection (const char*, char**);
324
8dec06f2
JG
325/* Support for command line parsing of boolean flags in the tuning
326 structures. */
327struct aarch64_flag_desc
328{
329 const char* name;
330 unsigned int flag;
331};
332
ed9fa8d2 333#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
334 { name, AARCH64_FUSE_##internal_name },
335static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
336{
337 { "none", AARCH64_FUSE_NOTHING },
338#include "aarch64-fusion-pairs.def"
339 { "all", AARCH64_FUSE_ALL },
340 { NULL, AARCH64_FUSE_NOTHING }
341};
8dec06f2 342
a339a01c 343#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
344 { name, AARCH64_EXTRA_TUNE_##internal_name },
345static const struct aarch64_flag_desc aarch64_tuning_flags[] =
346{
347 { "none", AARCH64_EXTRA_TUNE_NONE },
348#include "aarch64-tuning-flags.def"
349 { "all", AARCH64_EXTRA_TUNE_ALL },
350 { NULL, AARCH64_EXTRA_TUNE_NONE }
351};
8dec06f2 352
43e9d192
IB
353/* Tuning parameters. */
354
43e9d192
IB
355static const struct cpu_addrcost_table generic_addrcost_table =
356{
67747367 357 {
2fae724a 358 1, /* hi */
bd95e655
JG
359 0, /* si */
360 0, /* di */
2fae724a 361 1, /* ti */
67747367 362 },
bd95e655
JG
363 0, /* pre_modify */
364 0, /* post_modify */
365 0, /* register_offset */
783879e6
EM
366 0, /* register_sextend */
367 0, /* register_zextend */
bd95e655 368 0 /* imm_offset */
43e9d192
IB
369};
370
5ec1ae3b
EM
371static const struct cpu_addrcost_table exynosm1_addrcost_table =
372{
373 {
374 0, /* hi */
375 0, /* si */
376 0, /* di */
377 2, /* ti */
378 },
379 0, /* pre_modify */
380 0, /* post_modify */
381 1, /* register_offset */
382 1, /* register_sextend */
383 2, /* register_zextend */
384 0, /* imm_offset */
385};
386
381e27aa
PT
387static const struct cpu_addrcost_table xgene1_addrcost_table =
388{
381e27aa 389 {
bd95e655
JG
390 1, /* hi */
391 0, /* si */
392 0, /* di */
393 1, /* ti */
381e27aa 394 },
bd95e655 395 1, /* pre_modify */
52ddefd8 396 1, /* post_modify */
bd95e655 397 0, /* register_offset */
783879e6
EM
398 1, /* register_sextend */
399 1, /* register_zextend */
bd95e655 400 0, /* imm_offset */
381e27aa
PT
401};
402
d1261ac6 403static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
404{
405 {
5f407e57
AP
406 1, /* hi */
407 1, /* si */
408 1, /* di */
ad611a4c
VP
409 2, /* ti */
410 },
411 0, /* pre_modify */
412 0, /* post_modify */
413 2, /* register_offset */
414 3, /* register_sextend */
415 3, /* register_zextend */
416 0, /* imm_offset */
417};
418
fa477e45
AY
419static const struct cpu_addrcost_table thunderx3t110_addrcost_table =
420{
421 {
422 1, /* hi */
423 1, /* si */
424 1, /* di */
425 2, /* ti */
426 },
427 0, /* pre_modify */
428 0, /* post_modify */
429 2, /* register_offset */
430 3, /* register_sextend */
431 3, /* register_zextend */
432 0, /* imm_offset */
433};
434
910f72e7
SZ
435static const struct cpu_addrcost_table tsv110_addrcost_table =
436{
437 {
438 1, /* hi */
439 0, /* si */
440 0, /* di */
441 1, /* ti */
442 },
443 0, /* pre_modify */
444 0, /* post_modify */
445 0, /* register_offset */
446 1, /* register_sextend */
447 1, /* register_zextend */
448 0, /* imm_offset */
449};
450
8d39ea2f
LM
451static const struct cpu_addrcost_table qdf24xx_addrcost_table =
452{
453 {
454 1, /* hi */
455 1, /* si */
456 1, /* di */
457 2, /* ti */
458 },
459 1, /* pre_modify */
460 1, /* post_modify */
461 3, /* register_offset */
31508b39 462 3, /* register_sextend */
8d39ea2f
LM
463 3, /* register_zextend */
464 2, /* imm_offset */
465};
466
43e9d192
IB
467static const struct cpu_regmove_cost generic_regmove_cost =
468{
bd95e655 469 1, /* GP2GP */
3969c510
WD
470 /* Avoid the use of slow int<->fp moves for spilling by setting
471 their cost higher than memmov_cost. */
bd95e655
JG
472 5, /* GP2FP */
473 5, /* FP2GP */
474 2 /* FP2FP */
43e9d192
IB
475};
476
e4a9c55a
WD
477static const struct cpu_regmove_cost cortexa57_regmove_cost =
478{
bd95e655 479 1, /* GP2GP */
e4a9c55a
WD
480 /* Avoid the use of slow int<->fp moves for spilling by setting
481 their cost higher than memmov_cost. */
bd95e655
JG
482 5, /* GP2FP */
483 5, /* FP2GP */
484 2 /* FP2FP */
e4a9c55a
WD
485};
486
487static const struct cpu_regmove_cost cortexa53_regmove_cost =
488{
bd95e655 489 1, /* GP2GP */
e4a9c55a
WD
490 /* Avoid the use of slow int<->fp moves for spilling by setting
491 their cost higher than memmov_cost. */
bd95e655
JG
492 5, /* GP2FP */
493 5, /* FP2GP */
494 2 /* FP2FP */
e4a9c55a
WD
495};
496
5ec1ae3b
EM
497static const struct cpu_regmove_cost exynosm1_regmove_cost =
498{
499 1, /* GP2GP */
500 /* Avoid the use of slow int<->fp moves for spilling by setting
501 their cost higher than memmov_cost (actual, 4 and 9). */
502 9, /* GP2FP */
503 9, /* FP2GP */
504 1 /* FP2FP */
505};
506
d1bcc29f
AP
507static const struct cpu_regmove_cost thunderx_regmove_cost =
508{
bd95e655
JG
509 2, /* GP2GP */
510 2, /* GP2FP */
511 6, /* FP2GP */
512 4 /* FP2FP */
d1bcc29f
AP
513};
514
381e27aa
PT
515static const struct cpu_regmove_cost xgene1_regmove_cost =
516{
bd95e655 517 1, /* GP2GP */
381e27aa
PT
518 /* Avoid the use of slow int<->fp moves for spilling by setting
519 their cost higher than memmov_cost. */
bd95e655
JG
520 8, /* GP2FP */
521 8, /* FP2GP */
522 2 /* FP2FP */
381e27aa
PT
523};
524
ee446d9f
JW
525static const struct cpu_regmove_cost qdf24xx_regmove_cost =
526{
527 2, /* GP2GP */
528 /* Avoid the use of int<->fp moves for spilling. */
529 6, /* GP2FP */
530 6, /* FP2GP */
531 4 /* FP2FP */
532};
533
d1261ac6 534static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
535{
536 1, /* GP2GP */
537 /* Avoid the use of int<->fp moves for spilling. */
2aeccecb
AY
538 5, /* GP2FP */
539 6, /* FP2GP */
540 3, /* FP2FP */
ad611a4c
VP
541};
542
fa477e45
AY
543static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
544{
545 1, /* GP2GP */
546 /* Avoid the use of int<->fp moves for spilling. */
547 4, /* GP2FP */
548 5, /* FP2GP */
549 4 /* FP2FP */
550};
551
910f72e7
SZ
552static const struct cpu_regmove_cost tsv110_regmove_cost =
553{
554 1, /* GP2GP */
555 /* Avoid the use of slow int<->fp moves for spilling by setting
556 their cost higher than memmov_cost. */
557 2, /* GP2FP */
558 3, /* FP2GP */
559 2 /* FP2FP */
560};
561
8990e73a 562/* Generic costs for vector insn classes. */
8990e73a
TB
563static const struct cpu_vector_cost generic_vector_cost =
564{
cd8ae5ed
AP
565 1, /* scalar_int_stmt_cost */
566 1, /* scalar_fp_stmt_cost */
bd95e655
JG
567 1, /* scalar_load_cost */
568 1, /* scalar_store_cost */
cd8ae5ed
AP
569 1, /* vec_int_stmt_cost */
570 1, /* vec_fp_stmt_cost */
c428f91c 571 2, /* vec_permute_cost */
4bf29d15 572 2, /* vec_to_scalar_cost */
bd95e655
JG
573 1, /* scalar_to_vec_cost */
574 1, /* vec_align_load_cost */
575 1, /* vec_unalign_load_cost */
576 1, /* vec_unalign_store_cost */
577 1, /* vec_store_cost */
578 3, /* cond_taken_branch_cost */
579 1 /* cond_not_taken_branch_cost */
8990e73a
TB
580};
581
e75bc10e
LM
582/* QDF24XX costs for vector insn classes. */
583static const struct cpu_vector_cost qdf24xx_vector_cost =
584{
585 1, /* scalar_int_stmt_cost */
586 1, /* scalar_fp_stmt_cost */
587 1, /* scalar_load_cost */
588 1, /* scalar_store_cost */
589 1, /* vec_int_stmt_cost */
590 3, /* vec_fp_stmt_cost */
591 2, /* vec_permute_cost */
592 1, /* vec_to_scalar_cost */
593 1, /* scalar_to_vec_cost */
594 1, /* vec_align_load_cost */
595 1, /* vec_unalign_load_cost */
596 1, /* vec_unalign_store_cost */
597 1, /* vec_store_cost */
598 3, /* cond_taken_branch_cost */
599 1 /* cond_not_taken_branch_cost */
600};
601
c3f20327
AP
602/* ThunderX costs for vector insn classes. */
603static const struct cpu_vector_cost thunderx_vector_cost =
604{
cd8ae5ed
AP
605 1, /* scalar_int_stmt_cost */
606 1, /* scalar_fp_stmt_cost */
c3f20327
AP
607 3, /* scalar_load_cost */
608 1, /* scalar_store_cost */
cd8ae5ed 609 4, /* vec_int_stmt_cost */
b29d7591 610 1, /* vec_fp_stmt_cost */
c3f20327
AP
611 4, /* vec_permute_cost */
612 2, /* vec_to_scalar_cost */
613 2, /* scalar_to_vec_cost */
614 3, /* vec_align_load_cost */
7e87a3d9
AP
615 5, /* vec_unalign_load_cost */
616 5, /* vec_unalign_store_cost */
c3f20327
AP
617 1, /* vec_store_cost */
618 3, /* cond_taken_branch_cost */
619 3 /* cond_not_taken_branch_cost */
620};
621
910f72e7
SZ
622static const struct cpu_vector_cost tsv110_vector_cost =
623{
624 1, /* scalar_int_stmt_cost */
625 1, /* scalar_fp_stmt_cost */
626 5, /* scalar_load_cost */
627 1, /* scalar_store_cost */
628 2, /* vec_int_stmt_cost */
629 2, /* vec_fp_stmt_cost */
630 2, /* vec_permute_cost */
631 3, /* vec_to_scalar_cost */
632 2, /* scalar_to_vec_cost */
633 5, /* vec_align_load_cost */
634 5, /* vec_unalign_load_cost */
635 1, /* vec_unalign_store_cost */
636 1, /* vec_store_cost */
637 1, /* cond_taken_branch_cost */
638 1 /* cond_not_taken_branch_cost */
639};
640
60bff090 641/* Generic costs for vector insn classes. */
60bff090
JG
642static const struct cpu_vector_cost cortexa57_vector_cost =
643{
cd8ae5ed
AP
644 1, /* scalar_int_stmt_cost */
645 1, /* scalar_fp_stmt_cost */
bd95e655
JG
646 4, /* scalar_load_cost */
647 1, /* scalar_store_cost */
cd8ae5ed
AP
648 2, /* vec_int_stmt_cost */
649 2, /* vec_fp_stmt_cost */
c428f91c 650 3, /* vec_permute_cost */
bd95e655
JG
651 8, /* vec_to_scalar_cost */
652 8, /* scalar_to_vec_cost */
db4a1c18
WD
653 4, /* vec_align_load_cost */
654 4, /* vec_unalign_load_cost */
bd95e655
JG
655 1, /* vec_unalign_store_cost */
656 1, /* vec_store_cost */
657 1, /* cond_taken_branch_cost */
658 1 /* cond_not_taken_branch_cost */
60bff090
JG
659};
660
5ec1ae3b
EM
661static const struct cpu_vector_cost exynosm1_vector_cost =
662{
cd8ae5ed
AP
663 1, /* scalar_int_stmt_cost */
664 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
665 5, /* scalar_load_cost */
666 1, /* scalar_store_cost */
cd8ae5ed
AP
667 3, /* vec_int_stmt_cost */
668 3, /* vec_fp_stmt_cost */
c428f91c 669 3, /* vec_permute_cost */
5ec1ae3b
EM
670 3, /* vec_to_scalar_cost */
671 3, /* scalar_to_vec_cost */
672 5, /* vec_align_load_cost */
673 5, /* vec_unalign_load_cost */
674 1, /* vec_unalign_store_cost */
675 1, /* vec_store_cost */
676 1, /* cond_taken_branch_cost */
677 1 /* cond_not_taken_branch_cost */
678};
679
381e27aa 680/* Generic costs for vector insn classes. */
381e27aa
PT
681static const struct cpu_vector_cost xgene1_vector_cost =
682{
cd8ae5ed
AP
683 1, /* scalar_int_stmt_cost */
684 1, /* scalar_fp_stmt_cost */
bd95e655
JG
685 5, /* scalar_load_cost */
686 1, /* scalar_store_cost */
cd8ae5ed
AP
687 2, /* vec_int_stmt_cost */
688 2, /* vec_fp_stmt_cost */
c428f91c 689 2, /* vec_permute_cost */
bd95e655
JG
690 4, /* vec_to_scalar_cost */
691 4, /* scalar_to_vec_cost */
692 10, /* vec_align_load_cost */
693 10, /* vec_unalign_load_cost */
694 2, /* vec_unalign_store_cost */
695 2, /* vec_store_cost */
696 2, /* cond_taken_branch_cost */
697 1 /* cond_not_taken_branch_cost */
381e27aa
PT
698};
699
ad611a4c 700/* Costs for vector insn classes for Vulcan. */
d1261ac6 701static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 702{
cd8ae5ed
AP
703 1, /* scalar_int_stmt_cost */
704 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
705 4, /* scalar_load_cost */
706 1, /* scalar_store_cost */
2aeccecb
AY
707 4, /* vec_int_stmt_cost */
708 5, /* vec_fp_stmt_cost */
5aef51c2 709 10, /* vec_permute_cost */
ad611a4c
VP
710 6, /* vec_to_scalar_cost */
711 5, /* scalar_to_vec_cost */
2aeccecb
AY
712 4, /* vec_align_load_cost */
713 4, /* vec_unalign_load_cost */
714 1, /* vec_unalign_store_cost */
715 1, /* vec_store_cost */
ad611a4c
VP
716 2, /* cond_taken_branch_cost */
717 1 /* cond_not_taken_branch_cost */
718};
719
fa477e45
AY
720static const struct cpu_vector_cost thunderx3t110_vector_cost =
721{
722 1, /* scalar_int_stmt_cost */
723 5, /* scalar_fp_stmt_cost */
724 4, /* scalar_load_cost */
725 1, /* scalar_store_cost */
726 5, /* vec_int_stmt_cost */
727 5, /* vec_fp_stmt_cost */
728 10, /* vec_permute_cost */
729 5, /* vec_to_scalar_cost */
730 5, /* scalar_to_vec_cost */
731 4, /* vec_align_load_cost */
732 4, /* vec_unalign_load_cost */
733 4, /* vec_unalign_store_cost */
734 4, /* vec_store_cost */
735 2, /* cond_taken_branch_cost */
736 1 /* cond_not_taken_branch_cost */
737};
738
739
b9066f5a
MW
740/* Generic costs for branch instructions. */
741static const struct cpu_branch_cost generic_branch_cost =
742{
9094d4a4
WD
743 1, /* Predictable. */
744 3 /* Unpredictable. */
b9066f5a
MW
745};
746
9acc9cbe
EM
747/* Generic approximation modes. */
748static const cpu_approx_modes generic_approx_modes =
749{
79a2bc2d 750 AARCH64_APPROX_NONE, /* division */
98daafa0 751 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
752 AARCH64_APPROX_NONE /* recip_sqrt */
753};
754
755/* Approximation modes for Exynos M1. */
756static const cpu_approx_modes exynosm1_approx_modes =
757{
79a2bc2d 758 AARCH64_APPROX_NONE, /* division */
98daafa0 759 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
760 AARCH64_APPROX_ALL /* recip_sqrt */
761};
762
763/* Approximation modes for X-Gene 1. */
764static const cpu_approx_modes xgene1_approx_modes =
765{
79a2bc2d 766 AARCH64_APPROX_NONE, /* division */
98daafa0 767 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
768 AARCH64_APPROX_ALL /* recip_sqrt */
769};
770
9d2c6e2e
MK
771/* Generic prefetch settings (which disable prefetch). */
772static const cpu_prefetch_tune generic_prefetch_tune =
773{
774 0, /* num_slots */
775 -1, /* l1_cache_size */
776 -1, /* l1_cache_line_size */
16b2cafd 777 -1, /* l2_cache_size */
d2ff35c0 778 true, /* prefetch_dynamic_strides */
59100dfc 779 -1, /* minimum_stride */
16b2cafd 780 -1 /* default_opt_level */
9d2c6e2e
MK
781};
782
783static const cpu_prefetch_tune exynosm1_prefetch_tune =
784{
785 0, /* num_slots */
786 -1, /* l1_cache_size */
787 64, /* l1_cache_line_size */
16b2cafd 788 -1, /* l2_cache_size */
d2ff35c0 789 true, /* prefetch_dynamic_strides */
59100dfc 790 -1, /* minimum_stride */
16b2cafd 791 -1 /* default_opt_level */
9d2c6e2e
MK
792};
793
794static const cpu_prefetch_tune qdf24xx_prefetch_tune =
795{
70c51b58
MK
796 4, /* num_slots */
797 32, /* l1_cache_size */
9d2c6e2e 798 64, /* l1_cache_line_size */
725e2110 799 512, /* l2_cache_size */
d2ff35c0 800 false, /* prefetch_dynamic_strides */
59100dfc
LM
801 2048, /* minimum_stride */
802 3 /* default_opt_level */
9d2c6e2e
MK
803};
804
f1e247d0
AP
805static const cpu_prefetch_tune thunderxt88_prefetch_tune =
806{
807 8, /* num_slots */
808 32, /* l1_cache_size */
809 128, /* l1_cache_line_size */
810 16*1024, /* l2_cache_size */
d2ff35c0 811 true, /* prefetch_dynamic_strides */
59100dfc 812 -1, /* minimum_stride */
f1e247d0
AP
813 3 /* default_opt_level */
814};
815
816static const cpu_prefetch_tune thunderx_prefetch_tune =
817{
818 8, /* num_slots */
819 32, /* l1_cache_size */
820 128, /* l1_cache_line_size */
821 -1, /* l2_cache_size */
d2ff35c0 822 true, /* prefetch_dynamic_strides */
59100dfc 823 -1, /* minimum_stride */
f1e247d0
AP
824 -1 /* default_opt_level */
825};
826
9d2c6e2e
MK
827static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
828{
f1e247d0
AP
829 8, /* num_slots */
830 32, /* l1_cache_size */
9d2c6e2e 831 64, /* l1_cache_line_size */
f1e247d0 832 256, /* l2_cache_size */
d2ff35c0 833 true, /* prefetch_dynamic_strides */
59100dfc 834 -1, /* minimum_stride */
16b2cafd 835 -1 /* default_opt_level */
9d2c6e2e
MK
836};
837
fa477e45
AY
838static const cpu_prefetch_tune thunderx3t110_prefetch_tune =
839{
840 8, /* num_slots */
841 32, /* l1_cache_size */
842 64, /* l1_cache_line_size */
843 256, /* l2_cache_size */
844 true, /* prefetch_dynamic_strides */
845 -1, /* minimum_stride */
846 -1 /* default_opt_level */
847};
848
910f72e7
SZ
849static const cpu_prefetch_tune tsv110_prefetch_tune =
850{
851 0, /* num_slots */
852 64, /* l1_cache_size */
853 64, /* l1_cache_line_size */
854 512, /* l2_cache_size */
855 true, /* prefetch_dynamic_strides */
856 -1, /* minimum_stride */
857 -1 /* default_opt_level */
858};
859
d5e9851e
CM
860static const cpu_prefetch_tune xgene1_prefetch_tune =
861{
862 8, /* num_slots */
863 32, /* l1_cache_size */
864 64, /* l1_cache_line_size */
865 256, /* l2_cache_size */
866 true, /* prefetch_dynamic_strides */
867 -1, /* minimum_stride */
868 -1 /* default_opt_level */
869};
870
02f21aea
QJ
871static const cpu_prefetch_tune a64fx_prefetch_tune =
872{
873 8, /* num_slots */
874 64, /* l1_cache_size */
875 256, /* l1_cache_line_size */
876 32768, /* l2_cache_size */
877 true, /* prefetch_dynamic_strides */
878 -1, /* minimum_stride */
879 -1 /* default_opt_level */
880};
881
43e9d192
IB
882static const struct tune_params generic_tunings =
883{
4e2cd668 884 &cortexa57_extra_costs,
43e9d192
IB
885 &generic_addrcost_table,
886 &generic_regmove_cost,
8990e73a 887 &generic_vector_cost,
b9066f5a 888 &generic_branch_cost,
9acc9cbe 889 &generic_approx_modes,
2d56d6ba 890 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
891 4, /* memmov_cost */
892 2, /* issue_rate */
6ed8c923 893 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
4e55aefa 894 "16:12", /* function_align. */
c518c102
ML
895 "4", /* jump_align. */
896 "8", /* loop_align. */
cee66c68
WD
897 2, /* int_reassoc_width. */
898 4, /* fp_reassoc_width. */
50093a33
WD
899 1, /* vec_reassoc_width. */
900 2, /* min_div_recip_mul_sf. */
dfba575f 901 2, /* min_div_recip_mul_df. */
50487d79 902 0, /* max_case_values. */
3b4c0f7e 903 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
904 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
905 &generic_prefetch_tune
43e9d192
IB
906};
907
1c72a3ca
JG
908static const struct tune_params cortexa35_tunings =
909{
910 &cortexa53_extra_costs,
911 &generic_addrcost_table,
912 &cortexa53_regmove_cost,
913 &generic_vector_cost,
aca97ef8 914 &generic_branch_cost,
9acc9cbe 915 &generic_approx_modes,
2d56d6ba 916 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
917 4, /* memmov_cost */
918 1, /* issue_rate */
0bc24338 919 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 920 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
921 "16", /* function_align. */
922 "4", /* jump_align. */
923 "8", /* loop_align. */
1c72a3ca
JG
924 2, /* int_reassoc_width. */
925 4, /* fp_reassoc_width. */
926 1, /* vec_reassoc_width. */
927 2, /* min_div_recip_mul_sf. */
928 2, /* min_div_recip_mul_df. */
929 0, /* max_case_values. */
1c72a3ca 930 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
931 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
932 &generic_prefetch_tune
1c72a3ca
JG
933};
934
984239ad
KT
935static const struct tune_params cortexa53_tunings =
936{
937 &cortexa53_extra_costs,
938 &generic_addrcost_table,
e4a9c55a 939 &cortexa53_regmove_cost,
984239ad 940 &generic_vector_cost,
aca97ef8 941 &generic_branch_cost,
9acc9cbe 942 &generic_approx_modes,
2d56d6ba 943 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
944 4, /* memmov_cost */
945 2, /* issue_rate */
00a8574a 946 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 947 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
948 "16", /* function_align. */
949 "4", /* jump_align. */
950 "8", /* loop_align. */
cee66c68
WD
951 2, /* int_reassoc_width. */
952 4, /* fp_reassoc_width. */
50093a33
WD
953 1, /* vec_reassoc_width. */
954 2, /* min_div_recip_mul_sf. */
dfba575f 955 2, /* min_div_recip_mul_df. */
50487d79 956 0, /* max_case_values. */
2d6bc7fa 957 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
958 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
959 &generic_prefetch_tune
984239ad
KT
960};
961
4fd92af6
KT
962static const struct tune_params cortexa57_tunings =
963{
964 &cortexa57_extra_costs,
a39d4348 965 &generic_addrcost_table,
e4a9c55a 966 &cortexa57_regmove_cost,
60bff090 967 &cortexa57_vector_cost,
aca97ef8 968 &generic_branch_cost,
9acc9cbe 969 &generic_approx_modes,
2d56d6ba 970 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
971 4, /* memmov_cost */
972 3, /* issue_rate */
00a8574a 973 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 974 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
975 "16", /* function_align. */
976 "4", /* jump_align. */
977 "8", /* loop_align. */
cee66c68
WD
978 2, /* int_reassoc_width. */
979 4, /* fp_reassoc_width. */
50093a33
WD
980 1, /* vec_reassoc_width. */
981 2, /* min_div_recip_mul_sf. */
dfba575f 982 2, /* min_div_recip_mul_df. */
50487d79 983 0, /* max_case_values. */
2d6bc7fa 984 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
985 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
986 &generic_prefetch_tune
dfba575f
JG
987};
988
989static const struct tune_params cortexa72_tunings =
990{
991 &cortexa57_extra_costs,
a39d4348 992 &generic_addrcost_table,
dfba575f
JG
993 &cortexa57_regmove_cost,
994 &cortexa57_vector_cost,
aca97ef8 995 &generic_branch_cost,
9acc9cbe 996 &generic_approx_modes,
2d56d6ba 997 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
998 4, /* memmov_cost */
999 3, /* issue_rate */
00a8574a 1000 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 1001 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
1002 "16", /* function_align. */
1003 "4", /* jump_align. */
1004 "8", /* loop_align. */
dfba575f
JG
1005 2, /* int_reassoc_width. */
1006 4, /* fp_reassoc_width. */
1007 1, /* vec_reassoc_width. */
1008 2, /* min_div_recip_mul_sf. */
1009 2, /* min_div_recip_mul_df. */
50487d79 1010 0, /* max_case_values. */
0bc24338 1011 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1012 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1013 &generic_prefetch_tune
4fd92af6
KT
1014};
1015
4fb570c4
KT
1016static const struct tune_params cortexa73_tunings =
1017{
1018 &cortexa57_extra_costs,
a39d4348 1019 &generic_addrcost_table,
4fb570c4
KT
1020 &cortexa57_regmove_cost,
1021 &cortexa57_vector_cost,
aca97ef8 1022 &generic_branch_cost,
4fb570c4 1023 &generic_approx_modes,
2d56d6ba 1024 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
1025 4, /* memmov_cost. */
1026 2, /* issue_rate. */
1027 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1028 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
1029 "16", /* function_align. */
1030 "4", /* jump_align. */
1031 "8", /* loop_align. */
4fb570c4
KT
1032 2, /* int_reassoc_width. */
1033 4, /* fp_reassoc_width. */
1034 1, /* vec_reassoc_width. */
1035 2, /* min_div_recip_mul_sf. */
1036 2, /* min_div_recip_mul_df. */
1037 0, /* max_case_values. */
4fb570c4 1038 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1039 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1040 &generic_prefetch_tune
4fb570c4
KT
1041};
1042
9d2c6e2e
MK
1043
1044
5ec1ae3b
EM
1045static const struct tune_params exynosm1_tunings =
1046{
1047 &exynosm1_extra_costs,
1048 &exynosm1_addrcost_table,
1049 &exynosm1_regmove_cost,
1050 &exynosm1_vector_cost,
1051 &generic_branch_cost,
9acc9cbe 1052 &exynosm1_approx_modes,
2d56d6ba 1053 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
1054 4, /* memmov_cost */
1055 3, /* issue_rate */
25cc2199 1056 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
1057 "4", /* function_align. */
1058 "4", /* jump_align. */
1059 "4", /* loop_align. */
5ec1ae3b
EM
1060 2, /* int_reassoc_width. */
1061 4, /* fp_reassoc_width. */
1062 1, /* vec_reassoc_width. */
1063 2, /* min_div_recip_mul_sf. */
1064 2, /* min_div_recip_mul_df. */
1065 48, /* max_case_values. */
220379df 1066 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1067 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1068 &exynosm1_prefetch_tune
5ec1ae3b
EM
1069};
1070
f1e247d0
AP
1071static const struct tune_params thunderxt88_tunings =
1072{
1073 &thunderx_extra_costs,
1074 &generic_addrcost_table,
1075 &thunderx_regmove_cost,
1076 &thunderx_vector_cost,
1077 &generic_branch_cost,
1078 &generic_approx_modes,
2d56d6ba 1079 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
1080 6, /* memmov_cost */
1081 2, /* issue_rate */
a4f3fa71 1082 AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
c518c102
ML
1083 "8", /* function_align. */
1084 "8", /* jump_align. */
1085 "8", /* loop_align. */
f1e247d0
AP
1086 2, /* int_reassoc_width. */
1087 4, /* fp_reassoc_width. */
1088 1, /* vec_reassoc_width. */
1089 2, /* min_div_recip_mul_sf. */
1090 2, /* min_div_recip_mul_df. */
1091 0, /* max_case_values. */
1092 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
1093 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
1094 &thunderxt88_prefetch_tune
1095};
1096
d1bcc29f
AP
1097static const struct tune_params thunderx_tunings =
1098{
1099 &thunderx_extra_costs,
1100 &generic_addrcost_table,
1101 &thunderx_regmove_cost,
c3f20327 1102 &thunderx_vector_cost,
b9066f5a 1103 &generic_branch_cost,
9acc9cbe 1104 &generic_approx_modes,
2d56d6ba 1105 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
1106 6, /* memmov_cost */
1107 2, /* issue_rate */
a4f3fa71 1108 AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
c518c102
ML
1109 "8", /* function_align. */
1110 "8", /* jump_align. */
1111 "8", /* loop_align. */
cee66c68
WD
1112 2, /* int_reassoc_width. */
1113 4, /* fp_reassoc_width. */
50093a33
WD
1114 1, /* vec_reassoc_width. */
1115 2, /* min_div_recip_mul_sf. */
dfba575f 1116 2, /* min_div_recip_mul_df. */
50487d79 1117 0, /* max_case_values. */
2d6bc7fa 1118 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
1119 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
1120 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 1121 &thunderx_prefetch_tune
d1bcc29f
AP
1122};
1123
910f72e7
SZ
1124static const struct tune_params tsv110_tunings =
1125{
1126 &tsv110_extra_costs,
1127 &tsv110_addrcost_table,
1128 &tsv110_regmove_cost,
1129 &tsv110_vector_cost,
1130 &generic_branch_cost,
1131 &generic_approx_modes,
2d56d6ba 1132 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
1133 4, /* memmov_cost */
1134 4, /* issue_rate */
a4f3fa71
WD
1135 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
1136 | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
910f72e7
SZ
1137 "16", /* function_align. */
1138 "4", /* jump_align. */
1139 "8", /* loop_align. */
1140 2, /* int_reassoc_width. */
1141 4, /* fp_reassoc_width. */
1142 1, /* vec_reassoc_width. */
1143 2, /* min_div_recip_mul_sf. */
1144 2, /* min_div_recip_mul_df. */
1145 0, /* max_case_values. */
1146 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1147 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1148 &tsv110_prefetch_tune
1149};
1150
381e27aa 1151static const struct tune_params xgene1_tunings =
e02669db
CM
1152{
1153 &xgene1_extra_costs,
1154 &xgene1_addrcost_table,
1155 &xgene1_regmove_cost,
1156 &xgene1_vector_cost,
1157 &generic_branch_cost,
1158 &xgene1_approx_modes,
2d56d6ba 1159 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
1160 6, /* memmov_cost */
1161 4, /* issue_rate */
1162 AARCH64_FUSE_NOTHING, /* fusible_ops */
1163 "16", /* function_align. */
1164 "16", /* jump_align. */
1165 "16", /* loop_align. */
1166 2, /* int_reassoc_width. */
1167 4, /* fp_reassoc_width. */
1168 1, /* vec_reassoc_width. */
1169 2, /* min_div_recip_mul_sf. */
1170 2, /* min_div_recip_mul_df. */
1171 17, /* max_case_values. */
1172 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
1173 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
1174 &xgene1_prefetch_tune
1175};
1176
1177static const struct tune_params emag_tunings =
381e27aa
PT
1178{
1179 &xgene1_extra_costs,
1180 &xgene1_addrcost_table,
1181 &xgene1_regmove_cost,
1182 &xgene1_vector_cost,
b9066f5a 1183 &generic_branch_cost,
9acc9cbe 1184 &xgene1_approx_modes,
2d56d6ba 1185 SVE_NOT_IMPLEMENTED,
bd95e655
JG
1186 6, /* memmov_cost */
1187 4, /* issue_rate */
e9a3a175 1188 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 1189 "16", /* function_align. */
cf28c77e 1190 "16", /* jump_align. */
c518c102 1191 "16", /* loop_align. */
381e27aa
PT
1192 2, /* int_reassoc_width. */
1193 4, /* fp_reassoc_width. */
50093a33
WD
1194 1, /* vec_reassoc_width. */
1195 2, /* min_div_recip_mul_sf. */
dfba575f 1196 2, /* min_div_recip_mul_df. */
cf28c77e 1197 17, /* max_case_values. */
2d6bc7fa 1198 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 1199 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 1200 &xgene1_prefetch_tune
381e27aa
PT
1201};
1202
ee446d9f
JW
1203static const struct tune_params qdf24xx_tunings =
1204{
1205 &qdf24xx_extra_costs,
8d39ea2f 1206 &qdf24xx_addrcost_table,
ee446d9f 1207 &qdf24xx_regmove_cost,
e75bc10e 1208 &qdf24xx_vector_cost,
ee446d9f
JW
1209 &generic_branch_cost,
1210 &generic_approx_modes,
2d56d6ba 1211 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1212 4, /* memmov_cost */
1213 4, /* issue_rate */
1214 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1215 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1216 "16", /* function_align. */
1217 "8", /* jump_align. */
1218 "16", /* loop_align. */
ee446d9f
JW
1219 2, /* int_reassoc_width. */
1220 4, /* fp_reassoc_width. */
1221 1, /* vec_reassoc_width. */
1222 2, /* min_div_recip_mul_sf. */
1223 2, /* min_div_recip_mul_df. */
1224 0, /* max_case_values. */
4f2a94e6 1225 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1226 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1227 &qdf24xx_prefetch_tune
ee446d9f
JW
1228};
1229
52ee8191
SP
1230/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1231 for now. */
1232static const struct tune_params saphira_tunings =
1233{
1234 &generic_extra_costs,
1235 &generic_addrcost_table,
1236 &generic_regmove_cost,
1237 &generic_vector_cost,
1238 &generic_branch_cost,
1239 &generic_approx_modes,
2d56d6ba 1240 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1241 4, /* memmov_cost */
1242 4, /* issue_rate */
1243 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1244 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1245 "16", /* function_align. */
1246 "8", /* jump_align. */
1247 "16", /* loop_align. */
52ee8191
SP
1248 2, /* int_reassoc_width. */
1249 4, /* fp_reassoc_width. */
1250 1, /* vec_reassoc_width. */
1251 2, /* min_div_recip_mul_sf. */
1252 2, /* min_div_recip_mul_df. */
1253 0, /* max_case_values. */
1254 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1255 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1256 &generic_prefetch_tune
1257};
1258
d1261ac6 1259static const struct tune_params thunderx2t99_tunings =
ad611a4c 1260{
d1261ac6
AP
1261 &thunderx2t99_extra_costs,
1262 &thunderx2t99_addrcost_table,
1263 &thunderx2t99_regmove_cost,
1264 &thunderx2t99_vector_cost,
aca97ef8 1265 &generic_branch_cost,
ad611a4c 1266 &generic_approx_modes,
2d56d6ba 1267 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1268 4, /* memmov_cost. */
1269 4, /* issue_rate. */
a4f3fa71
WD
1270 (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
1271 | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
c518c102
ML
1272 "16", /* function_align. */
1273 "8", /* jump_align. */
1274 "16", /* loop_align. */
ad611a4c
VP
1275 3, /* int_reassoc_width. */
1276 2, /* fp_reassoc_width. */
1277 2, /* vec_reassoc_width. */
1278 2, /* min_div_recip_mul_sf. */
1279 2, /* min_div_recip_mul_df. */
1280 0, /* max_case_values. */
f1e247d0 1281 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1282 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1283 &thunderx2t99_prefetch_tune
ad611a4c
VP
1284};
1285
fa477e45
AY
1286static const struct tune_params thunderx3t110_tunings =
1287{
1288 &thunderx3t110_extra_costs,
1289 &thunderx3t110_addrcost_table,
1290 &thunderx3t110_regmove_cost,
1291 &thunderx3t110_vector_cost,
1292 &generic_branch_cost,
1293 &generic_approx_modes,
1294 SVE_NOT_IMPLEMENTED, /* sve_width */
1295 4, /* memmov_cost. */
1296 6, /* issue_rate. */
1297 (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
1298 | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
1299 "16", /* function_align. */
1300 "8", /* jump_align. */
1301 "16", /* loop_align. */
1302 3, /* int_reassoc_width. */
1303 2, /* fp_reassoc_width. */
1304 2, /* vec_reassoc_width. */
1305 2, /* min_div_recip_mul_sf. */
1306 2, /* min_div_recip_mul_df. */
1307 0, /* max_case_values. */
1308 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1309 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1310 &thunderx3t110_prefetch_tune
1311};
1312
9ed6834d 1313static const struct tune_params neoversen1_tunings =
fc881de2
KT
1314{
1315 &cortexa57_extra_costs,
1316 &generic_addrcost_table,
1317 &generic_regmove_cost,
1318 &cortexa57_vector_cost,
1319 &generic_branch_cost,
1320 &generic_approx_modes,
1321 SVE_NOT_IMPLEMENTED, /* sve_width */
1322 4, /* memmov_cost */
1323 3, /* issue_rate */
6ed8c923 1324 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
fc881de2 1325 "32:16", /* function_align. */
3a434597 1326 "4", /* jump_align. */
fc881de2
KT
1327 "32:16", /* loop_align. */
1328 2, /* int_reassoc_width. */
1329 4, /* fp_reassoc_width. */
1330 2, /* vec_reassoc_width. */
1331 2, /* min_div_recip_mul_sf. */
1332 2, /* min_div_recip_mul_df. */
1333 0, /* max_case_values. */
1334 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1335 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1336 &generic_prefetch_tune
1337};
1338
c8c77ed7
KT
1339static const struct tune_params neoversev1_tunings =
1340{
1341 &cortexa57_extra_costs,
1342 &generic_addrcost_table,
1343 &generic_regmove_cost,
1344 &cortexa57_vector_cost,
1345 &generic_branch_cost,
1346 &generic_approx_modes,
1347 SVE_256, /* sve_width */
1348 4, /* memmov_cost */
1349 3, /* issue_rate */
1350 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
1351 "32:16", /* function_align. */
1352 "4", /* jump_align. */
1353 "32:16", /* loop_align. */
1354 2, /* int_reassoc_width. */
1355 4, /* fp_reassoc_width. */
1356 2, /* vec_reassoc_width. */
1357 2, /* min_div_recip_mul_sf. */
1358 2, /* min_div_recip_mul_df. */
1359 0, /* max_case_values. */
1360 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1361 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1362 &generic_prefetch_tune
1363};
1364
25095d1e
KT
1365static const struct tune_params neoversen2_tunings =
1366{
1367 &cortexa57_extra_costs,
1368 &generic_addrcost_table,
1369 &generic_regmove_cost,
1370 &cortexa57_vector_cost,
1371 &generic_branch_cost,
1372 &generic_approx_modes,
1373 SVE_128, /* sve_width */
1374 4, /* memmov_cost */
1375 3, /* issue_rate */
1376 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
1377 "32:16", /* function_align. */
1378 "4", /* jump_align. */
1379 "32:16", /* loop_align. */
1380 2, /* int_reassoc_width. */
1381 4, /* fp_reassoc_width. */
1382 2, /* vec_reassoc_width. */
1383 2, /* min_div_recip_mul_sf. */
1384 2, /* min_div_recip_mul_df. */
1385 0, /* max_case_values. */
1386 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1387 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1388 &generic_prefetch_tune
1389};
1390
02f21aea
QJ
1391static const struct tune_params a64fx_tunings =
1392{
1393 &generic_extra_costs,
1394 &generic_addrcost_table,
1395 &generic_regmove_cost,
1396 &generic_vector_cost,
1397 &generic_branch_cost,
1398 &generic_approx_modes,
1399 SVE_512, /* sve_width */
1400 4, /* memmov_cost */
1401 7, /* issue_rate */
1402 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
1403 "32", /* function_align. */
1404 "16", /* jump_align. */
1405 "32", /* loop_align. */
1406 4, /* int_reassoc_width. */
1407 2, /* fp_reassoc_width. */
1408 2, /* vec_reassoc_width. */
1409 2, /* min_div_recip_mul_sf. */
1410 2, /* min_div_recip_mul_df. */
1411 0, /* max_case_values. */
1412 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1413 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1414 &a64fx_prefetch_tune
1415};
1416
8dec06f2
JG
1417/* Support for fine-grained override of the tuning structures. */
1418struct aarch64_tuning_override_function
1419{
1420 const char* name;
1421 void (*parse_override)(const char*, struct tune_params*);
1422};
1423
1424static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1425static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1426static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1427
1428static const struct aarch64_tuning_override_function
1429aarch64_tuning_override_functions[] =
1430{
1431 { "fuse", aarch64_parse_fuse_string },
1432 { "tune", aarch64_parse_tune_string },
886f092f 1433 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1434 { NULL, NULL }
1435};
1436
43e9d192
IB
1437/* A processor implementing AArch64. */
1438struct processor
1439{
1440 const char *const name;
46806c44
KT
1441 enum aarch64_processor ident;
1442 enum aarch64_processor sched_core;
393ae126 1443 enum aarch64_arch arch;
0c6caaf8 1444 unsigned architecture_version;
28108a53 1445 const uint64_t flags;
43e9d192
IB
1446 const struct tune_params *const tune;
1447};
1448
393ae126
KT
1449/* Architectures implementing AArch64. */
1450static const struct processor all_architectures[] =
1451{
1452#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1453 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1454#include "aarch64-arches.def"
393ae126
KT
1455 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1456};
1457
43e9d192
IB
1458/* Processor cores implementing AArch64. */
1459static const struct processor all_cores[] =
1460{
e8fcc9fa 1461#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1462 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1463 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1464 FLAGS, &COSTS##_tunings},
43e9d192 1465#include "aarch64-cores.def"
393ae126
KT
1466 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1467 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1468 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1469};
1470
43e9d192 1471
361fb3ee
KT
1472/* Target specification. These are populated by the -march, -mtune, -mcpu
1473 handling code or by target attributes. */
43e9d192
IB
1474static const struct processor *selected_arch;
1475static const struct processor *selected_cpu;
1476static const struct processor *selected_tune;
1477
8fc16d72
ST
1478enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
1479
b175b679
JG
1480/* The current tuning set. */
1481struct tune_params aarch64_tune_params = generic_tunings;
1482
c600df9a
RS
1483/* Check whether an 'aarch64_vector_pcs' attribute is valid. */
1484
1485static tree
1486handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
1487 int, bool *no_add_attrs)
1488{
1489 /* Since we set fn_type_req to true, the caller should have checked
1490 this for us. */
1491 gcc_assert (FUNC_OR_METHOD_TYPE_P (*node));
1492 switch ((arm_pcs) fntype_abi (*node).id ())
1493 {
1494 case ARM_PCS_AAPCS64:
1495 case ARM_PCS_SIMD:
1496 return NULL_TREE;
1497
1498 case ARM_PCS_SVE:
1499 error ("the %qE attribute cannot be applied to an SVE function type",
1500 name);
1501 *no_add_attrs = true;
1502 return NULL_TREE;
1503
1504 case ARM_PCS_TLSDESC:
1505 case ARM_PCS_UNKNOWN:
1506 break;
1507 }
1508 gcc_unreachable ();
1509}
1510
a0d0b980
SE
1511/* Table of machine attributes. */
1512static const struct attribute_spec aarch64_attribute_table[] =
1513{
1514 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1515 affects_type_identity, handler, exclude } */
c600df9a
RS
1516 { "aarch64_vector_pcs", 0, 0, false, true, true, true,
1517 handle_aarch64_vector_pcs_attribute, NULL },
38e62001
RS
1518 { "arm_sve_vector_bits", 1, 1, false, true, false, true,
1519 aarch64_sve::handle_arm_sve_vector_bits_attribute,
1520 NULL },
31427b97 1521 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
683e93d1 1522 { "SVE type", 3, 3, false, true, false, true, NULL, NULL },
5002dae3 1523 { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL },
a0d0b980
SE
1524 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1525};
1526
43e9d192
IB
1527#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1528
1529/* An ISA extension in the co-processor and main instruction set space. */
1530struct aarch64_option_extension
1531{
1532 const char *const name;
1533 const unsigned long flags_on;
1534 const unsigned long flags_off;
1535};
1536
43e9d192
IB
1537typedef enum aarch64_cond_code
1538{
1539 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1540 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1541 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1542}
1543aarch64_cc;
1544
1545#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1546
efac62a3
ST
1547struct aarch64_branch_protect_type
1548{
1549 /* The type's name that the user passes to the branch-protection option
1550 string. */
1551 const char* name;
1552 /* Function to handle the protection type and set global variables.
1553 First argument is the string token corresponding with this type and the
1554 second argument is the next token in the option string.
1555 Return values:
1556 * AARCH64_PARSE_OK: Handling was sucessful.
1557 * AARCH64_INVALID_ARG: The type is invalid in this context and the caller
1558 should print an error.
1559 * AARCH64_INVALID_FEATURE: The type is invalid and the handler prints its
1560 own error. */
1561 enum aarch64_parse_opt_result (*handler)(char*, char*);
1562 /* A list of types that can follow this type in the option string. */
1563 const aarch64_branch_protect_type* subtypes;
1564 unsigned int num_subtypes;
1565};
1566
1567static enum aarch64_parse_opt_result
1568aarch64_handle_no_branch_protection (char* str, char* rest)
1569{
1570 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
30afdf34 1571 aarch64_enable_bti = 0;
efac62a3
ST
1572 if (rest)
1573 {
1574 error ("unexpected %<%s%> after %<%s%>", rest, str);
1575 return AARCH64_PARSE_INVALID_FEATURE;
1576 }
1577 return AARCH64_PARSE_OK;
1578}
1579
1580static enum aarch64_parse_opt_result
1581aarch64_handle_standard_branch_protection (char* str, char* rest)
1582{
1583 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1584 aarch64_ra_sign_key = AARCH64_KEY_A;
30afdf34 1585 aarch64_enable_bti = 1;
efac62a3
ST
1586 if (rest)
1587 {
1588 error ("unexpected %<%s%> after %<%s%>", rest, str);
1589 return AARCH64_PARSE_INVALID_FEATURE;
1590 }
1591 return AARCH64_PARSE_OK;
1592}
1593
1594static enum aarch64_parse_opt_result
1595aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED,
1596 char* rest ATTRIBUTE_UNUSED)
1597{
1598 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
8fc16d72 1599 aarch64_ra_sign_key = AARCH64_KEY_A;
efac62a3
ST
1600 return AARCH64_PARSE_OK;
1601}
1602
1603static enum aarch64_parse_opt_result
1604aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED,
1605 char* rest ATTRIBUTE_UNUSED)
1606{
1607 aarch64_ra_sign_scope = AARCH64_FUNCTION_ALL;
1608 return AARCH64_PARSE_OK;
1609}
1610
8fc16d72
ST
1611static enum aarch64_parse_opt_result
1612aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED,
1613 char* rest ATTRIBUTE_UNUSED)
1614{
1615 aarch64_ra_sign_key = AARCH64_KEY_B;
1616 return AARCH64_PARSE_OK;
1617}
1618
30afdf34
SD
1619static enum aarch64_parse_opt_result
1620aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED,
1621 char* rest ATTRIBUTE_UNUSED)
1622{
1623 aarch64_enable_bti = 1;
1624 return AARCH64_PARSE_OK;
1625}
1626
efac62a3
ST
1627static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = {
1628 { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 },
8fc16d72 1629 { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 },
efac62a3
ST
1630 { NULL, NULL, NULL, 0 }
1631};
1632
1633static const struct aarch64_branch_protect_type aarch64_branch_protect_types[] = {
1634 { "none", aarch64_handle_no_branch_protection, NULL, 0 },
1635 { "standard", aarch64_handle_standard_branch_protection, NULL, 0 },
1636 { "pac-ret", aarch64_handle_pac_ret_protection, aarch64_pac_ret_subtypes,
1637 ARRAY_SIZE (aarch64_pac_ret_subtypes) },
30afdf34 1638 { "bti", aarch64_handle_bti_protection, NULL, 0 },
efac62a3
ST
1639 { NULL, NULL, NULL, 0 }
1640};
1641
43e9d192
IB
1642/* The condition codes of the processor, and the inverse function. */
1643static const char * const aarch64_condition_codes[] =
1644{
1645 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1646 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1647};
1648
57d6f4d0
RS
1649/* The preferred condition codes for SVE conditions. */
1650static const char *const aarch64_sve_condition_codes[] =
1651{
1652 "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc",
1653 "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv"
1654};
1655
0b1fe8cf
RS
1656/* Return the assembly token for svpattern value VALUE. */
1657
1658static const char *
1659svpattern_token (enum aarch64_svpattern pattern)
1660{
1661 switch (pattern)
1662 {
1663#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
1664 AARCH64_FOR_SVPATTERN (CASE)
1665#undef CASE
1666 case AARCH64_NUM_SVPATTERNS:
1667 break;
1668 }
1669 gcc_unreachable ();
1670}
1671
38e62001
RS
1672/* Return the location of a piece that is known to be passed or returned
1673 in registers. FIRST_ZR is the first unused vector argument register
1674 and FIRST_PR is the first unused predicate argument register. */
1675
1676rtx
1677pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
1678 unsigned int first_pr) const
1679{
1680 gcc_assert (VECTOR_MODE_P (mode)
1681 && first_zr + num_zr <= V0_REGNUM + NUM_FP_ARG_REGS
1682 && first_pr + num_pr <= P0_REGNUM + NUM_PR_ARG_REGS);
1683
1684 if (num_zr > 0 && num_pr == 0)
1685 return gen_rtx_REG (mode, first_zr);
1686
1687 if (num_zr == 0 && num_pr == 1)
1688 return gen_rtx_REG (mode, first_pr);
1689
1690 gcc_unreachable ();
1691}
1692
1693/* Return the total number of vector registers required by the PST. */
1694
1695unsigned int
1696pure_scalable_type_info::num_zr () const
1697{
1698 unsigned int res = 0;
1699 for (unsigned int i = 0; i < pieces.length (); ++i)
1700 res += pieces[i].num_zr;
1701 return res;
1702}
1703
1704/* Return the total number of predicate registers required by the PST. */
1705
1706unsigned int
1707pure_scalable_type_info::num_pr () const
1708{
1709 unsigned int res = 0;
1710 for (unsigned int i = 0; i < pieces.length (); ++i)
1711 res += pieces[i].num_pr;
1712 return res;
1713}
1714
1715/* Return the location of a PST that is known to be passed or returned
1716 in registers. FIRST_ZR is the first unused vector argument register
1717 and FIRST_PR is the first unused predicate argument register. */
1718
1719rtx
1720pure_scalable_type_info::get_rtx (machine_mode mode,
1721 unsigned int first_zr,
1722 unsigned int first_pr) const
1723{
1724 /* Try to return a single REG if possible. This leads to better
1725 code generation; it isn't required for correctness. */
1726 if (mode == pieces[0].mode)
1727 {
1728 gcc_assert (pieces.length () == 1);
1729 return pieces[0].get_rtx (first_zr, first_pr);
1730 }
1731
1732 /* Build up a PARALLEL that contains the individual pieces. */
1733 rtvec rtxes = rtvec_alloc (pieces.length ());
1734 for (unsigned int i = 0; i < pieces.length (); ++i)
1735 {
1736 rtx reg = pieces[i].get_rtx (first_zr, first_pr);
1737 rtx offset = gen_int_mode (pieces[i].offset, Pmode);
1738 RTVEC_ELT (rtxes, i) = gen_rtx_EXPR_LIST (VOIDmode, reg, offset);
1739 first_zr += pieces[i].num_zr;
1740 first_pr += pieces[i].num_pr;
1741 }
1742 return gen_rtx_PARALLEL (mode, rtxes);
1743}
1744
1745/* Analyze whether TYPE is a Pure Scalable Type according to the rules
1746 in the AAPCS64. */
1747
1748pure_scalable_type_info::analysis_result
1749pure_scalable_type_info::analyze (const_tree type)
1750{
1751 /* Prevent accidental reuse. */
1752 gcc_assert (pieces.is_empty ());
1753
1754 /* No code will be generated for erroneous types, so we won't establish
1755 an ABI mapping. */
1756 if (type == error_mark_node)
1757 return NO_ABI_IDENTITY;
1758
1759 /* Zero-sized types disappear in the language->ABI mapping. */
1760 if (TYPE_SIZE (type) && integer_zerop (TYPE_SIZE (type)))
1761 return NO_ABI_IDENTITY;
1762
1763 /* Check for SVTs, SPTs, and built-in tuple types that map to PSTs. */
1764 piece p = {};
1765 if (aarch64_sve::builtin_type_p (type, &p.num_zr, &p.num_pr))
1766 {
1767 machine_mode mode = TYPE_MODE_RAW (type);
1768 gcc_assert (VECTOR_MODE_P (mode)
1769 && (!TARGET_SVE || aarch64_sve_mode_p (mode)));
1770
1771 p.mode = p.orig_mode = mode;
1772 add_piece (p);
1773 return IS_PST;
1774 }
1775
1776 /* Check for user-defined PSTs. */
1777 if (TREE_CODE (type) == ARRAY_TYPE)
1778 return analyze_array (type);
1779 if (TREE_CODE (type) == RECORD_TYPE)
1780 return analyze_record (type);
1781
1782 return ISNT_PST;
1783}
1784
1785/* Analyze a type that is known not to be passed or returned in memory.
1786 Return true if it has an ABI identity and is a Pure Scalable Type. */
1787
1788bool
1789pure_scalable_type_info::analyze_registers (const_tree type)
1790{
1791 analysis_result result = analyze (type);
1792 gcc_assert (result != DOESNT_MATTER);
1793 return result == IS_PST;
1794}
1795
1796/* Subroutine of analyze for handling ARRAY_TYPEs. */
1797
1798pure_scalable_type_info::analysis_result
1799pure_scalable_type_info::analyze_array (const_tree type)
1800{
1801 /* Analyze the element type. */
1802 pure_scalable_type_info element_info;
1803 analysis_result result = element_info.analyze (TREE_TYPE (type));
1804 if (result != IS_PST)
1805 return result;
1806
1807 /* An array of unknown, flexible or variable length will be passed and
1808 returned by reference whatever we do. */
1809 tree nelts_minus_one = array_type_nelts (type);
1810 if (!tree_fits_uhwi_p (nelts_minus_one))
1811 return DOESNT_MATTER;
1812
1813 /* Likewise if the array is constant-sized but too big to be interesting.
1814 The double checks against MAX_PIECES are to protect against overflow. */
1815 unsigned HOST_WIDE_INT count = tree_to_uhwi (nelts_minus_one);
1816 if (count > MAX_PIECES)
1817 return DOESNT_MATTER;
1818 count += 1;
1819 if (count * element_info.pieces.length () > MAX_PIECES)
1820 return DOESNT_MATTER;
1821
1822 /* The above checks should have weeded out elements of unknown size. */
1823 poly_uint64 element_bytes;
1824 if (!poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (type)), &element_bytes))
1825 gcc_unreachable ();
1826
1827 /* Build up the list of individual vectors and predicates. */
1828 gcc_assert (!element_info.pieces.is_empty ());
1829 for (unsigned int i = 0; i < count; ++i)
1830 for (unsigned int j = 0; j < element_info.pieces.length (); ++j)
1831 {
1832 piece p = element_info.pieces[j];
1833 p.offset += i * element_bytes;
1834 add_piece (p);
1835 }
1836 return IS_PST;
1837}
1838
1839/* Subroutine of analyze for handling RECORD_TYPEs. */
1840
1841pure_scalable_type_info::analysis_result
1842pure_scalable_type_info::analyze_record (const_tree type)
1843{
1844 for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1845 {
1846 if (TREE_CODE (field) != FIELD_DECL)
1847 continue;
1848
1849 /* Zero-sized fields disappear in the language->ABI mapping. */
1850 if (DECL_SIZE (field) && integer_zerop (DECL_SIZE (field)))
1851 continue;
1852
1853 /* All fields with an ABI identity must be PSTs for the record as
1854 a whole to be a PST. If any individual field is too big to be
1855 interesting then the record is too. */
1856 pure_scalable_type_info field_info;
1857 analysis_result subresult = field_info.analyze (TREE_TYPE (field));
1858 if (subresult == NO_ABI_IDENTITY)
1859 continue;
1860 if (subresult != IS_PST)
1861 return subresult;
1862
1863 /* Since all previous fields are PSTs, we ought to be able to track
1864 the field offset using poly_ints. */
1865 tree bitpos = bit_position (field);
1866 gcc_assert (poly_int_tree_p (bitpos));
1867
1868 /* For the same reason, it shouldn't be possible to create a PST field
1869 whose offset isn't byte-aligned. */
1870 poly_widest_int wide_bytepos = exact_div (wi::to_poly_widest (bitpos),
1871 BITS_PER_UNIT);
1872
1873 /* Punt if the record is too big to be interesting. */
1874 poly_uint64 bytepos;
1875 if (!wide_bytepos.to_uhwi (&bytepos)
1876 || pieces.length () + field_info.pieces.length () > MAX_PIECES)
1877 return DOESNT_MATTER;
1878
1879 /* Add the individual vectors and predicates in the field to the
1880 record's list. */
1881 gcc_assert (!field_info.pieces.is_empty ());
1882 for (unsigned int i = 0; i < field_info.pieces.length (); ++i)
1883 {
1884 piece p = field_info.pieces[i];
1885 p.offset += bytepos;
1886 add_piece (p);
1887 }
1888 }
1889 /* Empty structures disappear in the language->ABI mapping. */
1890 return pieces.is_empty () ? NO_ABI_IDENTITY : IS_PST;
1891}
1892
1893/* Add P to the list of pieces in the type. */
1894
1895void
1896pure_scalable_type_info::add_piece (const piece &p)
1897{
1898 /* Try to fold the new piece into the previous one to form a
1899 single-mode PST. For example, if we see three consecutive vectors
1900 of the same mode, we can represent them using the corresponding
1901 3-tuple mode.
1902
1903 This is purely an optimization. */
1904 if (!pieces.is_empty ())
1905 {
1906 piece &prev = pieces.last ();
1907 gcc_assert (VECTOR_MODE_P (p.mode) && VECTOR_MODE_P (prev.mode));
1908 unsigned int nelems1, nelems2;
1909 if (prev.orig_mode == p.orig_mode
1910 && known_eq (prev.offset + GET_MODE_SIZE (prev.mode), p.offset)
1911 && constant_multiple_p (GET_MODE_NUNITS (prev.mode),
1912 GET_MODE_NUNITS (p.orig_mode), &nelems1)
1913 && constant_multiple_p (GET_MODE_NUNITS (p.mode),
1914 GET_MODE_NUNITS (p.orig_mode), &nelems2)
1915 && targetm.array_mode (p.orig_mode,
1916 nelems1 + nelems2).exists (&prev.mode))
1917 {
1918 prev.num_zr += p.num_zr;
1919 prev.num_pr += p.num_pr;
1920 return;
1921 }
1922 }
1923 pieces.quick_push (p);
1924}
1925
1926/* Return true if at least one possible value of type TYPE includes at
1927 least one object of Pure Scalable Type, in the sense of the AAPCS64.
1928
1929 This is a relatively expensive test for some types, so it should
1930 generally be made as late as possible. */
1931
1932static bool
1933aarch64_some_values_include_pst_objects_p (const_tree type)
1934{
1935 if (TYPE_SIZE (type) && integer_zerop (TYPE_SIZE (type)))
1936 return false;
1937
1938 if (aarch64_sve::builtin_type_p (type))
1939 return true;
1940
1941 if (TREE_CODE (type) == ARRAY_TYPE || TREE_CODE (type) == COMPLEX_TYPE)
1942 return aarch64_some_values_include_pst_objects_p (TREE_TYPE (type));
1943
1944 if (RECORD_OR_UNION_TYPE_P (type))
1945 for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1946 if (TREE_CODE (field) == FIELD_DECL
1947 && aarch64_some_values_include_pst_objects_p (TREE_TYPE (field)))
1948 return true;
1949
1950 return false;
1951}
1952
002ffd3c
RS
1953/* Return the descriptor of the SIMD ABI. */
1954
1955static const predefined_function_abi &
1956aarch64_simd_abi (void)
1957{
1958 predefined_function_abi &simd_abi = function_abis[ARM_PCS_SIMD];
1959 if (!simd_abi.initialized_p ())
1960 {
1961 HARD_REG_SET full_reg_clobbers
1962 = default_function_abi.full_reg_clobbers ();
1963 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1964 if (FP_SIMD_SAVED_REGNUM_P (regno))
1965 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
1966 simd_abi.initialize (ARM_PCS_SIMD, full_reg_clobbers);
1967 }
1968 return simd_abi;
1969}
1970
c600df9a
RS
1971/* Return the descriptor of the SVE PCS. */
1972
1973static const predefined_function_abi &
1974aarch64_sve_abi (void)
1975{
1976 predefined_function_abi &sve_abi = function_abis[ARM_PCS_SVE];
1977 if (!sve_abi.initialized_p ())
1978 {
1979 HARD_REG_SET full_reg_clobbers
1980 = default_function_abi.full_reg_clobbers ();
1981 for (int regno = V8_REGNUM; regno <= V23_REGNUM; ++regno)
1982 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
cb26919c 1983 for (int regno = P4_REGNUM; regno <= P15_REGNUM; ++regno)
c600df9a
RS
1984 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
1985 sve_abi.initialize (ARM_PCS_SVE, full_reg_clobbers);
1986 }
1987 return sve_abi;
1988}
1989
74b27d8e
RS
1990/* If X is an UNSPEC_SALT_ADDR expression, return the address that it
1991 wraps, otherwise return X itself. */
1992
1993static rtx
1994strip_salt (rtx x)
1995{
1996 rtx search = x;
1997 if (GET_CODE (search) == CONST)
1998 search = XEXP (search, 0);
1999 if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR)
2000 x = XVECEXP (search, 0, 0);
2001 return x;
2002}
2003
2004/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the
2005 expression. */
2006
2007static rtx
2008strip_offset_and_salt (rtx addr, poly_int64 *offset)
2009{
2010 return strip_salt (strip_offset (addr, offset));
2011}
2012
973d2e01
TP
2013/* Generate code to enable conditional branches in functions over 1 MiB. */
2014const char *
2015aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
2016 const char * branch_format)
2017{
2018 rtx_code_label * tmp_label = gen_label_rtx ();
2019 char label_buf[256];
2020 char buffer[128];
2021 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
2022 CODE_LABEL_NUMBER (tmp_label));
2023 const char *label_ptr = targetm.strip_name_encoding (label_buf);
2024 rtx dest_label = operands[pos_label];
2025 operands[pos_label] = tmp_label;
2026
2027 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
2028 output_asm_insn (buffer, operands);
2029
2030 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
2031 operands[pos_label] = dest_label;
2032 output_asm_insn (buffer, operands);
2033 return "";
2034}
2035
261fb553 2036void
fc29dfc9 2037aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 2038{
261fb553 2039 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
2040 if (FLOAT_MODE_P (mode))
2041 error ("%qs is incompatible with the use of floating-point types",
2042 "-mgeneral-regs-only");
2043 else
2044 error ("%qs is incompatible with the use of vector types",
2045 "-mgeneral-regs-only");
261fb553 2046 else
fc29dfc9
SE
2047 if (FLOAT_MODE_P (mode))
2048 error ("%qs feature modifier is incompatible with the use of"
2049 " floating-point types", "+nofp");
2050 else
2051 error ("%qs feature modifier is incompatible with the use of"
2052 " vector types", "+nofp");
261fb553
AL
2053}
2054
c0e0174b
RS
2055/* Report when we try to do something that requires SVE when SVE is disabled.
2056 This is an error of last resort and isn't very high-quality. It usually
2057 involves attempts to measure the vector length in some way. */
2058static void
2059aarch64_report_sve_required (void)
2060{
2061 static bool reported_p = false;
2062
2063 /* Avoid reporting a slew of messages for a single oversight. */
2064 if (reported_p)
2065 return;
2066
2067 error ("this operation requires the SVE ISA extension");
2068 inform (input_location, "you can enable SVE using the command-line"
2069 " option %<-march%>, or by using the %<target%>"
2070 " attribute or pragma");
2071 reported_p = true;
2072}
2073
183bfdaf
RS
2074/* Return true if REGNO is P0-P15 or one of the special FFR-related
2075 registers. */
2076inline bool
2077pr_or_ffr_regnum_p (unsigned int regno)
2078{
2079 return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM;
2080}
2081
c64f7d37 2082/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
2083 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
2084 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
2085 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
2086 and GENERAL_REGS is lower than the memory cost (in this case the best class
2087 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
2088 cost results in bad allocations with many redundant int<->FP moves which
2089 are expensive on various cores.
2090 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
2091 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
2092 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
2093 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
2094 The result of this is that it is no longer inefficient to have a higher
2095 memory move cost than the register move cost.
2096*/
c64f7d37
WD
2097
2098static reg_class_t
31e2b5a3
WD
2099aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
2100 reg_class_t best_class)
c64f7d37 2101{
b8506a8a 2102 machine_mode mode;
c64f7d37 2103
67e5c59a
RS
2104 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
2105 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
2106 return allocno_class;
2107
67e5c59a
RS
2108 if (!reg_class_subset_p (GENERAL_REGS, best_class)
2109 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
2110 return best_class;
2111
c64f7d37
WD
2112 mode = PSEUDO_REGNO_MODE (regno);
2113 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
2114}
2115
26e0ff94 2116static unsigned int
b8506a8a 2117aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 2118{
50093a33 2119 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
2120 return aarch64_tune_params.min_div_recip_mul_sf;
2121 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
2122}
2123
b5b33e11 2124/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 2125static int
b5b33e11 2126aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
2127{
2128 if (VECTOR_MODE_P (mode))
b175b679 2129 return aarch64_tune_params.vec_reassoc_width;
cee66c68 2130 if (INTEGRAL_MODE_P (mode))
b175b679 2131 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
2132 /* Avoid reassociating floating point addition so we emit more FMAs. */
2133 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 2134 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
2135 return 1;
2136}
2137
43e9d192
IB
2138/* Provide a mapping from gcc register numbers to dwarf register numbers. */
2139unsigned
2140aarch64_dbx_register_number (unsigned regno)
2141{
2142 if (GP_REGNUM_P (regno))
2143 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
2144 else if (regno == SP_REGNUM)
2145 return AARCH64_DWARF_SP;
2146 else if (FP_REGNUM_P (regno))
2147 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
2148 else if (PR_REGNUM_P (regno))
2149 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
2150 else if (regno == VG_REGNUM)
2151 return AARCH64_DWARF_VG;
43e9d192
IB
2152
2153 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
2154 equivalent DWARF register. */
2155 return DWARF_FRAME_REGISTERS;
2156}
2157
d29f7dd5
RS
2158/* If X is a CONST_DOUBLE, return its bit representation as a constant
2159 integer, otherwise return X unmodified. */
2160static rtx
2161aarch64_bit_representation (rtx x)
2162{
2163 if (CONST_DOUBLE_P (x))
2164 x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
2165 return x;
2166}
2167
43cacb12
RS
2168/* Return true if MODE is any of the Advanced SIMD structure modes. */
2169static bool
2170aarch64_advsimd_struct_mode_p (machine_mode mode)
2171{
2172 return (TARGET_SIMD
2173 && (mode == OImode || mode == CImode || mode == XImode));
2174}
2175
2176/* Return true if MODE is an SVE predicate mode. */
2177static bool
2178aarch64_sve_pred_mode_p (machine_mode mode)
2179{
2180 return (TARGET_SVE
2181 && (mode == VNx16BImode
2182 || mode == VNx8BImode
2183 || mode == VNx4BImode
2184 || mode == VNx2BImode));
2185}
2186
2187/* Three mutually-exclusive flags describing a vector or predicate type. */
2188const unsigned int VEC_ADVSIMD = 1;
2189const unsigned int VEC_SVE_DATA = 2;
2190const unsigned int VEC_SVE_PRED = 4;
2191/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
2192 a structure of 2, 3 or 4 vectors. */
2193const unsigned int VEC_STRUCT = 8;
550a3380
RS
2194/* Can be used in combination with VEC_SVE_DATA to indicate that the
2195 vector has fewer significant bytes than a full SVE vector. */
2196const unsigned int VEC_PARTIAL = 16;
43cacb12
RS
2197/* Useful combinations of the above. */
2198const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
2199const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
2200
2201/* Return a set of flags describing the vector properties of mode MODE.
2202 Ignore modes that are not supported by the current target. */
2203static unsigned int
2204aarch64_classify_vector_mode (machine_mode mode)
2205{
2206 if (aarch64_advsimd_struct_mode_p (mode))
2207 return VEC_ADVSIMD | VEC_STRUCT;
2208
2209 if (aarch64_sve_pred_mode_p (mode))
2210 return VEC_SVE_PRED;
2211
806f69cd
RS
2212 /* Make the decision based on the mode's enum value rather than its
2213 properties, so that we keep the correct classification regardless
2214 of -msve-vector-bits. */
2215 switch (mode)
43cacb12 2216 {
550a3380
RS
2217 /* Partial SVE QI vectors. */
2218 case E_VNx2QImode:
2219 case E_VNx4QImode:
2220 case E_VNx8QImode:
2221 /* Partial SVE HI vectors. */
2222 case E_VNx2HImode:
2223 case E_VNx4HImode:
2224 /* Partial SVE SI vector. */
2225 case E_VNx2SImode:
cc68f7c2
RS
2226 /* Partial SVE HF vectors. */
2227 case E_VNx2HFmode:
2228 case E_VNx4HFmode:
2229 /* Partial SVE SF vector. */
2230 case E_VNx2SFmode:
550a3380
RS
2231 return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
2232
806f69cd
RS
2233 case E_VNx16QImode:
2234 case E_VNx8HImode:
2235 case E_VNx4SImode:
2236 case E_VNx2DImode:
02fcd8ac 2237 case E_VNx8BFmode:
806f69cd
RS
2238 case E_VNx8HFmode:
2239 case E_VNx4SFmode:
2240 case E_VNx2DFmode:
2241 return TARGET_SVE ? VEC_SVE_DATA : 0;
2242
2243 /* x2 SVE vectors. */
2244 case E_VNx32QImode:
2245 case E_VNx16HImode:
2246 case E_VNx8SImode:
2247 case E_VNx4DImode:
02fcd8ac 2248 case E_VNx16BFmode:
806f69cd
RS
2249 case E_VNx16HFmode:
2250 case E_VNx8SFmode:
2251 case E_VNx4DFmode:
2252 /* x3 SVE vectors. */
2253 case E_VNx48QImode:
2254 case E_VNx24HImode:
2255 case E_VNx12SImode:
2256 case E_VNx6DImode:
02fcd8ac 2257 case E_VNx24BFmode:
806f69cd
RS
2258 case E_VNx24HFmode:
2259 case E_VNx12SFmode:
2260 case E_VNx6DFmode:
2261 /* x4 SVE vectors. */
2262 case E_VNx64QImode:
2263 case E_VNx32HImode:
2264 case E_VNx16SImode:
2265 case E_VNx8DImode:
02fcd8ac 2266 case E_VNx32BFmode:
806f69cd
RS
2267 case E_VNx32HFmode:
2268 case E_VNx16SFmode:
2269 case E_VNx8DFmode:
2270 return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0;
2271
2272 /* 64-bit Advanced SIMD vectors. */
2273 case E_V8QImode:
2274 case E_V4HImode:
2275 case E_V2SImode:
2276 /* ...E_V1DImode doesn't exist. */
2277 case E_V4HFmode:
abbe1ed2 2278 case E_V4BFmode:
806f69cd
RS
2279 case E_V2SFmode:
2280 case E_V1DFmode:
2281 /* 128-bit Advanced SIMD vectors. */
2282 case E_V16QImode:
2283 case E_V8HImode:
2284 case E_V4SImode:
2285 case E_V2DImode:
2286 case E_V8HFmode:
abbe1ed2 2287 case E_V8BFmode:
806f69cd
RS
2288 case E_V4SFmode:
2289 case E_V2DFmode:
2290 return TARGET_SIMD ? VEC_ADVSIMD : 0;
2291
2292 default:
2293 return 0;
43cacb12 2294 }
43cacb12
RS
2295}
2296
2297/* Return true if MODE is any of the data vector modes, including
2298 structure modes. */
43e9d192 2299static bool
43cacb12 2300aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 2301{
43cacb12 2302 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
2303}
2304
5c38705d
RS
2305/* Return true if MODE is any form of SVE mode, including predicates,
2306 vectors and structures. */
2307bool
2308aarch64_sve_mode_p (machine_mode mode)
2309{
2310 return aarch64_classify_vector_mode (mode) & VEC_ANY_SVE;
2311}
2312
43cacb12
RS
2313/* Return true if MODE is an SVE data vector mode; either a single vector
2314 or a structure of vectors. */
43e9d192 2315static bool
43cacb12 2316aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 2317{
43cacb12 2318 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
2319}
2320
550a3380
RS
2321/* Return the number of defined bytes in one constituent vector of
2322 SVE mode MODE, which has vector flags VEC_FLAGS. */
2323static poly_int64
2324aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
2325{
2326 if (vec_flags & VEC_PARTIAL)
2327 /* A single partial vector. */
2328 return GET_MODE_SIZE (mode);
2329
2330 if (vec_flags & VEC_SVE_DATA)
2331 /* A single vector or a tuple. */
2332 return BYTES_PER_SVE_VECTOR;
2333
2334 /* A single predicate. */
2335 gcc_assert (vec_flags & VEC_SVE_PRED);
2336 return BYTES_PER_SVE_PRED;
2337}
2338
9f4cbab8
RS
2339/* Implement target hook TARGET_ARRAY_MODE. */
2340static opt_machine_mode
2341aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
2342{
2343 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
2344 && IN_RANGE (nelems, 2, 4))
2345 return mode_for_vector (GET_MODE_INNER (mode),
2346 GET_MODE_NUNITS (mode) * nelems);
2347
2348 return opt_machine_mode ();
2349}
2350
43e9d192
IB
2351/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
2352static bool
ef4bddc2 2353aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
2354 unsigned HOST_WIDE_INT nelems)
2355{
2356 if (TARGET_SIMD
635e66fe
AL
2357 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
2358 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
2359 && (nelems >= 2 && nelems <= 4))
2360 return true;
2361
2362 return false;
2363}
2364
cc68f7c2
RS
2365/* MODE is some form of SVE vector mode. For data modes, return the number
2366 of vector register bits that each element of MODE occupies, such as 64
2367 for both VNx2DImode and VNx2SImode (where each 32-bit value is stored
2368 in a 64-bit container). For predicate modes, return the number of
2369 data bits controlled by each significant predicate bit. */
2370
2371static unsigned int
2372aarch64_sve_container_bits (machine_mode mode)
2373{
2374 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2375 poly_uint64 vector_bits = (vec_flags & (VEC_PARTIAL | VEC_SVE_PRED)
2376 ? BITS_PER_SVE_VECTOR
2377 : GET_MODE_BITSIZE (mode));
2378 return vector_element_size (vector_bits, GET_MODE_NUNITS (mode));
2379}
2380
43cacb12
RS
2381/* Return the SVE predicate mode to use for elements that have
2382 ELEM_NBYTES bytes, if such a mode exists. */
2383
2384opt_machine_mode
2385aarch64_sve_pred_mode (unsigned int elem_nbytes)
2386{
2387 if (TARGET_SVE)
2388 {
2389 if (elem_nbytes == 1)
2390 return VNx16BImode;
2391 if (elem_nbytes == 2)
2392 return VNx8BImode;
2393 if (elem_nbytes == 4)
2394 return VNx4BImode;
2395 if (elem_nbytes == 8)
2396 return VNx2BImode;
2397 }
2398 return opt_machine_mode ();
2399}
2400
cc68f7c2
RS
2401/* Return the SVE predicate mode that should be used to control
2402 SVE mode MODE. */
2403
2404machine_mode
2405aarch64_sve_pred_mode (machine_mode mode)
2406{
2407 unsigned int bits = aarch64_sve_container_bits (mode);
2408 return aarch64_sve_pred_mode (bits / BITS_PER_UNIT).require ();
2409}
2410
43cacb12
RS
2411/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
2412
2413static opt_machine_mode
10116ec1 2414aarch64_get_mask_mode (machine_mode mode)
43cacb12 2415{
10116ec1
RS
2416 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2417 if (vec_flags & VEC_SVE_DATA)
cc68f7c2 2418 return aarch64_sve_pred_mode (mode);
43cacb12 2419
10116ec1 2420 return default_get_mask_mode (mode);
43cacb12
RS
2421}
2422
d7a09c44
RS
2423/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE. */
2424
624d0f07 2425opt_machine_mode
d7a09c44
RS
2426aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
2427{
2428 enum mode_class mclass = (is_a <scalar_float_mode> (inner_mode)
2429 ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
2430 machine_mode mode;
2431 FOR_EACH_MODE_IN_CLASS (mode, mclass)
2432 if (inner_mode == GET_MODE_INNER (mode)
2433 && known_eq (nunits, GET_MODE_NUNITS (mode))
2434 && aarch64_sve_data_mode_p (mode))
2435 return mode;
2436 return opt_machine_mode ();
2437}
2438
1044fa32
RS
2439/* Return the integer element mode associated with SVE mode MODE. */
2440
2441static scalar_int_mode
2442aarch64_sve_element_int_mode (machine_mode mode)
2443{
cc68f7c2
RS
2444 poly_uint64 vector_bits = (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
2445 ? BITS_PER_SVE_VECTOR
2446 : GET_MODE_BITSIZE (mode));
2447 unsigned int elt_bits = vector_element_size (vector_bits,
1044fa32
RS
2448 GET_MODE_NUNITS (mode));
2449 return int_mode_for_size (elt_bits, 0).require ();
2450}
2451
cc68f7c2
RS
2452/* Return an integer element mode that contains exactly
2453 aarch64_sve_container_bits (MODE) bits. This is wider than
2454 aarch64_sve_element_int_mode if MODE is a partial vector,
2455 otherwise it's the same. */
2456
2457static scalar_int_mode
2458aarch64_sve_container_int_mode (machine_mode mode)
2459{
2460 return int_mode_for_size (aarch64_sve_container_bits (mode), 0).require ();
2461}
2462
d7a09c44 2463/* Return the integer vector mode associated with SVE mode MODE.
d083ee47 2464 Unlike related_int_vector_mode, this can handle the case in which
d7a09c44
RS
2465 MODE is a predicate (and thus has a different total size). */
2466
624d0f07 2467machine_mode
d7a09c44
RS
2468aarch64_sve_int_mode (machine_mode mode)
2469{
2470 scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
2471 return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
2472}
2473
74166aab
RS
2474/* Implement TARGET_VECTORIZE_RELATED_MODE. */
2475
2476static opt_machine_mode
2477aarch64_vectorize_related_mode (machine_mode vector_mode,
2478 scalar_mode element_mode,
2479 poly_uint64 nunits)
2480{
2481 unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
2482
cc68f7c2
RS
2483 /* If we're operating on SVE vectors, try to return an SVE mode. */
2484 poly_uint64 sve_nunits;
2485 if ((vec_flags & VEC_SVE_DATA)
2486 && multiple_p (BYTES_PER_SVE_VECTOR,
2487 GET_MODE_SIZE (element_mode), &sve_nunits))
2488 {
2489 machine_mode sve_mode;
2490 if (maybe_ne (nunits, 0U))
2491 {
2492 /* Try to find a full or partial SVE mode with exactly
2493 NUNITS units. */
2494 if (multiple_p (sve_nunits, nunits)
2495 && aarch64_sve_data_mode (element_mode,
2496 nunits).exists (&sve_mode))
2497 return sve_mode;
2498 }
2499 else
2500 {
2501 /* Take the preferred number of units from the number of bytes
2502 that fit in VECTOR_MODE. We always start by "autodetecting"
2503 a full vector mode with preferred_simd_mode, so vectors
2504 chosen here will also be full vector modes. Then
2505 autovectorize_vector_modes tries smaller starting modes
2506 and thus smaller preferred numbers of units. */
2507 sve_nunits = ordered_min (sve_nunits, GET_MODE_SIZE (vector_mode));
2508 if (aarch64_sve_data_mode (element_mode,
2509 sve_nunits).exists (&sve_mode))
2510 return sve_mode;
2511 }
2512 }
2513
74166aab
RS
2514 /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
2515 if ((vec_flags & VEC_ADVSIMD)
2516 && known_eq (nunits, 0U)
2517 && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
2518 && maybe_ge (GET_MODE_BITSIZE (element_mode)
2519 * GET_MODE_NUNITS (vector_mode), 128U))
2520 {
2521 machine_mode res = aarch64_simd_container_mode (element_mode, 128);
2522 if (VECTOR_MODE_P (res))
2523 return res;
2524 }
2525
2526 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
2527}
2528
b41d1f6e
RS
2529/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
2530 prefer to use the first arithmetic operand as the else value if
2531 the else value doesn't matter, since that exactly matches the SVE
2532 destructive merging form. For ternary operations we could either
2533 pick the first operand and use FMAD-like instructions or the last
2534 operand and use FMLA-like instructions; the latter seems more
2535 natural. */
6a86928d
RS
2536
2537static tree
b41d1f6e 2538aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 2539{
b41d1f6e 2540 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
2541}
2542
c43f4279 2543/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 2544
c43f4279 2545static unsigned int
ef4bddc2 2546aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 2547{
6a70badb
RS
2548 /* ??? Logically we should only need to provide a value when
2549 HARD_REGNO_MODE_OK says that the combination is valid,
2550 but at the moment we need to handle all modes. Just ignore
2551 any runtime parts for registers that can't store them. */
2552 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
2553 switch (aarch64_regno_regclass (regno))
2554 {
2555 case FP_REGS:
2556 case FP_LO_REGS:
163b1f6a 2557 case FP_LO8_REGS:
550a3380
RS
2558 {
2559 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2560 if (vec_flags & VEC_SVE_DATA)
2561 return exact_div (GET_MODE_SIZE (mode),
2562 aarch64_vl_bytes (mode, vec_flags)).to_constant ();
2563 return CEIL (lowest_size, UNITS_PER_VREG);
2564 }
43cacb12
RS
2565 case PR_REGS:
2566 case PR_LO_REGS:
2567 case PR_HI_REGS:
183bfdaf
RS
2568 case FFR_REGS:
2569 case PR_AND_FFR_REGS:
43cacb12 2570 return 1;
43e9d192 2571 default:
6a70badb 2572 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
2573 }
2574 gcc_unreachable ();
2575}
2576
f939c3e6 2577/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 2578
f939c3e6 2579static bool
ef4bddc2 2580aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
2581{
2582 if (GET_MODE_CLASS (mode) == MODE_CC)
2583 return regno == CC_REGNUM;
2584
43cacb12
RS
2585 if (regno == VG_REGNUM)
2586 /* This must have the same size as _Unwind_Word. */
2587 return mode == DImode;
2588
2589 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2590 if (vec_flags & VEC_SVE_PRED)
183bfdaf 2591 return pr_or_ffr_regnum_p (regno);
43cacb12 2592
183bfdaf
RS
2593 if (pr_or_ffr_regnum_p (regno))
2594 return false;
43cacb12 2595
9259db42
YZ
2596 if (regno == SP_REGNUM)
2597 /* The purpose of comparing with ptr_mode is to support the
2598 global register variable associated with the stack pointer
2599 register via the syntax of asm ("wsp") in ILP32. */
2600 return mode == Pmode || mode == ptr_mode;
2601
2602 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
2603 return mode == Pmode;
2604
563cc649
RH
2605 if (GP_REGNUM_P (regno))
2606 {
aa1a2795
RS
2607 if (vec_flags & VEC_ANY_SVE)
2608 return false;
563cc649
RH
2609 if (known_le (GET_MODE_SIZE (mode), 8))
2610 return true;
aa1a2795 2611 if (known_le (GET_MODE_SIZE (mode), 16))
563cc649
RH
2612 return (regno & 1) == 0;
2613 }
2614 else if (FP_REGNUM_P (regno))
43e9d192 2615 {
43cacb12 2616 if (vec_flags & VEC_STRUCT)
4edd6298 2617 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 2618 else
43cacb12 2619 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
2620 }
2621
f939c3e6 2622 return false;
43e9d192
IB
2623}
2624
c600df9a
RS
2625/* Return true if a function with type FNTYPE returns its value in
2626 SVE vector or predicate registers. */
2627
2628static bool
2629aarch64_returns_value_in_sve_regs_p (const_tree fntype)
2630{
c600df9a 2631 tree return_type = TREE_TYPE (fntype);
38e62001
RS
2632
2633 pure_scalable_type_info pst_info;
2634 switch (pst_info.analyze (return_type))
2635 {
2636 case pure_scalable_type_info::IS_PST:
2637 return (pst_info.num_zr () <= NUM_FP_ARG_REGS
2638 && pst_info.num_pr () <= NUM_PR_ARG_REGS);
2639
2640 case pure_scalable_type_info::DOESNT_MATTER:
2641 gcc_assert (aarch64_return_in_memory_1 (return_type));
2642 return false;
2643
2644 case pure_scalable_type_info::NO_ABI_IDENTITY:
2645 case pure_scalable_type_info::ISNT_PST:
2646 return false;
2647 }
2648 gcc_unreachable ();
c600df9a
RS
2649}
2650
2651/* Return true if a function with type FNTYPE takes arguments in
2652 SVE vector or predicate registers. */
2653
2654static bool
2655aarch64_takes_arguments_in_sve_regs_p (const_tree fntype)
2656{
2657 CUMULATIVE_ARGS args_so_far_v;
2658 aarch64_init_cumulative_args (&args_so_far_v, NULL_TREE, NULL_RTX,
2659 NULL_TREE, 0, true);
2660 cumulative_args_t args_so_far = pack_cumulative_args (&args_so_far_v);
2661
2662 for (tree chain = TYPE_ARG_TYPES (fntype);
2663 chain && chain != void_list_node;
2664 chain = TREE_CHAIN (chain))
2665 {
2666 tree arg_type = TREE_VALUE (chain);
2667 if (arg_type == error_mark_node)
2668 return false;
2669
2670 function_arg_info arg (arg_type, /*named=*/true);
2671 apply_pass_by_reference_rules (&args_so_far_v, arg);
38e62001
RS
2672 pure_scalable_type_info pst_info;
2673 if (pst_info.analyze_registers (arg.type))
2674 {
2675 unsigned int end_zr = args_so_far_v.aapcs_nvrn + pst_info.num_zr ();
2676 unsigned int end_pr = args_so_far_v.aapcs_nprn + pst_info.num_pr ();
2677 gcc_assert (end_zr <= NUM_FP_ARG_REGS && end_pr <= NUM_PR_ARG_REGS);
2678 return true;
2679 }
c600df9a
RS
2680
2681 targetm.calls.function_arg_advance (args_so_far, arg);
2682 }
2683 return false;
2684}
2685
002ffd3c
RS
2686/* Implement TARGET_FNTYPE_ABI. */
2687
2688static const predefined_function_abi &
2689aarch64_fntype_abi (const_tree fntype)
2690{
2691 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
2692 return aarch64_simd_abi ();
c600df9a
RS
2693
2694 if (aarch64_returns_value_in_sve_regs_p (fntype)
2695 || aarch64_takes_arguments_in_sve_regs_p (fntype))
2696 return aarch64_sve_abi ();
2697
002ffd3c
RS
2698 return default_function_abi;
2699}
2700
482b2b43
RS
2701/* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */
2702
2703static bool
2704aarch64_compatible_vector_types_p (const_tree type1, const_tree type2)
2705{
2706 return (aarch64_sve::builtin_type_p (type1)
2707 == aarch64_sve::builtin_type_p (type2));
2708}
2709
c600df9a 2710/* Return true if we should emit CFI for register REGNO. */
a0d0b980
SE
2711
2712static bool
c600df9a 2713aarch64_emit_cfi_for_reg_p (unsigned int regno)
a0d0b980 2714{
c600df9a
RS
2715 return (GP_REGNUM_P (regno)
2716 || !default_function_abi.clobbers_full_reg_p (regno));
a0d0b980
SE
2717}
2718
c600df9a 2719/* Return the mode we should use to save and restore register REGNO. */
a0d0b980
SE
2720
2721static machine_mode
c600df9a 2722aarch64_reg_save_mode (unsigned int regno)
a0d0b980 2723{
c600df9a
RS
2724 if (GP_REGNUM_P (regno))
2725 return DImode;
2726
2727 if (FP_REGNUM_P (regno))
2728 switch (crtl->abi->id ())
2729 {
2730 case ARM_PCS_AAPCS64:
2731 /* Only the low 64 bits are saved by the base PCS. */
2732 return DFmode;
2733
2734 case ARM_PCS_SIMD:
2735 /* The vector PCS saves the low 128 bits (which is the full
2736 register on non-SVE targets). */
2737 return TFmode;
2738
2739 case ARM_PCS_SVE:
2740 /* Use vectors of DImode for registers that need frame
2741 information, so that the first 64 bytes of the save slot
2742 are always the equivalent of what storing D<n> would give. */
2743 if (aarch64_emit_cfi_for_reg_p (regno))
2744 return VNx2DImode;
2745
2746 /* Use vectors of bytes otherwise, so that the layout is
2747 endian-agnostic, and so that we can use LDR and STR for
2748 big-endian targets. */
2749 return VNx16QImode;
2750
2751 case ARM_PCS_TLSDESC:
2752 case ARM_PCS_UNKNOWN:
2753 break;
2754 }
2755
2756 if (PR_REGNUM_P (regno))
2757 /* Save the full predicate register. */
2758 return VNx16BImode;
2759
2760 gcc_unreachable ();
a0d0b980
SE
2761}
2762
5a5a3bc5 2763/* Implement TARGET_INSN_CALLEE_ABI. */
b3650d40 2764
5a5a3bc5
RS
2765const predefined_function_abi &
2766aarch64_insn_callee_abi (const rtx_insn *insn)
b3650d40 2767{
08cc4d92
RS
2768 rtx pat = PATTERN (insn);
2769 gcc_assert (GET_CODE (pat) == PARALLEL);
2770 rtx unspec = XVECEXP (pat, 0, 1);
2771 gcc_assert (GET_CODE (unspec) == UNSPEC
2772 && XINT (unspec, 1) == UNSPEC_CALLEE_ABI);
2773 return function_abis[INTVAL (XVECEXP (unspec, 0, 0))];
b3650d40
SE
2774}
2775
80ec73f4
RS
2776/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
2777 the lower 64 bits of a 128-bit register. Tell the compiler the callee
2778 clobbers the top 64 bits when restoring the bottom 64 bits. */
2779
2780static bool
6ee2cc70
RS
2781aarch64_hard_regno_call_part_clobbered (unsigned int abi_id,
2782 unsigned int regno,
473574ee 2783 machine_mode mode)
80ec73f4 2784{
c600df9a 2785 if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE)
51051f47 2786 {
51051f47
RS
2787 poly_int64 per_register_size = GET_MODE_SIZE (mode);
2788 unsigned int nregs = hard_regno_nregs (regno, mode);
2789 if (nregs > 1)
2790 per_register_size = exact_div (per_register_size, nregs);
bb6ce448
RS
2791 if (abi_id == ARM_PCS_SIMD || abi_id == ARM_PCS_TLSDESC)
2792 return maybe_gt (per_register_size, 16);
2793 return maybe_gt (per_register_size, 8);
51051f47
RS
2794 }
2795 return false;
473574ee
SE
2796}
2797
43cacb12
RS
2798/* Implement REGMODE_NATURAL_SIZE. */
2799poly_uint64
2800aarch64_regmode_natural_size (machine_mode mode)
2801{
2802 /* The natural size for SVE data modes is one SVE data vector,
2803 and similarly for predicates. We can't independently modify
2804 anything smaller than that. */
2805 /* ??? For now, only do this for variable-width SVE registers.
2806 Doing it for constant-sized registers breaks lower-subreg.c. */
2807 /* ??? And once that's fixed, we should probably have similar
2808 code for Advanced SIMD. */
2809 if (!aarch64_sve_vg.is_constant ())
2810 {
2811 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
2812 if (vec_flags & VEC_SVE_PRED)
2813 return BYTES_PER_SVE_PRED;
2814 if (vec_flags & VEC_SVE_DATA)
2815 return BYTES_PER_SVE_VECTOR;
2816 }
2817 return UNITS_PER_WORD;
2818}
2819
73d9ac6a 2820/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 2821machine_mode
43cacb12
RS
2822aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
2823 machine_mode mode)
2824{
2825 /* The predicate mode determines which bits are significant and
2826 which are "don't care". Decreasing the number of lanes would
2827 lose data while increasing the number of lanes would make bits
2828 unnecessarily significant. */
2829 if (PR_REGNUM_P (regno))
2830 return mode;
6a70badb
RS
2831 if (known_ge (GET_MODE_SIZE (mode), 4))
2832 return mode;
73d9ac6a 2833 else
6a70badb 2834 return SImode;
73d9ac6a
IB
2835}
2836
231c52ae
ST
2837/* Return true if I's bits are consecutive ones from the MSB. */
2838bool
2839aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
2840{
2841 return exact_log2 (-i) != HOST_WIDE_INT_M1;
2842}
2843
58e17cf8
RS
2844/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
2845 that strcpy from constants will be faster. */
2846
2847static HOST_WIDE_INT
2848aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
2849{
2850 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
2851 return MAX (align, BITS_PER_WORD);
2852 return align;
2853}
2854
43e9d192
IB
2855/* Return true if calls to DECL should be treated as
2856 long-calls (ie called via a register). */
2857static bool
2858aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
2859{
2860 return false;
2861}
2862
2863/* Return true if calls to symbol-ref SYM should be treated as
2864 long-calls (ie called via a register). */
2865bool
2866aarch64_is_long_call_p (rtx sym)
2867{
2868 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
2869}
2870
b60d63cb
JW
2871/* Return true if calls to symbol-ref SYM should not go through
2872 plt stubs. */
2873
2874bool
2875aarch64_is_noplt_call_p (rtx sym)
2876{
2877 const_tree decl = SYMBOL_REF_DECL (sym);
2878
2879 if (flag_pic
2880 && decl
2881 && (!flag_plt
2882 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
2883 && !targetm.binds_local_p (decl))
2884 return true;
2885
2886 return false;
2887}
2888
43e9d192
IB
2889/* Emit an insn that's a simple single-set. Both the operands must be
2890 known to be valid. */
827ab47a 2891inline static rtx_insn *
43e9d192
IB
2892emit_set_insn (rtx x, rtx y)
2893{
f7df4a84 2894 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
2895}
2896
2897/* X and Y are two things to compare using CODE. Emit the compare insn and
2898 return the rtx for register 0 in the proper mode. */
2899rtx
2900aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
2901{
4a2095eb
RH
2902 machine_mode cmp_mode = GET_MODE (x);
2903 machine_mode cc_mode;
2904 rtx cc_reg;
43e9d192 2905
4a2095eb
RH
2906 if (cmp_mode == TImode)
2907 {
2908 gcc_assert (code == NE);
2909
2910 cc_mode = CCmode;
2911 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2912
2913 rtx x_lo = operand_subword (x, 0, 0, TImode);
2914 rtx y_lo = operand_subword (y, 0, 0, TImode);
2915 emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
2916
2917 rtx x_hi = operand_subword (x, 1, 0, TImode);
2918 rtx y_hi = operand_subword (y, 1, 0, TImode);
865257c4
RS
2919 emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
2920 gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
2921 GEN_INT (AARCH64_EQ)));
4a2095eb
RH
2922 }
2923 else
2924 {
2925 cc_mode = SELECT_CC_MODE (code, x, y);
2926 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2927 emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
2928 }
43e9d192
IB
2929 return cc_reg;
2930}
2931
d400fda3
RH
2932/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
2933
2934static rtx
2935aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
2936 machine_mode y_mode)
2937{
2938 if (y_mode == E_QImode || y_mode == E_HImode)
2939 {
2940 if (CONST_INT_P (y))
df562b12
JJ
2941 {
2942 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
2943 y_mode = SImode;
2944 }
d400fda3
RH
2945 else
2946 {
2947 rtx t, cc_reg;
2948 machine_mode cc_mode;
2949
2950 t = gen_rtx_ZERO_EXTEND (SImode, y);
2951 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
2952 cc_mode = CC_SWPmode;
2953 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
2954 emit_set_insn (cc_reg, t);
2955 return cc_reg;
2956 }
2957 }
2958
846f78d4
PK
2959 if (!aarch64_plus_operand (y, y_mode))
2960 y = force_reg (y_mode, y);
2961
d400fda3
RH
2962 return aarch64_gen_compare_reg (code, x, y);
2963}
2964
43e9d192
IB
2965/* Build the SYMBOL_REF for __tls_get_addr. */
2966
2967static GTY(()) rtx tls_get_addr_libfunc;
2968
2969rtx
2970aarch64_tls_get_addr (void)
2971{
2972 if (!tls_get_addr_libfunc)
2973 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
2974 return tls_get_addr_libfunc;
2975}
2976
2977/* Return the TLS model to use for ADDR. */
2978
2979static enum tls_model
2980tls_symbolic_operand_type (rtx addr)
2981{
2982 enum tls_model tls_kind = TLS_MODEL_NONE;
74b27d8e
RS
2983 poly_int64 offset;
2984 addr = strip_offset_and_salt (addr, &offset);
2985 if (GET_CODE (addr) == SYMBOL_REF)
43e9d192
IB
2986 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
2987
2988 return tls_kind;
2989}
2990
2991/* We'll allow lo_sum's in addresses in our legitimate addresses
2992 so that combine would take care of combining addresses where
2993 necessary, but for generation purposes, we'll generate the address
2994 as :
2995 RTL Absolute
2996 tmp = hi (symbol_ref); adrp x1, foo
2997 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
2998 nop
2999
3000 PIC TLS
3001 adrp x1, :got:foo adrp tmp, :tlsgd:foo
3002 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
3003 bl __tls_get_addr
3004 nop
3005
3006 Load TLS symbol, depending on TLS mechanism and TLS access model.
3007
3008 Global Dynamic - Traditional TLS:
3009 adrp tmp, :tlsgd:imm
3010 add dest, tmp, #:tlsgd_lo12:imm
3011 bl __tls_get_addr
3012
3013 Global Dynamic - TLS Descriptors:
3014 adrp dest, :tlsdesc:imm
3015 ldr tmp, [dest, #:tlsdesc_lo12:imm]
3016 add dest, dest, #:tlsdesc_lo12:imm
3017 blr tmp
3018 mrs tp, tpidr_el0
3019 add dest, dest, tp
3020
3021 Initial Exec:
3022 mrs tp, tpidr_el0
3023 adrp tmp, :gottprel:imm
3024 ldr dest, [tmp, #:gottprel_lo12:imm]
3025 add dest, dest, tp
3026
3027 Local Exec:
3028 mrs tp, tpidr_el0
0699caae
RL
3029 add t0, tp, #:tprel_hi12:imm, lsl #12
3030 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
3031*/
3032
3033static void
3034aarch64_load_symref_appropriately (rtx dest, rtx imm,
3035 enum aarch64_symbol_type type)
3036{
3037 switch (type)
3038 {
3039 case SYMBOL_SMALL_ABSOLUTE:
3040 {
28514dda 3041 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 3042 rtx tmp_reg = dest;
ef4bddc2 3043 machine_mode mode = GET_MODE (dest);
28514dda
YZ
3044
3045 gcc_assert (mode == Pmode || mode == ptr_mode);
3046
43e9d192 3047 if (can_create_pseudo_p ())
28514dda 3048 tmp_reg = gen_reg_rtx (mode);
43e9d192 3049
28514dda 3050 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
3051 emit_insn (gen_add_losym (dest, tmp_reg, imm));
3052 return;
3053 }
3054
a5350ddc 3055 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 3056 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
3057 return;
3058
1b1e81f8
JW
3059 case SYMBOL_SMALL_GOT_28K:
3060 {
3061 machine_mode mode = GET_MODE (dest);
3062 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
3063 rtx insn;
3064 rtx mem;
1b1e81f8
JW
3065
3066 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
3067 here before rtl expand. Tree IVOPT will generate rtl pattern to
3068 decide rtx costs, in which case pic_offset_table_rtx is not
3069 initialized. For that case no need to generate the first adrp
026c3cfd 3070 instruction as the final cost for global variable access is
1b1e81f8
JW
3071 one instruction. */
3072 if (gp_rtx != NULL)
3073 {
3074 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
3075 using the page base as GOT base, the first page may be wasted,
3076 in the worst scenario, there is only 28K space for GOT).
3077
3078 The generate instruction sequence for accessing global variable
3079 is:
3080
a3957742 3081 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
3082
3083 Only one instruction needed. But we must initialize
3084 pic_offset_table_rtx properly. We generate initialize insn for
3085 every global access, and allow CSE to remove all redundant.
3086
3087 The final instruction sequences will look like the following
3088 for multiply global variables access.
3089
a3957742 3090 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 3091
a3957742
JW
3092 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
3093 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
3094 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
3095 ... */
1b1e81f8
JW
3096
3097 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3098 crtl->uses_pic_offset_table = 1;
3099 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
3100
3101 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
3102 gp_rtx = gen_lowpart (mode, gp_rtx);
3103
1b1e81f8
JW
3104 }
3105
3106 if (mode == ptr_mode)
3107 {
3108 if (mode == DImode)
53021678 3109 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 3110 else
53021678
JW
3111 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
3112
3113 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
3114 }
3115 else
3116 {
3117 gcc_assert (mode == Pmode);
53021678
JW
3118
3119 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
3120 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
3121 }
3122
53021678
JW
3123 /* The operand is expected to be MEM. Whenever the related insn
3124 pattern changed, above code which calculate mem should be
3125 updated. */
3126 gcc_assert (GET_CODE (mem) == MEM);
3127 MEM_READONLY_P (mem) = 1;
3128 MEM_NOTRAP_P (mem) = 1;
3129 emit_insn (insn);
1b1e81f8
JW
3130 return;
3131 }
3132
6642bdb4 3133 case SYMBOL_SMALL_GOT_4G:
43e9d192 3134 {
28514dda
YZ
3135 /* In ILP32, the mode of dest can be either SImode or DImode,
3136 while the got entry is always of SImode size. The mode of
3137 dest depends on how dest is used: if dest is assigned to a
3138 pointer (e.g. in the memory), it has SImode; it may have
3139 DImode if dest is dereferenced to access the memeory.
3140 This is why we have to handle three different ldr_got_small
3141 patterns here (two patterns for ILP32). */
53021678
JW
3142
3143 rtx insn;
3144 rtx mem;
43e9d192 3145 rtx tmp_reg = dest;
ef4bddc2 3146 machine_mode mode = GET_MODE (dest);
28514dda 3147
43e9d192 3148 if (can_create_pseudo_p ())
28514dda
YZ
3149 tmp_reg = gen_reg_rtx (mode);
3150
3151 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
3152 if (mode == ptr_mode)
3153 {
3154 if (mode == DImode)
53021678 3155 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 3156 else
53021678
JW
3157 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
3158
3159 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
3160 }
3161 else
3162 {
3163 gcc_assert (mode == Pmode);
53021678
JW
3164
3165 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
3166 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
3167 }
3168
53021678
JW
3169 gcc_assert (GET_CODE (mem) == MEM);
3170 MEM_READONLY_P (mem) = 1;
3171 MEM_NOTRAP_P (mem) = 1;
3172 emit_insn (insn);
43e9d192
IB
3173 return;
3174 }
3175
3176 case SYMBOL_SMALL_TLSGD:
3177 {
5d8a22a5 3178 rtx_insn *insns;
87ca615a
AP
3179 /* The return type of __tls_get_addr is the C pointer type
3180 so use ptr_mode. */
3181 rtx result = gen_rtx_REG (ptr_mode, R0_REGNUM);
3182 rtx tmp_reg = dest;
3183
3184 if (GET_MODE (dest) != ptr_mode)
3185 tmp_reg = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : result;
43e9d192
IB
3186
3187 start_sequence ();
87ca615a 3188 if (ptr_mode == SImode)
23b88fda
N
3189 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
3190 else
3191 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
3192 insns = get_insns ();
3193 end_sequence ();
3194
3195 RTL_CONST_CALL_P (insns) = 1;
87ca615a
AP
3196 emit_libcall_block (insns, tmp_reg, result, imm);
3197 /* Convert back to the mode of the dest adding a zero_extend
3198 from SImode (ptr_mode) to DImode (Pmode). */
3199 if (dest != tmp_reg)
3200 convert_move (dest, tmp_reg, true);
43e9d192
IB
3201 return;
3202 }
3203
3204 case SYMBOL_SMALL_TLSDESC:
3205 {
ef4bddc2 3206 machine_mode mode = GET_MODE (dest);
621ad2de 3207 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
3208 rtx tp;
3209
621ad2de
AP
3210 gcc_assert (mode == Pmode || mode == ptr_mode);
3211
2876a13f
JW
3212 /* In ILP32, the got entry is always of SImode size. Unlike
3213 small GOT, the dest is fixed at reg 0. */
3214 if (TARGET_ILP32)
3215 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 3216 else
2876a13f 3217 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 3218 tp = aarch64_load_tp (NULL);
621ad2de
AP
3219
3220 if (mode != Pmode)
3221 tp = gen_lowpart (mode, tp);
3222
2876a13f 3223 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
3224 if (REG_P (dest))
3225 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
3226 return;
3227 }
3228
79496620 3229 case SYMBOL_SMALL_TLSIE:
43e9d192 3230 {
621ad2de
AP
3231 /* In ILP32, the mode of dest can be either SImode or DImode,
3232 while the got entry is always of SImode size. The mode of
3233 dest depends on how dest is used: if dest is assigned to a
3234 pointer (e.g. in the memory), it has SImode; it may have
3235 DImode if dest is dereferenced to access the memeory.
3236 This is why we have to handle three different tlsie_small
3237 patterns here (two patterns for ILP32). */
ef4bddc2 3238 machine_mode mode = GET_MODE (dest);
621ad2de 3239 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 3240 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
3241
3242 if (mode == ptr_mode)
3243 {
3244 if (mode == DImode)
3245 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
3246 else
3247 {
3248 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
3249 tp = gen_lowpart (mode, tp);
3250 }
3251 }
3252 else
3253 {
3254 gcc_assert (mode == Pmode);
3255 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
3256 }
3257
f7df4a84 3258 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
3259 if (REG_P (dest))
3260 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
3261 return;
3262 }
3263
cbf5629e 3264 case SYMBOL_TLSLE12:
d18ba284 3265 case SYMBOL_TLSLE24:
cbf5629e
JW
3266 case SYMBOL_TLSLE32:
3267 case SYMBOL_TLSLE48:
43e9d192 3268 {
cbf5629e 3269 machine_mode mode = GET_MODE (dest);
43e9d192 3270 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 3271
cbf5629e
JW
3272 if (mode != Pmode)
3273 tp = gen_lowpart (mode, tp);
3274
3275 switch (type)
3276 {
3277 case SYMBOL_TLSLE12:
3278 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
3279 (dest, tp, imm));
3280 break;
3281 case SYMBOL_TLSLE24:
3282 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
3283 (dest, tp, imm));
3284 break;
3285 case SYMBOL_TLSLE32:
3286 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
3287 (dest, imm));
3288 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
3289 (dest, dest, tp));
3290 break;
3291 case SYMBOL_TLSLE48:
3292 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
3293 (dest, imm));
3294 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
3295 (dest, dest, tp));
3296 break;
3297 default:
3298 gcc_unreachable ();
3299 }
e6f7f0e9 3300
241dbd9d
QZ
3301 if (REG_P (dest))
3302 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
3303 return;
3304 }
3305
87dd8ab0 3306 case SYMBOL_TINY_GOT:
d91480de
D
3307 {
3308 rtx insn;
3309 machine_mode mode = GET_MODE (dest);
3310
3311 if (mode == ptr_mode)
3312 insn = gen_ldr_got_tiny (mode, dest, imm);
3313 else
3314 {
3315 gcc_assert (mode == Pmode);
3316 insn = gen_ldr_got_tiny_sidi (dest, imm);
3317 }
3318
3319 emit_insn (insn);
3320 return;
3321 }
87dd8ab0 3322
5ae7caad
JW
3323 case SYMBOL_TINY_TLSIE:
3324 {
3325 machine_mode mode = GET_MODE (dest);
3326 rtx tp = aarch64_load_tp (NULL);
3327
3328 if (mode == ptr_mode)
3329 {
3330 if (mode == DImode)
3331 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
3332 else
3333 {
3334 tp = gen_lowpart (mode, tp);
3335 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
3336 }
3337 }
3338 else
3339 {
3340 gcc_assert (mode == Pmode);
3341 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
3342 }
3343
241dbd9d
QZ
3344 if (REG_P (dest))
3345 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
3346 return;
3347 }
3348
43e9d192
IB
3349 default:
3350 gcc_unreachable ();
3351 }
3352}
3353
3354/* Emit a move from SRC to DEST. Assume that the move expanders can
3355 handle all moves if !can_create_pseudo_p (). The distinction is
3356 important because, unlike emit_move_insn, the move expanders know
3357 how to force Pmode objects into the constant pool even when the
3358 constant pool address is not itself legitimate. */
3359static rtx
3360aarch64_emit_move (rtx dest, rtx src)
3361{
3362 return (can_create_pseudo_p ()
3363 ? emit_move_insn (dest, src)
3364 : emit_move_insn_1 (dest, src));
3365}
3366
f22d7973
RS
3367/* Apply UNOPTAB to OP and store the result in DEST. */
3368
3369static void
3370aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
3371{
3372 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
3373 if (dest != tmp)
3374 emit_move_insn (dest, tmp);
3375}
3376
3377/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
3378
3379static void
3380aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
3381{
3382 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
3383 OPTAB_DIRECT);
3384 if (dest != tmp)
3385 emit_move_insn (dest, tmp);
3386}
3387
030d03b8
RE
3388/* Split a 128-bit move operation into two 64-bit move operations,
3389 taking care to handle partial overlap of register to register
3390 copies. Special cases are needed when moving between GP regs and
3391 FP regs. SRC can be a register, constant or memory; DST a register
3392 or memory. If either operand is memory it must not have any side
3393 effects. */
43e9d192
IB
3394void
3395aarch64_split_128bit_move (rtx dst, rtx src)
3396{
030d03b8
RE
3397 rtx dst_lo, dst_hi;
3398 rtx src_lo, src_hi;
43e9d192 3399
ef4bddc2 3400 machine_mode mode = GET_MODE (dst);
12dc6974 3401
030d03b8
RE
3402 gcc_assert (mode == TImode || mode == TFmode);
3403 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
3404 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
3405
3406 if (REG_P (dst) && REG_P (src))
3407 {
030d03b8
RE
3408 int src_regno = REGNO (src);
3409 int dst_regno = REGNO (dst);
43e9d192 3410
030d03b8 3411 /* Handle FP <-> GP regs. */
43e9d192
IB
3412 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
3413 {
030d03b8
RE
3414 src_lo = gen_lowpart (word_mode, src);
3415 src_hi = gen_highpart (word_mode, src);
3416
0016d8d9
RS
3417 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
3418 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 3419 return;
43e9d192
IB
3420 }
3421 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
3422 {
030d03b8
RE
3423 dst_lo = gen_lowpart (word_mode, dst);
3424 dst_hi = gen_highpart (word_mode, dst);
3425
0016d8d9
RS
3426 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
3427 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 3428 return;
43e9d192 3429 }
43e9d192
IB
3430 }
3431
030d03b8
RE
3432 dst_lo = gen_lowpart (word_mode, dst);
3433 dst_hi = gen_highpart (word_mode, dst);
3434 src_lo = gen_lowpart (word_mode, src);
3435 src_hi = gen_highpart_mode (word_mode, mode, src);
3436
3437 /* At most one pairing may overlap. */
3438 if (reg_overlap_mentioned_p (dst_lo, src_hi))
3439 {
3440 aarch64_emit_move (dst_hi, src_hi);
3441 aarch64_emit_move (dst_lo, src_lo);
3442 }
3443 else
3444 {
3445 aarch64_emit_move (dst_lo, src_lo);
3446 aarch64_emit_move (dst_hi, src_hi);
3447 }
43e9d192
IB
3448}
3449
d4f9e819
RS
3450/* Return true if we should split a move from 128-bit value SRC
3451 to 128-bit register DEST. */
3452
43e9d192
IB
3453bool
3454aarch64_split_128bit_move_p (rtx dst, rtx src)
3455{
d4f9e819
RS
3456 if (FP_REGNUM_P (REGNO (dst)))
3457 return REG_P (src) && !FP_REGNUM_P (REGNO (src));
3458 /* All moves to GPRs need to be split. */
3459 return true;
43e9d192
IB
3460}
3461
8b033a8a
SN
3462/* Split a complex SIMD combine. */
3463
3464void
3465aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
3466{
ef4bddc2
RS
3467 machine_mode src_mode = GET_MODE (src1);
3468 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
3469
3470 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
3471 gcc_assert (register_operand (dst, dst_mode)
3472 && register_operand (src1, src_mode)
3473 && register_operand (src2, src_mode));
8b033a8a 3474
0016d8d9 3475 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 3476 return;
8b033a8a
SN
3477}
3478
fd4842cd
SN
3479/* Split a complex SIMD move. */
3480
3481void
3482aarch64_split_simd_move (rtx dst, rtx src)
3483{
ef4bddc2
RS
3484 machine_mode src_mode = GET_MODE (src);
3485 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
3486
3487 gcc_assert (VECTOR_MODE_P (dst_mode));
3488
3489 if (REG_P (dst) && REG_P (src))
3490 {
3491 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 3492 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
3493 }
3494}
3495
ef22810a
RH
3496bool
3497aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
3498 machine_mode ymode, rtx y)
3499{
3500 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
3501 gcc_assert (r != NULL);
3502 return rtx_equal_p (x, r);
3503}
ef22810a 3504
678faefc
RS
3505/* Return TARGET if it is nonnull and a register of mode MODE.
3506 Otherwise, return a fresh register of mode MODE if we can,
3507 or TARGET reinterpreted as MODE if we can't. */
3508
3509static rtx
3510aarch64_target_reg (rtx target, machine_mode mode)
3511{
3512 if (target && REG_P (target) && GET_MODE (target) == mode)
3513 return target;
3514 if (!can_create_pseudo_p ())
3515 {
3516 gcc_assert (target);
3517 return gen_lowpart (mode, target);
3518 }
3519 return gen_reg_rtx (mode);
3520}
3521
3522/* Return a register that contains the constant in BUILDER, given that
3523 the constant is a legitimate move operand. Use TARGET as the register
3524 if it is nonnull and convenient. */
3525
3526static rtx
3527aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
3528{
3529 rtx src = builder.build ();
3530 target = aarch64_target_reg (target, GET_MODE (src));
3531 emit_insn (gen_rtx_SET (target, src));
3532 return target;
3533}
3534
43e9d192 3535static rtx
ef4bddc2 3536aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
3537{
3538 if (can_create_pseudo_p ())
e18b4a81 3539 return force_reg (mode, value);
43e9d192
IB
3540 else
3541 {
f5470a77
RS
3542 gcc_assert (x);
3543 aarch64_emit_move (x, value);
43e9d192
IB
3544 return x;
3545 }
3546}
3547
0b1fe8cf
RS
3548/* Return true if predicate value X is a constant in which every element
3549 is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI
3550 value, i.e. as a predicate in which all bits are significant. */
3551
3552static bool
3553aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x)
3554{
3555 if (GET_CODE (x) != CONST_VECTOR)
3556 return false;
3557
3558 unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
3559 GET_MODE_NUNITS (GET_MODE (x)));
3560 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor;
3561 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
3562 builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern);
3563
3564 unsigned int nelts = const_vector_encoded_nelts (x);
3565 for (unsigned int i = 0; i < nelts; ++i)
3566 {
3567 rtx elt = CONST_VECTOR_ENCODED_ELT (x, i);
3568 if (!CONST_INT_P (elt))
3569 return false;
3570
3571 builder.quick_push (elt);
3572 for (unsigned int j = 1; j < factor; ++j)
3573 builder.quick_push (const0_rtx);
3574 }
3575 builder.finalize ();
3576 return true;
3577}
3578
3579/* BUILDER contains a predicate constant of mode VNx16BI. Return the
3580 widest predicate element size it can have (that is, the largest size
3581 for which each element would still be 0 or 1). */
3582
3583unsigned int
3584aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder)
3585{
3586 /* Start with the most optimistic assumption: that we only need
3587 one bit per pattern. This is what we will use if only the first
3588 bit in each pattern is ever set. */
3589 unsigned int mask = GET_MODE_SIZE (DImode);
3590 mask |= builder.npatterns ();
3591
3592 /* Look for set bits. */
3593 unsigned int nelts = builder.encoded_nelts ();
3594 for (unsigned int i = 1; i < nelts; ++i)
3595 if (INTVAL (builder.elt (i)) != 0)
3596 {
3597 if (i & 1)
3598 return 1;
3599 mask |= i;
3600 }
3601 return mask & -mask;
3602}
3603
624d0f07
RS
3604/* If VNx16BImode rtx X is a canonical PTRUE for a predicate mode,
3605 return that predicate mode, otherwise return opt_machine_mode (). */
3606
3607opt_machine_mode
3608aarch64_ptrue_all_mode (rtx x)
3609{
3610 gcc_assert (GET_MODE (x) == VNx16BImode);
3611 if (GET_CODE (x) != CONST_VECTOR
3612 || !CONST_VECTOR_DUPLICATE_P (x)
3613 || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0))
3614 || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0)
3615 return opt_machine_mode ();
3616
3617 unsigned int nelts = const_vector_encoded_nelts (x);
3618 for (unsigned int i = 1; i < nelts; ++i)
3619 if (CONST_VECTOR_ENCODED_ELT (x, i) != const0_rtx)
3620 return opt_machine_mode ();
3621
3622 return aarch64_sve_pred_mode (nelts);
3623}
3624
0b1fe8cf
RS
3625/* BUILDER is a predicate constant of mode VNx16BI. Consider the value
3626 that the constant would have with predicate element size ELT_SIZE
3627 (ignoring the upper bits in each element) and return:
3628
3629 * -1 if all bits are set
3630 * N if the predicate has N leading set bits followed by all clear bits
3631 * 0 if the predicate does not have any of these forms. */
3632
3633int
3634aarch64_partial_ptrue_length (rtx_vector_builder &builder,
3635 unsigned int elt_size)
3636{
3637 /* If nelts_per_pattern is 3, we have set bits followed by clear bits
3638 followed by set bits. */
3639 if (builder.nelts_per_pattern () == 3)
3640 return 0;
3641
3642 /* Skip over leading set bits. */
3643 unsigned int nelts = builder.encoded_nelts ();
3644 unsigned int i = 0;
3645 for (; i < nelts; i += elt_size)
3646 if (INTVAL (builder.elt (i)) == 0)
3647 break;
3648 unsigned int vl = i / elt_size;
3649
3650 /* Check for the all-true case. */
3651 if (i == nelts)
3652 return -1;
3653
3654 /* If nelts_per_pattern is 1, then either VL is zero, or we have a
3655 repeating pattern of set bits followed by clear bits. */
3656 if (builder.nelts_per_pattern () != 2)
3657 return 0;
3658
3659 /* We have a "foreground" value and a duplicated "background" value.
3660 If the background might repeat and the last set bit belongs to it,
3661 we might have set bits followed by clear bits followed by set bits. */
3662 if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ()))
3663 return 0;
3664
3665 /* Make sure that the rest are all clear. */
3666 for (; i < nelts; i += elt_size)
3667 if (INTVAL (builder.elt (i)) != 0)
3668 return 0;
3669
3670 return vl;
3671}
3672
3673/* See if there is an svpattern that encodes an SVE predicate of mode
3674 PRED_MODE in which the first VL bits are set and the rest are clear.
3675 Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
3676 A VL of -1 indicates an all-true vector. */
3677
3678aarch64_svpattern
3679aarch64_svpattern_for_vl (machine_mode pred_mode, int vl)
3680{
3681 if (vl < 0)
3682 return AARCH64_SV_ALL;
3683
3684 if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode)))
3685 return AARCH64_NUM_SVPATTERNS;
3686
3687 if (vl >= 1 && vl <= 8)
3688 return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1));
3689
3690 if (vl >= 16 && vl <= 256 && pow2p_hwi (vl))
3691 return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4));
3692
3693 int max_vl;
3694 if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl))
3695 {
3696 if (vl == (max_vl / 3) * 3)
3697 return AARCH64_SV_MUL3;
3698 /* These would only trigger for non-power-of-2 lengths. */
3699 if (vl == (max_vl & -4))
3700 return AARCH64_SV_MUL4;
3701 if (vl == (1 << floor_log2 (max_vl)))
3702 return AARCH64_SV_POW2;
3703 if (vl == max_vl)
3704 return AARCH64_SV_ALL;
3705 }
3706 return AARCH64_NUM_SVPATTERNS;
3707}
3708
34467289
RS
3709/* Return a VNx16BImode constant in which every sequence of ELT_SIZE
3710 bits has the lowest bit set and the upper bits clear. This is the
3711 VNx16BImode equivalent of a PTRUE for controlling elements of
3712 ELT_SIZE bytes. However, because the constant is VNx16BImode,
3713 all bits are significant, even the upper zeros. */
3714
3715rtx
3716aarch64_ptrue_all (unsigned int elt_size)
3717{
3718 rtx_vector_builder builder (VNx16BImode, elt_size, 1);
3719 builder.quick_push (const1_rtx);
3720 for (unsigned int i = 1; i < elt_size; ++i)
3721 builder.quick_push (const0_rtx);
3722 return builder.build ();
3723}
3724
16de3637
RS
3725/* Return an all-true predicate register of mode MODE. */
3726
3727rtx
3728aarch64_ptrue_reg (machine_mode mode)
3729{
3730 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
3731 rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3732 return gen_lowpart (mode, reg);
16de3637
RS
3733}
3734
e7053b0c
RS
3735/* Return an all-false predicate register of mode MODE. */
3736
3737rtx
3738aarch64_pfalse_reg (machine_mode mode)
3739{
3740 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
678faefc
RS
3741 rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
3742 return gen_lowpart (mode, reg);
3743}
3744
00fa90d9
RS
3745/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
3746 for it. PRED2[0] is the predicate for the instruction whose result
3747 is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
3748 for it. Return true if we can prove that the two predicates are
3749 equivalent for PTEST purposes; that is, if we can replace PRED2[0]
3750 with PRED1[0] without changing behavior. */
3751
3752bool
3753aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
3754{
3755 machine_mode mode = GET_MODE (pred1[0]);
3756 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
3757 && mode == GET_MODE (pred2[0])
3758 && aarch64_sve_ptrue_flag (pred1[1], SImode)
3759 && aarch64_sve_ptrue_flag (pred2[1], SImode));
3760
3761 bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
3762 || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
3763 bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
3764 || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
3765 return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
3766}
3767
3768/* Emit a comparison CMP between OP0 and OP1, both of which have mode
3769 DATA_MODE, and return the result in a predicate of mode PRED_MODE.
3770 Use TARGET as the target register if nonnull and convenient. */
3771
3772static rtx
3773aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
3774 machine_mode data_mode, rtx op1, rtx op2)
3775{
3776 insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
3777 expand_operand ops[5];
3778 create_output_operand (&ops[0], target, pred_mode);
3779 create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
3780 create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
3781 create_input_operand (&ops[3], op1, data_mode);
3782 create_input_operand (&ops[4], op2, data_mode);
3783 expand_insn (icode, 5, ops);
3784 return ops[0].value;
3785}
3786
678faefc
RS
3787/* Use a comparison to convert integer vector SRC into MODE, which is
3788 the corresponding SVE predicate mode. Use TARGET for the result
3789 if it's nonnull and convenient. */
3790
624d0f07 3791rtx
678faefc
RS
3792aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
3793{
3794 machine_mode src_mode = GET_MODE (src);
00fa90d9
RS
3795 return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
3796 src, CONST0_RTX (src_mode));
e7053b0c
RS
3797}
3798
624d0f07
RS
3799/* Return the assembly token for svprfop value PRFOP. */
3800
3801static const char *
3802svprfop_token (enum aarch64_svprfop prfop)
3803{
3804 switch (prfop)
3805 {
3806#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
3807 AARCH64_FOR_SVPRFOP (CASE)
3808#undef CASE
3809 case AARCH64_NUM_SVPRFOPS:
3810 break;
3811 }
3812 gcc_unreachable ();
3813}
3814
3815/* Return the assembly string for an SVE prefetch operation with
3816 mnemonic MNEMONIC, given that PRFOP_RTX is the prefetch operation
3817 and that SUFFIX is the format for the remaining operands. */
3818
3819char *
3820aarch64_output_sve_prefetch (const char *mnemonic, rtx prfop_rtx,
3821 const char *suffix)
3822{
3823 static char buffer[128];
3824 aarch64_svprfop prfop = (aarch64_svprfop) INTVAL (prfop_rtx);
3825 unsigned int written = snprintf (buffer, sizeof (buffer), "%s\t%s, %s",
3826 mnemonic, svprfop_token (prfop), suffix);
3827 gcc_assert (written < sizeof (buffer));
3828 return buffer;
3829}
3830
3831/* Check whether we can calculate the number of elements in PATTERN
3832 at compile time, given that there are NELTS_PER_VQ elements per
3833 128-bit block. Return the value if so, otherwise return -1. */
3834
3835HOST_WIDE_INT
3836aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
3837{
3838 unsigned int vl, const_vg;
3839 if (pattern >= AARCH64_SV_VL1 && pattern <= AARCH64_SV_VL8)
3840 vl = 1 + (pattern - AARCH64_SV_VL1);
3841 else if (pattern >= AARCH64_SV_VL16 && pattern <= AARCH64_SV_VL256)
3842 vl = 16 << (pattern - AARCH64_SV_VL16);
3843 else if (aarch64_sve_vg.is_constant (&const_vg))
3844 {
3845 /* There are two vector granules per quadword. */
3846 unsigned int nelts = (const_vg / 2) * nelts_per_vq;
3847 switch (pattern)
3848 {
3849 case AARCH64_SV_POW2: return 1 << floor_log2 (nelts);
3850 case AARCH64_SV_MUL4: return nelts & -4;
3851 case AARCH64_SV_MUL3: return (nelts / 3) * 3;
3852 case AARCH64_SV_ALL: return nelts;
3853 default: gcc_unreachable ();
3854 }
3855 }
3856 else
3857 return -1;
3858
3859 /* There are two vector granules per quadword. */
3860 poly_uint64 nelts_all = exact_div (aarch64_sve_vg, 2) * nelts_per_vq;
3861 if (known_le (vl, nelts_all))
3862 return vl;
3863
3864 /* Requesting more elements than are available results in a PFALSE. */
3865 if (known_gt (vl, nelts_all))
3866 return 0;
3867
3868 return -1;
3869}
3870
43cacb12
RS
3871/* Return true if we can move VALUE into a register using a single
3872 CNT[BHWD] instruction. */
3873
3874static bool
3875aarch64_sve_cnt_immediate_p (poly_int64 value)
3876{
3877 HOST_WIDE_INT factor = value.coeffs[0];
3878 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
3879 return (value.coeffs[1] == factor
3880 && IN_RANGE (factor, 2, 16 * 16)
3881 && (factor & 1) == 0
3882 && factor <= 16 * (factor & -factor));
3883}
3884
3885/* Likewise for rtx X. */
3886
3887bool
3888aarch64_sve_cnt_immediate_p (rtx x)
3889{
3890 poly_int64 value;
3891 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
3892}
3893
3894/* Return the asm string for an instruction with a CNT-like vector size
3895 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3896 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3897 first part of the operands template (the part that comes before the
139df05a
RS
3898 vector size itself). PATTERN is the pattern to use. FACTOR is the
3899 number of quadwords. NELTS_PER_VQ, if nonzero, is the number of elements
3900 in each quadword. If it is zero, we can use any element size. */
43cacb12
RS
3901
3902static char *
3903aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
139df05a 3904 aarch64_svpattern pattern,
43cacb12
RS
3905 unsigned int factor,
3906 unsigned int nelts_per_vq)
3907{
139df05a 3908 static char buffer[sizeof ("sqincd\t%x0, %w0, vl256, mul #16")];
43cacb12
RS
3909
3910 if (nelts_per_vq == 0)
3911 /* There is some overlap in the ranges of the four CNT instructions.
3912 Here we always use the smallest possible element size, so that the
3913 multiplier is 1 whereever possible. */
3914 nelts_per_vq = factor & -factor;
3915 int shift = std::min (exact_log2 (nelts_per_vq), 4);
3916 gcc_assert (IN_RANGE (shift, 1, 4));
3917 char suffix = "dwhb"[shift - 1];
3918
3919 factor >>= shift;
3920 unsigned int written;
139df05a 3921 if (pattern == AARCH64_SV_ALL && factor == 1)
43cacb12
RS
3922 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
3923 prefix, suffix, operands);
139df05a
RS
3924 else if (factor == 1)
3925 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s",
3926 prefix, suffix, operands, svpattern_token (pattern));
43cacb12 3927 else
139df05a
RS
3928 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s, mul #%d",
3929 prefix, suffix, operands, svpattern_token (pattern),
3930 factor);
43cacb12
RS
3931 gcc_assert (written < sizeof (buffer));
3932 return buffer;
3933}
3934
3935/* Return the asm string for an instruction with a CNT-like vector size
3936 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3937 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3938 first part of the operands template (the part that comes before the
3939 vector size itself). X is the value of the vector size operand,
139df05a
RS
3940 as a polynomial integer rtx; we need to convert this into an "all"
3941 pattern with a multiplier. */
43cacb12
RS
3942
3943char *
3944aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
3945 rtx x)
3946{
3947 poly_int64 value = rtx_to_poly_int64 (x);
3948 gcc_assert (aarch64_sve_cnt_immediate_p (value));
139df05a 3949 return aarch64_output_sve_cnt_immediate (prefix, operands, AARCH64_SV_ALL,
43cacb12
RS
3950 value.coeffs[1], 0);
3951}
3952
624d0f07
RS
3953/* Return the asm string for an instruction with a CNT-like vector size
3954 operand (a vector pattern followed by a multiplier in the range [1, 16]).
3955 PREFIX is the mnemonic without the size suffix and OPERANDS is the
3956 first part of the operands template (the part that comes before the
3957 vector size itself). CNT_PAT[0..2] are the operands of the
3958 UNSPEC_SVE_CNT_PAT; see aarch64_sve_cnt_pat for details. */
3959
3960char *
3961aarch64_output_sve_cnt_pat_immediate (const char *prefix,
3962 const char *operands, rtx *cnt_pat)
3963{
3964 aarch64_svpattern pattern = (aarch64_svpattern) INTVAL (cnt_pat[0]);
3965 unsigned int nelts_per_vq = INTVAL (cnt_pat[1]);
3966 unsigned int factor = INTVAL (cnt_pat[2]) * nelts_per_vq;
3967 return aarch64_output_sve_cnt_immediate (prefix, operands, pattern,
3968 factor, nelts_per_vq);
3969}
3970
0fdc30bc
RS
3971/* Return true if we can add X using a single SVE INC or DEC instruction. */
3972
3973bool
3974aarch64_sve_scalar_inc_dec_immediate_p (rtx x)
3975{
3976 poly_int64 value;
3977 return (poly_int_rtx_p (x, &value)
3978 && (aarch64_sve_cnt_immediate_p (value)
3979 || aarch64_sve_cnt_immediate_p (-value)));
3980}
3981
3982/* Return the asm string for adding SVE INC/DEC immediate OFFSET to
3983 operand 0. */
3984
3985char *
3986aarch64_output_sve_scalar_inc_dec (rtx offset)
3987{
3988 poly_int64 offset_value = rtx_to_poly_int64 (offset);
3989 gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]);
3990 if (offset_value.coeffs[1] > 0)
139df05a 3991 return aarch64_output_sve_cnt_immediate ("inc", "%x0", AARCH64_SV_ALL,
0fdc30bc
RS
3992 offset_value.coeffs[1], 0);
3993 else
139df05a 3994 return aarch64_output_sve_cnt_immediate ("dec", "%x0", AARCH64_SV_ALL,
0fdc30bc
RS
3995 -offset_value.coeffs[1], 0);
3996}
3997
43cacb12
RS
3998/* Return true if we can add VALUE to a register using a single ADDVL
3999 or ADDPL instruction. */
4000
4001static bool
4002aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
4003{
4004 HOST_WIDE_INT factor = value.coeffs[0];
4005 if (factor == 0 || value.coeffs[1] != factor)
4006 return false;
4007 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
4008 and a value of 16 is one vector width. */
4009 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
4010 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
4011}
4012
4013/* Likewise for rtx X. */
4014
4015bool
4016aarch64_sve_addvl_addpl_immediate_p (rtx x)
4017{
4018 poly_int64 value;
4019 return (poly_int_rtx_p (x, &value)
4020 && aarch64_sve_addvl_addpl_immediate_p (value));
4021}
4022
0fdc30bc
RS
4023/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET
4024 to operand 1 and storing the result in operand 0. */
43cacb12
RS
4025
4026char *
0fdc30bc 4027aarch64_output_sve_addvl_addpl (rtx offset)
43cacb12
RS
4028{
4029 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
4030 poly_int64 offset_value = rtx_to_poly_int64 (offset);
4031 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
4032
43cacb12
RS
4033 int factor = offset_value.coeffs[1];
4034 if ((factor & 15) == 0)
4035 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
4036 else
4037 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
4038 return buffer;
4039}
4040
4041/* Return true if X is a valid immediate for an SVE vector INC or DEC
4042 instruction. If it is, store the number of elements in each vector
4043 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
4044 factor in *FACTOR_OUT (if nonnull). */
4045
4046bool
0fdc30bc
RS
4047aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out,
4048 unsigned int *nelts_per_vq_out)
43cacb12
RS
4049{
4050 rtx elt;
4051 poly_int64 value;
4052
4053 if (!const_vec_duplicate_p (x, &elt)
4054 || !poly_int_rtx_p (elt, &value))
4055 return false;
4056
4057 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
4058 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
4059 /* There's no vector INCB. */
4060 return false;
4061
4062 HOST_WIDE_INT factor = value.coeffs[0];
4063 if (value.coeffs[1] != factor)
4064 return false;
4065
4066 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
4067 if ((factor % nelts_per_vq) != 0
4068 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
4069 return false;
4070
4071 if (factor_out)
4072 *factor_out = factor;
4073 if (nelts_per_vq_out)
4074 *nelts_per_vq_out = nelts_per_vq;
4075 return true;
4076}
4077
4078/* Return true if X is a valid immediate for an SVE vector INC or DEC
4079 instruction. */
4080
4081bool
0fdc30bc 4082aarch64_sve_vector_inc_dec_immediate_p (rtx x)
43cacb12 4083{
0fdc30bc 4084 return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL);
43cacb12
RS
4085}
4086
4087/* Return the asm template for an SVE vector INC or DEC instruction.
4088 OPERANDS gives the operands before the vector count and X is the
4089 value of the vector count operand itself. */
4090
4091char *
0fdc30bc 4092aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
43cacb12
RS
4093{
4094 int factor;
4095 unsigned int nelts_per_vq;
0fdc30bc 4096 if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
43cacb12
RS
4097 gcc_unreachable ();
4098 if (factor < 0)
139df05a
RS
4099 return aarch64_output_sve_cnt_immediate ("dec", operands, AARCH64_SV_ALL,
4100 -factor, nelts_per_vq);
43cacb12 4101 else
139df05a
RS
4102 return aarch64_output_sve_cnt_immediate ("inc", operands, AARCH64_SV_ALL,
4103 factor, nelts_per_vq);
43cacb12 4104}
43e9d192 4105
82614948
RR
4106static int
4107aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 4108 scalar_int_mode mode)
43e9d192 4109{
43e9d192 4110 int i;
9a4865db
WD
4111 unsigned HOST_WIDE_INT val, val2, mask;
4112 int one_match, zero_match;
4113 int num_insns;
43e9d192 4114
9a4865db
WD
4115 val = INTVAL (imm);
4116
4117 if (aarch64_move_imm (val, mode))
43e9d192 4118 {
82614948 4119 if (generate)
f7df4a84 4120 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 4121 return 1;
43e9d192
IB
4122 }
4123
9de00935
TC
4124 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
4125 (with XXXX non-zero). In that case check to see if the move can be done in
4126 a smaller mode. */
4127 val2 = val & 0xffffffff;
4128 if (mode == DImode
4129 && aarch64_move_imm (val2, SImode)
4130 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
4131 {
4132 if (generate)
4133 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
4134
4135 /* Check if we have to emit a second instruction by checking to see
4136 if any of the upper 32 bits of the original DI mode value is set. */
4137 if (val == val2)
4138 return 1;
4139
4140 i = (val >> 48) ? 48 : 32;
4141
4142 if (generate)
4143 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
4144 GEN_INT ((val >> i) & 0xffff)));
4145
4146 return 2;
4147 }
4148
9a4865db 4149 if ((val >> 32) == 0 || mode == SImode)
43e9d192 4150 {
82614948
RR
4151 if (generate)
4152 {
9a4865db
WD
4153 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
4154 if (mode == SImode)
4155 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
4156 GEN_INT ((val >> 16) & 0xffff)));
4157 else
4158 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
4159 GEN_INT ((val >> 16) & 0xffff)));
82614948 4160 }
9a4865db 4161 return 2;
43e9d192
IB
4162 }
4163
4164 /* Remaining cases are all for DImode. */
4165
43e9d192 4166 mask = 0xffff;
9a4865db
WD
4167 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
4168 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
4169 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
4170 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 4171
62c8d76c 4172 if (zero_match != 2 && one_match != 2)
43e9d192 4173 {
62c8d76c
WD
4174 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
4175 For a 64-bit bitmask try whether changing 16 bits to all ones or
4176 zeroes creates a valid bitmask. To check any repeated bitmask,
4177 try using 16 bits from the other 32-bit half of val. */
43e9d192 4178
62c8d76c 4179 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 4180 {
62c8d76c
WD
4181 val2 = val & ~mask;
4182 if (val2 != val && aarch64_bitmask_imm (val2, mode))
4183 break;
4184 val2 = val | mask;
4185 if (val2 != val && aarch64_bitmask_imm (val2, mode))
4186 break;
4187 val2 = val2 & ~mask;
4188 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
4189 if (val2 != val && aarch64_bitmask_imm (val2, mode))
4190 break;
43e9d192 4191 }
62c8d76c 4192 if (i != 64)
43e9d192 4193 {
62c8d76c 4194 if (generate)
43e9d192 4195 {
62c8d76c
WD
4196 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
4197 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 4198 GEN_INT ((val >> i) & 0xffff)));
43e9d192 4199 }
1312b1ba 4200 return 2;
43e9d192
IB
4201 }
4202 }
4203
9a4865db
WD
4204 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
4205 are emitted by the initial mov. If one_match > zero_match, skip set bits,
4206 otherwise skip zero bits. */
2c274197 4207
9a4865db 4208 num_insns = 1;
43e9d192 4209 mask = 0xffff;
9a4865db
WD
4210 val2 = one_match > zero_match ? ~val : val;
4211 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
4212
4213 if (generate)
4214 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
4215 ? (val | ~(mask << i))
4216 : (val & (mask << i)))));
4217 for (i += 16; i < 64; i += 16)
43e9d192 4218 {
9a4865db
WD
4219 if ((val2 & (mask << i)) == 0)
4220 continue;
4221 if (generate)
4222 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
4223 GEN_INT ((val >> i) & 0xffff)));
4224 num_insns ++;
82614948
RR
4225 }
4226
4227 return num_insns;
4228}
4229
c0bb5bc5
WD
4230/* Return whether imm is a 128-bit immediate which is simple enough to
4231 expand inline. */
4232bool
4233aarch64_mov128_immediate (rtx imm)
4234{
4235 if (GET_CODE (imm) == CONST_INT)
4236 return true;
4237
4238 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
4239
4240 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
4241 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
4242
4243 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
4244 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
4245}
4246
4247
43cacb12
RS
4248/* Return the number of temporary registers that aarch64_add_offset_1
4249 would need to add OFFSET to a register. */
4250
4251static unsigned int
4252aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
4253{
4254 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
4255}
4256
f5470a77
RS
4257/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
4258 a non-polynomial OFFSET. MODE is the mode of the addition.
4259 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
4260 be set and CFA adjustments added to the generated instructions.
4261
4262 TEMP1, if nonnull, is a register of mode MODE that can be used as a
4263 temporary if register allocation is already complete. This temporary
4264 register may overlap DEST but must not overlap SRC. If TEMP1 is known
4265 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
4266 the immediate again.
0100c5f9
RS
4267
4268 Since this function may be used to adjust the stack pointer, we must
4269 ensure that it cannot cause transient stack deallocation (for example
4270 by first incrementing SP and then decrementing when adjusting by a
4271 large immediate). */
4272
4273static void
f5470a77
RS
4274aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
4275 rtx src, HOST_WIDE_INT offset, rtx temp1,
4276 bool frame_related_p, bool emit_move_imm)
0100c5f9 4277{
f5470a77
RS
4278 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
4279 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
4280
42bc589e 4281 unsigned HOST_WIDE_INT moffset = absu_hwi (offset);
0100c5f9
RS
4282 rtx_insn *insn;
4283
f5470a77
RS
4284 if (!moffset)
4285 {
4286 if (!rtx_equal_p (dest, src))
4287 {
4288 insn = emit_insn (gen_rtx_SET (dest, src));
4289 RTX_FRAME_RELATED_P (insn) = frame_related_p;
4290 }
4291 return;
4292 }
0100c5f9
RS
4293
4294 /* Single instruction adjustment. */
f5470a77 4295 if (aarch64_uimm12_shift (moffset))
0100c5f9 4296 {
f5470a77 4297 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
4298 RTX_FRAME_RELATED_P (insn) = frame_related_p;
4299 return;
4300 }
4301
f5470a77
RS
4302 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
4303 and either:
4304
4305 a) the offset cannot be loaded by a 16-bit move or
4306 b) there is no spare register into which we can move it. */
4307 if (moffset < 0x1000000
4308 && ((!temp1 && !can_create_pseudo_p ())
4309 || !aarch64_move_imm (moffset, mode)))
0100c5f9 4310 {
f5470a77 4311 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 4312
f5470a77
RS
4313 low_off = offset < 0 ? -low_off : low_off;
4314 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 4315 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 4316 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
4317 RTX_FRAME_RELATED_P (insn) = frame_related_p;
4318 return;
4319 }
4320
4321 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 4322 if (emit_move_imm)
f5470a77
RS
4323 {
4324 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
7aa605c9
JJ
4325 temp1 = aarch64_force_temporary (mode, temp1,
4326 gen_int_mode (moffset, mode));
f5470a77
RS
4327 }
4328 insn = emit_insn (offset < 0
4329 ? gen_sub3_insn (dest, src, temp1)
4330 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
4331 if (frame_related_p)
4332 {
4333 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
4334 rtx adj = plus_constant (mode, src, offset);
4335 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
4336 }
4337}
4338
43cacb12
RS
4339/* Return the number of temporary registers that aarch64_add_offset
4340 would need to move OFFSET into a register or add OFFSET to a register;
4341 ADD_P is true if we want the latter rather than the former. */
4342
4343static unsigned int
4344aarch64_offset_temporaries (bool add_p, poly_int64 offset)
4345{
4346 /* This follows the same structure as aarch64_add_offset. */
4347 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
4348 return 0;
4349
4350 unsigned int count = 0;
4351 HOST_WIDE_INT factor = offset.coeffs[1];
4352 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
4353 poly_int64 poly_offset (factor, factor);
4354 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
4355 /* Need one register for the ADDVL/ADDPL result. */
4356 count += 1;
4357 else if (factor != 0)
4358 {
4359 factor = abs (factor);
4360 if (factor > 16 * (factor & -factor))
4361 /* Need one register for the CNT result and one for the multiplication
4362 factor. If necessary, the second temporary can be reused for the
4363 constant part of the offset. */
4364 return 2;
4365 /* Need one register for the CNT result (which might then
4366 be shifted). */
4367 count += 1;
4368 }
4369 return count + aarch64_add_offset_1_temporaries (constant);
4370}
4371
4372/* If X can be represented as a poly_int64, return the number
4373 of temporaries that are required to add it to a register.
4374 Return -1 otherwise. */
4375
4376int
4377aarch64_add_offset_temporaries (rtx x)
4378{
4379 poly_int64 offset;
4380 if (!poly_int_rtx_p (x, &offset))
4381 return -1;
4382 return aarch64_offset_temporaries (true, offset);
4383}
4384
f5470a77
RS
4385/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
4386 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
4387 be set and CFA adjustments added to the generated instructions.
4388
4389 TEMP1, if nonnull, is a register of mode MODE that can be used as a
4390 temporary if register allocation is already complete. This temporary
43cacb12
RS
4391 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
4392 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
4393 false to avoid emitting the immediate again.
4394
4395 TEMP2, if nonnull, is a second temporary register that doesn't
4396 overlap either DEST or REG.
f5470a77
RS
4397
4398 Since this function may be used to adjust the stack pointer, we must
4399 ensure that it cannot cause transient stack deallocation (for example
4400 by first incrementing SP and then decrementing when adjusting by a
4401 large immediate). */
4402
4403static void
4404aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
4405 poly_int64 offset, rtx temp1, rtx temp2,
4406 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 4407{
f5470a77
RS
4408 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
4409 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
4410 gcc_assert (temp1 == NULL_RTX
4411 || !frame_related_p
4412 || !reg_overlap_mentioned_p (temp1, dest));
4413 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
4414
4415 /* Try using ADDVL or ADDPL to add the whole value. */
4416 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
4417 {
4418 rtx offset_rtx = gen_int_mode (offset, mode);
4419 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
4420 RTX_FRAME_RELATED_P (insn) = frame_related_p;
4421 return;
4422 }
4423
4424 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
4425 SVE vector register, over and above the minimum size of 128 bits.
4426 This is equivalent to half the value returned by CNTD with a
4427 vector shape of ALL. */
4428 HOST_WIDE_INT factor = offset.coeffs[1];
4429 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
4430
4431 /* Try using ADDVL or ADDPL to add the VG-based part. */
4432 poly_int64 poly_offset (factor, factor);
4433 if (src != const0_rtx
4434 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
4435 {
4436 rtx offset_rtx = gen_int_mode (poly_offset, mode);
4437 if (frame_related_p)
4438 {
4439 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
4440 RTX_FRAME_RELATED_P (insn) = true;
4441 src = dest;
4442 }
4443 else
4444 {
4445 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
4446 src = aarch64_force_temporary (mode, temp1, addr);
4447 temp1 = temp2;
4448 temp2 = NULL_RTX;
4449 }
4450 }
4451 /* Otherwise use a CNT-based sequence. */
4452 else if (factor != 0)
4453 {
4454 /* Use a subtraction if we have a negative factor. */
4455 rtx_code code = PLUS;
4456 if (factor < 0)
4457 {
4458 factor = -factor;
4459 code = MINUS;
4460 }
4461
4462 /* Calculate CNTD * FACTOR / 2. First try to fold the division
4463 into the multiplication. */
4464 rtx val;
4465 int shift = 0;
4466 if (factor & 1)
4467 /* Use a right shift by 1. */
4468 shift = -1;
4469 else
4470 factor /= 2;
4471 HOST_WIDE_INT low_bit = factor & -factor;
4472 if (factor <= 16 * low_bit)
4473 {
4474 if (factor > 16 * 8)
4475 {
4476 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
4477 the value with the minimum multiplier and shift it into
4478 position. */
4479 int extra_shift = exact_log2 (low_bit);
4480 shift += extra_shift;
4481 factor >>= extra_shift;
4482 }
4483 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
4484 }
4485 else
4486 {
7d8bdfa7
RS
4487 /* Base the factor on LOW_BIT if we can calculate LOW_BIT
4488 directly, since that should increase the chances of being
4489 able to use a shift and add sequence. If LOW_BIT itself
4490 is out of range, just use CNTD. */
4491 if (low_bit <= 16 * 8)
4492 factor /= low_bit;
4493 else
4494 low_bit = 1;
4495
4496 val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
43cacb12
RS
4497 val = aarch64_force_temporary (mode, temp1, val);
4498
7d8bdfa7
RS
4499 if (can_create_pseudo_p ())
4500 {
4501 rtx coeff1 = gen_int_mode (factor, mode);
4502 val = expand_mult (mode, val, coeff1, NULL_RTX, false, true);
4503 }
4504 else
43cacb12 4505 {
7d8bdfa7
RS
4506 /* Go back to using a negative multiplication factor if we have
4507 no register from which to subtract. */
4508 if (code == MINUS && src == const0_rtx)
4509 {
4510 factor = -factor;
4511 code = PLUS;
4512 }
4513 rtx coeff1 = gen_int_mode (factor, mode);
4514 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
4515 val = gen_rtx_MULT (mode, val, coeff1);
43cacb12 4516 }
43cacb12
RS
4517 }
4518
4519 if (shift > 0)
4520 {
4521 /* Multiply by 1 << SHIFT. */
4522 val = aarch64_force_temporary (mode, temp1, val);
4523 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
4524 }
4525 else if (shift == -1)
4526 {
4527 /* Divide by 2. */
4528 val = aarch64_force_temporary (mode, temp1, val);
4529 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
4530 }
4531
4532 /* Calculate SRC +/- CNTD * FACTOR / 2. */
4533 if (src != const0_rtx)
4534 {
4535 val = aarch64_force_temporary (mode, temp1, val);
4536 val = gen_rtx_fmt_ee (code, mode, src, val);
4537 }
4538 else if (code == MINUS)
4539 {
4540 val = aarch64_force_temporary (mode, temp1, val);
4541 val = gen_rtx_NEG (mode, val);
4542 }
4543
4544 if (constant == 0 || frame_related_p)
4545 {
4546 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
4547 if (frame_related_p)
4548 {
4549 RTX_FRAME_RELATED_P (insn) = true;
4550 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4551 gen_rtx_SET (dest, plus_constant (Pmode, src,
4552 poly_offset)));
4553 }
4554 src = dest;
4555 if (constant == 0)
4556 return;
4557 }
4558 else
4559 {
4560 src = aarch64_force_temporary (mode, temp1, val);
4561 temp1 = temp2;
4562 temp2 = NULL_RTX;
4563 }
4564
4565 emit_move_imm = true;
4566 }
f5470a77 4567
f5470a77
RS
4568 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
4569 frame_related_p, emit_move_imm);
0100c5f9
RS
4570}
4571
43cacb12
RS
4572/* Like aarch64_add_offset, but the offset is given as an rtx rather
4573 than a poly_int64. */
4574
4575void
4576aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
4577 rtx offset_rtx, rtx temp1, rtx temp2)
4578{
4579 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
4580 temp1, temp2, false);
4581}
4582
f5470a77
RS
4583/* Add DELTA to the stack pointer, marking the instructions frame-related.
4584 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
4585 if TEMP1 already contains abs (DELTA). */
4586
0100c5f9 4587static inline void
43cacb12 4588aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 4589{
f5470a77 4590 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 4591 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
4592}
4593
f5470a77
RS
4594/* Subtract DELTA from the stack pointer, marking the instructions
4595 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
4596 if nonnull. */
4597
0100c5f9 4598static inline void
cd1bef27
JL
4599aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
4600 bool emit_move_imm = true)
0100c5f9 4601{
f5470a77 4602 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 4603 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 4604}
82614948 4605
43cacb12
RS
4606/* Set DEST to (vec_series BASE STEP). */
4607
4608static void
4609aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
4610{
4611 machine_mode mode = GET_MODE (dest);
43cacb12
RS
4612 scalar_mode inner = GET_MODE_INNER (mode);
4613
4614 /* Each operand can be a register or an immediate in the range [-16, 15]. */
4615 if (!aarch64_sve_index_immediate_p (base))
4616 base = force_reg (inner, base);
4617 if (!aarch64_sve_index_immediate_p (step))
4618 step = force_reg (inner, step);
4619
4620 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
4621}
82614948 4622
4aeb1ba7
RS
4623/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE
4624 register of mode MODE. Use TARGET for the result if it's nonnull
4625 and convenient.
4626
4627 The two vector modes must have the same element mode. The behavior
4628 is to duplicate architectural lane N of SRC into architectural lanes
4629 N + I * STEP of the result. On big-endian targets, architectural
4630 lane 0 of an Advanced SIMD vector is the last element of the vector
4631 in memory layout, so for big-endian targets this operation has the
4632 effect of reversing SRC before duplicating it. Callers need to
4633 account for this. */
43cacb12 4634
4aeb1ba7
RS
4635rtx
4636aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src)
4637{
4638 machine_mode src_mode = GET_MODE (src);
4639 gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode));
4640 insn_code icode = (BYTES_BIG_ENDIAN
4641 ? code_for_aarch64_vec_duplicate_vq_be (mode)
4642 : code_for_aarch64_vec_duplicate_vq_le (mode));
4643
4644 unsigned int i = 0;
4645 expand_operand ops[3];
4646 create_output_operand (&ops[i++], target, mode);
4647 create_output_operand (&ops[i++], src, src_mode);
4648 if (BYTES_BIG_ENDIAN)
4649 {
4650 /* Create a PARALLEL describing the reversal of SRC. */
4651 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode);
4652 rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq,
4653 nelts_per_vq - 1, -1);
4654 create_fixed_operand (&ops[i++], sel);
43cacb12 4655 }
4aeb1ba7
RS
4656 expand_insn (icode, i, ops);
4657 return ops[0].value;
4658}
4659
4660/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch
4661 the memory image into DEST. Return true on success. */
43cacb12 4662
4aeb1ba7
RS
4663static bool
4664aarch64_expand_sve_ld1rq (rtx dest, rtx src)
4665{
4666 src = force_const_mem (GET_MODE (src), src);
43cacb12
RS
4667 if (!src)
4668 return false;
4669
4670 /* Make sure that the address is legitimate. */
4aeb1ba7 4671 if (!aarch64_sve_ld1rq_operand_p (src))
43cacb12
RS
4672 {
4673 rtx addr = force_reg (Pmode, XEXP (src, 0));
4674 src = replace_equiv_address (src, addr);
4675 }
4676
947b1372 4677 machine_mode mode = GET_MODE (dest);
cc68f7c2 4678 machine_mode pred_mode = aarch64_sve_pred_mode (mode);
16de3637 4679 rtx ptrue = aarch64_ptrue_reg (pred_mode);
4aeb1ba7 4680 emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue));
43cacb12
RS
4681 return true;
4682}
4683
4aeb1ba7
RS
4684/* Return a register containing CONST_VECTOR SRC, given that SRC has an
4685 SVE data mode and isn't a legitimate constant. Use TARGET for the
4686 result if convenient.
43cacb12 4687
4aeb1ba7
RS
4688 The returned register can have whatever mode seems most natural
4689 given the contents of SRC. */
4690
4691static rtx
4692aarch64_expand_sve_const_vector (rtx target, rtx src)
43cacb12
RS
4693{
4694 machine_mode mode = GET_MODE (src);
4695 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
4696 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
4aeb1ba7
RS
4697 scalar_mode elt_mode = GET_MODE_INNER (mode);
4698 unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode);
cc68f7c2
RS
4699 unsigned int container_bits = aarch64_sve_container_bits (mode);
4700 unsigned int encoded_bits = npatterns * nelts_per_pattern * container_bits;
4701
4702 if (nelts_per_pattern == 1
4703 && encoded_bits <= 128
4704 && container_bits != elt_bits)
4705 {
4706 /* We have a partial vector mode and a constant whose full-vector
4707 equivalent would occupy a repeating 128-bit sequence. Build that
4708 full-vector equivalent instead, so that we have the option of
4709 using LD1RQ and Advanced SIMD operations. */
4710 unsigned int repeat = container_bits / elt_bits;
4711 machine_mode full_mode = aarch64_full_sve_mode (elt_mode).require ();
4712 rtx_vector_builder builder (full_mode, npatterns * repeat, 1);
4713 for (unsigned int i = 0; i < npatterns; ++i)
4714 for (unsigned int j = 0; j < repeat; ++j)
4715 builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i));
4716 target = aarch64_target_reg (target, full_mode);
4717 return aarch64_expand_sve_const_vector (target, builder.build ());
4718 }
4aeb1ba7
RS
4719
4720 if (nelts_per_pattern == 1 && encoded_bits == 128)
4721 {
4722 /* The constant is a duplicated quadword but can't be narrowed
4723 beyond a quadword. Get the memory image of the first quadword
4724 as a 128-bit vector and try using LD1RQ to load it from memory.
4725
4726 The effect for both endiannesses is to load memory lane N into
4727 architectural lanes N + I * STEP of the result. On big-endian
4728 targets, the layout of the 128-bit vector in an Advanced SIMD
4729 register would be different from its layout in an SVE register,
4730 but this 128-bit vector is a memory value only. */
4731 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
4732 rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0);
4733 if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value))
4734 return target;
4735 }
4736
4737 if (nelts_per_pattern == 1 && encoded_bits < 128)
4738 {
4739 /* The vector is a repeating sequence of 64 bits or fewer.
4740 See if we can load them using an Advanced SIMD move and then
4741 duplicate it to fill a vector. This is better than using a GPR
4742 move because it keeps everything in the same register file. */
4743 machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
4744 rtx_vector_builder builder (vq_mode, npatterns, 1);
4745 for (unsigned int i = 0; i < npatterns; ++i)
4746 {
4747 /* We want memory lane N to go into architectural lane N,
4748 so reverse for big-endian targets. The DUP .Q pattern
4749 has a compensating reverse built-in. */
4750 unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i;
4751 builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci));
4752 }
4753 rtx vq_src = builder.build ();
4754 if (aarch64_simd_valid_immediate (vq_src, NULL))
4755 {
4756 vq_src = force_reg (vq_mode, vq_src);
4757 return aarch64_expand_sve_dupq (target, mode, vq_src);
4758 }
4759
4760 /* Get an integer representation of the repeating part of Advanced
4761 SIMD vector VQ_SRC. This preserves the endianness of VQ_SRC,
4762 which for big-endian targets is lane-swapped wrt a normal
4763 Advanced SIMD vector. This means that for both endiannesses,
4764 memory lane N of SVE vector SRC corresponds to architectural
4765 lane N of a register holding VQ_SRC. This in turn means that
4766 memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed
4767 as a single 128-bit value) and thus that memory lane 0 of SRC is
4768 in the lsb of the integer. Duplicating the integer therefore
4769 ensures that memory lane N of SRC goes into architectural lane
4770 N + I * INDEX of the SVE register. */
4771 scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require ();
4772 rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0);
4773 if (elt_value)
4774 {
4775 /* Pretend that we had a vector of INT_MODE to start with. */
4776 elt_mode = int_mode;
4777 mode = aarch64_full_sve_mode (int_mode).require ();
4778
4779 /* If the integer can be moved into a general register by a
4780 single instruction, do that and duplicate the result. */
4781 if (CONST_INT_P (elt_value)
4782 && aarch64_move_imm (INTVAL (elt_value), elt_mode))
4783 {
4784 elt_value = force_reg (elt_mode, elt_value);
4785 return expand_vector_broadcast (mode, elt_value);
4786 }
4787 }
4788 else if (npatterns == 1)
4789 /* We're duplicating a single value, but can't do better than
4790 force it to memory and load from there. This handles things
4791 like symbolic constants. */
4792 elt_value = CONST_VECTOR_ENCODED_ELT (src, 0);
43cacb12 4793
4aeb1ba7 4794 if (elt_value)
8179efe0 4795 {
4aeb1ba7
RS
4796 /* Load the element from memory if we can, otherwise move it into
4797 a register and use a DUP. */
4798 rtx op = force_const_mem (elt_mode, elt_value);
4799 if (!op)
4800 op = force_reg (elt_mode, elt_value);
4801 return expand_vector_broadcast (mode, op);
8179efe0 4802 }
43cacb12
RS
4803 }
4804
4aeb1ba7
RS
4805 /* Try using INDEX. */
4806 rtx base, step;
4807 if (const_vec_series_p (src, &base, &step))
4808 {
4809 aarch64_expand_vec_series (target, base, step);
4810 return target;
4811 }
4812
4813 /* From here on, it's better to force the whole constant to memory
4814 if we can. */
4815 if (GET_MODE_NUNITS (mode).is_constant ())
4816 return NULL_RTX;
4817
43cacb12 4818 /* Expand each pattern individually. */
4aeb1ba7 4819 gcc_assert (npatterns > 1);
43cacb12
RS
4820 rtx_vector_builder builder;
4821 auto_vec<rtx, 16> vectors (npatterns);
4822 for (unsigned int i = 0; i < npatterns; ++i)
4823 {
4824 builder.new_vector (mode, 1, nelts_per_pattern);
4825 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
4826 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
4827 vectors.quick_push (force_reg (mode, builder.build ()));
4828 }
4829
4830 /* Use permutes to interleave the separate vectors. */
4831 while (npatterns > 1)
4832 {
4833 npatterns /= 2;
4834 for (unsigned int i = 0; i < npatterns; ++i)
4835 {
4aeb1ba7 4836 rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode));
43cacb12
RS
4837 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
4838 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
4839 vectors[i] = tmp;
4840 }
4841 }
4aeb1ba7
RS
4842 gcc_assert (vectors[0] == target);
4843 return target;
43cacb12
RS
4844}
4845
678faefc
RS
4846/* Use WHILE to set a predicate register of mode MODE in which the first
4847 VL bits are set and the rest are clear. Use TARGET for the register
4848 if it's nonnull and convenient. */
0b1fe8cf 4849
678faefc
RS
4850static rtx
4851aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
4852 unsigned int vl)
0b1fe8cf
RS
4853{
4854 rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
678faefc 4855 target = aarch64_target_reg (target, mode);
6ad9571b 4856 emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
624d0f07 4857 target, const0_rtx, limit));
678faefc
RS
4858 return target;
4859}
4860
2803bc3b
RS
4861static rtx
4862aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool);
4863
4864/* BUILDER is a constant predicate in which the index of every set bit
4865 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
4866 by inverting every element at a multiple of ELT_SIZE and EORing the
4867 result with an ELT_SIZE PTRUE.
4868
4869 Return a register that contains the constant on success, otherwise
4870 return null. Use TARGET as the register if it is nonnull and
4871 convenient. */
4872
4873static rtx
4874aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder,
4875 unsigned int elt_size)
4876{
4877 /* Invert every element at a multiple of ELT_SIZE, keeping the
4878 other bits zero. */
4879 rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (),
4880 builder.nelts_per_pattern ());
4881 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
4882 if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0)
4883 inv_builder.quick_push (const1_rtx);
4884 else
4885 inv_builder.quick_push (const0_rtx);
4886 inv_builder.finalize ();
4887
4888 /* See if we can load the constant cheaply. */
4889 rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false);
4890 if (!inv)
4891 return NULL_RTX;
4892
4893 /* EOR the result with an ELT_SIZE PTRUE. */
4894 rtx mask = aarch64_ptrue_all (elt_size);
4895 mask = force_reg (VNx16BImode, mask);
26bebf57 4896 inv = gen_lowpart (VNx16BImode, inv);
2803bc3b
RS
4897 target = aarch64_target_reg (target, VNx16BImode);
4898 emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
4899 return target;
4900}
4901
4902/* BUILDER is a constant predicate in which the index of every set bit
4903 is a multiple of ELT_SIZE (which is <= 8). Try to load the constant
4904 using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE. Return the
4905 register on success, otherwise return null. Use TARGET as the register
4906 if nonnull and convenient. */
4907
4908static rtx
4909aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
4910 unsigned int elt_size,
4911 unsigned int permute_size)
4912{
4913 /* We're going to split the constant into two new constants A and B,
4914 with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0
4915 and into B otherwise. E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1:
4916
4917 A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ }
4918 B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ }
4919
4920 where _ indicates elements that will be discarded by the permute.
4921
4922 First calculate the ELT_SIZEs for A and B. */
4923 unsigned int a_elt_size = GET_MODE_SIZE (DImode);
4924 unsigned int b_elt_size = GET_MODE_SIZE (DImode);
4925 for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size)
4926 if (INTVAL (builder.elt (i)) != 0)
4927 {
4928 if (i & permute_size)
4929 b_elt_size |= i - permute_size;
4930 else
4931 a_elt_size |= i;
4932 }
4933 a_elt_size &= -a_elt_size;
4934 b_elt_size &= -b_elt_size;
4935
4936 /* Now construct the vectors themselves. */
4937 rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (),
4938 builder.nelts_per_pattern ());
4939 rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (),
4940 builder.nelts_per_pattern ());
4941 unsigned int nelts = builder.encoded_nelts ();
4942 for (unsigned int i = 0; i < nelts; ++i)
4943 if (i & (elt_size - 1))
4944 {
4945 a_builder.quick_push (const0_rtx);
4946 b_builder.quick_push (const0_rtx);
4947 }
4948 else if ((i & permute_size) == 0)
4949 {
4950 /* The A and B elements are significant. */
4951 a_builder.quick_push (builder.elt (i));
4952 b_builder.quick_push (builder.elt (i + permute_size));
4953 }
4954 else
4955 {
4956 /* The A and B elements are going to be discarded, so pick whatever
4957 is likely to give a nice constant. We are targeting element
4958 sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively,
4959 with the aim of each being a sequence of ones followed by
4960 a sequence of zeros. So:
4961
4962 * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to
4963 duplicate the last X_ELT_SIZE element, to extend the
4964 current sequence of ones or zeros.
4965
4966 * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a
4967 zero, so that the constant really does have X_ELT_SIZE and
4968 not a smaller size. */
4969 if (a_elt_size > permute_size)
4970 a_builder.quick_push (const0_rtx);
4971 else
4972 a_builder.quick_push (a_builder.elt (i - a_elt_size));
4973 if (b_elt_size > permute_size)
4974 b_builder.quick_push (const0_rtx);
4975 else
4976 b_builder.quick_push (b_builder.elt (i - b_elt_size));
4977 }
4978 a_builder.finalize ();
4979 b_builder.finalize ();
4980
4981 /* Try loading A into a register. */
4982 rtx_insn *last = get_last_insn ();
4983 rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false);
4984 if (!a)
4985 return NULL_RTX;
4986
4987 /* Try loading B into a register. */
4988 rtx b = a;
4989 if (a_builder != b_builder)
4990 {
4991 b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false);
4992 if (!b)
4993 {
4994 delete_insns_since (last);
4995 return NULL_RTX;
4996 }
4997 }
4998
4999 /* Emit the TRN1 itself. */
5000 machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
5001 target = aarch64_target_reg (target, mode);
5002 emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
5003 gen_lowpart (mode, a),
5004 gen_lowpart (mode, b)));
5005 return target;
5006}
5007
678faefc
RS
5008/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI
5009 constant in BUILDER into an SVE predicate register. Return the register
5010 on success, otherwise return null. Use TARGET for the register if
2803bc3b
RS
5011 nonnull and convenient.
5012
5013 ALLOW_RECURSE_P is true if we can use methods that would call this
5014 function recursively. */
678faefc
RS
5015
5016static rtx
2803bc3b
RS
5017aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder,
5018 bool allow_recurse_p)
678faefc
RS
5019{
5020 if (builder.encoded_nelts () == 1)
5021 /* A PFALSE or a PTRUE .B ALL. */
5022 return aarch64_emit_set_immediate (target, builder);
5023
5024 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
5025 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
5026 {
5027 /* If we can load the constant using PTRUE, use it as-is. */
5028 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
5029 if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
5030 return aarch64_emit_set_immediate (target, builder);
5031
5032 /* Otherwise use WHILE to set the first VL bits. */
5033 return aarch64_sve_move_pred_via_while (target, mode, vl);
5034 }
5035
2803bc3b
RS
5036 if (!allow_recurse_p)
5037 return NULL_RTX;
5038
5039 /* Try inverting the vector in element size ELT_SIZE and then EORing
5040 the result with an ELT_SIZE PTRUE. */
5041 if (INTVAL (builder.elt (0)) == 0)
5042 if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder,
5043 elt_size))
5044 return res;
5045
5046 /* Try using TRN1 to permute two simpler constants. */
5047 for (unsigned int i = elt_size; i <= 8; i *= 2)
5048 if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder,
5049 elt_size, i))
5050 return res;
5051
678faefc
RS
5052 return NULL_RTX;
5053}
5054
5055/* Return an SVE predicate register that contains the VNx16BImode
5056 constant in BUILDER, without going through the move expanders.
5057
5058 The returned register can have whatever mode seems most natural
5059 given the contents of BUILDER. Use TARGET for the result if
5060 convenient. */
5061
5062static rtx
5063aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
5064{
5065 /* Try loading the constant using pure predicate operations. */
2803bc3b 5066 if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true))
678faefc
RS
5067 return res;
5068
5069 /* Try forcing the constant to memory. */
5070 if (builder.full_nelts ().is_constant ())
5071 if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
5072 {
5073 target = aarch64_target_reg (target, VNx16BImode);
5074 emit_move_insn (target, mem);
5075 return target;
5076 }
5077
5078 /* The last resort is to load the constant as an integer and then
5079 compare it against zero. Use -1 for set bits in order to increase
5080 the changes of using SVE DUPM or an Advanced SIMD byte mask. */
5081 rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
5082 builder.nelts_per_pattern ());
5083 for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
5084 int_builder.quick_push (INTVAL (builder.elt (i))
5085 ? constm1_rtx : const0_rtx);
5086 return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
5087 int_builder.build ());
0b1fe8cf
RS
5088}
5089
4aeb1ba7 5090/* Set DEST to immediate IMM. */
43cacb12
RS
5091
5092void
4aeb1ba7 5093aarch64_expand_mov_immediate (rtx dest, rtx imm)
43cacb12
RS
5094{
5095 machine_mode mode = GET_MODE (dest);
82614948
RR
5096
5097 /* Check on what type of symbol it is. */
77e994c9
RS
5098 scalar_int_mode int_mode;
5099 if ((GET_CODE (imm) == SYMBOL_REF
5100 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
5101 || GET_CODE (imm) == CONST
5102 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 5103 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 5104 {
43cacb12
RS
5105 rtx mem;
5106 poly_int64 offset;
5107 HOST_WIDE_INT const_offset;
82614948
RR
5108 enum aarch64_symbol_type sty;
5109
5110 /* If we have (const (plus symbol offset)), separate out the offset
5111 before we start classifying the symbol. */
43cacb12 5112 rtx base = strip_offset (imm, &offset);
82614948 5113
43cacb12
RS
5114 /* We must always add an offset involving VL separately, rather than
5115 folding it into the relocation. */
5116 if (!offset.is_constant (&const_offset))
5117 {
c0e0174b
RS
5118 if (!TARGET_SVE)
5119 {
5120 aarch64_report_sve_required ();
5121 return;
5122 }
43cacb12
RS
5123 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
5124 emit_insn (gen_rtx_SET (dest, imm));
5125 else
5126 {
5127 /* Do arithmetic on 32-bit values if the result is smaller
5128 than that. */
5129 if (partial_subreg_p (int_mode, SImode))
5130 {
5131 /* It is invalid to do symbol calculations in modes
5132 narrower than SImode. */
5133 gcc_assert (base == const0_rtx);
5134 dest = gen_lowpart (SImode, dest);
5135 int_mode = SImode;
5136 }
5137 if (base != const0_rtx)
5138 {
5139 base = aarch64_force_temporary (int_mode, dest, base);
5140 aarch64_add_offset (int_mode, dest, base, offset,
5141 NULL_RTX, NULL_RTX, false);
5142 }
5143 else
5144 aarch64_add_offset (int_mode, dest, base, offset,
5145 dest, NULL_RTX, false);
5146 }
5147 return;
5148 }
5149
5150 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
5151 switch (sty)
5152 {
5153 case SYMBOL_FORCE_TO_MEM:
43cacb12 5154 if (const_offset != 0
77e994c9 5155 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
5156 {
5157 gcc_assert (can_create_pseudo_p ());
77e994c9 5158 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
5159 aarch64_add_offset (int_mode, dest, base, const_offset,
5160 NULL_RTX, NULL_RTX, false);
82614948
RR
5161 return;
5162 }
b4f50fd4 5163
82614948
RR
5164 mem = force_const_mem (ptr_mode, imm);
5165 gcc_assert (mem);
b4f50fd4
RR
5166
5167 /* If we aren't generating PC relative literals, then
5168 we need to expand the literal pool access carefully.
5169 This is something that needs to be done in a number
5170 of places, so could well live as a separate function. */
9ee6540a 5171 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5172 {
5173 gcc_assert (can_create_pseudo_p ());
5174 base = gen_reg_rtx (ptr_mode);
5175 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
5176 if (ptr_mode != Pmode)
5177 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
5178 mem = gen_rtx_MEM (ptr_mode, base);
5179 }
5180
77e994c9
RS
5181 if (int_mode != ptr_mode)
5182 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 5183
f7df4a84 5184 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 5185
82614948
RR
5186 return;
5187
5188 case SYMBOL_SMALL_TLSGD:
5189 case SYMBOL_SMALL_TLSDESC:
79496620 5190 case SYMBOL_SMALL_TLSIE:
1b1e81f8 5191 case SYMBOL_SMALL_GOT_28K:
6642bdb4 5192 case SYMBOL_SMALL_GOT_4G:
82614948 5193 case SYMBOL_TINY_GOT:
5ae7caad 5194 case SYMBOL_TINY_TLSIE:
43cacb12 5195 if (const_offset != 0)
82614948
RR
5196 {
5197 gcc_assert(can_create_pseudo_p ());
77e994c9 5198 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
5199 aarch64_add_offset (int_mode, dest, base, const_offset,
5200 NULL_RTX, NULL_RTX, false);
82614948
RR
5201 return;
5202 }
5203 /* FALLTHRU */
5204
82614948
RR
5205 case SYMBOL_SMALL_ABSOLUTE:
5206 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 5207 case SYMBOL_TLSLE12:
d18ba284 5208 case SYMBOL_TLSLE24:
cbf5629e
JW
5209 case SYMBOL_TLSLE32:
5210 case SYMBOL_TLSLE48:
82614948
RR
5211 aarch64_load_symref_appropriately (dest, imm, sty);
5212 return;
5213
5214 default:
5215 gcc_unreachable ();
5216 }
5217 }
5218
5219 if (!CONST_INT_P (imm))
5220 {
678faefc
RS
5221 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5222 {
5223 /* Only the low bit of each .H, .S and .D element is defined,
5224 so we can set the upper bits to whatever we like. If the
5225 predicate is all-true in MODE, prefer to set all the undefined
5226 bits as well, so that we can share a single .B predicate for
5227 all modes. */
5228 if (imm == CONSTM1_RTX (mode))
5229 imm = CONSTM1_RTX (VNx16BImode);
5230
5231 /* All methods for constructing predicate modes wider than VNx16BI
5232 will set the upper bits of each element to zero. Expose this
5233 by moving such constants as a VNx16BI, so that all bits are
5234 significant and so that constants for different modes can be
5235 shared. The wider constant will still be available as a
5236 REG_EQUAL note. */
5237 rtx_vector_builder builder;
5238 if (aarch64_get_sve_pred_bits (builder, imm))
5239 {
5240 rtx res = aarch64_expand_sve_const_pred (dest, builder);
5241 if (dest != res)
5242 emit_move_insn (dest, gen_lowpart (mode, res));
5243 return;
5244 }
5245 }
5246
43cacb12
RS
5247 if (GET_CODE (imm) == HIGH
5248 || aarch64_simd_valid_immediate (imm, NULL))
43cacb12 5249 {
4aeb1ba7
RS
5250 emit_insn (gen_rtx_SET (dest, imm));
5251 return;
43e9d192 5252 }
82614948 5253
4aeb1ba7
RS
5254 if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
5255 if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
5256 {
5257 if (dest != res)
5258 emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res));
5259 return;
5260 }
5261
5262 rtx mem = force_const_mem (mode, imm);
5263 gcc_assert (mem);
5264 emit_move_insn (dest, mem);
82614948 5265 return;
43e9d192 5266 }
82614948 5267
77e994c9
RS
5268 aarch64_internal_mov_immediate (dest, imm, true,
5269 as_a <scalar_int_mode> (mode));
43e9d192
IB
5270}
5271
74b27d8e
RS
5272/* Return the MEM rtx that provides the canary value that should be used
5273 for stack-smashing protection. MODE is the mode of the memory.
5274 For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable
5275 (__stack_chk_guard), otherwise it has no useful value. SALT_TYPE
5276 indicates whether the caller is performing a SET or a TEST operation. */
5277
5278rtx
5279aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
5280 aarch64_salt_type salt_type)
5281{
5282 rtx addr;
5283 if (aarch64_stack_protector_guard == SSP_GLOBAL)
5284 {
5285 gcc_assert (MEM_P (decl_rtl));
5286 addr = XEXP (decl_rtl, 0);
5287 poly_int64 offset;
5288 rtx base = strip_offset_and_salt (addr, &offset);
5289 if (!SYMBOL_REF_P (base))
5290 return decl_rtl;
5291
5292 rtvec v = gen_rtvec (2, base, GEN_INT (salt_type));
5293 addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR);
5294 addr = gen_rtx_CONST (Pmode, addr);
5295 addr = plus_constant (Pmode, addr, offset);
5296 }
5297 else
5298 {
5299 /* Calculate the address from the system register. */
5300 rtx salt = GEN_INT (salt_type);
5301 addr = gen_reg_rtx (mode);
5302 if (mode == DImode)
5303 emit_insn (gen_reg_stack_protect_address_di (addr, salt));
5304 else
5305 {
5306 emit_insn (gen_reg_stack_protect_address_si (addr, salt));
5307 addr = convert_memory_address (Pmode, addr);
5308 }
5309 addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset);
5310 }
5311 return gen_rtx_MEM (mode, force_reg (Pmode, addr));
5312}
5313
43cacb12
RS
5314/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
5315 that is known to contain PTRUE. */
5316
5317void
5318aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
5319{
0c63a8ee
TC
5320 expand_operand ops[3];
5321 machine_mode mode = GET_MODE (dest);
5322 create_output_operand (&ops[0], dest, mode);
5323 create_input_operand (&ops[1], pred, GET_MODE(pred));
5324 create_input_operand (&ops[2], src, mode);
f2b29269 5325 temporary_volatile_ok v (true);
0c63a8ee 5326 expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
43cacb12
RS
5327}
5328
5329/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
5330 operand is in memory. In this case we need to use the predicated LD1
5331 and ST1 instead of LDR and STR, both for correctness on big-endian
5332 targets and because LD1 and ST1 support a wider range of addressing modes.
5333 PRED_MODE is the mode of the predicate.
5334
5335 See the comment at the head of aarch64-sve.md for details about the
5336 big-endian handling. */
5337
5338void
5339aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
5340{
5341 machine_mode mode = GET_MODE (dest);
16de3637 5342 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
5343 if (!register_operand (src, mode)
5344 && !register_operand (dest, mode))
5345 {
5346 rtx tmp = gen_reg_rtx (mode);
5347 if (MEM_P (src))
5348 aarch64_emit_sve_pred_move (tmp, ptrue, src);
5349 else
5350 emit_move_insn (tmp, src);
5351 src = tmp;
5352 }
5353 aarch64_emit_sve_pred_move (dest, ptrue, src);
5354}
5355
002092be
RS
5356/* Called only on big-endian targets. See whether an SVE vector move
5357 from SRC to DEST is effectively a REV[BHW] instruction, because at
5358 least one operand is a subreg of an SVE vector that has wider or
5359 narrower elements. Return true and emit the instruction if so.
5360
5361 For example:
5362
5363 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
5364
5365 represents a VIEW_CONVERT between the following vectors, viewed
5366 in memory order:
5367
5368 R2: { [0].high, [0].low, [1].high, [1].low, ... }
5369 R1: { [0], [1], [2], [3], ... }
5370
5371 The high part of lane X in R2 should therefore correspond to lane X*2
5372 of R1, but the register representations are:
5373
5374 msb lsb
5375 R2: ...... [1].high [1].low [0].high [0].low
5376 R1: ...... [3] [2] [1] [0]
5377
5378 where the low part of lane X in R2 corresponds to lane X*2 in R1.
5379 We therefore need a reverse operation to swap the high and low values
5380 around.
5381
5382 This is purely an optimization. Without it we would spill the
5383 subreg operand to the stack in one mode and reload it in the
5384 other mode, which has the same effect as the REV. */
5385
5386bool
5387aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
5388{
5389 gcc_assert (BYTES_BIG_ENDIAN);
5390 if (GET_CODE (dest) == SUBREG)
5391 dest = SUBREG_REG (dest);
5392 if (GET_CODE (src) == SUBREG)
5393 src = SUBREG_REG (src);
5394
5395 /* The optimization handles two single SVE REGs with different element
5396 sizes. */
5397 if (!REG_P (dest)
5398 || !REG_P (src)
5399 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
5400 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
5401 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
5402 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
5403 return false;
5404
5405 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
16de3637 5406 rtx ptrue = aarch64_ptrue_reg (VNx16BImode);
002092be
RS
5407 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
5408 UNSPEC_REV_SUBREG);
5409 emit_insn (gen_rtx_SET (dest, unspec));
5410 return true;
5411}
5412
5413/* Return a copy of X with mode MODE, without changing its other
5414 attributes. Unlike gen_lowpart, this doesn't care whether the
5415 mode change is valid. */
5416
624d0f07 5417rtx
002092be
RS
5418aarch64_replace_reg_mode (rtx x, machine_mode mode)
5419{
5420 if (GET_MODE (x) == mode)
5421 return x;
5422
5423 x = shallow_copy_rtx (x);
5424 set_mode_and_regno (x, mode, REGNO (x));
5425 return x;
5426}
5427
d7a09c44
RS
5428/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE
5429 stored in wider integer containers. */
5430
5431static unsigned int
5432aarch64_sve_rev_unspec (machine_mode mode)
5433{
5434 switch (GET_MODE_UNIT_SIZE (mode))
5435 {
5436 case 1: return UNSPEC_REVB;
5437 case 2: return UNSPEC_REVH;
5438 case 4: return UNSPEC_REVW;
5439 }
5440 gcc_unreachable ();
5441}
5442
002092be
RS
5443/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
5444 operands. */
5445
5446void
5447aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
5448{
d7a09c44
RS
5449 /* Decide which REV operation we need. The mode with wider elements
5450 determines the mode of the operands and the mode with the narrower
002092be 5451 elements determines the reverse width. */
5c06093c
RS
5452 machine_mode mode_with_wider_elts = aarch64_sve_int_mode (GET_MODE (dest));
5453 machine_mode mode_with_narrower_elts = aarch64_sve_int_mode (GET_MODE (src));
002092be
RS
5454 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
5455 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
5456 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
5457
d7a09c44 5458 unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
cc68f7c2 5459 machine_mode pred_mode = aarch64_sve_pred_mode (mode_with_wider_elts);
002092be 5460
d7a09c44 5461 /* Get the operands in the appropriate modes and emit the instruction. */
002092be 5462 ptrue = gen_lowpart (pred_mode, ptrue);
d7a09c44
RS
5463 dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts);
5464 src = aarch64_replace_reg_mode (src, mode_with_wider_elts);
5465 emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts,
5466 dest, ptrue, src));
002092be
RS
5467}
5468
43e9d192 5469static bool
c600df9a 5470aarch64_function_ok_for_sibcall (tree, tree exp)
43e9d192 5471{
c600df9a 5472 if (crtl->abi->id () != expr_callee_abi (exp).id ())
a0d0b980
SE
5473 return false;
5474
43e9d192
IB
5475 return true;
5476}
5477
38e62001
RS
5478/* Subroutine of aarch64_pass_by_reference for arguments that are not
5479 passed in SVE registers. */
43e9d192
IB
5480
5481static bool
56fe3ca3
RS
5482aarch64_pass_by_reference_1 (CUMULATIVE_ARGS *pcum,
5483 const function_arg_info &arg)
43e9d192
IB
5484{
5485 HOST_WIDE_INT size;
ef4bddc2 5486 machine_mode dummymode;
43e9d192
IB
5487 int nregs;
5488
5489 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
52090e4d
RS
5490 if (arg.mode == BLKmode && arg.type)
5491 size = int_size_in_bytes (arg.type);
6a70badb
RS
5492 else
5493 /* No frontends can create types with variable-sized modes, so we
5494 shouldn't be asked to pass or return them. */
52090e4d 5495 size = GET_MODE_SIZE (arg.mode).to_constant ();
43e9d192 5496
aadc1c43 5497 /* Aggregates are passed by reference based on their size. */
52090e4d
RS
5498 if (arg.aggregate_type_p ())
5499 size = int_size_in_bytes (arg.type);
43e9d192
IB
5500
5501 /* Variable sized arguments are always returned by reference. */
5502 if (size < 0)
5503 return true;
5504
5505 /* Can this be a candidate to be passed in fp/simd register(s)? */
52090e4d 5506 if (aarch64_vfp_is_call_or_return_candidate (arg.mode, arg.type,
56fe3ca3
RS
5507 &dummymode, &nregs, NULL,
5508 !pcum || pcum->silent_p))
43e9d192
IB
5509 return false;
5510
5511 /* Arguments which are variable sized or larger than 2 registers are
5512 passed by reference unless they are a homogenous floating point
5513 aggregate. */
5514 return size > 2 * UNITS_PER_WORD;
5515}
5516
38e62001
RS
5517/* Implement TARGET_PASS_BY_REFERENCE. */
5518
5519static bool
5520aarch64_pass_by_reference (cumulative_args_t pcum_v,
5521 const function_arg_info &arg)
5522{
5523 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5524
5525 if (!arg.type)
56fe3ca3 5526 return aarch64_pass_by_reference_1 (pcum, arg);
38e62001
RS
5527
5528 pure_scalable_type_info pst_info;
5529 switch (pst_info.analyze (arg.type))
5530 {
5531 case pure_scalable_type_info::IS_PST:
5532 if (pcum && !pcum->silent_p && !TARGET_SVE)
5533 /* We can't gracefully recover at this point, so make this a
5534 fatal error. */
5535 fatal_error (input_location, "arguments of type %qT require"
5536 " the SVE ISA extension", arg.type);
5537
5538 /* Variadic SVE types are passed by reference. Normal non-variadic
5539 arguments are too if we've run out of registers. */
5540 return (!arg.named
5541 || pcum->aapcs_nvrn + pst_info.num_zr () > NUM_FP_ARG_REGS
5542 || pcum->aapcs_nprn + pst_info.num_pr () > NUM_PR_ARG_REGS);
5543
5544 case pure_scalable_type_info::DOESNT_MATTER:
56fe3ca3 5545 gcc_assert (aarch64_pass_by_reference_1 (pcum, arg));
38e62001
RS
5546 return true;
5547
5548 case pure_scalable_type_info::NO_ABI_IDENTITY:
5549 case pure_scalable_type_info::ISNT_PST:
56fe3ca3 5550 return aarch64_pass_by_reference_1 (pcum, arg);
38e62001
RS
5551 }
5552 gcc_unreachable ();
5553}
5554
43e9d192
IB
5555/* Return TRUE if VALTYPE is padded to its least significant bits. */
5556static bool
5557aarch64_return_in_msb (const_tree valtype)
5558{
ef4bddc2 5559 machine_mode dummy_mode;
43e9d192
IB
5560 int dummy_int;
5561
5562 /* Never happens in little-endian mode. */
5563 if (!BYTES_BIG_ENDIAN)
5564 return false;
5565
5566 /* Only composite types smaller than or equal to 16 bytes can
5567 be potentially returned in registers. */
5568 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
5569 || int_size_in_bytes (valtype) <= 0
5570 || int_size_in_bytes (valtype) > 16)
5571 return false;
5572
5573 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
5574 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
5575 is always passed/returned in the least significant bits of fp/simd
5576 register(s). */
5577 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
56fe3ca3
RS
5578 &dummy_mode, &dummy_int, NULL,
5579 false))
43e9d192
IB
5580 return false;
5581
38e62001
RS
5582 /* Likewise pure scalable types for SVE vector and predicate registers. */
5583 pure_scalable_type_info pst_info;
5584 if (pst_info.analyze_registers (valtype))
5585 return false;
5586
43e9d192
IB
5587 return true;
5588}
5589
38e62001
RS
5590/* Implement TARGET_FUNCTION_VALUE.
5591 Define how to find the value returned by a function. */
5592
43e9d192 5593static rtx
38e62001
RS
5594aarch64_function_value (const_tree type, const_tree func,
5595 bool outgoing ATTRIBUTE_UNUSED)
43e9d192 5596{
38e62001
RS
5597 machine_mode mode;
5598 int unsignedp;
c600df9a 5599
38e62001
RS
5600 mode = TYPE_MODE (type);
5601 if (INTEGRAL_TYPE_P (type))
5602 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
c600df9a 5603
38e62001
RS
5604 pure_scalable_type_info pst_info;
5605 if (type && pst_info.analyze_registers (type))
5606 return pst_info.get_rtx (mode, V0_REGNUM, P0_REGNUM);
c600df9a 5607
38e62001
RS
5608 /* Generic vectors that map to full SVE modes with -msve-vector-bits=N
5609 are returned in memory, not by value. */
5610 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5611 bool sve_p = (vec_flags & VEC_ANY_SVE);
c600df9a 5612
43e9d192
IB
5613 if (aarch64_return_in_msb (type))
5614 {
5615 HOST_WIDE_INT size = int_size_in_bytes (type);
5616
5617 if (size % UNITS_PER_WORD != 0)
5618 {
5619 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 5620 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
5621 }
5622 }
5623
6aa5370c
RS
5624 int count;
5625 machine_mode ag_mode;
56fe3ca3
RS
5626 if (aarch64_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count,
5627 NULL, false))
43e9d192 5628 {
38e62001 5629 gcc_assert (!sve_p);
43e9d192
IB
5630 if (!aarch64_composite_type_p (type, mode))
5631 {
5632 gcc_assert (count == 1 && mode == ag_mode);
5633 return gen_rtx_REG (mode, V0_REGNUM);
5634 }
5635 else
5636 {
5637 int i;
5638 rtx par;
5639
5640 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5641 for (i = 0; i < count; i++)
5642 {
5643 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
5644 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
5645 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
5646 XVECEXP (par, 0, i) = tmp;
5647 }
5648 return par;
5649 }
5650 }
5651 else
6aa5370c 5652 {
38e62001
RS
5653 if (sve_p)
5654 {
5655 /* Vector types can acquire a partial SVE mode using things like
5656 __attribute__((vector_size(N))), and this is potentially useful.
5657 However, the choice of mode doesn't affect the type's ABI
5658 identity, so we should treat the types as though they had
5659 the associated integer mode, just like they did before SVE
5660 was introduced.
5661
5662 We know that the vector must be 128 bits or smaller,
5663 otherwise we'd have returned it in memory instead. */
5664 gcc_assert (type
5665 && (aarch64_some_values_include_pst_objects_p (type)
5666 || (vec_flags & VEC_PARTIAL)));
5667
5668 scalar_int_mode int_mode = int_mode_for_mode (mode).require ();
5669 rtx reg = gen_rtx_REG (int_mode, R0_REGNUM);
5670 rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5671 return gen_rtx_PARALLEL (mode, gen_rtvec (1, pair));
5672 }
5673 return gen_rtx_REG (mode, R0_REGNUM);
6aa5370c 5674 }
6aa5370c
RS
5675}
5676
43e9d192
IB
5677/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
5678 Return true if REGNO is the number of a hard register in which the values
5679 of called function may come back. */
5680
5681static bool
5682aarch64_function_value_regno_p (const unsigned int regno)
5683{
5684 /* Maximum of 16 bytes can be returned in the general registers. Examples
5685 of 16-byte return values are: 128-bit integers and 16-byte small
5686 structures (excluding homogeneous floating-point aggregates). */
5687 if (regno == R0_REGNUM || regno == R1_REGNUM)
5688 return true;
5689
5690 /* Up to four fp/simd registers can return a function value, e.g. a
5691 homogeneous floating-point aggregate having four members. */
5692 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 5693 return TARGET_FLOAT;
43e9d192
IB
5694
5695 return false;
5696}
5697
38e62001
RS
5698/* Subroutine for aarch64_return_in_memory for types that are not returned
5699 in SVE registers. */
43e9d192
IB
5700
5701static bool
38e62001 5702aarch64_return_in_memory_1 (const_tree type)
43e9d192
IB
5703{
5704 HOST_WIDE_INT size;
ef4bddc2 5705 machine_mode ag_mode;
43e9d192
IB
5706 int count;
5707
5708 if (!AGGREGATE_TYPE_P (type)
5709 && TREE_CODE (type) != COMPLEX_TYPE
5710 && TREE_CODE (type) != VECTOR_TYPE)
5711 /* Simple scalar types always returned in registers. */
5712 return false;
5713
56fe3ca3
RS
5714 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
5715 &ag_mode, &count, NULL, false))
43e9d192
IB
5716 return false;
5717
5718 /* Types larger than 2 registers returned in memory. */
5719 size = int_size_in_bytes (type);
5720 return (size < 0 || size > 2 * UNITS_PER_WORD);
5721}
5722
38e62001
RS
5723/* Implement TARGET_RETURN_IN_MEMORY.
5724
5725 If the type T of the result of a function is such that
5726 void func (T arg)
5727 would require that arg be passed as a value in a register (or set of
5728 registers) according to the parameter passing rules, then the result
5729 is returned in the same registers as would be used for such an
5730 argument. */
5731
5732static bool
5733aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5734{
5735 pure_scalable_type_info pst_info;
5736 switch (pst_info.analyze (type))
5737 {
5738 case pure_scalable_type_info::IS_PST:
5739 return (pst_info.num_zr () > NUM_FP_ARG_REGS
5740 || pst_info.num_pr () > NUM_PR_ARG_REGS);
5741
5742 case pure_scalable_type_info::DOESNT_MATTER:
5743 gcc_assert (aarch64_return_in_memory_1 (type));
5744 return true;
5745
5746 case pure_scalable_type_info::NO_ABI_IDENTITY:
5747 case pure_scalable_type_info::ISNT_PST:
5748 return aarch64_return_in_memory_1 (type);
5749 }
5750 gcc_unreachable ();
5751}
5752
43e9d192 5753static bool
ef4bddc2 5754aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
5755 const_tree type, int *nregs)
5756{
5757 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
56fe3ca3 5758 return aarch64_vfp_is_call_or_return_candidate (mode, type,
43e9d192 5759 &pcum->aapcs_vfp_rmode,
56fe3ca3 5760 nregs, NULL, pcum->silent_p);
43e9d192
IB
5761}
5762
985b8393 5763/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192 5764 bits. The idea is to suppress any stronger alignment requested by
c590597c
RE
5765 the user and opt for the natural alignment (specified in AAPCS64 \S
5766 4.1). ABI_BREAK is set to true if the alignment was incorrectly
5767 calculated in versions of GCC prior to GCC-9. This is a helper
5768 function for local use only. */
43e9d192 5769
985b8393 5770static unsigned int
c590597c
RE
5771aarch64_function_arg_alignment (machine_mode mode, const_tree type,
5772 bool *abi_break)
43e9d192 5773{
c590597c 5774 *abi_break = false;
75d6cc81 5775 if (!type)
985b8393 5776 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 5777
75d6cc81 5778 if (integer_zerop (TYPE_SIZE (type)))
985b8393 5779 return 0;
43e9d192 5780
75d6cc81
AL
5781 gcc_assert (TYPE_MODE (type) == mode);
5782
5783 if (!AGGREGATE_TYPE_P (type))
985b8393 5784 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
5785
5786 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 5787 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 5788
985b8393 5789 unsigned int alignment = 0;
c590597c 5790 unsigned int bitfield_alignment = 0;
75d6cc81 5791 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393 5792 if (TREE_CODE (field) == FIELD_DECL)
c590597c 5793 {
56fe3ca3
RS
5794 /* Note that we explicitly consider zero-sized fields here,
5795 even though they don't map to AAPCS64 machine types.
5796 For example, in:
5797
5798 struct __attribute__((aligned(8))) empty {};
5799
5800 struct s {
5801 [[no_unique_address]] empty e;
5802 int x;
5803 };
5804
5805 "s" contains only one Fundamental Data Type (the int field)
5806 but gains 8-byte alignment and size thanks to "e". */
c590597c
RE
5807 alignment = std::max (alignment, DECL_ALIGN (field));
5808 if (DECL_BIT_FIELD_TYPE (field))
5809 bitfield_alignment
5810 = std::max (bitfield_alignment,
5811 TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
5812 }
5813
5814 if (bitfield_alignment > alignment)
5815 {
5816 *abi_break = true;
5817 return bitfield_alignment;
5818 }
43e9d192 5819
985b8393 5820 return alignment;
43e9d192
IB
5821}
5822
5823/* Layout a function argument according to the AAPCS64 rules. The rule
6aa5370c
RS
5824 numbers refer to the rule numbers in the AAPCS64. ORIG_MODE is the
5825 mode that was originally given to us by the target hook, whereas the
5826 mode in ARG might be the result of replacing partial SVE modes with
5827 the equivalent integer mode. */
43e9d192
IB
5828
5829static void
38e62001 5830aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
43e9d192
IB
5831{
5832 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
c600df9a
RS
5833 tree type = arg.type;
5834 machine_mode mode = arg.mode;
43e9d192
IB
5835 int ncrn, nvrn, nregs;
5836 bool allocate_ncrn, allocate_nvrn;
3abf17cf 5837 HOST_WIDE_INT size;
c590597c 5838 bool abi_break;
43e9d192
IB
5839
5840 /* We need to do this once per argument. */
5841 if (pcum->aapcs_arg_processed)
5842 return;
5843
5844 pcum->aapcs_arg_processed = true;
5845
38e62001
RS
5846 pure_scalable_type_info pst_info;
5847 if (type && pst_info.analyze_registers (type))
c600df9a
RS
5848 {
5849 /* The PCS says that it is invalid to pass an SVE value to an
5850 unprototyped function. There is no ABI-defined location we
5851 can return in this case, so we have no real choice but to raise
5852 an error immediately, even though this is only a query function. */
5853 if (arg.named && pcum->pcs_variant != ARM_PCS_SVE)
5854 {
5855 gcc_assert (!pcum->silent_p);
5856 error ("SVE type %qT cannot be passed to an unprototyped function",
5857 arg.type);
5858 /* Avoid repeating the message, and avoid tripping the assert
5859 below. */
5860 pcum->pcs_variant = ARM_PCS_SVE;
5861 }
5862
5863 /* We would have converted the argument into pass-by-reference
5864 form if it didn't fit in registers. */
38e62001
RS
5865 pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + pst_info.num_zr ();
5866 pcum->aapcs_nextnprn = pcum->aapcs_nprn + pst_info.num_pr ();
c600df9a
RS
5867 gcc_assert (arg.named
5868 && pcum->pcs_variant == ARM_PCS_SVE
c600df9a
RS
5869 && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS
5870 && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS);
38e62001
RS
5871 pcum->aapcs_reg = pst_info.get_rtx (mode, V0_REGNUM + pcum->aapcs_nvrn,
5872 P0_REGNUM + pcum->aapcs_nprn);
c600df9a
RS
5873 return;
5874 }
5875
38e62001
RS
5876 /* Generic vectors that map to full SVE modes with -msve-vector-bits=N
5877 are passed by reference, not by value. */
5878 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5879 bool sve_p = (vec_flags & VEC_ANY_SVE);
5880 if (sve_p)
5881 /* Vector types can acquire a partial SVE mode using things like
5882 __attribute__((vector_size(N))), and this is potentially useful.
5883 However, the choice of mode doesn't affect the type's ABI
5884 identity, so we should treat the types as though they had
5885 the associated integer mode, just like they did before SVE
5886 was introduced.
5887
5888 We know that the vector must be 128 bits or smaller,
5889 otherwise we'd have passed it in memory instead. */
5890 gcc_assert (type
5891 && (aarch64_some_values_include_pst_objects_p (type)
5892 || (vec_flags & VEC_PARTIAL)));
c600df9a 5893
3abf17cf 5894 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
5895 if (type)
5896 size = int_size_in_bytes (type);
5897 else
5898 /* No frontends can create types with variable-sized modes, so we
5899 shouldn't be asked to pass or return them. */
5900 size = GET_MODE_SIZE (mode).to_constant ();
5901 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 5902
43e9d192
IB
5903 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
5904 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
5905 mode,
5906 type,
5907 &nregs);
38e62001 5908 gcc_assert (!sve_p || !allocate_nvrn);
43e9d192
IB
5909
5910 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
5911 The following code thus handles passing by SIMD/FP registers first. */
5912
5913 nvrn = pcum->aapcs_nvrn;
5914
5915 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
5916 and homogenous short-vector aggregates (HVA). */
5917 if (allocate_nvrn)
5918 {
c600df9a 5919 if (!pcum->silent_p && !TARGET_FLOAT)
fc29dfc9 5920 aarch64_err_no_fpadvsimd (mode);
261fb553 5921
43e9d192
IB
5922 if (nvrn + nregs <= NUM_FP_ARG_REGS)
5923 {
5924 pcum->aapcs_nextnvrn = nvrn + nregs;
5925 if (!aarch64_composite_type_p (type, mode))
5926 {
5927 gcc_assert (nregs == 1);
5928 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
5929 }
5930 else
5931 {
5932 rtx par;
5933 int i;
5934 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
5935 for (i = 0; i < nregs; i++)
5936 {
5937 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
5938 V0_REGNUM + nvrn + i);
6a70badb
RS
5939 rtx offset = gen_int_mode
5940 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
5941 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
5942 XVECEXP (par, 0, i) = tmp;
5943 }
5944 pcum->aapcs_reg = par;
5945 }
5946 return;
5947 }
5948 else
5949 {
5950 /* C.3 NSRN is set to 8. */
5951 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
5952 goto on_stack;
5953 }
5954 }
5955
5956 ncrn = pcum->aapcs_ncrn;
3abf17cf 5957 nregs = size / UNITS_PER_WORD;
43e9d192
IB
5958
5959 /* C6 - C9. though the sign and zero extension semantics are
5960 handled elsewhere. This is the case where the argument fits
5961 entirely general registers. */
5962 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
5963 {
43e9d192
IB
5964 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
5965
5966 /* C.8 if the argument has an alignment of 16 then the NGRN is
c590597c 5967 rounded up to the next even number. */
985b8393
JJ
5968 if (nregs == 2
5969 && ncrn % 2
2ec07fa6 5970 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 5971 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
5972 alignment nregs should be > 2 and therefore it should be
5973 passed by reference rather than value. */
38e62001 5974 && (aarch64_function_arg_alignment (mode, type, &abi_break)
c590597c 5975 == 16 * BITS_PER_UNIT))
985b8393 5976 {
c590597c
RE
5977 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
5978 inform (input_location, "parameter passing for argument of type "
5979 "%qT changed in GCC 9.1", type);
985b8393
JJ
5980 ++ncrn;
5981 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 5982 }
2ec07fa6 5983
38e62001
RS
5984 /* If an argument with an SVE mode needs to be shifted up to the
5985 high part of the register, treat it as though it had an integer mode.
5986 Using the normal (parallel [...]) would suppress the shifting. */
5987 if (sve_p
5988 && BYTES_BIG_ENDIAN
5989 && maybe_ne (GET_MODE_SIZE (mode), nregs * UNITS_PER_WORD)
5990 && aarch64_pad_reg_upward (mode, type, false))
5991 {
5992 mode = int_mode_for_mode (mode).require ();
5993 sve_p = false;
5994 }
5995
43e9d192 5996 /* NREGS can be 0 when e.g. an empty structure is to be passed.
c590597c 5997 A reg is still generated for it, but the caller should be smart
43e9d192 5998 enough not to use it. */
38e62001
RS
5999 if (nregs == 0
6000 || (nregs == 1 && !sve_p)
6001 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 6002 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
6003 else
6004 {
6005 rtx par;
6006 int i;
6007
6008 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
6009 for (i = 0; i < nregs; i++)
6010 {
38e62001
RS
6011 scalar_int_mode reg_mode = word_mode;
6012 if (nregs == 1)
6013 reg_mode = int_mode_for_mode (mode).require ();
6014 rtx tmp = gen_rtx_REG (reg_mode, R0_REGNUM + ncrn + i);
43e9d192
IB
6015 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6016 GEN_INT (i * UNITS_PER_WORD));
6017 XVECEXP (par, 0, i) = tmp;
6018 }
6019 pcum->aapcs_reg = par;
6020 }
6021
6022 pcum->aapcs_nextncrn = ncrn + nregs;
6023 return;
6024 }
6025
6026 /* C.11 */
6027 pcum->aapcs_nextncrn = NUM_ARG_REGS;
6028
6029 /* The argument is passed on stack; record the needed number of words for
3abf17cf 6030 this argument and align the total size if necessary. */
43e9d192 6031on_stack:
3abf17cf 6032 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 6033
38e62001 6034 if (aarch64_function_arg_alignment (mode, type, &abi_break)
c590597c
RE
6035 == 16 * BITS_PER_UNIT)
6036 {
6037 int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
6038 if (pcum->aapcs_stack_size != new_size)
6039 {
6040 if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
6041 inform (input_location, "parameter passing for argument of type "
6042 "%qT changed in GCC 9.1", type);
6043 pcum->aapcs_stack_size = new_size;
6044 }
6045 }
43e9d192
IB
6046 return;
6047}
6048
6049/* Implement TARGET_FUNCTION_ARG. */
6050
6051static rtx
6783fdb7 6052aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
43e9d192
IB
6053{
6054 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
08cc4d92 6055 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64
c600df9a
RS
6056 || pcum->pcs_variant == ARM_PCS_SIMD
6057 || pcum->pcs_variant == ARM_PCS_SVE);
43e9d192 6058
6783fdb7 6059 if (arg.end_marker_p ())
08cc4d92 6060 return gen_int_mode (pcum->pcs_variant, DImode);
43e9d192 6061
38e62001 6062 aarch64_layout_arg (pcum_v, arg);
43e9d192
IB
6063 return pcum->aapcs_reg;
6064}
6065
6066void
6067aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
08cc4d92
RS
6068 const_tree fntype,
6069 rtx libname ATTRIBUTE_UNUSED,
6070 const_tree fndecl ATTRIBUTE_UNUSED,
c600df9a
RS
6071 unsigned n_named ATTRIBUTE_UNUSED,
6072 bool silent_p)
43e9d192
IB
6073{
6074 pcum->aapcs_ncrn = 0;
6075 pcum->aapcs_nvrn = 0;
c600df9a 6076 pcum->aapcs_nprn = 0;
43e9d192
IB
6077 pcum->aapcs_nextncrn = 0;
6078 pcum->aapcs_nextnvrn = 0;
c600df9a 6079 pcum->aapcs_nextnprn = 0;
08cc4d92
RS
6080 if (fntype)
6081 pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id ();
6082 else
6083 pcum->pcs_variant = ARM_PCS_AAPCS64;
43e9d192
IB
6084 pcum->aapcs_reg = NULL_RTX;
6085 pcum->aapcs_arg_processed = false;
6086 pcum->aapcs_stack_words = 0;
6087 pcum->aapcs_stack_size = 0;
c600df9a 6088 pcum->silent_p = silent_p;
43e9d192 6089
c600df9a
RS
6090 if (!silent_p
6091 && !TARGET_FLOAT
261fb553
AL
6092 && fntype && fntype != error_mark_node)
6093 {
6094 const_tree type = TREE_TYPE (fntype);
6095 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
6096 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
6097 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
56fe3ca3 6098 &mode, &nregs, NULL, false))
fc29dfc9 6099 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 6100 }
c600df9a
RS
6101
6102 if (!silent_p
6103 && !TARGET_SVE
6104 && pcum->pcs_variant == ARM_PCS_SVE)
6105 {
6106 /* We can't gracefully recover at this point, so make this a
6107 fatal error. */
6108 if (fndecl)
6109 fatal_error (input_location, "%qE requires the SVE ISA extension",
6110 fndecl);
6111 else
6112 fatal_error (input_location, "calls to functions of type %qT require"
6113 " the SVE ISA extension", fntype);
6114 }
43e9d192
IB
6115}
6116
6117static void
6118aarch64_function_arg_advance (cumulative_args_t pcum_v,
6930c98c 6119 const function_arg_info &arg)
43e9d192
IB
6120{
6121 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
08cc4d92 6122 if (pcum->pcs_variant == ARM_PCS_AAPCS64
c600df9a
RS
6123 || pcum->pcs_variant == ARM_PCS_SIMD
6124 || pcum->pcs_variant == ARM_PCS_SVE)
43e9d192 6125 {
38e62001 6126 aarch64_layout_arg (pcum_v, arg);
43e9d192
IB
6127 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
6128 != (pcum->aapcs_stack_words != 0));
6129 pcum->aapcs_arg_processed = false;
6130 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
6131 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
c600df9a 6132 pcum->aapcs_nprn = pcum->aapcs_nextnprn;
43e9d192
IB
6133 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
6134 pcum->aapcs_stack_words = 0;
6135 pcum->aapcs_reg = NULL_RTX;
6136 }
6137}
6138
6139bool
6140aarch64_function_arg_regno_p (unsigned regno)
6141{
6142 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
6143 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
6144}
6145
6146/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
6147 PARM_BOUNDARY bits of alignment, but will be given anything up
6148 to STACK_BOUNDARY bits if the type requires it. This makes sure
6149 that both before and after the layout of each argument, the Next
6150 Stacked Argument Address (NSAA) will have a minimum alignment of
6151 8 bytes. */
6152
6153static unsigned int
ef4bddc2 6154aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 6155{
c590597c
RE
6156 bool abi_break;
6157 unsigned int alignment = aarch64_function_arg_alignment (mode, type,
6158 &abi_break);
6159 if (abi_break & warn_psabi)
6160 inform (input_location, "parameter passing for argument of type "
6161 "%qT changed in GCC 9.1", type);
6162
985b8393 6163 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
6164}
6165
43cacb12
RS
6166/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
6167
6168static fixed_size_mode
6169aarch64_get_reg_raw_mode (int regno)
6170{
6171 if (TARGET_SVE && FP_REGNUM_P (regno))
6172 /* Don't use the SVE part of the register for __builtin_apply and
6173 __builtin_return. The SVE registers aren't used by the normal PCS,
6174 so using them there would be a waste of time. The PCS extensions
6175 for SVE types are fundamentally incompatible with the
6176 __builtin_return/__builtin_apply interface. */
6177 return as_a <fixed_size_mode> (V16QImode);
6178 return default_get_reg_raw_mode (regno);
6179}
6180
76b0cbf8 6181/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
6182
6183 Small aggregate types are placed in the lowest memory address.
6184
6185 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
6186
76b0cbf8
RS
6187static pad_direction
6188aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
6189{
6190 /* On little-endian targets, the least significant byte of every stack
6191 argument is passed at the lowest byte address of the stack slot. */
6192 if (!BYTES_BIG_ENDIAN)
76b0cbf8 6193 return PAD_UPWARD;
43e9d192 6194
00edcfbe 6195 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
6196 the least significant byte of a stack argument is passed at the highest
6197 byte address of the stack slot. */
6198 if (type
00edcfbe
YZ
6199 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
6200 || POINTER_TYPE_P (type))
43e9d192 6201 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 6202 return PAD_DOWNWARD;
43e9d192
IB
6203
6204 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 6205 return PAD_UPWARD;
43e9d192
IB
6206}
6207
6208/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6209
6210 It specifies padding for the last (may also be the only)
6211 element of a block move between registers and memory. If
6212 assuming the block is in the memory, padding upward means that
6213 the last element is padded after its highest significant byte,
6214 while in downward padding, the last element is padded at the
6215 its least significant byte side.
6216
6217 Small aggregates and small complex types are always padded
6218 upwards.
6219
6220 We don't need to worry about homogeneous floating-point or
6221 short-vector aggregates; their move is not affected by the
6222 padding direction determined here. Regardless of endianness,
6223 each element of such an aggregate is put in the least
6224 significant bits of a fp/simd register.
6225
6226 Return !BYTES_BIG_ENDIAN if the least significant byte of the
6227 register has useful data, and return the opposite if the most
6228 significant byte does. */
6229
6230bool
ef4bddc2 6231aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
6232 bool first ATTRIBUTE_UNUSED)
6233{
6234
38e62001
RS
6235 /* Aside from pure scalable types, small composite types are always
6236 padded upward. */
43e9d192
IB
6237 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
6238 {
6a70badb
RS
6239 HOST_WIDE_INT size;
6240 if (type)
6241 size = int_size_in_bytes (type);
6242 else
6243 /* No frontends can create types with variable-sized modes, so we
6244 shouldn't be asked to pass or return them. */
6245 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 6246 if (size < 2 * UNITS_PER_WORD)
38e62001
RS
6247 {
6248 pure_scalable_type_info pst_info;
6249 if (pst_info.analyze_registers (type))
6250 return false;
6251 return true;
6252 }
43e9d192
IB
6253 }
6254
6255 /* Otherwise, use the default padding. */
6256 return !BYTES_BIG_ENDIAN;
6257}
6258
095a2d76 6259static scalar_int_mode
43e9d192
IB
6260aarch64_libgcc_cmp_return_mode (void)
6261{
6262 return SImode;
6263}
6264
a3eb8a52
EB
6265#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
6266
6267/* We use the 12-bit shifted immediate arithmetic instructions so values
6268 must be multiple of (1 << 12), i.e. 4096. */
6269#define ARITH_FACTOR 4096
6270
6271#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
6272#error Cannot use simple address calculation for stack probing
6273#endif
6274
6275/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
6276#define PROBE_STACK_FIRST_REG R9_REGNUM
6277#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 6278
6a70badb 6279/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
6280 inclusive. These are offsets from the current stack pointer. */
6281
6282static void
6a70badb 6283aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 6284{
6a70badb
RS
6285 HOST_WIDE_INT size;
6286 if (!poly_size.is_constant (&size))
6287 {
6288 sorry ("stack probes for SVE frames");
6289 return;
6290 }
6291
5f5c5e0f 6292 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
6293
6294 /* See the same assertion on PROBE_INTERVAL above. */
6295 gcc_assert ((first % ARITH_FACTOR) == 0);
6296
6297 /* See if we have a constant small number of probes to generate. If so,
6298 that's the easy case. */
6299 if (size <= PROBE_INTERVAL)
6300 {
6301 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
6302
6303 emit_set_insn (reg1,
5f5c5e0f 6304 plus_constant (Pmode,
a3eb8a52 6305 stack_pointer_rtx, -(first + base)));
5f5c5e0f 6306 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
6307 }
6308
6309 /* The run-time loop is made up of 8 insns in the generic case while the
6310 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
6311 else if (size <= 4 * PROBE_INTERVAL)
6312 {
6313 HOST_WIDE_INT i, rem;
6314
6315 emit_set_insn (reg1,
5f5c5e0f 6316 plus_constant (Pmode,
a3eb8a52
EB
6317 stack_pointer_rtx,
6318 -(first + PROBE_INTERVAL)));
6319 emit_stack_probe (reg1);
6320
6321 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
6322 it exceeds SIZE. If only two probes are needed, this will not
6323 generate any code. Then probe at FIRST + SIZE. */
6324 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
6325 {
6326 emit_set_insn (reg1,
5f5c5e0f 6327 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
6328 emit_stack_probe (reg1);
6329 }
6330
6331 rem = size - (i - PROBE_INTERVAL);
6332 if (rem > 256)
6333 {
6334 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
6335
5f5c5e0f
EB
6336 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
6337 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
6338 }
6339 else
5f5c5e0f 6340 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
6341 }
6342
6343 /* Otherwise, do the same as above, but in a loop. Note that we must be
6344 extra careful with variables wrapping around because we might be at
6345 the very top (or the very bottom) of the address space and we have
6346 to be able to handle this case properly; in particular, we use an
6347 equality test for the loop condition. */
6348 else
6349 {
5f5c5e0f 6350 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
6351
6352 /* Step 1: round SIZE to the previous multiple of the interval. */
6353
6354 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
6355
6356
6357 /* Step 2: compute initial and final value of the loop counter. */
6358
6359 /* TEST_ADDR = SP + FIRST. */
6360 emit_set_insn (reg1,
5f5c5e0f 6361 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
6362
6363 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
6364 HOST_WIDE_INT adjustment = - (first + rounded_size);
6365 if (! aarch64_uimm12_shift (adjustment))
6366 {
6367 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
6368 true, Pmode);
6369 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
6370 }
6371 else
8dd64cdf
EB
6372 emit_set_insn (reg2,
6373 plus_constant (Pmode, stack_pointer_rtx, adjustment));
6374
a3eb8a52
EB
6375 /* Step 3: the loop
6376
6377 do
6378 {
6379 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
6380 probe at TEST_ADDR
6381 }
6382 while (TEST_ADDR != LAST_ADDR)
6383
6384 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
6385 until it is equal to ROUNDED_SIZE. */
6386
5f5c5e0f 6387 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
6388
6389
6390 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
6391 that SIZE is equal to ROUNDED_SIZE. */
6392
6393 if (size != rounded_size)
6394 {
6395 HOST_WIDE_INT rem = size - rounded_size;
6396
6397 if (rem > 256)
6398 {
6399 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
6400
5f5c5e0f
EB
6401 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
6402 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
6403 }
6404 else
5f5c5e0f 6405 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
6406 }
6407 }
6408
6409 /* Make sure nothing is scheduled before we are done. */
6410 emit_insn (gen_blockage ());
6411}
6412
6413/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
6414 absolute addresses. */
6415
6416const char *
6417aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
6418{
6419 static int labelno = 0;
6420 char loop_lab[32];
6421 rtx xops[2];
6422
6423 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
6424
6425 /* Loop. */
6426 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
6427
cd1bef27 6428 HOST_WIDE_INT stack_clash_probe_interval
028d4092 6429 = 1 << param_stack_clash_protection_guard_size;
cd1bef27 6430
a3eb8a52
EB
6431 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
6432 xops[0] = reg1;
cd1bef27
JL
6433 HOST_WIDE_INT interval;
6434 if (flag_stack_clash_protection)
6435 interval = stack_clash_probe_interval;
6436 else
6437 interval = PROBE_INTERVAL;
6438
6439 gcc_assert (aarch64_uimm12_shift (interval));
6440 xops[1] = GEN_INT (interval);
6441
a3eb8a52
EB
6442 output_asm_insn ("sub\t%0, %0, %1", xops);
6443
cd1bef27
JL
6444 /* If doing stack clash protection then we probe up by the ABI specified
6445 amount. We do this because we're dropping full pages at a time in the
6446 loop. But if we're doing non-stack clash probing, probe at SP 0. */
6447 if (flag_stack_clash_protection)
6448 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
6449 else
6450 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
6451
6452 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
6453 by this amount for each iteration. */
6454 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
6455
6456 /* Test if TEST_ADDR == LAST_ADDR. */
6457 xops[1] = reg2;
6458 output_asm_insn ("cmp\t%0, %1", xops);
6459
6460 /* Branch. */
6461 fputs ("\tb.ne\t", asm_out_file);
6462 assemble_name_raw (asm_out_file, loop_lab);
6463 fputc ('\n', asm_out_file);
6464
6465 return "";
6466}
6467
eb471ba3
TC
6468/* Emit the probe loop for doing stack clash probes and stack adjustments for
6469 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
6470 of GUARD_SIZE. When a probe is emitted it is done at most
6471 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
6472 at most MIN_PROBE_THRESHOLD. By the end of this function
6473 BASE = BASE - ADJUSTMENT. */
6474
6475const char *
6476aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
6477 rtx min_probe_threshold, rtx guard_size)
6478{
6479 /* This function is not allowed to use any instruction generation function
6480 like gen_ and friends. If you do you'll likely ICE during CFG validation,
6481 so instead emit the code you want using output_asm_insn. */
6482 gcc_assert (flag_stack_clash_protection);
6483 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
6484 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
6485
6486 /* The minimum required allocation before the residual requires probing. */
6487 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
6488
6489 /* Clamp the value down to the nearest value that can be used with a cmp. */
6490 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
6491 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
6492
6493 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
6494 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
6495
6496 static int labelno = 0;
6497 char loop_start_lab[32];
6498 char loop_end_lab[32];
6499 rtx xops[2];
6500
6501 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
6502 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
6503
6504 /* Emit loop start label. */
6505 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
6506
6507 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
6508 xops[0] = adjustment;
6509 xops[1] = probe_offset_value_rtx;
6510 output_asm_insn ("cmp\t%0, %1", xops);
6511
6512 /* Branch to end if not enough adjustment to probe. */
6513 fputs ("\tb.lt\t", asm_out_file);
6514 assemble_name_raw (asm_out_file, loop_end_lab);
6515 fputc ('\n', asm_out_file);
6516
6517 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
6518 xops[0] = base;
6519 xops[1] = probe_offset_value_rtx;
6520 output_asm_insn ("sub\t%0, %0, %1", xops);
6521
6522 /* Probe at BASE. */
6523 xops[1] = const0_rtx;
6524 output_asm_insn ("str\txzr, [%0, %1]", xops);
6525
6526 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
6527 xops[0] = adjustment;
6528 xops[1] = probe_offset_value_rtx;
6529 output_asm_insn ("sub\t%0, %0, %1", xops);
6530
6531 /* Branch to start if still more bytes to allocate. */
6532 fputs ("\tb\t", asm_out_file);
6533 assemble_name_raw (asm_out_file, loop_start_lab);
6534 fputc ('\n', asm_out_file);
6535
6536 /* No probe leave. */
6537 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
6538
6539 /* BASE = BASE - ADJUSTMENT. */
6540 xops[0] = base;
6541 xops[1] = adjustment;
6542 output_asm_insn ("sub\t%0, %0, %1", xops);
6543 return "";
6544}
6545
d6cb6d6a
WD
6546/* Determine whether a frame chain needs to be generated. */
6547static bool
6548aarch64_needs_frame_chain (void)
6549{
6550 /* Force a frame chain for EH returns so the return address is at FP+8. */
6551 if (frame_pointer_needed || crtl->calls_eh_return)
6552 return true;
6553
6554 /* A leaf function cannot have calls or write LR. */
6555 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
6556
6557 /* Don't use a frame chain in leaf functions if leaf frame pointers
6558 are disabled. */
6559 if (flag_omit_leaf_frame_pointer && is_leaf)
6560 return false;
6561
6562 return aarch64_use_frame_pointer;
6563}
6564
43e9d192
IB
6565/* Mark the registers that need to be saved by the callee and calculate
6566 the size of the callee-saved registers area and frame record (both FP
33a2e348 6567 and LR may be omitted). */
43e9d192
IB
6568static void
6569aarch64_layout_frame (void)
6570{
c600df9a 6571 poly_int64 offset = 0;
4b0685d9 6572 int regno, last_fp_reg = INVALID_REGNUM;
c600df9a
RS
6573 machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
6574 poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
6575 bool frame_related_fp_reg_p = false;
ab43763e 6576 aarch64_frame &frame = cfun->machine->frame;
43e9d192 6577
ab43763e 6578 frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 6579
8c6e3b23
TC
6580 /* Adjust the outgoing arguments size if required. Keep it in sync with what
6581 the mid-end is doing. */
6582 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
6583
97826595
MS
6584#define SLOT_NOT_REQUIRED (-2)
6585#define SLOT_REQUIRED (-1)
6586
ab43763e
RS
6587 frame.wb_candidate1 = INVALID_REGNUM;
6588 frame.wb_candidate2 = INVALID_REGNUM;
c600df9a 6589 frame.spare_pred_reg = INVALID_REGNUM;
363ffa50 6590
43e9d192 6591 /* First mark all the registers that really need to be saved... */
c600df9a 6592 for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
ab43763e 6593 frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
6594
6595 /* ... that includes the eh data registers (if needed)... */
6596 if (crtl->calls_eh_return)
6597 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
ab43763e 6598 frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED;
43e9d192
IB
6599
6600 /* ... and any callee saved register that dataflow says is live. */
6601 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
6602 if (df_regs_ever_live_p (regno)
dcdd0f05 6603 && !fixed_regs[regno]
1c923b60 6604 && (regno == R30_REGNUM
dcdd0f05 6605 || !crtl->abi->clobbers_full_reg_p (regno)))
ab43763e 6606 frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
6607
6608 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
6609 if (df_regs_ever_live_p (regno)
dcdd0f05
RS
6610 && !fixed_regs[regno]
6611 && !crtl->abi->clobbers_full_reg_p (regno))
4b0685d9 6612 {
ab43763e 6613 frame.reg_offset[regno] = SLOT_REQUIRED;
4b0685d9 6614 last_fp_reg = regno;
c600df9a
RS
6615 if (aarch64_emit_cfi_for_reg_p (regno))
6616 frame_related_fp_reg_p = true;
4b0685d9 6617 }
43e9d192 6618
c600df9a
RS
6619 /* Big-endian SVE frames need a spare predicate register in order
6620 to save Z8-Z15. Decide which register they should use. Prefer
6621 an unused argument register if possible, so that we don't force P4
6622 to be saved unnecessarily. */
6623 if (frame_related_fp_reg_p
6624 && crtl->abi->id () == ARM_PCS_SVE
6625 && BYTES_BIG_ENDIAN)
6626 {
6627 bitmap live1 = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6628 bitmap live2 = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
6629 for (regno = P0_REGNUM; regno <= P7_REGNUM; regno++)
6630 if (!bitmap_bit_p (live1, regno) && !bitmap_bit_p (live2, regno))
6631 break;
6632 gcc_assert (regno <= P7_REGNUM);
6633 frame.spare_pred_reg = regno;
6634 df_set_regs_ever_live (regno, true);
6635 }
6636
6637 for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
6638 if (df_regs_ever_live_p (regno)
6639 && !fixed_regs[regno]
6640 && !crtl->abi->clobbers_full_reg_p (regno))
6641 frame.reg_offset[regno] = SLOT_REQUIRED;
6642
d6430e3c
TC
6643 /* With stack-clash, LR must be saved in non-leaf functions. The saving of
6644 LR counts as an implicit probe which allows us to maintain the invariant
6645 described in the comment at expand_prologue. */
c600df9a
RS
6646 gcc_assert (crtl->is_leaf
6647 || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
6648
6649 /* Now assign stack slots for the registers. Start with the predicate
6650 registers, since predicate LDR and STR have a relatively small
6651 offset range. These saves happen below the hard frame pointer. */
6652 for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
6653 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
6654 {
6655 frame.reg_offset[regno] = offset;
6656 offset += BYTES_PER_SVE_PRED;
6657 }
6658
c600df9a
RS
6659 if (maybe_ne (offset, 0))
6660 {
cb26919c
RS
6661 /* If we have any vector registers to save above the predicate registers,
6662 the offset of the vector register save slots need to be a multiple
6663 of the vector size. This lets us use the immediate forms of LDR/STR
6664 (or LD1/ST1 for big-endian).
6665
6666 A vector register is 8 times the size of a predicate register,
6667 and we need to save a maximum of 12 predicate registers, so the
6668 first vector register will be at either #1, MUL VL or #2, MUL VL.
6669
6670 If we don't have any vector registers to save, and we know how
6671 big the predicate save area is, we can just round it up to the
6672 next 16-byte boundary. */
6673 if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
6674 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
6675 else
6676 {
6677 if (known_le (offset, vector_save_size))
6678 offset = vector_save_size;
6679 else if (known_le (offset, vector_save_size * 2))
6680 offset = vector_save_size * 2;
6681 else
6682 gcc_unreachable ();
6683 }
c600df9a
RS
6684 }
6685
6686 /* If we need to save any SVE vector registers, add them next. */
6687 if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
6688 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
6689 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
6690 {
6691 frame.reg_offset[regno] = offset;
6692 offset += vector_save_size;
6693 }
6694
6695 /* OFFSET is now the offset of the hard frame pointer from the bottom
6696 of the callee save area. */
6697 bool saves_below_hard_fp_p = maybe_ne (offset, 0);
6698 frame.below_hard_fp_saved_regs_size = offset;
ab43763e 6699 if (frame.emit_frame_chain)
43e9d192 6700 {
2e1cdae5 6701 /* FP and LR are placed in the linkage record. */
c600df9a 6702 frame.reg_offset[R29_REGNUM] = offset;
ab43763e 6703 frame.wb_candidate1 = R29_REGNUM;
c600df9a 6704 frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
ab43763e 6705 frame.wb_candidate2 = R30_REGNUM;
c600df9a 6706 offset += 2 * UNITS_PER_WORD;
1f7bffd0 6707 }
43e9d192 6708
2e1cdae5 6709 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
c600df9a 6710 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
43e9d192 6711 {
ab43763e
RS
6712 frame.reg_offset[regno] = offset;
6713 if (frame.wb_candidate1 == INVALID_REGNUM)
6714 frame.wb_candidate1 = regno;
6715 else if (frame.wb_candidate2 == INVALID_REGNUM)
6716 frame.wb_candidate2 = regno;
43e9d192
IB
6717 offset += UNITS_PER_WORD;
6718 }
6719
c600df9a
RS
6720 poly_int64 max_int_offset = offset;
6721 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
6722 bool has_align_gap = maybe_ne (offset, max_int_offset);
4b0685d9 6723
43e9d192 6724 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
c600df9a 6725 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
43e9d192 6726 {
4b0685d9
WD
6727 /* If there is an alignment gap between integer and fp callee-saves,
6728 allocate the last fp register to it if possible. */
a0d0b980
SE
6729 if (regno == last_fp_reg
6730 && has_align_gap
c600df9a
RS
6731 && known_eq (vector_save_size, 8)
6732 && multiple_p (offset, 16))
4b0685d9 6733 {
ab43763e 6734 frame.reg_offset[regno] = max_int_offset;
4b0685d9
WD
6735 break;
6736 }
6737
ab43763e
RS
6738 frame.reg_offset[regno] = offset;
6739 if (frame.wb_candidate1 == INVALID_REGNUM)
6740 frame.wb_candidate1 = regno;
6741 else if (frame.wb_candidate2 == INVALID_REGNUM
6742 && frame.wb_candidate1 >= V0_REGNUM)
6743 frame.wb_candidate2 = regno;
c600df9a 6744 offset += vector_save_size;
43e9d192
IB
6745 }
6746
c600df9a 6747 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192 6748
ab43763e 6749 frame.saved_regs_size = offset;
1c960e02 6750
c600df9a 6751 poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
71bfb77a 6752
c600df9a 6753 poly_int64 above_outgoing_args
6a70badb
RS
6754 = aligned_upper_bound (varargs_and_saved_regs_size
6755 + get_frame_size (),
6756 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 6757
c600df9a
RS
6758 frame.hard_fp_offset
6759 = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
6760
6a70badb
RS
6761 /* Both these values are already aligned. */
6762 gcc_assert (multiple_p (crtl->outgoing_args_size,
6763 STACK_BOUNDARY / BITS_PER_UNIT));
c600df9a 6764 frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
1c960e02 6765
ab43763e 6766 frame.locals_offset = frame.saved_varargs_size;
71bfb77a 6767
ab43763e
RS
6768 frame.initial_adjust = 0;
6769 frame.final_adjust = 0;
6770 frame.callee_adjust = 0;
c600df9a 6771 frame.sve_callee_adjust = 0;
ab43763e 6772 frame.callee_offset = 0;
71bfb77a
WD
6773
6774 HOST_WIDE_INT max_push_offset = 0;
ab43763e 6775 if (frame.wb_candidate2 != INVALID_REGNUM)
71bfb77a 6776 max_push_offset = 512;
ab43763e 6777 else if (frame.wb_candidate1 != INVALID_REGNUM)
71bfb77a
WD
6778 max_push_offset = 256;
6779
9b17a646 6780 HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
c600df9a 6781 HOST_WIDE_INT const_saved_regs_size;
ab43763e 6782 if (frame.frame_size.is_constant (&const_size)
6a70badb 6783 && const_size < max_push_offset
c600df9a 6784 && known_eq (frame.hard_fp_offset, const_size))
71bfb77a
WD
6785 {
6786 /* Simple, small frame with no outgoing arguments:
c600df9a 6787
71bfb77a
WD
6788 stp reg1, reg2, [sp, -frame_size]!
6789 stp reg3, reg4, [sp, 16] */
ab43763e 6790 frame.callee_adjust = const_size;
71bfb77a 6791 }
9b17a646 6792 else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
c600df9a
RS
6793 && frame.saved_regs_size.is_constant (&const_saved_regs_size)
6794 && const_outgoing_args_size + const_saved_regs_size < 512
6795 /* We could handle this case even with outgoing args, provided
6796 that the number of args left us with valid offsets for all
6797 predicate and vector save slots. It's such a rare case that
6798 it hardly seems worth the effort though. */
6799 && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
71bfb77a 6800 && !(cfun->calls_alloca
9b17a646
RS
6801 && frame.hard_fp_offset.is_constant (&const_fp_offset)
6802 && const_fp_offset < max_push_offset))
71bfb77a
WD
6803 {
6804 /* Frame with small outgoing arguments:
c600df9a 6805
71bfb77a
WD
6806 sub sp, sp, frame_size
6807 stp reg1, reg2, [sp, outgoing_args_size]
6808 stp reg3, reg4, [sp, outgoing_args_size + 16] */
ab43763e 6809 frame.initial_adjust = frame.frame_size;
9b17a646 6810 frame.callee_offset = const_outgoing_args_size;
71bfb77a 6811 }
c600df9a
RS
6812 else if (saves_below_hard_fp_p
6813 && known_eq (frame.saved_regs_size,
6814 frame.below_hard_fp_saved_regs_size))
6815 {
6816 /* Frame in which all saves are SVE saves:
6817
6818 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
6819 save SVE registers relative to SP
6820 sub sp, sp, outgoing_args_size */
6821 frame.initial_adjust = (frame.hard_fp_offset
6822 + frame.below_hard_fp_saved_regs_size);
6823 frame.final_adjust = crtl->outgoing_args_size;
6824 }
ab43763e 6825 else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
6a70badb 6826 && const_fp_offset < max_push_offset)
71bfb77a 6827 {
c600df9a
RS
6828 /* Frame with large outgoing arguments or SVE saves, but with
6829 a small local area:
6830
71bfb77a
WD
6831 stp reg1, reg2, [sp, -hard_fp_offset]!
6832 stp reg3, reg4, [sp, 16]
c600df9a
RS
6833 [sub sp, sp, below_hard_fp_saved_regs_size]
6834 [save SVE registers relative to SP]
71bfb77a 6835 sub sp, sp, outgoing_args_size */
ab43763e 6836 frame.callee_adjust = const_fp_offset;
c600df9a 6837 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
8e66b377 6838 frame.final_adjust = crtl->outgoing_args_size;
71bfb77a 6839 }
71bfb77a
WD
6840 else
6841 {
c600df9a
RS
6842 /* Frame with large local area and outgoing arguments or SVE saves,
6843 using frame pointer:
6844
71bfb77a
WD
6845 sub sp, sp, hard_fp_offset
6846 stp x29, x30, [sp, 0]
6847 add x29, sp, 0
6848 stp reg3, reg4, [sp, 16]
c600df9a
RS
6849 [sub sp, sp, below_hard_fp_saved_regs_size]
6850 [save SVE registers relative to SP]
71bfb77a 6851 sub sp, sp, outgoing_args_size */
ab43763e 6852 frame.initial_adjust = frame.hard_fp_offset;
c600df9a 6853 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
8e66b377 6854 frame.final_adjust = crtl->outgoing_args_size;
71bfb77a
WD
6855 }
6856
8e66b377
RS
6857 /* Make sure the individual adjustments add up to the full frame size. */
6858 gcc_assert (known_eq (frame.initial_adjust
6859 + frame.callee_adjust
c600df9a 6860 + frame.sve_callee_adjust
8e66b377
RS
6861 + frame.final_adjust, frame.frame_size));
6862
59a3d73d
RS
6863 if (!frame.emit_frame_chain && frame.callee_adjust == 0)
6864 {
6865 /* We've decided not to associate any register saves with the initial
6866 stack allocation. */
6867 frame.wb_candidate1 = INVALID_REGNUM;
6868 frame.wb_candidate2 = INVALID_REGNUM;
6869 }
6870
ab43763e 6871 frame.laid_out = true;
43e9d192
IB
6872}
6873
04ddfe06
KT
6874/* Return true if the register REGNO is saved on entry to
6875 the current function. */
6876
43e9d192
IB
6877static bool
6878aarch64_register_saved_on_entry (int regno)
6879{
c600df9a 6880 return known_ge (cfun->machine->frame.reg_offset[regno], 0);
43e9d192
IB
6881}
6882
04ddfe06
KT
6883/* Return the next register up from REGNO up to LIMIT for the callee
6884 to save. */
6885
64dedd72
JW
6886static unsigned
6887aarch64_next_callee_save (unsigned regno, unsigned limit)
6888{
6889 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
6890 regno ++;
6891 return regno;
6892}
43e9d192 6893
04ddfe06
KT
6894/* Push the register number REGNO of mode MODE to the stack with write-back
6895 adjusting the stack by ADJUSTMENT. */
6896
c5e1f66e 6897static void
ef4bddc2 6898aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
6899 HOST_WIDE_INT adjustment)
6900 {
6901 rtx base_rtx = stack_pointer_rtx;
6902 rtx insn, reg, mem;
6903
6904 reg = gen_rtx_REG (mode, regno);
6905 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
6906 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 6907 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
6908
6909 insn = emit_move_insn (mem, reg);
6910 RTX_FRAME_RELATED_P (insn) = 1;
6911}
6912
04ddfe06
KT
6913/* Generate and return an instruction to store the pair of registers
6914 REG and REG2 of mode MODE to location BASE with write-back adjusting
6915 the stack location BASE by ADJUSTMENT. */
6916
80c11907 6917static rtx
ef4bddc2 6918aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
6919 HOST_WIDE_INT adjustment)
6920{
6921 switch (mode)
6922 {
4e10a5a7 6923 case E_DImode:
80c11907
JW
6924 return gen_storewb_pairdi_di (base, base, reg, reg2,
6925 GEN_INT (-adjustment),
6926 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 6927 case E_DFmode:
80c11907
JW
6928 return gen_storewb_pairdf_di (base, base, reg, reg2,
6929 GEN_INT (-adjustment),
6930 GEN_INT (UNITS_PER_WORD - adjustment));
a0d0b980
SE
6931 case E_TFmode:
6932 return gen_storewb_pairtf_di (base, base, reg, reg2,
6933 GEN_INT (-adjustment),
6934 GEN_INT (UNITS_PER_VREG - adjustment));
80c11907
JW
6935 default:
6936 gcc_unreachable ();
6937 }
6938}
6939
04ddfe06
KT
6940/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
6941 stack pointer by ADJUSTMENT. */
6942
80c11907 6943static void
89ac681e 6944aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 6945{
5d8a22a5 6946 rtx_insn *insn;
c600df9a 6947 machine_mode mode = aarch64_reg_save_mode (regno1);
89ac681e 6948
71bfb77a 6949 if (regno2 == INVALID_REGNUM)
89ac681e
WD
6950 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
6951
80c11907
JW
6952 rtx reg1 = gen_rtx_REG (mode, regno1);
6953 rtx reg2 = gen_rtx_REG (mode, regno2);
6954
6955 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
6956 reg2, adjustment));
6957 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
6958 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
6959 RTX_FRAME_RELATED_P (insn) = 1;
6960}
6961
04ddfe06
KT
6962/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
6963 adjusting it by ADJUSTMENT afterwards. */
6964
159313d9 6965static rtx
ef4bddc2 6966aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
6967 HOST_WIDE_INT adjustment)
6968{
6969 switch (mode)
6970 {
4e10a5a7 6971 case E_DImode:
159313d9 6972 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 6973 GEN_INT (UNITS_PER_WORD));
4e10a5a7 6974 case E_DFmode:
159313d9 6975 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 6976 GEN_INT (UNITS_PER_WORD));
a0d0b980
SE
6977 case E_TFmode:
6978 return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
6979 GEN_INT (UNITS_PER_VREG));
159313d9
JW
6980 default:
6981 gcc_unreachable ();
6982 }
6983}
6984
04ddfe06
KT
6985/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
6986 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
6987 into CFI_OPS. */
6988
89ac681e
WD
6989static void
6990aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
6991 rtx *cfi_ops)
6992{
c600df9a 6993 machine_mode mode = aarch64_reg_save_mode (regno1);
89ac681e
WD
6994 rtx reg1 = gen_rtx_REG (mode, regno1);
6995
6996 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
6997
71bfb77a 6998 if (regno2 == INVALID_REGNUM)
89ac681e
WD
6999 {
7000 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
7001 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 7002 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
7003 }
7004 else
7005 {
7006 rtx reg2 = gen_rtx_REG (mode, regno2);
7007 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
7008 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
7009 reg2, adjustment));
7010 }
7011}
7012
04ddfe06
KT
7013/* Generate and return a store pair instruction of mode MODE to store
7014 register REG1 to MEM1 and register REG2 to MEM2. */
7015
72df5c1f 7016static rtx
ef4bddc2 7017aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
7018 rtx reg2)
7019{
7020 switch (mode)
7021 {
4e10a5a7 7022 case E_DImode:
dfe1da23 7023 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 7024
4e10a5a7 7025 case E_DFmode:
dfe1da23 7026 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f 7027
a0d0b980
SE
7028 case E_TFmode:
7029 return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
7030
7cda9e08
SD
7031 case E_V4SImode:
7032 return gen_vec_store_pairv4siv4si (mem1, reg1, mem2, reg2);
7033
72df5c1f
JW
7034 default:
7035 gcc_unreachable ();
7036 }
7037}
7038
04ddfe06
KT
7039/* Generate and regurn a load pair isntruction of mode MODE to load register
7040 REG1 from MEM1 and register REG2 from MEM2. */
7041
72df5c1f 7042static rtx
ef4bddc2 7043aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
7044 rtx mem2)
7045{
7046 switch (mode)
7047 {
4e10a5a7 7048 case E_DImode:
dfe1da23 7049 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 7050
4e10a5a7 7051 case E_DFmode:
dfe1da23 7052 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f 7053
a0d0b980
SE
7054 case E_TFmode:
7055 return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
7056
7cda9e08
SD
7057 case E_V4SImode:
7058 return gen_load_pairv4siv4si (reg1, mem1, reg2, mem2);
7059
72df5c1f
JW
7060 default:
7061 gcc_unreachable ();
7062 }
7063}
7064
db58fd89
JW
7065/* Return TRUE if return address signing should be enabled for the current
7066 function, otherwise return FALSE. */
7067
7068bool
7069aarch64_return_address_signing_enabled (void)
7070{
7071 /* This function should only be called after frame laid out. */
7072 gcc_assert (cfun->machine->frame.laid_out);
7073
2bc95be3
SN
7074 /* Turn return address signing off in any function that uses
7075 __builtin_eh_return. The address passed to __builtin_eh_return
7076 is not signed so either it has to be signed (with original sp)
7077 or the code path that uses it has to avoid authenticating it.
7078 Currently eh return introduces a return to anywhere gadget, no
7079 matter what we do here since it uses ret with user provided
7080 address. An ideal fix for that is to use indirect branch which
7081 can be protected with BTI j (to some extent). */
7082 if (crtl->calls_eh_return)
7083 return false;
7084
db58fd89 7085 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
8fc16d72 7086 if its LR is pushed onto stack. */
db58fd89
JW
7087 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
7088 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
c600df9a 7089 && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)));
db58fd89
JW
7090}
7091
30afdf34
SD
7092/* Return TRUE if Branch Target Identification Mechanism is enabled. */
7093bool
7094aarch64_bti_enabled (void)
7095{
7096 return (aarch64_enable_bti == 1);
7097}
7098
c600df9a
RS
7099/* The caller is going to use ST1D or LD1D to save or restore an SVE
7100 register in mode MODE at BASE_RTX + OFFSET, where OFFSET is in
7101 the range [1, 16] * GET_MODE_SIZE (MODE). Prepare for this by:
7102
7103 (1) updating BASE_RTX + OFFSET so that it is a legitimate ST1D
7104 or LD1D address
7105
7106 (2) setting PRED to a valid predicate register for the ST1D or LD1D,
7107 if the variable isn't already nonnull
7108
7109 (1) is needed when OFFSET is in the range [8, 16] * GET_MODE_SIZE (MODE).
7110 Handle this case using a temporary base register that is suitable for
7111 all offsets in that range. Use ANCHOR_REG as this base register if it
7112 is nonnull, otherwise create a new register and store it in ANCHOR_REG. */
7113
7114static inline void
7115aarch64_adjust_sve_callee_save_base (machine_mode mode, rtx &base_rtx,
7116 rtx &anchor_reg, poly_int64 &offset,
7117 rtx &ptrue)
7118{
7119 if (maybe_ge (offset, 8 * GET_MODE_SIZE (mode)))
7120 {
7121 /* This is the maximum valid offset of the anchor from the base.
7122 Lower values would be valid too. */
7123 poly_int64 anchor_offset = 16 * GET_MODE_SIZE (mode);
7124 if (!anchor_reg)
7125 {
7126 anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
7127 emit_insn (gen_add3_insn (anchor_reg, base_rtx,
7128 gen_int_mode (anchor_offset, Pmode)));
7129 }
7130 base_rtx = anchor_reg;
7131 offset -= anchor_offset;
7132 }
7133 if (!ptrue)
7134 {
7135 int pred_reg = cfun->machine->frame.spare_pred_reg;
7136 emit_move_insn (gen_rtx_REG (VNx16BImode, pred_reg),
7137 CONSTM1_RTX (VNx16BImode));
7138 ptrue = gen_rtx_REG (VNx2BImode, pred_reg);
7139 }
7140}
7141
7142/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
7143 is saved at BASE + OFFSET. */
7144
7145static void
7146aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
7147 rtx base, poly_int64 offset)
7148{
7149 rtx mem = gen_frame_mem (GET_MODE (reg),
7150 plus_constant (Pmode, base, offset));
7151 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7152}
7153
04ddfe06
KT
7154/* Emit code to save the callee-saved registers from register number START
7155 to LIMIT to the stack at the location starting at offset START_OFFSET,
c600df9a
RS
7156 skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
7157 is true if the hard frame pointer has been set up. */
43e9d192 7158
43e9d192 7159static void
c600df9a
RS
7160aarch64_save_callee_saves (poly_int64 start_offset,
7161 unsigned start, unsigned limit, bool skip_wb,
7162 bool hard_fp_valid_p)
43e9d192 7163{
5d8a22a5 7164 rtx_insn *insn;
43e9d192
IB
7165 unsigned regno;
7166 unsigned regno2;
c600df9a 7167 rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
43e9d192 7168
0ec74a1e 7169 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
7170 regno <= limit;
7171 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 7172 {
ae13fce3 7173 rtx reg, mem;
6a70badb 7174 poly_int64 offset;
c600df9a 7175 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
64dedd72 7176
ae13fce3
JW
7177 if (skip_wb
7178 && (regno == cfun->machine->frame.wb_candidate1
7179 || regno == cfun->machine->frame.wb_candidate2))
7180 continue;
7181
827ab47a 7182 if (cfun->machine->reg_is_wrapped_separately[regno])
c600df9a 7183 continue;
827ab47a 7184
c600df9a 7185 machine_mode mode = aarch64_reg_save_mode (regno);
ae13fce3
JW
7186 reg = gen_rtx_REG (mode, regno);
7187 offset = start_offset + cfun->machine->frame.reg_offset[regno];
c600df9a
RS
7188 rtx base_rtx = stack_pointer_rtx;
7189 poly_int64 sp_offset = offset;
64dedd72 7190
c600df9a
RS
7191 HOST_WIDE_INT const_offset;
7192 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
7193 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
7194 offset, ptrue);
7195 else if (GP_REGNUM_P (regno)
7196 && (!offset.is_constant (&const_offset) || const_offset >= 512))
7197 {
7198 gcc_assert (known_eq (start_offset, 0));
7199 poly_int64 fp_offset
7200 = cfun->machine->frame.below_hard_fp_saved_regs_size;
7201 if (hard_fp_valid_p)
7202 base_rtx = hard_frame_pointer_rtx;
7203 else
7204 {
7205 if (!anchor_reg)
7206 {
7207 anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
7208 emit_insn (gen_add3_insn (anchor_reg, base_rtx,
7209 gen_int_mode (fp_offset, Pmode)));
7210 }
7211 base_rtx = anchor_reg;
7212 }
7213 offset -= fp_offset;
7214 }
7215 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
7216 bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
64dedd72 7217
c600df9a
RS
7218 if (!aarch64_sve_mode_p (mode)
7219 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
827ab47a 7220 && !cfun->machine->reg_is_wrapped_separately[regno2]
c600df9a
RS
7221 && known_eq (GET_MODE_SIZE (mode),
7222 cfun->machine->frame.reg_offset[regno2]
7223 - cfun->machine->frame.reg_offset[regno]))
43e9d192 7224 {
0ec74a1e 7225 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
7226 rtx mem2;
7227
c600df9a
RS
7228 offset += GET_MODE_SIZE (mode);
7229 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
7230 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
7231 reg2));
0b4a9743 7232
64dedd72
JW
7233 /* The first part of a frame-related parallel insn is
7234 always assumed to be relevant to the frame
7235 calculations; subsequent parts, are only
7236 frame-related if explicitly marked. */
c600df9a
RS
7237 if (aarch64_emit_cfi_for_reg_p (regno2))
7238 {
7239 if (need_cfa_note_p)
7240 aarch64_add_cfa_expression (insn, reg2, stack_pointer_rtx,
7241 sp_offset + GET_MODE_SIZE (mode));
7242 else
7243 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
7244 }
7245
64dedd72
JW
7246 regno = regno2;
7247 }
c600df9a
RS
7248 else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
7249 {
7250 insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg));
7251 need_cfa_note_p = true;
7252 }
7253 else if (aarch64_sve_mode_p (mode))
7254 insn = emit_insn (gen_rtx_SET (mem, reg));
64dedd72 7255 else
8ed2fc62
JW
7256 insn = emit_move_insn (mem, reg);
7257
c600df9a
RS
7258 RTX_FRAME_RELATED_P (insn) = frame_related_p;
7259 if (frame_related_p && need_cfa_note_p)
7260 aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset);
8ed2fc62
JW
7261 }
7262}
7263
c600df9a
RS
7264/* Emit code to restore the callee registers from register number START
7265 up to and including LIMIT. Restore from the stack offset START_OFFSET,
7266 skipping any write-back candidates if SKIP_WB is true. Write the
7267 appropriate REG_CFA_RESTORE notes into CFI_OPS. */
04ddfe06 7268
8ed2fc62 7269static void
c600df9a 7270aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
dd991abb 7271 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 7272{
8ed2fc62
JW
7273 unsigned regno;
7274 unsigned regno2;
6a70badb 7275 poly_int64 offset;
c600df9a 7276 rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
8ed2fc62
JW
7277
7278 for (regno = aarch64_next_callee_save (start, limit);
7279 regno <= limit;
7280 regno = aarch64_next_callee_save (regno + 1, limit))
7281 {
c600df9a 7282 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
827ab47a 7283 if (cfun->machine->reg_is_wrapped_separately[regno])
c600df9a 7284 continue;
827ab47a 7285
ae13fce3 7286 rtx reg, mem;
8ed2fc62 7287
ae13fce3
JW
7288 if (skip_wb
7289 && (regno == cfun->machine->frame.wb_candidate1
7290 || regno == cfun->machine->frame.wb_candidate2))
7291 continue;
7292
c600df9a 7293 machine_mode mode = aarch64_reg_save_mode (regno);
ae13fce3 7294 reg = gen_rtx_REG (mode, regno);
8ed2fc62 7295 offset = start_offset + cfun->machine->frame.reg_offset[regno];
c600df9a
RS
7296 rtx base_rtx = stack_pointer_rtx;
7297 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
7298 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
7299 offset, ptrue);
30079dde 7300 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62 7301
c600df9a
RS
7302 if (!aarch64_sve_mode_p (mode)
7303 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
827ab47a 7304 && !cfun->machine->reg_is_wrapped_separately[regno2]
c600df9a
RS
7305 && known_eq (GET_MODE_SIZE (mode),
7306 cfun->machine->frame.reg_offset[regno2]
7307 - cfun->machine->frame.reg_offset[regno]))
64dedd72 7308 {
8ed2fc62
JW
7309 rtx reg2 = gen_rtx_REG (mode, regno2);
7310 rtx mem2;
7311
c600df9a 7312 offset += GET_MODE_SIZE (mode);
30079dde 7313 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 7314 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 7315
dd991abb 7316 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 7317 regno = regno2;
43e9d192 7318 }
c600df9a
RS
7319 else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
7320 emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
7321 else if (aarch64_sve_mode_p (mode))
7322 emit_insn (gen_rtx_SET (reg, mem));
8ed2fc62 7323 else
dd991abb 7324 emit_move_insn (reg, mem);
c600df9a
RS
7325 if (frame_related_p)
7326 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 7327 }
43e9d192
IB
7328}
7329
43cacb12
RS
7330/* Return true if OFFSET is a signed 4-bit value multiplied by the size
7331 of MODE. */
7332
7333static inline bool
7334offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
7335{
7336 HOST_WIDE_INT multiple;
7337 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
7338 && IN_RANGE (multiple, -8, 7));
7339}
7340
7341/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
7342 of MODE. */
7343
7344static inline bool
7345offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
7346{
7347 HOST_WIDE_INT multiple;
7348 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
7349 && IN_RANGE (multiple, 0, 63));
7350}
7351
7352/* Return true if OFFSET is a signed 7-bit value multiplied by the size
7353 of MODE. */
7354
7355bool
7356aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
7357{
7358 HOST_WIDE_INT multiple;
7359 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
7360 && IN_RANGE (multiple, -64, 63));
7361}
7362
7363/* Return true if OFFSET is a signed 9-bit value. */
7364
3c5af608
MM
7365bool
7366aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
7367 poly_int64 offset)
827ab47a 7368{
6a70badb
RS
7369 HOST_WIDE_INT const_offset;
7370 return (offset.is_constant (&const_offset)
7371 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
7372}
7373
43cacb12
RS
7374/* Return true if OFFSET is a signed 9-bit value multiplied by the size
7375 of MODE. */
7376
827ab47a 7377static inline bool
43cacb12 7378offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 7379{
6a70badb
RS
7380 HOST_WIDE_INT multiple;
7381 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 7382 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
7383}
7384
43cacb12
RS
7385/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
7386 of MODE. */
7387
7388static inline bool
7389offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 7390{
6a70badb
RS
7391 HOST_WIDE_INT multiple;
7392 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 7393 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
7394}
7395
7396/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
7397
7398static sbitmap
7399aarch64_get_separate_components (void)
7400{
827ab47a
KT
7401 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
7402 bitmap_clear (components);
7403
7404 /* The registers we need saved to the frame. */
7405 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
7406 if (aarch64_register_saved_on_entry (regno))
7407 {
c600df9a
RS
7408 /* Punt on saves and restores that use ST1D and LD1D. We could
7409 try to be smarter, but it would involve making sure that the
7410 spare predicate register itself is safe to use at the save
7411 and restore points. Also, when a frame pointer is being used,
7412 the slots are often out of reach of ST1D and LD1D anyway. */
7413 machine_mode mode = aarch64_reg_save_mode (regno);
7414 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
7415 continue;
7416
6a70badb 7417 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
c600df9a
RS
7418
7419 /* If the register is saved in the first SVE save slot, we use
7420 it as a stack probe for -fstack-clash-protection. */
7421 if (flag_stack_clash_protection
7422 && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
7423 && known_eq (offset, 0))
7424 continue;
7425
7426 /* Get the offset relative to the register we'll use. */
7427 if (frame_pointer_needed)
7428 offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
7429 else
7430 offset += crtl->outgoing_args_size;
7431
827ab47a
KT
7432 /* Check that we can access the stack slot of the register with one
7433 direct load with no adjustments needed. */
c600df9a
RS
7434 if (aarch64_sve_mode_p (mode)
7435 ? offset_9bit_signed_scaled_p (mode, offset)
7436 : offset_12bit_unsigned_scaled_p (mode, offset))
827ab47a
KT
7437 bitmap_set_bit (components, regno);
7438 }
7439
7440 /* Don't mess with the hard frame pointer. */
7441 if (frame_pointer_needed)
7442 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
7443
c600df9a
RS
7444 /* If the spare predicate register used by big-endian SVE code
7445 is call-preserved, it must be saved in the main prologue
7446 before any saves that use it. */
7447 if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
7448 bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
7449
827ab47a
KT
7450 unsigned reg1 = cfun->machine->frame.wb_candidate1;
7451 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 7452 /* If registers have been chosen to be stored/restored with
827ab47a
KT
7453 writeback don't interfere with them to avoid having to output explicit
7454 stack adjustment instructions. */
7455 if (reg2 != INVALID_REGNUM)
7456 bitmap_clear_bit (components, reg2);
7457 if (reg1 != INVALID_REGNUM)
7458 bitmap_clear_bit (components, reg1);
7459
7460 bitmap_clear_bit (components, LR_REGNUM);
7461 bitmap_clear_bit (components, SP_REGNUM);
7462
7463 return components;
7464}
7465
7466/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
7467
7468static sbitmap
7469aarch64_components_for_bb (basic_block bb)
7470{
7471 bitmap in = DF_LIVE_IN (bb);
7472 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
7473 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
7474
7475 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
7476 bitmap_clear (components);
7477
ce9d2a37
RS
7478 /* Clobbered registers don't generate values in any meaningful sense,
7479 since nothing after the clobber can rely on their value. And we can't
7480 say that partially-clobbered registers are unconditionally killed,
7481 because whether they're killed or not depends on the mode of the
7482 value they're holding. Thus partially call-clobbered registers
7483 appear in neither the kill set nor the gen set.
7484
7485 Check manually for any calls that clobber more of a register than the
7486 current function can. */
7487 function_abi_aggregator callee_abis;
7488 rtx_insn *insn;
7489 FOR_BB_INSNS (bb, insn)
7490 if (CALL_P (insn))
7491 callee_abis.note_callee_abi (insn_callee_abi (insn));
7492 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
7493
827ab47a
KT
7494 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
7495 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
dcdd0f05
RS
7496 if (!fixed_regs[regno]
7497 && !crtl->abi->clobbers_full_reg_p (regno)
ce9d2a37
RS
7498 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
7499 || bitmap_bit_p (in, regno)
7500 || bitmap_bit_p (gen, regno)
7501 || bitmap_bit_p (kill, regno)))
3f26f054 7502 {
3f26f054
WD
7503 bitmap_set_bit (components, regno);
7504
7505 /* If there is a callee-save at an adjacent offset, add it too
7506 to increase the use of LDP/STP. */
c600df9a
RS
7507 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
7508 unsigned regno2 = multiple_p (offset, 16) ? regno + 1 : regno - 1;
3f26f054
WD
7509
7510 if (regno2 <= LAST_SAVED_REGNUM)
7511 {
c600df9a
RS
7512 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
7513 if (regno < regno2
7514 ? known_eq (offset + 8, offset2)
7515 : multiple_p (offset2, 16) && known_eq (offset2 + 8, offset))
3f26f054
WD
7516 bitmap_set_bit (components, regno2);
7517 }
7518 }
827ab47a
KT
7519
7520 return components;
7521}
7522
7523/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
7524 Nothing to do for aarch64. */
7525
7526static void
7527aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
7528{
7529}
7530
7531/* Return the next set bit in BMP from START onwards. Return the total number
7532 of bits in BMP if no set bit is found at or after START. */
7533
7534static unsigned int
7535aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
7536{
7537 unsigned int nbits = SBITMAP_SIZE (bmp);
7538 if (start == nbits)
7539 return start;
7540
7541 gcc_assert (start < nbits);
7542 for (unsigned int i = start; i < nbits; i++)
7543 if (bitmap_bit_p (bmp, i))
7544 return i;
7545
7546 return nbits;
7547}
7548
7549/* Do the work for aarch64_emit_prologue_components and
7550 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
7551 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
7552 for these components or the epilogue sequence. That is, it determines
7553 whether we should emit stores or loads and what kind of CFA notes to attach
7554 to the insns. Otherwise the logic for the two sequences is very
7555 similar. */
7556
7557static void
7558aarch64_process_components (sbitmap components, bool prologue_p)
7559{
7560 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
7561 ? HARD_FRAME_POINTER_REGNUM
7562 : STACK_POINTER_REGNUM);
7563
7564 unsigned last_regno = SBITMAP_SIZE (components);
7565 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
7566 rtx_insn *insn = NULL;
7567
7568 while (regno != last_regno)
7569 {
c600df9a
RS
7570 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
7571 machine_mode mode = aarch64_reg_save_mode (regno);
a0d0b980 7572
827ab47a 7573 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 7574 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
c600df9a
RS
7575 if (frame_pointer_needed)
7576 offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
7577 else
7578 offset += crtl->outgoing_args_size;
7579
827ab47a
KT
7580 rtx addr = plus_constant (Pmode, ptr_reg, offset);
7581 rtx mem = gen_frame_mem (mode, addr);
7582
7583 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
7584 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
7585 /* No more registers to handle after REGNO.
7586 Emit a single save/restore and exit. */
7587 if (regno2 == last_regno)
7588 {
7589 insn = emit_insn (set);
c600df9a
RS
7590 if (frame_related_p)
7591 {
7592 RTX_FRAME_RELATED_P (insn) = 1;
7593 if (prologue_p)
7594 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
7595 else
7596 add_reg_note (insn, REG_CFA_RESTORE, reg);
7597 }
827ab47a
KT
7598 break;
7599 }
7600
6a70badb 7601 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
7602 /* The next register is not of the same class or its offset is not
7603 mergeable with the current one into a pair. */
c600df9a
RS
7604 if (aarch64_sve_mode_p (mode)
7605 || !satisfies_constraint_Ump (mem)
827ab47a 7606 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
dcdd0f05 7607 || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
6a70badb
RS
7608 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
7609 GET_MODE_SIZE (mode)))
827ab47a
KT
7610 {
7611 insn = emit_insn (set);
c600df9a
RS
7612 if (frame_related_p)
7613 {
7614 RTX_FRAME_RELATED_P (insn) = 1;
7615 if (prologue_p)
7616 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
7617 else
7618 add_reg_note (insn, REG_CFA_RESTORE, reg);
7619 }
827ab47a
KT
7620
7621 regno = regno2;
7622 continue;
7623 }
7624
c600df9a
RS
7625 bool frame_related2_p = aarch64_emit_cfi_for_reg_p (regno2);
7626
827ab47a
KT
7627 /* REGNO2 can be saved/restored in a pair with REGNO. */
7628 rtx reg2 = gen_rtx_REG (mode, regno2);
c600df9a
RS
7629 if (frame_pointer_needed)
7630 offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
7631 else
7632 offset2 += crtl->outgoing_args_size;
827ab47a
KT
7633 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
7634 rtx mem2 = gen_frame_mem (mode, addr2);
7635 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
7636 : gen_rtx_SET (reg2, mem2);
7637
7638 if (prologue_p)
7639 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
7640 else
7641 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
7642
c600df9a 7643 if (frame_related_p || frame_related2_p)
827ab47a 7644 {
c600df9a
RS
7645 RTX_FRAME_RELATED_P (insn) = 1;
7646 if (prologue_p)
7647 {
7648 if (frame_related_p)
7649 add_reg_note (insn, REG_CFA_OFFSET, set);
7650 if (frame_related2_p)
7651 add_reg_note (insn, REG_CFA_OFFSET, set2);
7652 }
7653 else
7654 {
7655 if (frame_related_p)
7656 add_reg_note (insn, REG_CFA_RESTORE, reg);
7657 if (frame_related2_p)
7658 add_reg_note (insn, REG_CFA_RESTORE, reg2);
7659 }
827ab47a
KT
7660 }
7661
7662 regno = aarch64_get_next_set_bit (components, regno2 + 1);
7663 }
7664}
7665
7666/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
7667
7668static void
7669aarch64_emit_prologue_components (sbitmap components)
7670{
7671 aarch64_process_components (components, true);
7672}
7673
7674/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
7675
7676static void
7677aarch64_emit_epilogue_components (sbitmap components)
7678{
7679 aarch64_process_components (components, false);
7680}
7681
7682/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
7683
7684static void
7685aarch64_set_handled_components (sbitmap components)
7686{
7687 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
7688 if (bitmap_bit_p (components, regno))
7689 cfun->machine->reg_is_wrapped_separately[regno] = true;
7690}
7691
8c6e3b23
TC
7692/* On AArch64 we have an ABI defined safe buffer. This constant is used to
7693 determining the probe offset for alloca. */
7694
7695static HOST_WIDE_INT
7696aarch64_stack_clash_protection_alloca_probe_range (void)
7697{
7698 return STACK_CLASH_CALLER_GUARD;
7699}
7700
7701
cd1bef27
JL
7702/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
7703 registers. If POLY_SIZE is not large enough to require a probe this function
7704 will only adjust the stack. When allocating the stack space
7705 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
7706 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
7707 arguments. If we are then we ensure that any allocation larger than the ABI
7708 defined buffer needs a probe so that the invariant of having a 1KB buffer is
7709 maintained.
7710
7711 We emit barriers after each stack adjustment to prevent optimizations from
7712 breaking the invariant that we never drop the stack more than a page. This
7713 invariant is needed to make it easier to correctly handle asynchronous
7714 events, e.g. if we were to allow the stack to be dropped by more than a page
7715 and then have multiple probes up and we take a signal somewhere in between
7716 then the signal handler doesn't know the state of the stack and can make no
7717 assumptions about which pages have been probed. */
7718
7719static void
7720aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
7721 poly_int64 poly_size,
7722 bool frame_related_p,
7723 bool final_adjustment_p)
7724{
7725 HOST_WIDE_INT guard_size
028d4092 7726 = 1 << param_stack_clash_protection_guard_size;
cd1bef27 7727 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
cd1bef27 7728 HOST_WIDE_INT min_probe_threshold
c600df9a
RS
7729 = (final_adjustment_p
7730 ? guard_used_by_caller
7731 : guard_size - guard_used_by_caller);
7732 /* When doing the final adjustment for the outgoing arguments, take into
7733 account any unprobed space there is above the current SP. There are
7734 two cases:
7735
7736 - When saving SVE registers below the hard frame pointer, we force
7737 the lowest save to take place in the prologue before doing the final
7738 adjustment (i.e. we don't allow the save to be shrink-wrapped).
7739 This acts as a probe at SP, so there is no unprobed space.
7740
7741 - When there are no SVE register saves, we use the store of the link
7742 register as a probe. We can't assume that LR was saved at position 0
7743 though, so treat any space below it as unprobed. */
7744 if (final_adjustment_p
7745 && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
7746 {
7747 poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
7748 if (known_ge (lr_offset, 0))
7749 min_probe_threshold -= lr_offset.to_constant ();
7750 else
7751 gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
7752 }
cd1bef27
JL
7753
7754 poly_int64 frame_size = cfun->machine->frame.frame_size;
7755
7756 /* We should always have a positive probe threshold. */
7757 gcc_assert (min_probe_threshold > 0);
7758
7759 if (flag_stack_clash_protection && !final_adjustment_p)
7760 {
7761 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
c600df9a 7762 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
cd1bef27
JL
7763 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
7764
7765 if (known_eq (frame_size, 0))
7766 {
7767 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7768 }
c600df9a
RS
7769 else if (known_lt (initial_adjust + sve_callee_adjust,
7770 guard_size - guard_used_by_caller)
cd1bef27
JL
7771 && known_lt (final_adjust, guard_used_by_caller))
7772 {
7773 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7774 }
7775 }
7776
cd1bef27
JL
7777 /* If SIZE is not large enough to require probing, just adjust the stack and
7778 exit. */
eb471ba3 7779 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
7780 || !flag_stack_clash_protection)
7781 {
7782 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
7783 return;
7784 }
7785
eb471ba3
TC
7786 HOST_WIDE_INT size;
7787 /* Handle the SVE non-constant case first. */
7788 if (!poly_size.is_constant (&size))
7789 {
7790 if (dump_file)
7791 {
7792 fprintf (dump_file, "Stack clash SVE prologue: ");
7793 print_dec (poly_size, dump_file);
7794 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
7795 }
7796
7797 /* First calculate the amount of bytes we're actually spilling. */
7798 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
7799 poly_size, temp1, temp2, false, true);
7800
7801 rtx_insn *insn = get_last_insn ();
7802
7803 if (frame_related_p)
7804 {
7805 /* This is done to provide unwinding information for the stack
7806 adjustments we're about to do, however to prevent the optimizers
143d3b15 7807 from removing the R11 move and leaving the CFA note (which would be
eb471ba3
TC
7808 very wrong) we tie the old and new stack pointer together.
7809 The tie will expand to nothing but the optimizers will not touch
7810 the instruction. */
143d3b15 7811 rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
eb471ba3
TC
7812 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
7813 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
7814
7815 /* We want the CFA independent of the stack pointer for the
7816 duration of the loop. */
7817 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
7818 RTX_FRAME_RELATED_P (insn) = 1;
7819 }
7820
7821 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
7822 rtx guard_const = gen_int_mode (guard_size, Pmode);
7823
7824 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
7825 stack_pointer_rtx, temp1,
7826 probe_const, guard_const));
7827
7828 /* Now reset the CFA register if needed. */
7829 if (frame_related_p)
7830 {
7831 add_reg_note (insn, REG_CFA_DEF_CFA,
7832 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7833 gen_int_mode (poly_size, Pmode)));
7834 RTX_FRAME_RELATED_P (insn) = 1;
7835 }
7836
7837 return;
7838 }
7839
cd1bef27
JL
7840 if (dump_file)
7841 fprintf (dump_file,
eb471ba3
TC
7842 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
7843 " bytes, probing will be required.\n", size);
cd1bef27
JL
7844
7845 /* Round size to the nearest multiple of guard_size, and calculate the
7846 residual as the difference between the original size and the rounded
7847 size. */
7848 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
7849 HOST_WIDE_INT residual = size - rounded_size;
7850
7851 /* We can handle a small number of allocations/probes inline. Otherwise
7852 punt to a loop. */
7853 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
7854 {
7855 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
7856 {
7857 aarch64_sub_sp (NULL, temp2, guard_size, true);
7858 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7859 guard_used_by_caller));
7860 emit_insn (gen_blockage ());
7861 }
7862 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
7863 }
7864 else
7865 {
7866 /* Compute the ending address. */
7867 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
7868 temp1, NULL, false, true);
7869 rtx_insn *insn = get_last_insn ();
7870
7871 /* For the initial allocation, we don't have a frame pointer
7872 set up, so we always need CFI notes. If we're doing the
7873 final allocation, then we may have a frame pointer, in which
7874 case it is the CFA, otherwise we need CFI notes.
7875
7876 We can determine which allocation we are doing by looking at
7877 the value of FRAME_RELATED_P since the final allocations are not
7878 frame related. */
7879 if (frame_related_p)
7880 {
7881 /* We want the CFA independent of the stack pointer for the
7882 duration of the loop. */
7883 add_reg_note (insn, REG_CFA_DEF_CFA,
7884 plus_constant (Pmode, temp1, rounded_size));
7885 RTX_FRAME_RELATED_P (insn) = 1;
7886 }
7887
7888 /* This allocates and probes the stack. Note that this re-uses some of
7889 the existing Ada stack protection code. However we are guaranteed not
7890 to enter the non loop or residual branches of that code.
7891
7892 The non-loop part won't be entered because if our allocation amount
7893 doesn't require a loop, the case above would handle it.
7894
7895 The residual amount won't be entered because TEMP1 is a mutliple of
7896 the allocation size. The residual will always be 0. As such, the only
7897 part we are actually using from that code is the loop setup. The
7898 actual probing is done in aarch64_output_probe_stack_range. */
7899 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
7900 stack_pointer_rtx, temp1));
7901
7902 /* Now reset the CFA register if needed. */
7903 if (frame_related_p)
7904 {
7905 add_reg_note (insn, REG_CFA_DEF_CFA,
7906 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
7907 RTX_FRAME_RELATED_P (insn) = 1;
7908 }
7909
7910 emit_insn (gen_blockage ());
7911 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7912 }
7913
7914 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
7915 be probed. This maintains the requirement that each page is probed at
7916 least once. For initial probing we probe only if the allocation is
7917 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
7918 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
7919 GUARD_SIZE. This works that for any allocation that is large enough to
7920 trigger a probe here, we'll have at least one, and if they're not large
7921 enough for this code to emit anything for them, The page would have been
7922 probed by the saving of FP/LR either by this function or any callees. If
7923 we don't have any callees then we won't have more stack adjustments and so
7924 are still safe. */
7925 if (residual)
7926 {
7927 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
7928 /* If we're doing final adjustments, and we've done any full page
7929 allocations then any residual needs to be probed. */
7930 if (final_adjustment_p && rounded_size != 0)
7931 min_probe_threshold = 0;
7932 /* If doing a small final adjustment, we always probe at offset 0.
7933 This is done to avoid issues when LR is not at position 0 or when
7934 the final adjustment is smaller than the probing offset. */
7935 else if (final_adjustment_p && rounded_size == 0)
7936 residual_probe_offset = 0;
7937
7938 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
7939 if (residual >= min_probe_threshold)
7940 {
7941 if (dump_file)
7942 fprintf (dump_file,
7943 "Stack clash AArch64 prologue residuals: "
7944 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
7945 "\n", residual);
7946
7947 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7948 residual_probe_offset));
7949 emit_insn (gen_blockage ());
7950 }
7951 }
7952}
7953
a0d0b980
SE
7954/* Return 1 if the register is used by the epilogue. We need to say the
7955 return register is used, but only after epilogue generation is complete.
7956 Note that in the case of sibcalls, the values "used by the epilogue" are
7957 considered live at the start of the called function.
7958
7959 For SIMD functions we need to return 1 for FP registers that are saved and
7960 restored by a function but are not zero in call_used_regs. If we do not do
7961 this optimizations may remove the restore of the register. */
7962
7963int
7964aarch64_epilogue_uses (int regno)
7965{
7966 if (epilogue_completed)
7967 {
7968 if (regno == LR_REGNUM)
7969 return 1;
a0d0b980
SE
7970 }
7971 return 0;
7972}
7973
43e9d192
IB
7974/* AArch64 stack frames generated by this compiler look like:
7975
7976 +-------------------------------+
7977 | |
7978 | incoming stack arguments |
7979 | |
34834420
MS
7980 +-------------------------------+
7981 | | <-- incoming stack pointer (aligned)
43e9d192
IB
7982 | callee-allocated save area |
7983 | for register varargs |
7984 | |
34834420
MS
7985 +-------------------------------+
7986 | local variables | <-- frame_pointer_rtx
43e9d192
IB
7987 | |
7988 +-------------------------------+
cd1bef27 7989 | padding | \
454fdba9 7990 +-------------------------------+ |
454fdba9 7991 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
7992 +-------------------------------+ |
7993 | LR' | |
7994 +-------------------------------+ |
c600df9a
RS
7995 | FP' | |
7996 +-------------------------------+ |<- hard_frame_pointer_rtx (aligned)
7997 | SVE vector registers | | \
7998 +-------------------------------+ | | below_hard_fp_saved_regs_size
7999 | SVE predicate registers | / /
8000 +-------------------------------+
43e9d192
IB
8001 | dynamic allocation |
8002 +-------------------------------+
34834420
MS
8003 | padding |
8004 +-------------------------------+
8005 | outgoing stack arguments | <-- arg_pointer
8006 | |
8007 +-------------------------------+
8008 | | <-- stack_pointer_rtx (aligned)
43e9d192 8009
34834420
MS
8010 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
8011 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
8012 unchanged.
8013
8014 By default for stack-clash we assume the guard is at least 64KB, but this
8015 value is configurable to either 4KB or 64KB. We also force the guard size to
8016 be the same as the probing interval and both values are kept in sync.
8017
8018 With those assumptions the callee can allocate up to 63KB (or 3KB depending
8019 on the guard size) of stack space without probing.
8020
8021 When probing is needed, we emit a probe at the start of the prologue
8022 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
8023
8024 We have to track how much space has been allocated and the only stores
8025 to the stack we track as implicit probes are the FP/LR stores.
8026
8027 For outgoing arguments we probe if the size is larger than 1KB, such that
143d3b15
TC
8028 the ABI specified buffer is maintained for the next callee.
8029
8030 The following registers are reserved during frame layout and should not be
8031 used for any other purpose:
8032
c600df9a
RS
8033 - r11: Used by stack clash protection when SVE is enabled, and also
8034 as an anchor register when saving and restoring registers
143d3b15
TC
8035 - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment.
8036 - r14 and r15: Used for speculation tracking.
8037 - r16(IP0), r17(IP1): Used by indirect tailcalls.
8038 - r30(LR), r29(FP): Used by standard frame layout.
8039
8040 These registers must be avoided in frame layout related code unless the
8041 explicit intention is to interact with one of the features listed above. */
43e9d192
IB
8042
8043/* Generate the prologue instructions for entry into a function.
8044 Establish the stack frame by decreasing the stack pointer with a
8045 properly calculated size and, if necessary, create a frame record
8046 filled with the values of LR and previous frame pointer. The
6991c977 8047 current FP is also set up if it is in use. */
43e9d192
IB
8048
8049void
8050aarch64_expand_prologue (void)
8051{
6a70badb
RS
8052 poly_int64 frame_size = cfun->machine->frame.frame_size;
8053 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 8054 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
8055 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
8056 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
c600df9a
RS
8057 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
8058 poly_int64 below_hard_fp_saved_regs_size
8059 = cfun->machine->frame.below_hard_fp_saved_regs_size;
71bfb77a
WD
8060 unsigned reg1 = cfun->machine->frame.wb_candidate1;
8061 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 8062 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 8063 rtx_insn *insn;
43e9d192 8064
c600df9a
RS
8065 if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
8066 {
8067 /* Fold the SVE allocation into the initial allocation.
8068 We don't do this in aarch64_layout_arg to avoid pessimizing
8069 the epilogue code. */
8070 initial_adjust += sve_callee_adjust;
8071 sve_callee_adjust = 0;
8072 }
8073
db58fd89
JW
8074 /* Sign return address for functions. */
8075 if (aarch64_return_address_signing_enabled ())
27169e45 8076 {
8fc16d72
ST
8077 switch (aarch64_ra_sign_key)
8078 {
8079 case AARCH64_KEY_A:
8080 insn = emit_insn (gen_paciasp ());
8081 break;
8082 case AARCH64_KEY_B:
8083 insn = emit_insn (gen_pacibsp ());
8084 break;
8085 default:
8086 gcc_unreachable ();
8087 }
27169e45
JW
8088 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
8089 RTX_FRAME_RELATED_P (insn) = 1;
8090 }
db58fd89 8091
dd991abb 8092 if (flag_stack_usage_info)
6a70badb 8093 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 8094
a3eb8a52
EB
8095 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8096 {
8097 if (crtl->is_leaf && !cfun->calls_alloca)
8098 {
6a70badb
RS
8099 if (maybe_gt (frame_size, PROBE_INTERVAL)
8100 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
8101 aarch64_emit_probe_stack_range (get_stack_check_protect (),
8102 (frame_size
8103 - get_stack_check_protect ()));
a3eb8a52 8104 }
6a70badb 8105 else if (maybe_gt (frame_size, 0))
8c1dd970 8106 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
8107 }
8108
901e66e0
SD
8109 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
8110 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 8111
cd1bef27
JL
8112 /* In theory we should never have both an initial adjustment
8113 and a callee save adjustment. Verify that is the case since the
8114 code below does not handle it for -fstack-clash-protection. */
8115 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
8116
8117 /* Will only probe if the initial adjustment is larger than the guard
8118 less the amount of the guard reserved for use by the caller's
8119 outgoing args. */
901e66e0 8120 aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
cd1bef27 8121 true, false);
43e9d192 8122
71bfb77a
WD
8123 if (callee_adjust != 0)
8124 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 8125
c600df9a
RS
8126 /* The offset of the frame chain record (if any) from the current SP. */
8127 poly_int64 chain_offset = (initial_adjust + callee_adjust
8128 - cfun->machine->frame.hard_fp_offset);
8129 gcc_assert (known_ge (chain_offset, 0));
8130
8131 /* The offset of the bottom of the save area from the current SP. */
8132 poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
8133
204d2c03 8134 if (emit_frame_chain)
43e9d192 8135 {
71bfb77a 8136 if (callee_adjust == 0)
43cacb12
RS
8137 {
8138 reg1 = R29_REGNUM;
8139 reg2 = R30_REGNUM;
c600df9a
RS
8140 aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
8141 false, false);
43cacb12 8142 }
c600df9a
RS
8143 else
8144 gcc_assert (known_eq (chain_offset, 0));
f5470a77 8145 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
c600df9a 8146 stack_pointer_rtx, chain_offset,
901e66e0 8147 tmp1_rtx, tmp0_rtx, frame_pointer_needed);
43cacb12
RS
8148 if (frame_pointer_needed && !frame_size.is_constant ())
8149 {
8150 /* Variable-sized frames need to describe the save slot
8151 address using DW_CFA_expression rather than DW_CFA_offset.
8152 This means that, without taking further action, the
8153 locations of the registers that we've already saved would
8154 remain based on the stack pointer even after we redefine
8155 the CFA based on the frame pointer. We therefore need new
8156 DW_CFA_expressions to re-express the save slots with addresses
8157 based on the frame pointer. */
8158 rtx_insn *insn = get_last_insn ();
8159 gcc_assert (RTX_FRAME_RELATED_P (insn));
8160
8161 /* Add an explicit CFA definition if this was previously
8162 implicit. */
8163 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
8164 {
8165 rtx src = plus_constant (Pmode, stack_pointer_rtx,
8166 callee_offset);
8167 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8168 gen_rtx_SET (hard_frame_pointer_rtx, src));
8169 }
8170
8171 /* Change the save slot expressions for the registers that
8172 we've already saved. */
c600df9a
RS
8173 aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2],
8174 hard_frame_pointer_rtx, UNITS_PER_WORD);
8175 aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
8176 hard_frame_pointer_rtx, 0);
43cacb12 8177 }
71bfb77a 8178 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 8179 }
71bfb77a 8180
c600df9a
RS
8181 aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
8182 callee_adjust != 0 || emit_frame_chain,
8183 emit_frame_chain);
8184 if (maybe_ne (sve_callee_adjust, 0))
8185 {
8186 gcc_assert (!flag_stack_clash_protection
8187 || known_eq (initial_adjust, 0));
8188 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
8189 sve_callee_adjust,
8190 !frame_pointer_needed, false);
8191 saved_regs_offset += sve_callee_adjust;
8192 }
8193 aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
8194 false, emit_frame_chain);
8195 aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
8196 callee_adjust != 0 || emit_frame_chain,
8197 emit_frame_chain);
cd1bef27
JL
8198
8199 /* We may need to probe the final adjustment if it is larger than the guard
8200 that is assumed by the called. */
901e66e0 8201 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
cd1bef27 8202 !frame_pointer_needed, true);
43e9d192
IB
8203}
8204
4f942779
RL
8205/* Return TRUE if we can use a simple_return insn.
8206
8207 This function checks whether the callee saved stack is empty, which
8208 means no restore actions are need. The pro_and_epilogue will use
8209 this to check whether shrink-wrapping opt is feasible. */
8210
8211bool
8212aarch64_use_return_insn_p (void)
8213{
8214 if (!reload_completed)
8215 return false;
8216
8217 if (crtl->profile)
8218 return false;
8219
6a70badb 8220 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
8221}
8222
71bfb77a
WD
8223/* Generate the epilogue instructions for returning from a function.
8224 This is almost exactly the reverse of the prolog sequence, except
8225 that we need to insert barriers to avoid scheduling loads that read
8226 from a deallocated stack, and we optimize the unwind records by
8227 emitting them all together if possible. */
43e9d192
IB
8228void
8229aarch64_expand_epilogue (bool for_sibcall)
8230{
6a70badb 8231 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 8232 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
8233 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
8234 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
c600df9a
RS
8235 poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
8236 poly_int64 below_hard_fp_saved_regs_size
8237 = cfun->machine->frame.below_hard_fp_saved_regs_size;
71bfb77a
WD
8238 unsigned reg1 = cfun->machine->frame.wb_candidate1;
8239 unsigned reg2 = cfun->machine->frame.wb_candidate2;
8240 rtx cfi_ops = NULL;
8241 rtx_insn *insn;
901e66e0
SD
8242 /* A stack clash protection prologue may not have left EP0_REGNUM or
8243 EP1_REGNUM in a usable state. The same is true for allocations
43cacb12 8244 with an SVE component, since we then need both temporary registers
cd1bef27
JL
8245 for each allocation. For stack clash we are in a usable state if
8246 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
8247 HOST_WIDE_INT guard_size
028d4092 8248 = 1 << param_stack_clash_protection_guard_size;
cd1bef27
JL
8249 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
8250
c600df9a
RS
8251 /* We can re-use the registers when:
8252
8253 (a) the deallocation amount is the same as the corresponding
8254 allocation amount (which is false if we combine the initial
8255 and SVE callee save allocations in the prologue); and
8256
8257 (b) the allocation amount doesn't need a probe (which is false
8258 if the amount is guard_size - guard_used_by_caller or greater).
8259
8260 In such situations the register should remain live with the correct
cd1bef27 8261 value. */
43cacb12 8262 bool can_inherit_p = (initial_adjust.is_constant ()
c600df9a 8263 && final_adjust.is_constant ()
cd1bef27 8264 && (!flag_stack_clash_protection
c600df9a
RS
8265 || (known_lt (initial_adjust,
8266 guard_size - guard_used_by_caller)
8267 && known_eq (sve_callee_adjust, 0))));
44c0e7b9 8268
71bfb77a 8269 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
8270 bool need_barrier_p
8271 = maybe_ne (get_frame_size ()
8272 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 8273
71bfb77a 8274 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
8275 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
8276 || cfun->calls_alloca
8144a493 8277 || crtl->calls_eh_return)
43e9d192 8278 {
71bfb77a
WD
8279 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
8280 need_barrier_p = false;
8281 }
7e8c2bd5 8282
71bfb77a
WD
8283 /* Restore the stack pointer from the frame pointer if it may not
8284 be the same as the stack pointer. */
901e66e0
SD
8285 rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
8286 rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
6a70badb
RS
8287 if (frame_pointer_needed
8288 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
8289 /* If writeback is used when restoring callee-saves, the CFA
8290 is restored on the instruction doing the writeback. */
8291 aarch64_add_offset (Pmode, stack_pointer_rtx,
c600df9a
RS
8292 hard_frame_pointer_rtx,
8293 -callee_offset - below_hard_fp_saved_regs_size,
901e66e0 8294 tmp1_rtx, tmp0_rtx, callee_adjust == 0);
71bfb77a 8295 else
cd1bef27
JL
8296 /* The case where we need to re-use the register here is very rare, so
8297 avoid the complicated condition and just always emit a move if the
8298 immediate doesn't fit. */
901e66e0 8299 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
43e9d192 8300
c600df9a
RS
8301 /* Restore the vector registers before the predicate registers,
8302 so that we can use P4 as a temporary for big-endian SVE frames. */
8303 aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
8304 callee_adjust != 0, &cfi_ops);
8305 aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
8306 false, &cfi_ops);
8307 if (maybe_ne (sve_callee_adjust, 0))
8308 aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
8309 aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
8310 R0_REGNUM, R30_REGNUM,
71bfb77a 8311 callee_adjust != 0, &cfi_ops);
43e9d192 8312
71bfb77a
WD
8313 if (need_barrier_p)
8314 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
8315
8316 if (callee_adjust != 0)
8317 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
8318
1ccbfffb
RS
8319 /* If we have no register restore information, the CFA must have been
8320 defined in terms of the stack pointer since the end of the prologue. */
8321 gcc_assert (cfi_ops || !frame_pointer_needed);
8322
8323 if (cfi_ops && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
71bfb77a
WD
8324 {
8325 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 8326 insn = get_last_insn ();
71bfb77a
WD
8327 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
8328 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 8329 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 8330 cfi_ops = NULL;
43e9d192
IB
8331 }
8332
901e66e0
SD
8333 /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
8334 add restriction on emit_move optimization to leaf functions. */
8335 aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust,
8336 (!can_inherit_p || !crtl->is_leaf
8337 || df_regs_ever_live_p (EP0_REGNUM)));
7e8c2bd5 8338
71bfb77a
WD
8339 if (cfi_ops)
8340 {
8341 /* Emit delayed restores and reset the CFA to be SP. */
8342 insn = get_last_insn ();
8343 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
8344 REG_NOTES (insn) = cfi_ops;
8345 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
8346 }
8347
db58fd89
JW
8348 /* We prefer to emit the combined return/authenticate instruction RETAA,
8349 however there are three cases in which we must instead emit an explicit
8350 authentication instruction.
8351
8352 1) Sibcalls don't return in a normal way, so if we're about to call one
8353 we must authenticate.
8354
8355 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
8356 generating code for !TARGET_ARMV8_3 we can't use it and must
8357 explicitly authenticate.
8358
8359 3) On an eh_return path we make extra stack adjustments to update the
8360 canonical frame address to be the exception handler's CFA. We want
8361 to authenticate using the CFA of the function which calls eh_return.
8362 */
8363 if (aarch64_return_address_signing_enabled ()
8364 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45 8365 {
8fc16d72
ST
8366 switch (aarch64_ra_sign_key)
8367 {
8368 case AARCH64_KEY_A:
8369 insn = emit_insn (gen_autiasp ());
8370 break;
8371 case AARCH64_KEY_B:
8372 insn = emit_insn (gen_autibsp ());
8373 break;
8374 default:
8375 gcc_unreachable ();
8376 }
27169e45
JW
8377 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
8378 RTX_FRAME_RELATED_P (insn) = 1;
8379 }
db58fd89 8380
dd991abb 8381 /* Stack adjustment for exception handler. */
b5b9147d 8382 if (crtl->calls_eh_return && !for_sibcall)
dd991abb
RH
8383 {
8384 /* We need to unwind the stack by the offset computed by
8385 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
8386 to be SP; letting the CFA move during this adjustment
8387 is just as correct as retaining the CFA from the body
8388 of the function. Therefore, do nothing special. */
8389 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
8390 }
8391
8392 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
8393 if (!for_sibcall)
8394 emit_jump_insn (ret_rtx);
8395}
8396
8144a493
WD
8397/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
8398 normally or return to a previous frame after unwinding.
1c960e02 8399
8144a493
WD
8400 An EH return uses a single shared return sequence. The epilogue is
8401 exactly like a normal epilogue except that it has an extra input
8402 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
8403 that must be applied after the frame has been destroyed. An extra label
8404 is inserted before the epilogue which initializes this register to zero,
8405 and this is the entry point for a normal return.
43e9d192 8406
8144a493
WD
8407 An actual EH return updates the return address, initializes the stack
8408 adjustment and jumps directly into the epilogue (bypassing the zeroing
8409 of the adjustment). Since the return address is typically saved on the
8410 stack when a function makes a call, the saved LR must be updated outside
8411 the epilogue.
43e9d192 8412
8144a493
WD
8413 This poses problems as the store is generated well before the epilogue,
8414 so the offset of LR is not known yet. Also optimizations will remove the
8415 store as it appears dead, even after the epilogue is generated (as the
8416 base or offset for loading LR is different in many cases).
43e9d192 8417
8144a493
WD
8418 To avoid these problems this implementation forces the frame pointer
8419 in eh_return functions so that the location of LR is fixed and known early.
8420 It also marks the store volatile, so no optimization is permitted to
8421 remove the store. */
8422rtx
8423aarch64_eh_return_handler_rtx (void)
8424{
8425 rtx tmp = gen_frame_mem (Pmode,
8426 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 8427
8144a493
WD
8428 /* Mark the store volatile, so no optimization is permitted to remove it. */
8429 MEM_VOLATILE_P (tmp) = true;
8430 return tmp;
43e9d192
IB
8431}
8432
43e9d192
IB
8433/* Output code to add DELTA to the first argument, and then jump
8434 to FUNCTION. Used for C++ multiple inheritance. */
8435static void
8436aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8437 HOST_WIDE_INT delta,
8438 HOST_WIDE_INT vcall_offset,
8439 tree function)
8440{
8441 /* The this pointer is always in x0. Note that this differs from
8442 Arm where the this pointer maybe bumped to r1 if r0 is required
8443 to return a pointer to an aggregate. On AArch64 a result value
8444 pointer will be in x8. */
8445 int this_regno = R0_REGNUM;
5d8a22a5
DM
8446 rtx this_rtx, temp0, temp1, addr, funexp;
8447 rtx_insn *insn;
6b5777c6 8448 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
43e9d192 8449
c904388d
SD
8450 if (aarch64_bti_enabled ())
8451 emit_insn (gen_bti_c());
8452
75f1d6fc
SN
8453 reload_completed = 1;
8454 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 8455
f5470a77 8456 this_rtx = gen_rtx_REG (Pmode, this_regno);
901e66e0
SD
8457 temp0 = gen_rtx_REG (Pmode, EP0_REGNUM);
8458 temp1 = gen_rtx_REG (Pmode, EP1_REGNUM);
f5470a77 8459
43e9d192 8460 if (vcall_offset == 0)
43cacb12 8461 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
8462 else
8463 {
28514dda 8464 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 8465
75f1d6fc
SN
8466 addr = this_rtx;
8467 if (delta != 0)
8468 {
8469 if (delta >= -256 && delta < 256)
8470 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
8471 plus_constant (Pmode, this_rtx, delta));
8472 else
43cacb12
RS
8473 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
8474 temp1, temp0, false);
43e9d192
IB
8475 }
8476
28514dda
YZ
8477 if (Pmode == ptr_mode)
8478 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
8479 else
8480 aarch64_emit_move (temp0,
8481 gen_rtx_ZERO_EXTEND (Pmode,
8482 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 8483
28514dda 8484 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 8485 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
8486 else
8487 {
f43657b4
JW
8488 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
8489 Pmode);
75f1d6fc 8490 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
8491 }
8492
28514dda
YZ
8493 if (Pmode == ptr_mode)
8494 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
8495 else
8496 aarch64_emit_move (temp1,
8497 gen_rtx_SIGN_EXTEND (Pmode,
8498 gen_rtx_MEM (ptr_mode, addr)));
8499
75f1d6fc 8500 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
8501 }
8502
75f1d6fc
SN
8503 /* Generate a tail call to the target function. */
8504 if (!TREE_USED (function))
8505 {
8506 assemble_external (function);
8507 TREE_USED (function) = 1;
8508 }
8509 funexp = XEXP (DECL_RTL (function), 0);
8510 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
08cc4d92
RS
8511 rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode);
8512 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi));
75f1d6fc
SN
8513 SIBLING_CALL_P (insn) = 1;
8514
8515 insn = get_insns ();
8516 shorten_branches (insn);
6b5777c6
MF
8517
8518 assemble_start_function (thunk, fnname);
75f1d6fc
SN
8519 final_start_function (insn, file, 1);
8520 final (insn, file, 1);
43e9d192 8521 final_end_function ();
6b5777c6 8522 assemble_end_function (thunk, fnname);
75f1d6fc
SN
8523
8524 /* Stop pretending to be a post-reload pass. */
8525 reload_completed = 0;
43e9d192
IB
8526}
8527
43e9d192
IB
8528static bool
8529aarch64_tls_referenced_p (rtx x)
8530{
8531 if (!TARGET_HAVE_TLS)
8532 return false;
e7de8563
RS
8533 subrtx_iterator::array_type array;
8534 FOR_EACH_SUBRTX (iter, array, x, ALL)
8535 {
8536 const_rtx x = *iter;
8537 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8538 return true;
8539 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8540 TLS offsets, not real symbol references. */
8541 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8542 iter.skip_subrtxes ();
8543 }
8544 return false;
43e9d192
IB
8545}
8546
8547
43e9d192
IB
8548/* Return true if val can be encoded as a 12-bit unsigned immediate with
8549 a left shift of 0 or 12 bits. */
8550bool
8551aarch64_uimm12_shift (HOST_WIDE_INT val)
8552{
8553 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
8554 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
8555 );
8556}
8557
eb471ba3
TC
8558/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
8559 that can be created with a left shift of 0 or 12. */
8560static HOST_WIDE_INT
8561aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
8562{
8563 /* Check to see if the value fits in 24 bits, as that is the maximum we can
8564 handle correctly. */
8565 gcc_assert ((val & 0xffffff) == val);
8566
8567 if (((val & 0xfff) << 0) == val)
8568 return val;
8569
8570 return val & (0xfff << 12);
8571}
43e9d192
IB
8572
8573/* Return true if val is an immediate that can be loaded into a
8574 register by a MOVZ instruction. */
8575static bool
77e994c9 8576aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
8577{
8578 if (GET_MODE_SIZE (mode) > 4)
8579 {
8580 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
8581 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
8582 return 1;
8583 }
8584 else
8585 {
43cacb12
RS
8586 /* Ignore sign extension. */
8587 val &= (HOST_WIDE_INT) 0xffffffff;
8588 }
8589 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
8590 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
8591}
8592
bba0c624
RS
8593/* Test whether:
8594
8595 X = (X & AND_VAL) | IOR_VAL;
8596
8597 can be implemented using:
8598
8599 MOVK X, #(IOR_VAL >> shift), LSL #shift
8600
8601 Return the shift if so, otherwise return -1. */
8602int
8603aarch64_movk_shift (const wide_int_ref &and_val,
8604 const wide_int_ref &ior_val)
8605{
8606 unsigned int precision = and_val.get_precision ();
8607 unsigned HOST_WIDE_INT mask = 0xffff;
8608 for (unsigned int shift = 0; shift < precision; shift += 16)
8609 {
8610 if (and_val == ~mask && (ior_val & mask) == ior_val)
8611 return shift;
8612 mask <<= 16;
8613 }
8614 return -1;
8615}
8616
43cacb12
RS
8617/* VAL is a value with the inner mode of MODE. Replicate it to fill a
8618 64-bit (DImode) integer. */
8619
8620static unsigned HOST_WIDE_INT
8621aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
8622{
8623 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
8624 while (size < 64)
8625 {
8626 val &= (HOST_WIDE_INT_1U << size) - 1;
8627 val |= val << size;
8628 size *= 2;
43e9d192 8629 }
43cacb12 8630 return val;
43e9d192
IB
8631}
8632
a64c73a2
WD
8633/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
8634
8635static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
8636 {
8637 0x0000000100000001ull,
8638 0x0001000100010001ull,
8639 0x0101010101010101ull,
8640 0x1111111111111111ull,
8641 0x5555555555555555ull,
8642 };
8643
43e9d192
IB
8644
8645/* Return true if val is a valid bitmask immediate. */
a64c73a2 8646
43e9d192 8647bool
a64c73a2 8648aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 8649{
a64c73a2
WD
8650 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
8651 int bits;
8652
8653 /* Check for a single sequence of one bits and return quickly if so.
8654 The special cases of all ones and all zeroes returns false. */
43cacb12 8655 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
8656 tmp = val + (val & -val);
8657
8658 if (tmp == (tmp & -tmp))
8659 return (val + 1) > 1;
8660
8661 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
8662 if (mode == SImode)
8663 val = (val << 32) | (val & 0xffffffff);
8664
8665 /* Invert if the immediate doesn't start with a zero bit - this means we
8666 only need to search for sequences of one bits. */
8667 if (val & 1)
8668 val = ~val;
8669
8670 /* Find the first set bit and set tmp to val with the first sequence of one
8671 bits removed. Return success if there is a single sequence of ones. */
8672 first_one = val & -val;
8673 tmp = val & (val + first_one);
8674
8675 if (tmp == 0)
8676 return true;
8677
8678 /* Find the next set bit and compute the difference in bit position. */
8679 next_one = tmp & -tmp;
8680 bits = clz_hwi (first_one) - clz_hwi (next_one);
8681 mask = val ^ tmp;
8682
8683 /* Check the bit position difference is a power of 2, and that the first
8684 sequence of one bits fits within 'bits' bits. */
8685 if ((mask >> bits) != 0 || bits != (bits & -bits))
8686 return false;
8687
8688 /* Check the sequence of one bits is repeated 64/bits times. */
8689 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
8690}
8691
43fd192f
MC
8692/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
8693 Assumed precondition: VAL_IN Is not zero. */
8694
8695unsigned HOST_WIDE_INT
8696aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
8697{
8698 int lowest_bit_set = ctz_hwi (val_in);
8699 int highest_bit_set = floor_log2 (val_in);
8700 gcc_assert (val_in != 0);
8701
8702 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
8703 (HOST_WIDE_INT_1U << lowest_bit_set));
8704}
8705
8706/* Create constant where bits outside of lowest bit set to highest bit set
8707 are set to 1. */
8708
8709unsigned HOST_WIDE_INT
8710aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
8711{
8712 return val_in | ~aarch64_and_split_imm1 (val_in);
8713}
8714
8715/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
8716
8717bool
8718aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
8719{
77e994c9
RS
8720 scalar_int_mode int_mode;
8721 if (!is_a <scalar_int_mode> (mode, &int_mode))
8722 return false;
8723
8724 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
8725 return false;
8726
77e994c9 8727 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
8728 return false;
8729
8730 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
8731
77e994c9 8732 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 8733}
43e9d192
IB
8734
8735/* Return true if val is an immediate that can be loaded into a
8736 register in a single instruction. */
8737bool
ef4bddc2 8738aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 8739{
77e994c9
RS
8740 scalar_int_mode int_mode;
8741 if (!is_a <scalar_int_mode> (mode, &int_mode))
8742 return false;
8743
8744 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 8745 return 1;
77e994c9 8746 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
8747}
8748
8749static bool
ef4bddc2 8750aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192 8751{
43e9d192
IB
8752 if (GET_CODE (x) == HIGH)
8753 return true;
8754
43cacb12
RS
8755 /* There's no way to calculate VL-based values using relocations. */
8756 subrtx_iterator::array_type array;
8757 FOR_EACH_SUBRTX (iter, array, x, ALL)
8758 if (GET_CODE (*iter) == CONST_POLY_INT)
8759 return true;
8760
74b27d8e
RS
8761 poly_int64 offset;
8762 rtx base = strip_offset_and_salt (x, &offset);
43e9d192 8763 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 8764 {
74b27d8e
RS
8765 /* We checked for POLY_INT_CST offsets above. */
8766 if (aarch64_classify_symbol (base, offset.to_constant ())
28514dda
YZ
8767 != SYMBOL_FORCE_TO_MEM)
8768 return true;
8769 else
8770 /* Avoid generating a 64-bit relocation in ILP32; leave
8771 to aarch64_expand_mov_immediate to handle it properly. */
8772 return mode != ptr_mode;
8773 }
43e9d192
IB
8774
8775 return aarch64_tls_referenced_p (x);
8776}
8777
e79136e4
WD
8778/* Implement TARGET_CASE_VALUES_THRESHOLD.
8779 The expansion for a table switch is quite expensive due to the number
8780 of instructions, the table lookup and hard to predict indirect jump.
8781 When optimizing for speed, and -O3 enabled, use the per-core tuning if
8782 set, otherwise use tables for > 16 cases as a tradeoff between size and
8783 performance. When optimizing for size, use the default setting. */
50487d79
EM
8784
8785static unsigned int
8786aarch64_case_values_threshold (void)
8787{
8788 /* Use the specified limit for the number of cases before using jump
8789 tables at higher optimization levels. */
8790 if (optimize > 2
8791 && selected_cpu->tune->max_case_values != 0)
8792 return selected_cpu->tune->max_case_values;
8793 else
e79136e4 8794 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
8795}
8796
43e9d192
IB
8797/* Return true if register REGNO is a valid index register.
8798 STRICT_P is true if REG_OK_STRICT is in effect. */
8799
8800bool
8801aarch64_regno_ok_for_index_p (int regno, bool strict_p)
8802{
8803 if (!HARD_REGISTER_NUM_P (regno))
8804 {
8805 if (!strict_p)
8806 return true;
8807
8808 if (!reg_renumber)
8809 return false;
8810
8811 regno = reg_renumber[regno];
8812 }
8813 return GP_REGNUM_P (regno);
8814}
8815
8816/* Return true if register REGNO is a valid base register for mode MODE.
8817 STRICT_P is true if REG_OK_STRICT is in effect. */
8818
8819bool
8820aarch64_regno_ok_for_base_p (int regno, bool strict_p)
8821{
8822 if (!HARD_REGISTER_NUM_P (regno))
8823 {
8824 if (!strict_p)
8825 return true;
8826
8827 if (!reg_renumber)
8828 return false;
8829
8830 regno = reg_renumber[regno];
8831 }
8832
8833 /* The fake registers will be eliminated to either the stack or
8834 hard frame pointer, both of which are usually valid base registers.
8835 Reload deals with the cases where the eliminated form isn't valid. */
8836 return (GP_REGNUM_P (regno)
8837 || regno == SP_REGNUM
8838 || regno == FRAME_POINTER_REGNUM
8839 || regno == ARG_POINTER_REGNUM);
8840}
8841
8842/* Return true if X is a valid base register for mode MODE.
8843 STRICT_P is true if REG_OK_STRICT is in effect. */
8844
8845static bool
8846aarch64_base_register_rtx_p (rtx x, bool strict_p)
8847{
76160199
RS
8848 if (!strict_p
8849 && GET_CODE (x) == SUBREG
8850 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
8851 x = SUBREG_REG (x);
8852
8853 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
8854}
8855
8856/* Return true if address offset is a valid index. If it is, fill in INFO
8857 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
8858
8859static bool
8860aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 8861 machine_mode mode, bool strict_p)
43e9d192
IB
8862{
8863 enum aarch64_address_type type;
8864 rtx index;
8865 int shift;
8866
8867 /* (reg:P) */
8868 if ((REG_P (x) || GET_CODE (x) == SUBREG)
8869 && GET_MODE (x) == Pmode)
8870 {
8871 type = ADDRESS_REG_REG;
8872 index = x;
8873 shift = 0;
8874 }
8875 /* (sign_extend:DI (reg:SI)) */
8876 else if ((GET_CODE (x) == SIGN_EXTEND
8877 || GET_CODE (x) == ZERO_EXTEND)
8878 && GET_MODE (x) == DImode
8879 && GET_MODE (XEXP (x, 0)) == SImode)
8880 {
8881 type = (GET_CODE (x) == SIGN_EXTEND)
8882 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8883 index = XEXP (x, 0);
8884 shift = 0;
8885 }
8886 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
8887 else if (GET_CODE (x) == MULT
8888 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
8889 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
8890 && GET_MODE (XEXP (x, 0)) == DImode
8891 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
8892 && CONST_INT_P (XEXP (x, 1)))
8893 {
8894 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
8895 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8896 index = XEXP (XEXP (x, 0), 0);
8897 shift = exact_log2 (INTVAL (XEXP (x, 1)));
8898 }
8899 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
8900 else if (GET_CODE (x) == ASHIFT
8901 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
8902 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
8903 && GET_MODE (XEXP (x, 0)) == DImode
8904 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
8905 && CONST_INT_P (XEXP (x, 1)))
8906 {
8907 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
8908 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
8909 index = XEXP (XEXP (x, 0), 0);
8910 shift = INTVAL (XEXP (x, 1));
8911 }
43e9d192
IB
8912 /* (and:DI (mult:DI (reg:DI) (const_int scale))
8913 (const_int 0xffffffff<<shift)) */
8914 else if (GET_CODE (x) == AND
8915 && GET_MODE (x) == DImode
8916 && GET_CODE (XEXP (x, 0)) == MULT
8917 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8918 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8919 && CONST_INT_P (XEXP (x, 1)))
8920 {
8921 type = ADDRESS_REG_UXTW;
8922 index = XEXP (XEXP (x, 0), 0);
8923 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
8924 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
8925 shift = -1;
8926 }
43e9d192
IB
8927 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
8928 (const_int 0xffffffff<<shift)) */
8929 else if (GET_CODE (x) == AND
8930 && GET_MODE (x) == DImode
8931 && GET_CODE (XEXP (x, 0)) == ASHIFT
8932 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
8933 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8934 && CONST_INT_P (XEXP (x, 1)))
8935 {
8936 type = ADDRESS_REG_UXTW;
8937 index = XEXP (XEXP (x, 0), 0);
8938 shift = INTVAL (XEXP (XEXP (x, 0), 1));
8939 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
8940 shift = -1;
8941 }
8942 /* (mult:P (reg:P) (const_int scale)) */
8943 else if (GET_CODE (x) == MULT
8944 && GET_MODE (x) == Pmode
8945 && GET_MODE (XEXP (x, 0)) == Pmode
8946 && CONST_INT_P (XEXP (x, 1)))
8947 {
8948 type = ADDRESS_REG_REG;
8949 index = XEXP (x, 0);
8950 shift = exact_log2 (INTVAL (XEXP (x, 1)));
8951 }
8952 /* (ashift:P (reg:P) (const_int shift)) */
8953 else if (GET_CODE (x) == ASHIFT
8954 && GET_MODE (x) == Pmode
8955 && GET_MODE (XEXP (x, 0)) == Pmode
8956 && CONST_INT_P (XEXP (x, 1)))
8957 {
8958 type = ADDRESS_REG_REG;
8959 index = XEXP (x, 0);
8960 shift = INTVAL (XEXP (x, 1));
8961 }
8962 else
8963 return false;
8964
76160199
RS
8965 if (!strict_p
8966 && GET_CODE (index) == SUBREG
8967 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
8968 index = SUBREG_REG (index);
8969
43cacb12
RS
8970 if (aarch64_sve_data_mode_p (mode))
8971 {
8972 if (type != ADDRESS_REG_REG
8973 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
8974 return false;
8975 }
8976 else
8977 {
8978 if (shift != 0
8979 && !(IN_RANGE (shift, 1, 3)
8980 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
8981 return false;
8982 }
8983
8984 if (REG_P (index)
43e9d192
IB
8985 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
8986 {
8987 info->type = type;
8988 info->offset = index;
8989 info->shift = shift;
8990 return true;
8991 }
8992
8993 return false;
8994}
8995
abc52318
KT
8996/* Return true if MODE is one of the modes for which we
8997 support LDP/STP operations. */
8998
8999static bool
9000aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
9001{
9002 return mode == SImode || mode == DImode
9003 || mode == SFmode || mode == DFmode
9004 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
9005 && (known_eq (GET_MODE_SIZE (mode), 8)
9006 || (known_eq (GET_MODE_SIZE (mode), 16)
9007 && (aarch64_tune_params.extra_tuning_flags
9008 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
9009}
9010
9e0218fc
RH
9011/* Return true if REGNO is a virtual pointer register, or an eliminable
9012 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
9013 include stack_pointer or hard_frame_pointer. */
9014static bool
9015virt_or_elim_regno_p (unsigned regno)
9016{
9017 return ((regno >= FIRST_VIRTUAL_REGISTER
9018 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
9019 || regno == FRAME_POINTER_REGNUM
9020 || regno == ARG_POINTER_REGNUM);
9021}
9022
a97d8b98
RS
9023/* Return true if X is a valid address of type TYPE for machine mode MODE.
9024 If it is, fill in INFO appropriately. STRICT_P is true if
9025 REG_OK_STRICT is in effect. */
43e9d192 9026
a98824ac 9027bool
43e9d192 9028aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 9029 rtx x, machine_mode mode, bool strict_p,
a98824ac 9030 aarch64_addr_query_type type)
43e9d192
IB
9031{
9032 enum rtx_code code = GET_CODE (x);
9033 rtx op0, op1;
dc640181
RS
9034 poly_int64 offset;
9035
6a70badb 9036 HOST_WIDE_INT const_size;
2d8c6dc1 9037
550a3380
RS
9038 /* Whether a vector mode is partial doesn't affect address legitimacy.
9039 Partial vectors like VNx8QImode allow the same indexed addressing
9040 mode and MUL VL addressing mode as full vectors like VNx16QImode;
9041 in both cases, MUL VL counts multiples of GET_MODE_SIZE. */
9042 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
9043 vec_flags &= ~VEC_PARTIAL;
9044
80d43579
WD
9045 /* On BE, we use load/store pair for all large int mode load/stores.
9046 TI/TFmode may also use a load/store pair. */
43cacb12 9047 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 9048 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 9049 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
9050 || mode == TImode
9051 || mode == TFmode
43cacb12 9052 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 9053
a25831ac
AV
9054 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
9055 corresponds to the actual size of the memory being loaded/stored and the
9056 mode of the corresponding addressing mode is half of that. */
9057 if (type == ADDR_QUERY_LDP_STP_N
9058 && known_eq (GET_MODE_SIZE (mode), 16))
9059 mode = DFmode;
9060
6a70badb 9061 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
9062 && (known_lt (GET_MODE_SIZE (mode), 16)
9063 || vec_flags == VEC_ADVSIMD
fa9863e7 9064 || vec_flags & VEC_SVE_DATA));
43cacb12
RS
9065
9066 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
9067 [Rn, #offset, MUL VL]. */
9068 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
9069 && (code != REG && code != PLUS))
9070 return false;
2d8c6dc1
AH
9071
9072 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
9073 REG addressing. */
43cacb12
RS
9074 if (advsimd_struct_p
9075 && !BYTES_BIG_ENDIAN
43e9d192
IB
9076 && (code != POST_INC && code != REG))
9077 return false;
9078
43cacb12
RS
9079 gcc_checking_assert (GET_MODE (x) == VOIDmode
9080 || SCALAR_INT_MODE_P (GET_MODE (x)));
9081
43e9d192
IB
9082 switch (code)
9083 {
9084 case REG:
9085 case SUBREG:
9086 info->type = ADDRESS_REG_IMM;
9087 info->base = x;
9088 info->offset = const0_rtx;
dc640181 9089 info->const_offset = 0;
43e9d192
IB
9090 return aarch64_base_register_rtx_p (x, strict_p);
9091
9092 case PLUS:
9093 op0 = XEXP (x, 0);
9094 op1 = XEXP (x, 1);
15c0c5c9
JW
9095
9096 if (! strict_p
4aa81c2e 9097 && REG_P (op0)
9e0218fc 9098 && virt_or_elim_regno_p (REGNO (op0))
dc640181 9099 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
9100 {
9101 info->type = ADDRESS_REG_IMM;
9102 info->base = op0;
9103 info->offset = op1;
dc640181 9104 info->const_offset = offset;
15c0c5c9
JW
9105
9106 return true;
9107 }
9108
6a70badb 9109 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
9110 && aarch64_base_register_rtx_p (op0, strict_p)
9111 && poly_int_rtx_p (op1, &offset))
43e9d192 9112 {
43e9d192
IB
9113 info->type = ADDRESS_REG_IMM;
9114 info->base = op0;
9115 info->offset = op1;
dc640181 9116 info->const_offset = offset;
43e9d192
IB
9117
9118 /* TImode and TFmode values are allowed in both pairs of X
9119 registers and individual Q registers. The available
9120 address modes are:
9121 X,X: 7-bit signed scaled offset
9122 Q: 9-bit signed offset
9123 We conservatively require an offset representable in either mode.
8ed49fab
KT
9124 When performing the check for pairs of X registers i.e. LDP/STP
9125 pass down DImode since that is the natural size of the LDP/STP
9126 instruction memory accesses. */
43e9d192 9127 if (mode == TImode || mode == TFmode)
8ed49fab 9128 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 9129 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 9130 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 9131
2d8c6dc1
AH
9132 /* A 7bit offset check because OImode will emit a ldp/stp
9133 instruction (only big endian will get here).
9134 For ldp/stp instructions, the offset is scaled for the size of a
9135 single element of the pair. */
9136 if (mode == OImode)
9137 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
9138
9139 /* Three 9/12 bit offsets checks because CImode will emit three
9140 ldr/str instructions (only big endian will get here). */
9141 if (mode == CImode)
9142 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
9143 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
9144 offset + 32)
2d8c6dc1
AH
9145 || offset_12bit_unsigned_scaled_p (V16QImode,
9146 offset + 32)));
9147
9148 /* Two 7bit offsets checks because XImode will emit two ldp/stp
9149 instructions (only big endian will get here). */
9150 if (mode == XImode)
9151 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
9152 && aarch64_offset_7bit_signed_scaled_p (TImode,
9153 offset + 32));
9154
43cacb12
RS
9155 /* Make "m" use the LD1 offset range for SVE data modes, so
9156 that pre-RTL optimizers like ivopts will work to that
9157 instead of the wider LDR/STR range. */
9158 if (vec_flags == VEC_SVE_DATA)
9159 return (type == ADDR_QUERY_M
9160 ? offset_4bit_signed_scaled_p (mode, offset)
9161 : offset_9bit_signed_scaled_p (mode, offset));
9162
9f4cbab8
RS
9163 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
9164 {
9165 poly_int64 end_offset = (offset
9166 + GET_MODE_SIZE (mode)
9167 - BYTES_PER_SVE_VECTOR);
9168 return (type == ADDR_QUERY_M
9169 ? offset_4bit_signed_scaled_p (mode, offset)
9170 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
9171 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
9172 end_offset)));
9173 }
9174
43cacb12
RS
9175 if (vec_flags == VEC_SVE_PRED)
9176 return offset_9bit_signed_scaled_p (mode, offset);
9177
2d8c6dc1 9178 if (load_store_pair_p)
6a70badb 9179 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
9180 || known_eq (GET_MODE_SIZE (mode), 8)
9181 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 9182 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 9183 else
3c5af608 9184 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
9185 || offset_12bit_unsigned_scaled_p (mode, offset));
9186 }
9187
9188 if (allow_reg_index_p)
9189 {
9190 /* Look for base + (scaled/extended) index register. */
9191 if (aarch64_base_register_rtx_p (op0, strict_p)
9192 && aarch64_classify_index (info, op1, mode, strict_p))
9193 {
9194 info->base = op0;
9195 return true;
9196 }
9197 if (aarch64_base_register_rtx_p (op1, strict_p)
9198 && aarch64_classify_index (info, op0, mode, strict_p))
9199 {
9200 info->base = op1;
9201 return true;
9202 }
9203 }
9204
9205 return false;
9206
9207 case POST_INC:
9208 case POST_DEC:
9209 case PRE_INC:
9210 case PRE_DEC:
9211 info->type = ADDRESS_REG_WB;
9212 info->base = XEXP (x, 0);
9213 info->offset = NULL_RTX;
9214 return aarch64_base_register_rtx_p (info->base, strict_p);
9215
9216 case POST_MODIFY:
9217 case PRE_MODIFY:
9218 info->type = ADDRESS_REG_WB;
9219 info->base = XEXP (x, 0);
9220 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 9221 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
9222 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
9223 && aarch64_base_register_rtx_p (info->base, strict_p))
9224 {
43e9d192 9225 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 9226 info->const_offset = offset;
43e9d192
IB
9227
9228 /* TImode and TFmode values are allowed in both pairs of X
9229 registers and individual Q registers. The available
9230 address modes are:
9231 X,X: 7-bit signed scaled offset
9232 Q: 9-bit signed offset
9233 We conservatively require an offset representable in either mode.
9234 */
9235 if (mode == TImode || mode == TFmode)
44707478 9236 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 9237 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 9238
2d8c6dc1 9239 if (load_store_pair_p)
6a70badb 9240 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
9241 || known_eq (GET_MODE_SIZE (mode), 8)
9242 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 9243 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 9244 else
3c5af608 9245 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
9246 }
9247 return false;
9248
9249 case CONST:
9250 case SYMBOL_REF:
9251 case LABEL_REF:
79517551
SN
9252 /* load literal: pc-relative constant pool entry. Only supported
9253 for SI mode or larger. */
43e9d192 9254 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 9255
6a70badb
RS
9256 if (!load_store_pair_p
9257 && GET_MODE_SIZE (mode).is_constant (&const_size)
9258 && const_size >= 4)
43e9d192 9259 {
74b27d8e
RS
9260 poly_int64 offset;
9261 rtx sym = strip_offset_and_salt (x, &offset);
b4f50fd4
RR
9262 return ((GET_CODE (sym) == LABEL_REF
9263 || (GET_CODE (sym) == SYMBOL_REF
9264 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 9265 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
9266 }
9267 return false;
9268
9269 case LO_SUM:
9270 info->type = ADDRESS_LO_SUM;
9271 info->base = XEXP (x, 0);
9272 info->offset = XEXP (x, 1);
9273 if (allow_reg_index_p
9274 && aarch64_base_register_rtx_p (info->base, strict_p))
9275 {
74b27d8e
RS
9276 poly_int64 offset;
9277 HOST_WIDE_INT const_offset;
9278 rtx sym = strip_offset_and_salt (info->offset, &offset);
43e9d192 9279 if (GET_CODE (sym) == SYMBOL_REF
74b27d8e
RS
9280 && offset.is_constant (&const_offset)
9281 && (aarch64_classify_symbol (sym, const_offset)
43cacb12 9282 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
9283 {
9284 /* The symbol and offset must be aligned to the access size. */
9285 unsigned int align;
43e9d192
IB
9286
9287 if (CONSTANT_POOL_ADDRESS_P (sym))
9288 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
9289 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
9290 {
9291 tree exp = SYMBOL_REF_DECL (sym);
9292 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 9293 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
9294 }
9295 else if (SYMBOL_REF_DECL (sym))
9296 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
9297 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
9298 && SYMBOL_REF_BLOCK (sym) != NULL)
9299 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
9300 else
9301 align = BITS_PER_UNIT;
9302
6a70badb
RS
9303 poly_int64 ref_size = GET_MODE_SIZE (mode);
9304 if (known_eq (ref_size, 0))
43e9d192
IB
9305 ref_size = GET_MODE_SIZE (DImode);
9306
74b27d8e 9307 return (multiple_p (const_offset, ref_size)
6a70badb 9308 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
9309 }
9310 }
9311 return false;
9312
9313 default:
9314 return false;
9315 }
9316}
9317
9bf2f779
KT
9318/* Return true if the address X is valid for a PRFM instruction.
9319 STRICT_P is true if we should do strict checking with
9320 aarch64_classify_address. */
9321
9322bool
9323aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
9324{
9325 struct aarch64_address_info addr;
9326
9327 /* PRFM accepts the same addresses as DImode... */
a97d8b98 9328 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
9329 if (!res)
9330 return false;
9331
9332 /* ... except writeback forms. */
9333 return addr.type != ADDRESS_REG_WB;
9334}
9335
43e9d192
IB
9336bool
9337aarch64_symbolic_address_p (rtx x)
9338{
74b27d8e
RS
9339 poly_int64 offset;
9340 x = strip_offset_and_salt (x, &offset);
43e9d192
IB
9341 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
9342}
9343
a6e0bfa7 9344/* Classify the base of symbolic expression X. */
da4f13a4
MS
9345
9346enum aarch64_symbol_type
a6e0bfa7 9347aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
9348{
9349 rtx offset;
da4f13a4 9350
43e9d192 9351 split_const (x, &x, &offset);
43cacb12 9352 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
9353}
9354
9355
9356/* Return TRUE if X is a legitimate address for accessing memory in
9357 mode MODE. */
9358static bool
ef4bddc2 9359aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
9360{
9361 struct aarch64_address_info addr;
9362
a97d8b98 9363 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
9364}
9365
a97d8b98
RS
9366/* Return TRUE if X is a legitimate address of type TYPE for accessing
9367 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 9368bool
a97d8b98
RS
9369aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
9370 aarch64_addr_query_type type)
43e9d192
IB
9371{
9372 struct aarch64_address_info addr;
9373
a97d8b98 9374 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
9375}
9376
9005477f
RS
9377/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
9378
491ec060 9379static bool
9005477f
RS
9380aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
9381 poly_int64 orig_offset,
9382 machine_mode mode)
491ec060 9383{
6a70badb
RS
9384 HOST_WIDE_INT size;
9385 if (GET_MODE_SIZE (mode).is_constant (&size))
9386 {
9005477f
RS
9387 HOST_WIDE_INT const_offset, second_offset;
9388
9389 /* A general SVE offset is A * VQ + B. Remove the A component from
9390 coefficient 0 in order to get the constant B. */
9391 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
9392
9393 /* Split an out-of-range address displacement into a base and
9394 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
9395 range otherwise to increase opportunities for sharing the base
9396 address of different sizes. Unaligned accesses use the signed
9397 9-bit range, TImode/TFmode use the intersection of signed
9398 scaled 7-bit and signed 9-bit offset. */
6a70badb 9399 if (mode == TImode || mode == TFmode)
9005477f
RS
9400 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
9401 else if ((const_offset & (size - 1)) != 0)
9402 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 9403 else
9005477f 9404 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 9405
9005477f
RS
9406 if (second_offset == 0 || known_eq (orig_offset, second_offset))
9407 return false;
9408
9409 /* Split the offset into second_offset and the rest. */
9410 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
9411 *offset2 = gen_int_mode (second_offset, Pmode);
9412 return true;
9413 }
9414 else
9415 {
9416 /* Get the mode we should use as the basis of the range. For structure
9417 modes this is the mode of one vector. */
9418 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
9419 machine_mode step_mode
9420 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
9421
9422 /* Get the "mul vl" multiplier we'd like to use. */
9423 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
9424 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
9425 if (vec_flags & VEC_SVE_DATA)
9426 /* LDR supports a 9-bit range, but the move patterns for
9427 structure modes require all vectors to be in range of the
9428 same base. The simplest way of accomodating that while still
9429 promoting reuse of anchor points between different modes is
9430 to use an 8-bit range unconditionally. */
9431 vnum = ((vnum + 128) & 255) - 128;
9432 else
9433 /* Predicates are only handled singly, so we might as well use
9434 the full range. */
9435 vnum = ((vnum + 256) & 511) - 256;
9436 if (vnum == 0)
9437 return false;
9438
9439 /* Convert the "mul vl" multiplier into a byte offset. */
9440 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
9441 if (known_eq (second_offset, orig_offset))
9442 return false;
9443
9444 /* Split the offset into second_offset and the rest. */
9445 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
9446 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
9447 return true;
9448 }
491ec060
WD
9449}
9450
a2170965
TC
9451/* Return the binary representation of floating point constant VALUE in INTVAL.
9452 If the value cannot be converted, return false without setting INTVAL.
9453 The conversion is done in the given MODE. */
9454bool
9455aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
9456{
9457
9458 /* We make a general exception for 0. */
9459 if (aarch64_float_const_zero_rtx_p (value))
9460 {
9461 *intval = 0;
9462 return true;
9463 }
9464
0d0e0188 9465 scalar_float_mode mode;
a2170965 9466 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 9467 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
9468 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
9469 /* Only support up to DF mode. */
9470 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
9471 return false;
9472
9473 unsigned HOST_WIDE_INT ival = 0;
9474
9475 long res[2];
9476 real_to_target (res,
9477 CONST_DOUBLE_REAL_VALUE (value),
9478 REAL_MODE_FORMAT (mode));
9479
5c22bb48
TC
9480 if (mode == DFmode)
9481 {
9482 int order = BYTES_BIG_ENDIAN ? 1 : 0;
9483 ival = zext_hwi (res[order], 32);
9484 ival |= (zext_hwi (res[1 - order], 32) << 32);
9485 }
9486 else
9487 ival = zext_hwi (res[0], 32);
a2170965
TC
9488
9489 *intval = ival;
9490 return true;
9491}
9492
9493/* Return TRUE if rtx X is an immediate constant that can be moved using a
9494 single MOV(+MOVK) followed by an FMOV. */
9495bool
9496aarch64_float_const_rtx_p (rtx x)
9497{
9498 machine_mode mode = GET_MODE (x);
9499 if (mode == VOIDmode)
9500 return false;
9501
9502 /* Determine whether it's cheaper to write float constants as
9503 mov/movk pairs over ldr/adrp pairs. */
9504 unsigned HOST_WIDE_INT ival;
9505
9506 if (GET_CODE (x) == CONST_DOUBLE
9507 && SCALAR_FLOAT_MODE_P (mode)
9508 && aarch64_reinterpret_float_as_int (x, &ival))
9509 {
77e994c9
RS
9510 scalar_int_mode imode = (mode == HFmode
9511 ? SImode
9512 : int_mode_for_mode (mode).require ());
a2170965
TC
9513 int num_instr = aarch64_internal_mov_immediate
9514 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
9515 return num_instr < 3;
9516 }
9517
9518 return false;
9519}
9520
43e9d192
IB
9521/* Return TRUE if rtx X is immediate constant 0.0 */
9522bool
3520f7cc 9523aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 9524{
43e9d192
IB
9525 if (GET_MODE (x) == VOIDmode)
9526 return false;
9527
34a72c33 9528 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 9529 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 9530 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
9531}
9532
a2170965
TC
9533/* Return TRUE if rtx X is immediate constant that fits in a single
9534 MOVI immediate operation. */
9535bool
9536aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
9537{
9538 if (!TARGET_SIMD)
9539 return false;
9540
77e994c9
RS
9541 machine_mode vmode;
9542 scalar_int_mode imode;
a2170965
TC
9543 unsigned HOST_WIDE_INT ival;
9544
9545 if (GET_CODE (x) == CONST_DOUBLE
9546 && SCALAR_FLOAT_MODE_P (mode))
9547 {
9548 if (!aarch64_reinterpret_float_as_int (x, &ival))
9549 return false;
9550
35c38fa6
TC
9551 /* We make a general exception for 0. */
9552 if (aarch64_float_const_zero_rtx_p (x))
9553 return true;
9554
304b9962 9555 imode = int_mode_for_mode (mode).require ();
a2170965
TC
9556 }
9557 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
9558 && is_a <scalar_int_mode> (mode, &imode))
9559 ival = INTVAL (x);
a2170965
TC
9560 else
9561 return false;
9562
9563 /* use a 64 bit mode for everything except for DI/DF mode, where we use
9564 a 128 bit vector mode. */
77e994c9 9565 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
9566
9567 vmode = aarch64_simd_container_mode (imode, width);
9568 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
9569
b187677b 9570 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
9571}
9572
9573
70f09188
AP
9574/* Return the fixed registers used for condition codes. */
9575
9576static bool
9577aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9578{
9579 *p1 = CC_REGNUM;
9580 *p2 = INVALID_REGNUM;
9581 return true;
9582}
9583
47210a04
RL
9584/* This function is used by the call expanders of the machine description.
9585 RESULT is the register in which the result is returned. It's NULL for
9586 "call" and "sibcall".
9587 MEM is the location of the function call.
08cc4d92 9588 CALLEE_ABI is a const_int that gives the arm_pcs of the callee.
47210a04
RL
9589 SIBCALL indicates whether this function call is normal call or sibling call.
9590 It will generate different pattern accordingly. */
9591
9592void
08cc4d92 9593aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall)
47210a04
RL
9594{
9595 rtx call, callee, tmp;
9596 rtvec vec;
9597 machine_mode mode;
9598
9599 gcc_assert (MEM_P (mem));
9600 callee = XEXP (mem, 0);
9601 mode = GET_MODE (callee);
9602 gcc_assert (mode == Pmode);
9603
9604 /* Decide if we should generate indirect calls by loading the
9605 address of the callee into a register before performing
9606 the branch-and-link. */
9607 if (SYMBOL_REF_P (callee)
9608 ? (aarch64_is_long_call_p (callee)
9609 || aarch64_is_noplt_call_p (callee))
9610 : !REG_P (callee))
9611 XEXP (mem, 0) = force_reg (mode, callee);
9612
9613 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
9614
9615 if (result != NULL_RTX)
9616 call = gen_rtx_SET (result, call);
9617
9618 if (sibcall)
9619 tmp = ret_rtx;
9620 else
9621 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
9622
08cc4d92
RS
9623 gcc_assert (CONST_INT_P (callee_abi));
9624 callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi),
9625 UNSPEC_CALLEE_ABI);
9626
9627 vec = gen_rtvec (3, call, callee_abi, tmp);
47210a04
RL
9628 call = gen_rtx_PARALLEL (VOIDmode, vec);
9629
9630 aarch64_emit_call_insn (call);
9631}
9632
78607708
TV
9633/* Emit call insn with PAT and do aarch64-specific handling. */
9634
d07a3fed 9635void
78607708
TV
9636aarch64_emit_call_insn (rtx pat)
9637{
9638 rtx insn = emit_call_insn (pat);
9639
9640 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
9641 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
9642 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
9643}
9644
ef4bddc2 9645machine_mode
43e9d192
IB
9646aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
9647{
f7343f20
RE
9648 machine_mode mode_x = GET_MODE (x);
9649 rtx_code code_x = GET_CODE (x);
9650
43e9d192
IB
9651 /* All floating point compares return CCFP if it is an equality
9652 comparison, and CCFPE otherwise. */
f7343f20 9653 if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
43e9d192
IB
9654 {
9655 switch (code)
9656 {
9657 case EQ:
9658 case NE:
9659 case UNORDERED:
9660 case ORDERED:
9661 case UNLT:
9662 case UNLE:
9663 case UNGT:
9664 case UNGE:
9665 case UNEQ:
43e9d192
IB
9666 return CCFPmode;
9667
9668 case LT:
9669 case LE:
9670 case GT:
9671 case GE:
8332c5ee 9672 case LTGT:
43e9d192
IB
9673 return CCFPEmode;
9674
9675 default:
9676 gcc_unreachable ();
9677 }
9678 }
9679
2b8568fe
KT
9680 /* Equality comparisons of short modes against zero can be performed
9681 using the TST instruction with the appropriate bitmask. */
f73dc006 9682 if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
2b8568fe 9683 && (code == EQ || code == NE)
f7343f20 9684 && (mode_x == HImode || mode_x == QImode))
2b8568fe
KT
9685 return CC_NZmode;
9686
b06335f9
KT
9687 /* Similarly, comparisons of zero_extends from shorter modes can
9688 be performed using an ANDS with an immediate mask. */
f7343f20
RE
9689 if (y == const0_rtx && code_x == ZERO_EXTEND
9690 && (mode_x == SImode || mode_x == DImode)
b06335f9
KT
9691 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
9692 && (code == EQ || code == NE))
9693 return CC_NZmode;
9694
f7343f20 9695 if ((mode_x == SImode || mode_x == DImode)
43e9d192
IB
9696 && y == const0_rtx
9697 && (code == EQ || code == NE || code == LT || code == GE)
f7343f20
RE
9698 && (code_x == PLUS || code_x == MINUS || code_x == AND
9699 || code_x == NEG
9700 || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
7325d85a 9701 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
9702 return CC_NZmode;
9703
1c992d1e 9704 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
9705 the comparison will have to be swapped when we emit the assembly
9706 code. */
f7343f20 9707 if ((mode_x == SImode || mode_x == DImode)
ffa8a921 9708 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
f7343f20
RE
9709 && (code_x == ASHIFT || code_x == ASHIFTRT
9710 || code_x == LSHIFTRT
9711 || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
43e9d192
IB
9712 return CC_SWPmode;
9713
1c992d1e
RE
9714 /* Similarly for a negated operand, but we can only do this for
9715 equalities. */
f7343f20 9716 if ((mode_x == SImode || mode_x == DImode)
4aa81c2e 9717 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e 9718 && (code == EQ || code == NE)
f7343f20 9719 && code_x == NEG)
1c992d1e
RE
9720 return CC_Zmode;
9721
f7343f20
RE
9722 /* A test for unsigned overflow from an addition. */
9723 if ((mode_x == DImode || mode_x == TImode)
9724 && (code == LTU || code == GEU)
9725 && code_x == PLUS
9726 && rtx_equal_p (XEXP (x, 0), y))
ef22810a
RH
9727 return CC_Cmode;
9728
f7343f20
RE
9729 /* A test for unsigned overflow from an add with carry. */
9730 if ((mode_x == DImode || mode_x == TImode)
9731 && (code == LTU || code == GEU)
9732 && code_x == PLUS
9733 && CONST_SCALAR_INT_P (y)
9734 && (rtx_mode_t (y, mode_x)
9735 == (wi::shwi (1, mode_x)
9736 << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
9737 return CC_ADCmode;
9738
30c46053 9739 /* A test for signed overflow. */
f7343f20 9740 if ((mode_x == DImode || mode_x == TImode)
30c46053 9741 && code == NE
f7343f20 9742 && code_x == PLUS
30c46053
MC
9743 && GET_CODE (y) == SIGN_EXTEND)
9744 return CC_Vmode;
9745
43e9d192
IB
9746 /* For everything else, return CCmode. */
9747 return CCmode;
9748}
9749
3dfa7055 9750static int
b8506a8a 9751aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 9752
cd5660ab 9753int
43e9d192
IB
9754aarch64_get_condition_code (rtx x)
9755{
ef4bddc2 9756 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
9757 enum rtx_code comp_code = GET_CODE (x);
9758
9759 if (GET_MODE_CLASS (mode) != MODE_CC)
9760 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
9761 return aarch64_get_condition_code_1 (mode, comp_code);
9762}
43e9d192 9763
3dfa7055 9764static int
b8506a8a 9765aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 9766{
43e9d192
IB
9767 switch (mode)
9768 {
4e10a5a7
RS
9769 case E_CCFPmode:
9770 case E_CCFPEmode:
43e9d192
IB
9771 switch (comp_code)
9772 {
9773 case GE: return AARCH64_GE;
9774 case GT: return AARCH64_GT;
9775 case LE: return AARCH64_LS;
9776 case LT: return AARCH64_MI;
9777 case NE: return AARCH64_NE;
9778 case EQ: return AARCH64_EQ;
9779 case ORDERED: return AARCH64_VC;
9780 case UNORDERED: return AARCH64_VS;
9781 case UNLT: return AARCH64_LT;
9782 case UNLE: return AARCH64_LE;
9783 case UNGT: return AARCH64_HI;
9784 case UNGE: return AARCH64_PL;
cd5660ab 9785 default: return -1;
43e9d192
IB
9786 }
9787 break;
9788
4e10a5a7 9789 case E_CCmode:
43e9d192
IB
9790 switch (comp_code)
9791 {
9792 case NE: return AARCH64_NE;
9793 case EQ: return AARCH64_EQ;
9794 case GE: return AARCH64_GE;
9795 case GT: return AARCH64_GT;
9796 case LE: return AARCH64_LE;
9797 case LT: return AARCH64_LT;
9798 case GEU: return AARCH64_CS;
9799 case GTU: return AARCH64_HI;
9800 case LEU: return AARCH64_LS;
9801 case LTU: return AARCH64_CC;
cd5660ab 9802 default: return -1;
43e9d192
IB
9803 }
9804 break;
9805
4e10a5a7 9806 case E_CC_SWPmode:
43e9d192
IB
9807 switch (comp_code)
9808 {
9809 case NE: return AARCH64_NE;
9810 case EQ: return AARCH64_EQ;
9811 case GE: return AARCH64_LE;
9812 case GT: return AARCH64_LT;
9813 case LE: return AARCH64_GE;
9814 case LT: return AARCH64_GT;
9815 case GEU: return AARCH64_LS;
9816 case GTU: return AARCH64_CC;
9817 case LEU: return AARCH64_CS;
9818 case LTU: return AARCH64_HI;
cd5660ab 9819 default: return -1;
43e9d192
IB
9820 }
9821 break;
9822
57d6f4d0
RS
9823 case E_CC_NZCmode:
9824 switch (comp_code)
9825 {
9826 case NE: return AARCH64_NE; /* = any */
9827 case EQ: return AARCH64_EQ; /* = none */
9828 case GE: return AARCH64_PL; /* = nfrst */
9829 case LT: return AARCH64_MI; /* = first */
9830 case GEU: return AARCH64_CS; /* = nlast */
9831 case GTU: return AARCH64_HI; /* = pmore */
9832 case LEU: return AARCH64_LS; /* = plast */
9833 case LTU: return AARCH64_CC; /* = last */
9834 default: return -1;
9835 }
9836 break;
9837
4e10a5a7 9838 case E_CC_NZmode:
43e9d192
IB
9839 switch (comp_code)
9840 {
9841 case NE: return AARCH64_NE;
9842 case EQ: return AARCH64_EQ;
9843 case GE: return AARCH64_PL;
9844 case LT: return AARCH64_MI;
cd5660ab 9845 default: return -1;
43e9d192
IB
9846 }
9847 break;
9848
4e10a5a7 9849 case E_CC_Zmode:
1c992d1e
RE
9850 switch (comp_code)
9851 {
9852 case NE: return AARCH64_NE;
9853 case EQ: return AARCH64_EQ;
cd5660ab 9854 default: return -1;
1c992d1e
RE
9855 }
9856 break;
9857
4e10a5a7 9858 case E_CC_Cmode:
ef22810a
RH
9859 switch (comp_code)
9860 {
f7343f20
RE
9861 case LTU: return AARCH64_CS;
9862 case GEU: return AARCH64_CC;
9863 default: return -1;
9864 }
9865 break;
9866
9867 case E_CC_ADCmode:
9868 switch (comp_code)
9869 {
9870 case GEU: return AARCH64_CS;
9871 case LTU: return AARCH64_CC;
ef22810a
RH
9872 default: return -1;
9873 }
9874 break;
9875
30c46053
MC
9876 case E_CC_Vmode:
9877 switch (comp_code)
9878 {
9879 case NE: return AARCH64_VS;
9880 case EQ: return AARCH64_VC;
9881 default: return -1;
9882 }
9883 break;
9884
43e9d192 9885 default:
cd5660ab 9886 return -1;
43e9d192 9887 }
3dfa7055 9888
3dfa7055 9889 return -1;
43e9d192
IB
9890}
9891
ddeabd3e
AL
9892bool
9893aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
9894 HOST_WIDE_INT minval,
9895 HOST_WIDE_INT maxval)
ddeabd3e 9896{
6a70badb
RS
9897 rtx elt;
9898 return (const_vec_duplicate_p (x, &elt)
9899 && CONST_INT_P (elt)
9900 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
9901}
9902
9903bool
9904aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
9905{
9906 return aarch64_const_vec_all_same_in_range_p (x, val, val);
9907}
9908
43cacb12
RS
9909/* Return true if VEC is a constant in which every element is in the range
9910 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
9911
9912static bool
9913aarch64_const_vec_all_in_range_p (rtx vec,
9914 HOST_WIDE_INT minval,
9915 HOST_WIDE_INT maxval)
9916{
9917 if (GET_CODE (vec) != CONST_VECTOR
9918 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
9919 return false;
9920
9921 int nunits;
9922 if (!CONST_VECTOR_STEPPED_P (vec))
9923 nunits = const_vector_encoded_nelts (vec);
9924 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
9925 return false;
9926
9927 for (int i = 0; i < nunits; i++)
9928 {
9929 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
9930 if (!CONST_INT_P (vec_elem)
9931 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
9932 return false;
9933 }
9934 return true;
9935}
43e9d192 9936
cf670503
ZC
9937/* N Z C V. */
9938#define AARCH64_CC_V 1
9939#define AARCH64_CC_C (1 << 1)
9940#define AARCH64_CC_Z (1 << 2)
9941#define AARCH64_CC_N (1 << 3)
9942
c8012fbc
WD
9943/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
9944static const int aarch64_nzcv_codes[] =
9945{
9946 0, /* EQ, Z == 1. */
9947 AARCH64_CC_Z, /* NE, Z == 0. */
9948 0, /* CS, C == 1. */
9949 AARCH64_CC_C, /* CC, C == 0. */
9950 0, /* MI, N == 1. */
9951 AARCH64_CC_N, /* PL, N == 0. */
9952 0, /* VS, V == 1. */
9953 AARCH64_CC_V, /* VC, V == 0. */
9954 0, /* HI, C ==1 && Z == 0. */
9955 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
9956 AARCH64_CC_V, /* GE, N == V. */
9957 0, /* LT, N != V. */
9958 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
9959 0, /* LE, !(Z == 0 && N == V). */
9960 0, /* AL, Any. */
9961 0 /* NV, Any. */
cf670503
ZC
9962};
9963
43cacb12
RS
9964/* Print floating-point vector immediate operand X to F, negating it
9965 first if NEGATE is true. Return true on success, false if it isn't
9966 a constant we can handle. */
9967
9968static bool
9969aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
9970{
9971 rtx elt;
9972
9973 if (!const_vec_duplicate_p (x, &elt))
9974 return false;
9975
9976 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
9977 if (negate)
9978 r = real_value_negate (&r);
9979
d29f7dd5
RS
9980 /* Handle the SVE single-bit immediates specially, since they have a
9981 fixed form in the assembly syntax. */
43cacb12
RS
9982 if (real_equal (&r, &dconst0))
9983 asm_fprintf (f, "0.0");
a19ba9e1
RS
9984 else if (real_equal (&r, &dconst2))
9985 asm_fprintf (f, "2.0");
43cacb12
RS
9986 else if (real_equal (&r, &dconst1))
9987 asm_fprintf (f, "1.0");
9988 else if (real_equal (&r, &dconsthalf))
9989 asm_fprintf (f, "0.5");
9990 else
d29f7dd5
RS
9991 {
9992 const int buf_size = 20;
9993 char float_buf[buf_size] = {'\0'};
9994 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
9995 1, GET_MODE (elt));
9996 asm_fprintf (f, "%s", float_buf);
9997 }
43cacb12
RS
9998
9999 return true;
10000}
10001
9f4cbab8
RS
10002/* Return the equivalent letter for size. */
10003static char
10004sizetochar (int size)
10005{
10006 switch (size)
10007 {
10008 case 64: return 'd';
10009 case 32: return 's';
10010 case 16: return 'h';
10011 case 8 : return 'b';
10012 default: gcc_unreachable ();
10013 }
10014}
10015
bcf19844
JW
10016/* Print operand X to file F in a target specific manner according to CODE.
10017 The acceptable formatting commands given by CODE are:
10018 'c': An integer or symbol address without a preceding #
10019 sign.
43cacb12
RS
10020 'C': Take the duplicated element in a vector constant
10021 and print it in hex.
10022 'D': Take the duplicated element in a vector constant
10023 and print it as an unsigned integer, in decimal.
bcf19844 10024 'e': Print the sign/zero-extend size as a character 8->b,
d113ece6
RS
10025 16->h, 32->w. Can also be used for masks:
10026 0xff->b, 0xffff->h, 0xffffffff->w.
d29f7dd5
RS
10027 'I': If the operand is a duplicated vector constant,
10028 replace it with the duplicated scalar. If the
10029 operand is then a floating-point constant, replace
10030 it with the integer bit representation. Print the
10031 transformed constant as a signed decimal number.
bcf19844
JW
10032 'p': Prints N such that 2^N == X (X must be power of 2 and
10033 const int).
10034 'P': Print the number of non-zero bits in X (a const_int).
10035 'H': Print the higher numbered register of a pair (TImode)
10036 of regs.
10037 'm': Print a condition (eq, ne, etc).
10038 'M': Same as 'm', but invert condition.
43cacb12
RS
10039 'N': Take the duplicated element in a vector constant
10040 and print the negative of it in decimal.
bcf19844
JW
10041 'b/h/s/d/q': Print a scalar FP/SIMD register name.
10042 'S/T/U/V': Print a FP/SIMD register name for a register list.
10043 The register printed is the FP/SIMD register name
10044 of X + 0/1/2/3 for S/T/U/V.
e3f15286 10045 'R': Print a scalar Integer/FP/SIMD register name + 1.
bcf19844
JW
10046 'X': Print bottom 16 bits of integer constant in hex.
10047 'w/x': Print a general register name or the zero register
10048 (32-bit or 64-bit).
10049 '0': Print a normal operand, if it's a general register,
10050 then we assume DImode.
10051 'k': Print NZCV for conditional compare instructions.
10052 'A': Output address constant representing the first
10053 argument of X, specifying a relocation offset
10054 if appropriate.
10055 'L': Output constant address specified by X
10056 with a relocation offset if appropriate.
10057 'G': Prints address of X, specifying a PC relative
e69a816d
WD
10058 relocation mode if appropriate.
10059 'y': Output address of LDP or STP - this is used for
10060 some LDP/STPs which don't use a PARALLEL in their
10061 pattern (so the mode needs to be adjusted).
10062 'z': Output address of a typical LDP or STP. */
bcf19844 10063
cc8ca59e
JB
10064static void
10065aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 10066{
43cacb12 10067 rtx elt;
43e9d192
IB
10068 switch (code)
10069 {
f541a481 10070 case 'c':
74b27d8e
RS
10071 if (CONST_INT_P (x))
10072 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10073 else
f541a481 10074 {
74b27d8e
RS
10075 poly_int64 offset;
10076 rtx base = strip_offset_and_salt (x, &offset);
10077 if (SYMBOL_REF_P (base))
10078 output_addr_const (f, x);
10079 else
10080 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
10081 }
10082 break;
10083
43e9d192 10084 case 'e':
43e9d192 10085 {
d113ece6
RS
10086 x = unwrap_const_vec_duplicate (x);
10087 if (!CONST_INT_P (x))
43e9d192
IB
10088 {
10089 output_operand_lossage ("invalid operand for '%%%c'", code);
10090 return;
10091 }
10092
d113ece6
RS
10093 HOST_WIDE_INT val = INTVAL (x);
10094 if ((val & ~7) == 8 || val == 0xff)
10095 fputc ('b', f);
10096 else if ((val & ~7) == 16 || val == 0xffff)
10097 fputc ('h', f);
10098 else if ((val & ~7) == 32 || val == 0xffffffff)
10099 fputc ('w', f);
10100 else
43e9d192 10101 {
43e9d192
IB
10102 output_operand_lossage ("invalid operand for '%%%c'", code);
10103 return;
10104 }
10105 }
10106 break;
10107
10108 case 'p':
10109 {
10110 int n;
10111
4aa81c2e 10112 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
10113 {
10114 output_operand_lossage ("invalid operand for '%%%c'", code);
10115 return;
10116 }
10117
10118 asm_fprintf (f, "%d", n);
10119 }
10120 break;
10121
10122 case 'P':
4aa81c2e 10123 if (!CONST_INT_P (x))
43e9d192
IB
10124 {
10125 output_operand_lossage ("invalid operand for '%%%c'", code);
10126 return;
10127 }
10128
8d55c61b 10129 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
10130 break;
10131
10132 case 'H':
c0111dc4
RE
10133 if (x == const0_rtx)
10134 {
10135 asm_fprintf (f, "xzr");
10136 break;
10137 }
10138
4aa81c2e 10139 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
10140 {
10141 output_operand_lossage ("invalid operand for '%%%c'", code);
10142 return;
10143 }
10144
01a3a324 10145 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
10146 break;
10147
d29f7dd5
RS
10148 case 'I':
10149 {
10150 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
10151 if (CONST_INT_P (x))
10152 asm_fprintf (f, "%wd", INTVAL (x));
10153 else
10154 {
10155 output_operand_lossage ("invalid operand for '%%%c'", code);
10156 return;
10157 }
10158 break;
10159 }
10160
43e9d192 10161 case 'M':
c8012fbc 10162 case 'm':
cd5660ab
KT
10163 {
10164 int cond_code;
c8012fbc
WD
10165 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
10166 if (x == const_true_rtx)
cd5660ab 10167 {
c8012fbc
WD
10168 if (code == 'M')
10169 fputs ("nv", f);
cd5660ab
KT
10170 return;
10171 }
43e9d192 10172
cd5660ab
KT
10173 if (!COMPARISON_P (x))
10174 {
10175 output_operand_lossage ("invalid operand for '%%%c'", code);
10176 return;
10177 }
c8012fbc 10178
cd5660ab
KT
10179 cond_code = aarch64_get_condition_code (x);
10180 gcc_assert (cond_code >= 0);
c8012fbc
WD
10181 if (code == 'M')
10182 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
57d6f4d0
RS
10183 if (GET_MODE (XEXP (x, 0)) == CC_NZCmode)
10184 fputs (aarch64_sve_condition_codes[cond_code], f);
10185 else
10186 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 10187 }
43e9d192
IB
10188 break;
10189
43cacb12
RS
10190 case 'N':
10191 if (!const_vec_duplicate_p (x, &elt))
10192 {
10193 output_operand_lossage ("invalid vector constant");
10194 return;
10195 }
10196
10197 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
10198 asm_fprintf (f, "%wd", -INTVAL (elt));
10199 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10200 && aarch64_print_vector_float_operand (f, x, true))
10201 ;
10202 else
10203 {
10204 output_operand_lossage ("invalid vector constant");
10205 return;
10206 }
10207 break;
10208
43e9d192
IB
10209 case 'b':
10210 case 'h':
10211 case 's':
10212 case 'd':
10213 case 'q':
43e9d192
IB
10214 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
10215 {
10216 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
10217 return;
10218 }
50ce6f88 10219 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
10220 break;
10221
10222 case 'S':
10223 case 'T':
10224 case 'U':
10225 case 'V':
43e9d192
IB
10226 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
10227 {
10228 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
10229 return;
10230 }
43cacb12
RS
10231 asm_fprintf (f, "%c%d",
10232 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
10233 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
10234 break;
10235
2d8c6dc1 10236 case 'R':
e3f15286
RH
10237 if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
10238 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
10239 else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
10240 asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
10241 else
10242 output_operand_lossage ("incompatible register operand for '%%%c'",
10243 code);
2d8c6dc1
AH
10244 break;
10245
a05c0ddf 10246 case 'X':
4aa81c2e 10247 if (!CONST_INT_P (x))
a05c0ddf
IB
10248 {
10249 output_operand_lossage ("invalid operand for '%%%c'", code);
10250 return;
10251 }
50d38551 10252 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
10253 break;
10254
43cacb12
RS
10255 case 'C':
10256 {
10257 /* Print a replicated constant in hex. */
10258 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
10259 {
10260 output_operand_lossage ("invalid operand for '%%%c'", code);
10261 return;
10262 }
10263 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
10264 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
10265 }
10266 break;
10267
10268 case 'D':
10269 {
10270 /* Print a replicated constant in decimal, treating it as
10271 unsigned. */
10272 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
10273 {
10274 output_operand_lossage ("invalid operand for '%%%c'", code);
10275 return;
10276 }
10277 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
10278 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
10279 }
10280 break;
10281
43e9d192
IB
10282 case 'w':
10283 case 'x':
3520f7cc
JG
10284 if (x == const0_rtx
10285 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 10286 {
50ce6f88 10287 asm_fprintf (f, "%czr", code);
43e9d192
IB
10288 break;
10289 }
10290
10291 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
10292 {
50ce6f88 10293 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
10294 break;
10295 }
10296
10297 if (REG_P (x) && REGNO (x) == SP_REGNUM)
10298 {
50ce6f88 10299 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
10300 break;
10301 }
10302
10303 /* Fall through */
10304
10305 case 0:
43e9d192
IB
10306 if (x == NULL)
10307 {
10308 output_operand_lossage ("missing operand");
10309 return;
10310 }
10311
10312 switch (GET_CODE (x))
10313 {
10314 case REG:
43cacb12 10315 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
10316 {
10317 if (REG_NREGS (x) == 1)
10318 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
10319 else
10320 {
10321 char suffix
10322 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
10323 asm_fprintf (f, "{z%d.%c - z%d.%c}",
10324 REGNO (x) - V0_REGNUM, suffix,
10325 END_REGNO (x) - V0_REGNUM - 1, suffix);
10326 }
10327 }
43cacb12
RS
10328 else
10329 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
10330 break;
10331
10332 case MEM:
cc8ca59e 10333 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
10334 break;
10335
10336 case LABEL_REF:
10337 case SYMBOL_REF:
10338 output_addr_const (asm_out_file, x);
10339 break;
10340
10341 case CONST_INT:
10342 asm_fprintf (f, "%wd", INTVAL (x));
10343 break;
10344
43cacb12
RS
10345 case CONST:
10346 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 10347 {
43cacb12
RS
10348 output_addr_const (asm_out_file, x);
10349 break;
3520f7cc 10350 }
43cacb12
RS
10351 /* fall through */
10352
10353 case CONST_VECTOR:
10354 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 10355 {
43cacb12
RS
10356 output_operand_lossage ("invalid vector constant");
10357 return;
3520f7cc 10358 }
43cacb12
RS
10359
10360 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
10361 asm_fprintf (f, "%wd", INTVAL (elt));
10362 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10363 && aarch64_print_vector_float_operand (f, x, false))
10364 ;
3520f7cc 10365 else
43cacb12
RS
10366 {
10367 output_operand_lossage ("invalid vector constant");
10368 return;
10369 }
43e9d192
IB
10370 break;
10371
3520f7cc 10372 case CONST_DOUBLE:
2ca5b430
KT
10373 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
10374 be getting CONST_DOUBLEs holding integers. */
10375 gcc_assert (GET_MODE (x) != VOIDmode);
10376 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
10377 {
10378 fputc ('0', f);
10379 break;
10380 }
10381 else if (aarch64_float_const_representable_p (x))
10382 {
10383#define buf_size 20
10384 char float_buf[buf_size] = {'\0'};
34a72c33
RS
10385 real_to_decimal_for_mode (float_buf,
10386 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
10387 buf_size, buf_size,
10388 1, GET_MODE (x));
10389 asm_fprintf (asm_out_file, "%s", float_buf);
10390 break;
10391#undef buf_size
10392 }
10393 output_operand_lossage ("invalid constant");
10394 return;
43e9d192
IB
10395 default:
10396 output_operand_lossage ("invalid operand");
10397 return;
10398 }
10399 break;
10400
10401 case 'A':
10402 if (GET_CODE (x) == HIGH)
10403 x = XEXP (x, 0);
10404
a6e0bfa7 10405 switch (aarch64_classify_symbolic_expression (x))
43e9d192 10406 {
6642bdb4 10407 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
10408 asm_fprintf (asm_out_file, ":got:");
10409 break;
10410
10411 case SYMBOL_SMALL_TLSGD:
10412 asm_fprintf (asm_out_file, ":tlsgd:");
10413 break;
10414
10415 case SYMBOL_SMALL_TLSDESC:
10416 asm_fprintf (asm_out_file, ":tlsdesc:");
10417 break;
10418
79496620 10419 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
10420 asm_fprintf (asm_out_file, ":gottprel:");
10421 break;
10422
d18ba284 10423 case SYMBOL_TLSLE24:
43e9d192
IB
10424 asm_fprintf (asm_out_file, ":tprel:");
10425 break;
10426
87dd8ab0
MS
10427 case SYMBOL_TINY_GOT:
10428 gcc_unreachable ();
10429 break;
10430
43e9d192
IB
10431 default:
10432 break;
10433 }
10434 output_addr_const (asm_out_file, x);
10435 break;
10436
10437 case 'L':
a6e0bfa7 10438 switch (aarch64_classify_symbolic_expression (x))
43e9d192 10439 {
6642bdb4 10440 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
10441 asm_fprintf (asm_out_file, ":lo12:");
10442 break;
10443
10444 case SYMBOL_SMALL_TLSGD:
10445 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
10446 break;
10447
10448 case SYMBOL_SMALL_TLSDESC:
10449 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
10450 break;
10451
79496620 10452 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
10453 asm_fprintf (asm_out_file, ":gottprel_lo12:");
10454 break;
10455
cbf5629e
JW
10456 case SYMBOL_TLSLE12:
10457 asm_fprintf (asm_out_file, ":tprel_lo12:");
10458 break;
10459
d18ba284 10460 case SYMBOL_TLSLE24:
43e9d192
IB
10461 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
10462 break;
10463
87dd8ab0
MS
10464 case SYMBOL_TINY_GOT:
10465 asm_fprintf (asm_out_file, ":got:");
10466 break;
10467
5ae7caad
JW
10468 case SYMBOL_TINY_TLSIE:
10469 asm_fprintf (asm_out_file, ":gottprel:");
10470 break;
10471
43e9d192
IB
10472 default:
10473 break;
10474 }
10475 output_addr_const (asm_out_file, x);
10476 break;
10477
10478 case 'G':
a6e0bfa7 10479 switch (aarch64_classify_symbolic_expression (x))
43e9d192 10480 {
d18ba284 10481 case SYMBOL_TLSLE24:
43e9d192
IB
10482 asm_fprintf (asm_out_file, ":tprel_hi12:");
10483 break;
10484 default:
10485 break;
10486 }
10487 output_addr_const (asm_out_file, x);
10488 break;
10489
cf670503
ZC
10490 case 'k':
10491 {
c8012fbc 10492 HOST_WIDE_INT cond_code;
cf670503 10493
c8012fbc 10494 if (!CONST_INT_P (x))
cf670503
ZC
10495 {
10496 output_operand_lossage ("invalid operand for '%%%c'", code);
10497 return;
10498 }
10499
c8012fbc
WD
10500 cond_code = INTVAL (x);
10501 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
10502 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
10503 }
10504 break;
10505
e69a816d
WD
10506 case 'y':
10507 case 'z':
10508 {
10509 machine_mode mode = GET_MODE (x);
10510
c348cab0 10511 if (GET_CODE (x) != MEM
6a70badb 10512 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
10513 {
10514 output_operand_lossage ("invalid operand for '%%%c'", code);
10515 return;
10516 }
10517
a25831ac
AV
10518 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
10519 code == 'y'
10520 ? ADDR_QUERY_LDP_STP_N
10521 : ADDR_QUERY_LDP_STP))
c348cab0 10522 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
10523 }
10524 break;
10525
43e9d192
IB
10526 default:
10527 output_operand_lossage ("invalid operand prefix '%%%c'", code);
10528 return;
10529 }
10530}
10531
e69a816d
WD
10532/* Print address 'x' of a memory access with mode 'mode'.
10533 'op' is the context required by aarch64_classify_address. It can either be
10534 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 10535static bool
a97d8b98
RS
10536aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
10537 aarch64_addr_query_type type)
43e9d192
IB
10538{
10539 struct aarch64_address_info addr;
550a3380 10540 unsigned int size, vec_flags;
43e9d192 10541
e69a816d 10542 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
10543 if (GET_MODE (x) != Pmode
10544 && (!CONST_INT_P (x)
10545 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
10546 {
10547 output_operand_lossage ("invalid address mode");
10548 return false;
10549 }
e69a816d 10550
a97d8b98 10551 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
10552 switch (addr.type)
10553 {
10554 case ADDRESS_REG_IMM:
dc640181 10555 if (known_eq (addr.const_offset, 0))
43cacb12 10556 {
550a3380
RS
10557 asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
10558 return true;
43cacb12 10559 }
550a3380
RS
10560
10561 vec_flags = aarch64_classify_vector_mode (mode);
10562 if (vec_flags & VEC_ANY_SVE)
43cacb12
RS
10563 {
10564 HOST_WIDE_INT vnum
10565 = exact_div (addr.const_offset,
550a3380 10566 aarch64_vl_bytes (mode, vec_flags)).to_constant ();
43cacb12
RS
10567 asm_fprintf (f, "[%s, #%wd, mul vl]",
10568 reg_names[REGNO (addr.base)], vnum);
550a3380 10569 return true;
43cacb12 10570 }
550a3380
RS
10571
10572 asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
10573 INTVAL (addr.offset));
c348cab0 10574 return true;
43e9d192
IB
10575
10576 case ADDRESS_REG_REG:
10577 if (addr.shift == 0)
16a3246f 10578 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 10579 reg_names [REGNO (addr.offset)]);
43e9d192 10580 else
16a3246f 10581 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 10582 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 10583 return true;
43e9d192
IB
10584
10585 case ADDRESS_REG_UXTW:
10586 if (addr.shift == 0)
16a3246f 10587 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
10588 REGNO (addr.offset) - R0_REGNUM);
10589 else
16a3246f 10590 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 10591 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 10592 return true;
43e9d192
IB
10593
10594 case ADDRESS_REG_SXTW:
10595 if (addr.shift == 0)
16a3246f 10596 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
10597 REGNO (addr.offset) - R0_REGNUM);
10598 else
16a3246f 10599 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 10600 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 10601 return true;
43e9d192
IB
10602
10603 case ADDRESS_REG_WB:
6a70badb
RS
10604 /* Writeback is only supported for fixed-width modes. */
10605 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
10606 switch (GET_CODE (x))
10607 {
10608 case PRE_INC:
6a70badb 10609 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 10610 return true;
43e9d192 10611 case POST_INC:
6a70badb 10612 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 10613 return true;
43e9d192 10614 case PRE_DEC:
6a70badb 10615 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 10616 return true;
43e9d192 10617 case POST_DEC:
6a70badb 10618 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 10619 return true;
43e9d192 10620 case PRE_MODIFY:
6a70badb 10621 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 10622 INTVAL (addr.offset));
c348cab0 10623 return true;
43e9d192 10624 case POST_MODIFY:
6a70badb 10625 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 10626 INTVAL (addr.offset));
c348cab0 10627 return true;
43e9d192
IB
10628 default:
10629 break;
10630 }
10631 break;
10632
10633 case ADDRESS_LO_SUM:
16a3246f 10634 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
10635 output_addr_const (f, addr.offset);
10636 asm_fprintf (f, "]");
c348cab0 10637 return true;
43e9d192
IB
10638
10639 case ADDRESS_SYMBOLIC:
d6591257 10640 output_addr_const (f, x);
c348cab0 10641 return true;
43e9d192
IB
10642 }
10643
c348cab0 10644 return false;
43e9d192
IB
10645}
10646
e69a816d
WD
10647/* Print address 'x' of a memory access with mode 'mode'. */
10648static void
10649aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
10650{
43cacb12 10651 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 10652 output_addr_const (f, x);
e69a816d
WD
10653}
10654
74b27d8e
RS
10655/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
10656
10657static bool
10658aarch64_output_addr_const_extra (FILE *file, rtx x)
10659{
10660 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR)
10661 {
10662 output_addr_const (file, XVECEXP (x, 0, 0));
10663 return true;
10664 }
10665 return false;
10666}
10667
43e9d192
IB
10668bool
10669aarch64_label_mentioned_p (rtx x)
10670{
10671 const char *fmt;
10672 int i;
10673
10674 if (GET_CODE (x) == LABEL_REF)
10675 return true;
10676
10677 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
10678 referencing instruction, but they are constant offsets, not
10679 symbols. */
10680 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10681 return false;
10682
10683 fmt = GET_RTX_FORMAT (GET_CODE (x));
10684 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10685 {
10686 if (fmt[i] == 'E')
10687 {
10688 int j;
10689
10690 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10691 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
10692 return 1;
10693 }
10694 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
10695 return 1;
10696 }
10697
10698 return 0;
10699}
10700
10701/* Implement REGNO_REG_CLASS. */
10702
10703enum reg_class
10704aarch64_regno_regclass (unsigned regno)
10705{
96b7f495
MM
10706 if (STUB_REGNUM_P (regno))
10707 return STUB_REGS;
10708
43e9d192 10709 if (GP_REGNUM_P (regno))
a4a182c6 10710 return GENERAL_REGS;
43e9d192
IB
10711
10712 if (regno == SP_REGNUM)
10713 return STACK_REG;
10714
10715 if (regno == FRAME_POINTER_REGNUM
10716 || regno == ARG_POINTER_REGNUM)
f24bb080 10717 return POINTER_REGS;
43e9d192
IB
10718
10719 if (FP_REGNUM_P (regno))
163b1f6a
RS
10720 return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS
10721 : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS);
43e9d192 10722
43cacb12
RS
10723 if (PR_REGNUM_P (regno))
10724 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
10725
183bfdaf
RS
10726 if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
10727 return FFR_REGS;
10728
43e9d192
IB
10729 return NO_REGS;
10730}
10731
6a70badb
RS
10732/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
10733 If OFFSET is out of range, return an offset of an anchor point
10734 that is in range. Return 0 otherwise. */
10735
10736static HOST_WIDE_INT
10737aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
10738 machine_mode mode)
10739{
10740 /* Does it look like we'll need a 16-byte load/store-pair operation? */
10741 if (size > 16)
10742 return (offset + 0x400) & ~0x7f0;
10743
10744 /* For offsets that aren't a multiple of the access size, the limit is
10745 -256...255. */
10746 if (offset & (size - 1))
10747 {
10748 /* BLKmode typically uses LDP of X-registers. */
10749 if (mode == BLKmode)
10750 return (offset + 512) & ~0x3ff;
10751 return (offset + 0x100) & ~0x1ff;
10752 }
10753
10754 /* Small negative offsets are supported. */
10755 if (IN_RANGE (offset, -256, 0))
10756 return 0;
10757
10758 if (mode == TImode || mode == TFmode)
10759 return (offset + 0x100) & ~0x1ff;
10760
10761 /* Use 12-bit offset by access size. */
10762 return offset & (~0xfff * size);
10763}
10764
0c4ec427 10765static rtx
ef4bddc2 10766aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
10767{
10768 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
10769 where mask is selected by alignment and size of the offset.
10770 We try to pick as large a range for the offset as possible to
10771 maximize the chance of a CSE. However, for aligned addresses
10772 we limit the range to 4k so that structures with different sized
e8426e0a
BC
10773 elements are likely to use the same base. We need to be careful
10774 not to split a CONST for some forms of address expression, otherwise
10775 it will generate sub-optimal code. */
0c4ec427
RE
10776
10777 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
10778 {
9e0218fc 10779 rtx base = XEXP (x, 0);
17d7bdd8 10780 rtx offset_rtx = XEXP (x, 1);
9e0218fc 10781 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 10782
9e0218fc 10783 if (GET_CODE (base) == PLUS)
e8426e0a 10784 {
9e0218fc
RH
10785 rtx op0 = XEXP (base, 0);
10786 rtx op1 = XEXP (base, 1);
10787
10788 /* Force any scaling into a temp for CSE. */
10789 op0 = force_reg (Pmode, op0);
10790 op1 = force_reg (Pmode, op1);
10791
10792 /* Let the pointer register be in op0. */
10793 if (REG_POINTER (op1))
10794 std::swap (op0, op1);
10795
10796 /* If the pointer is virtual or frame related, then we know that
10797 virtual register instantiation or register elimination is going
10798 to apply a second constant. We want the two constants folded
10799 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
10800 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 10801 {
9e0218fc
RH
10802 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
10803 NULL_RTX, true, OPTAB_DIRECT);
10804 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 10805 }
e8426e0a 10806
9e0218fc
RH
10807 /* Otherwise, in order to encourage CSE (and thence loop strength
10808 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
10809 base = expand_binop (Pmode, add_optab, op0, op1,
10810 NULL_RTX, true, OPTAB_DIRECT);
10811 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
10812 }
10813
6a70badb
RS
10814 HOST_WIDE_INT size;
10815 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 10816 {
6a70badb
RS
10817 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
10818 mode);
10819 if (base_offset != 0)
10820 {
10821 base = plus_constant (Pmode, base, base_offset);
10822 base = force_operand (base, NULL_RTX);
10823 return plus_constant (Pmode, base, offset - base_offset);
10824 }
9e0218fc 10825 }
0c4ec427
RE
10826 }
10827
10828 return x;
10829}
10830
43e9d192
IB
10831static reg_class_t
10832aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
10833 reg_class_t rclass,
ef4bddc2 10834 machine_mode mode,
43e9d192
IB
10835 secondary_reload_info *sri)
10836{
cc68f7c2
RS
10837 /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
10838 LDR and STR. See the comment at the head of aarch64-sve.md for
10839 more details about the big-endian handling. */
10840 if (reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
10841 && !((REG_P (x) && HARD_REGISTER_P (x))
10842 || aarch64_simd_valid_immediate (x, NULL))
cc68f7c2 10843 && mode != VNx16QImode)
43cacb12 10844 {
cc68f7c2
RS
10845 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
10846 if ((vec_flags & VEC_SVE_DATA)
10847 && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
10848 {
10849 sri->icode = CODE_FOR_aarch64_sve_reload_mem;
10850 return NO_REGS;
10851 }
43cacb12 10852 }
b4f50fd4
RR
10853
10854 /* If we have to disable direct literal pool loads and stores because the
10855 function is too big, then we need a scratch register. */
10856 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
10857 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
10858 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 10859 && !aarch64_pcrelative_literal_loads)
b4f50fd4 10860 {
0016d8d9 10861 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
10862 return NO_REGS;
10863 }
10864
43e9d192
IB
10865 /* Without the TARGET_SIMD instructions we cannot move a Q register
10866 to a Q register directly. We need a scratch. */
10867 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
10868 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
10869 && reg_class_subset_p (rclass, FP_REGS))
10870 {
0016d8d9 10871 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
10872 return NO_REGS;
10873 }
10874
10875 /* A TFmode or TImode memory access should be handled via an FP_REGS
10876 because AArch64 has richer addressing modes for LDR/STR instructions
10877 than LDP/STP instructions. */
d5726973 10878 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 10879 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
10880 return FP_REGS;
10881
10882 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 10883 return GENERAL_REGS;
43e9d192
IB
10884
10885 return NO_REGS;
10886}
10887
10888static bool
6216fd90 10889aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 10890{
6216fd90 10891 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 10892
6216fd90
WD
10893 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
10894 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 10895 if (frame_pointer_needed)
6216fd90 10896 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
10897 return true;
10898}
10899
6a70badb 10900poly_int64
43e9d192
IB
10901aarch64_initial_elimination_offset (unsigned from, unsigned to)
10902{
78c29983
MS
10903 if (to == HARD_FRAME_POINTER_REGNUM)
10904 {
10905 if (from == ARG_POINTER_REGNUM)
71bfb77a 10906 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
10907
10908 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
10909 return cfun->machine->frame.hard_fp_offset
10910 - cfun->machine->frame.locals_offset;
78c29983
MS
10911 }
10912
10913 if (to == STACK_POINTER_REGNUM)
10914 {
10915 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
10916 return cfun->machine->frame.frame_size
10917 - cfun->machine->frame.locals_offset;
78c29983
MS
10918 }
10919
1c960e02 10920 return cfun->machine->frame.frame_size;
43e9d192
IB
10921}
10922
463a54e5
SN
10923
10924/* Get return address without mangling. */
10925
10926rtx
10927aarch64_return_addr_rtx (void)
10928{
10929 rtx val = get_hard_reg_initial_val (Pmode, LR_REGNUM);
10930 /* Note: aarch64_return_address_signing_enabled only
10931 works after cfun->machine->frame.laid_out is set,
10932 so here we don't know if the return address will
10933 be signed or not. */
10934 rtx lr = gen_rtx_REG (Pmode, LR_REGNUM);
10935 emit_move_insn (lr, val);
10936 emit_insn (GEN_FCN (CODE_FOR_xpaclri) ());
10937 return lr;
10938}
10939
10940
43e9d192
IB
10941/* Implement RETURN_ADDR_RTX. We do not support moving back to a
10942 previous frame. */
10943
10944rtx
10945aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
10946{
10947 if (count != 0)
10948 return const0_rtx;
463a54e5 10949 return aarch64_return_addr_rtx ();
43e9d192
IB
10950}
10951
43e9d192
IB
10952static void
10953aarch64_asm_trampoline_template (FILE *f)
10954{
be7c41a5
OT
10955 /* Even if the current function doesn't have branch protection, some
10956 later function might, so since this template is only generated once
10957 we have to add a BTI just in case. */
10958 asm_fprintf (f, "\thint\t34 // bti c\n");
b5f794b4 10959
28514dda
YZ
10960 if (TARGET_ILP32)
10961 {
be178ecd
MM
10962 asm_fprintf (f, "\tldr\tw%d, .+20\n", IP1_REGNUM - R0_REGNUM);
10963 asm_fprintf (f, "\tldr\tw%d, .+20\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
28514dda
YZ
10964 }
10965 else
10966 {
be178ecd
MM
10967 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [IP1_REGNUM]);
10968 asm_fprintf (f, "\tldr\t%s, .+24\n", reg_names [STATIC_CHAIN_REGNUM]);
28514dda 10969 }
01a3a324 10970 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
b5f794b4 10971
be178ecd
MM
10972 /* We always emit a speculation barrier.
10973 This is because the same trampoline template is used for every nested
10974 function. Since nested functions are not particularly common or
10975 performant we don't worry too much about the extra instructions to copy
10976 around.
10977 This is not yet a problem, since we have not yet implemented function
10978 specific attributes to choose between hardening against straight line
10979 speculation or not, but such function specific attributes are likely to
10980 happen in the future. */
10981 asm_fprintf (f, "\tdsb\tsy\n\tisb\n");
10982
28514dda
YZ
10983 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
10984 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
10985}
10986
10987static void
10988aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10989{
10990 rtx fnaddr, mem, a_tramp;
be178ecd 10991 const int tramp_code_sz = 24;
43e9d192
IB
10992
10993 /* Don't need to copy the trailing D-words, we fill those in below. */
be178ecd
MM
10994 /* We create our own memory address in Pmode so that `emit_block_move` can
10995 use parts of the backend which expect Pmode addresses. */
10996 rtx temp = convert_memory_address (Pmode, XEXP (m_tramp, 0));
10997 emit_block_move (gen_rtx_MEM (BLKmode, temp),
10998 assemble_trampoline_template (),
28514dda
YZ
10999 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
11000 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 11001 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
11002 if (GET_MODE (fnaddr) != ptr_mode)
11003 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
11004 emit_move_insn (mem, fnaddr);
11005
28514dda 11006 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
11007 emit_move_insn (mem, chain_value);
11008
11009 /* XXX We should really define a "clear_cache" pattern and use
11010 gen_clear_cache(). */
11011 a_tramp = XEXP (m_tramp, 0);
11012 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 11013 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
11014 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
11015 ptr_mode);
43e9d192
IB
11016}
11017
11018static unsigned char
ef4bddc2 11019aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 11020{
6a70badb
RS
11021 /* ??? Logically we should only need to provide a value when
11022 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
11023 can hold MODE, but at the moment we need to handle all modes.
11024 Just ignore any runtime parts for registers that can't store them. */
11025 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
550a3380 11026 unsigned int nregs, vec_flags;
43e9d192
IB
11027 switch (regclass)
11028 {
96b7f495 11029 case STUB_REGS:
d677263e 11030 case TAILCALL_ADDR_REGS:
43e9d192
IB
11031 case POINTER_REGS:
11032 case GENERAL_REGS:
11033 case ALL_REGS:
f25a140b 11034 case POINTER_AND_FP_REGS:
43e9d192
IB
11035 case FP_REGS:
11036 case FP_LO_REGS:
163b1f6a 11037 case FP_LO8_REGS:
550a3380
RS
11038 vec_flags = aarch64_classify_vector_mode (mode);
11039 if ((vec_flags & VEC_SVE_DATA)
43cacb12 11040 && constant_multiple_p (GET_MODE_SIZE (mode),
550a3380 11041 aarch64_vl_bytes (mode, vec_flags), &nregs))
43cacb12 11042 return nregs;
550a3380 11043 return (vec_flags & VEC_ADVSIMD
6a70badb
RS
11044 ? CEIL (lowest_size, UNITS_PER_VREG)
11045 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 11046 case STACK_REG:
43cacb12
RS
11047 case PR_REGS:
11048 case PR_LO_REGS:
11049 case PR_HI_REGS:
183bfdaf
RS
11050 case FFR_REGS:
11051 case PR_AND_FFR_REGS:
43e9d192
IB
11052 return 1;
11053
11054 case NO_REGS:
11055 return 0;
11056
11057 default:
11058 break;
11059 }
11060 gcc_unreachable ();
11061}
11062
11063static reg_class_t
78d8b9f0 11064aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 11065{
51bb310d 11066 if (regclass == POINTER_REGS)
78d8b9f0
IB
11067 return GENERAL_REGS;
11068
51bb310d
MS
11069 if (regclass == STACK_REG)
11070 {
11071 if (REG_P(x)
11072 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
11073 return regclass;
11074
11075 return NO_REGS;
11076 }
11077
27bd251b
IB
11078 /* Register eliminiation can result in a request for
11079 SP+constant->FP_REGS. We cannot support such operations which
11080 use SP as source and an FP_REG as destination, so reject out
11081 right now. */
11082 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
11083 {
11084 rtx lhs = XEXP (x, 0);
11085
11086 /* Look through a possible SUBREG introduced by ILP32. */
11087 if (GET_CODE (lhs) == SUBREG)
11088 lhs = SUBREG_REG (lhs);
11089
11090 gcc_assert (REG_P (lhs));
11091 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
11092 POINTER_REGS));
11093 return NO_REGS;
11094 }
11095
78d8b9f0 11096 return regclass;
43e9d192
IB
11097}
11098
11099void
11100aarch64_asm_output_labelref (FILE* f, const char *name)
11101{
11102 asm_fprintf (f, "%U%s", name);
11103}
11104
11105static void
11106aarch64_elf_asm_constructor (rtx symbol, int priority)
11107{
11108 if (priority == DEFAULT_INIT_PRIORITY)
11109 default_ctor_section_asm_out_constructor (symbol, priority);
11110 else
11111 {
11112 section *s;
53d190c1
AT
11113 /* While priority is known to be in range [0, 65535], so 18 bytes
11114 would be enough, the compiler might not know that. To avoid
11115 -Wformat-truncation false positive, use a larger size. */
11116 char buf[23];
43e9d192 11117 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 11118 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
11119 switch_to_section (s);
11120 assemble_align (POINTER_SIZE);
28514dda 11121 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
11122 }
11123}
11124
11125static void
11126aarch64_elf_asm_destructor (rtx symbol, int priority)
11127{
11128 if (priority == DEFAULT_INIT_PRIORITY)
11129 default_dtor_section_asm_out_destructor (symbol, priority);
11130 else
11131 {
11132 section *s;
53d190c1
AT
11133 /* While priority is known to be in range [0, 65535], so 18 bytes
11134 would be enough, the compiler might not know that. To avoid
11135 -Wformat-truncation false positive, use a larger size. */
11136 char buf[23];
43e9d192 11137 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 11138 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
11139 switch_to_section (s);
11140 assemble_align (POINTER_SIZE);
28514dda 11141 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
11142 }
11143}
11144
11145const char*
11146aarch64_output_casesi (rtx *operands)
11147{
11148 char buf[100];
11149 char label[100];
b32d5189 11150 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
11151 int index;
11152 static const char *const patterns[4][2] =
11153 {
11154 {
11155 "ldrb\t%w3, [%0,%w1,uxtw]",
11156 "add\t%3, %4, %w3, sxtb #2"
11157 },
11158 {
11159 "ldrh\t%w3, [%0,%w1,uxtw #1]",
11160 "add\t%3, %4, %w3, sxth #2"
11161 },
11162 {
11163 "ldr\t%w3, [%0,%w1,uxtw #2]",
11164 "add\t%3, %4, %w3, sxtw #2"
11165 },
11166 /* We assume that DImode is only generated when not optimizing and
11167 that we don't really need 64-bit address offsets. That would
11168 imply an object file with 8GB of code in a single function! */
11169 {
11170 "ldr\t%w3, [%0,%w1,uxtw #2]",
11171 "add\t%3, %4, %w3, sxtw #2"
11172 }
11173 };
11174
11175 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
11176
77e994c9
RS
11177 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
11178 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
11179
11180 gcc_assert (index >= 0 && index <= 3);
11181
11182 /* Need to implement table size reduction, by chaning the code below. */
11183 output_asm_insn (patterns[index][0], operands);
11184 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
11185 snprintf (buf, sizeof (buf),
11186 "adr\t%%4, %s", targetm.strip_name_encoding (label));
11187 output_asm_insn (buf, operands);
11188 output_asm_insn (patterns[index][1], operands);
11189 output_asm_insn ("br\t%3", operands);
be178ecd
MM
11190 output_asm_insn (aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()),
11191 operands);
43e9d192
IB
11192 assemble_label (asm_out_file, label);
11193 return "";
11194}
11195
11196
11197/* Return size in bits of an arithmetic operand which is shifted/scaled and
11198 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
11199 operator. */
11200
11201int
11202aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
11203{
11204 if (shift >= 0 && shift <= 3)
11205 {
11206 int size;
11207 for (size = 8; size <= 32; size *= 2)
11208 {
11209 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
11210 if (mask == bits << shift)
11211 return size;
11212 }
11213 }
11214 return 0;
11215}
11216
e78d485e
RR
11217/* Constant pools are per function only when PC relative
11218 literal loads are true or we are in the large memory
11219 model. */
11220
11221static inline bool
11222aarch64_can_use_per_function_literal_pools_p (void)
11223{
9ee6540a 11224 return (aarch64_pcrelative_literal_loads
e78d485e
RR
11225 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
11226}
11227
43e9d192 11228static bool
e78d485e 11229aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 11230{
74a9301d
VM
11231 /* We can't use blocks for constants when we're using a per-function
11232 constant pool. */
11233 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
11234}
11235
e78d485e
RR
11236/* Select appropriate section for constants depending
11237 on where we place literal pools. */
11238
43e9d192 11239static section *
e78d485e
RR
11240aarch64_select_rtx_section (machine_mode mode,
11241 rtx x,
11242 unsigned HOST_WIDE_INT align)
43e9d192 11243{
e78d485e
RR
11244 if (aarch64_can_use_per_function_literal_pools_p ())
11245 return function_section (current_function_decl);
43e9d192 11246
e78d485e
RR
11247 return default_elf_select_rtx_section (mode, x, align);
11248}
43e9d192 11249
5fca7b66
RH
11250/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
11251void
11252aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
11253 HOST_WIDE_INT offset)
11254{
11255 /* When using per-function literal pools, we must ensure that any code
11256 section is aligned to the minimal instruction length, lest we get
11257 errors from the assembler re "unaligned instructions". */
11258 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
11259 ASM_OUTPUT_ALIGN (f, 2);
11260}
11261
43e9d192
IB
11262/* Costs. */
11263
11264/* Helper function for rtx cost calculation. Strip a shift expression
11265 from X. Returns the inner operand if successful, or the original
11266 expression on failure. */
11267static rtx
11268aarch64_strip_shift (rtx x)
11269{
11270 rtx op = x;
11271
57b77d46
RE
11272 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
11273 we can convert both to ROR during final output. */
43e9d192
IB
11274 if ((GET_CODE (op) == ASHIFT
11275 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
11276 || GET_CODE (op) == LSHIFTRT
11277 || GET_CODE (op) == ROTATERT
11278 || GET_CODE (op) == ROTATE)
43e9d192
IB
11279 && CONST_INT_P (XEXP (op, 1)))
11280 return XEXP (op, 0);
11281
11282 if (GET_CODE (op) == MULT
11283 && CONST_INT_P (XEXP (op, 1))
11284 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
11285 return XEXP (op, 0);
11286
11287 return x;
11288}
11289
4745e701 11290/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
11291 expression from X. Returns the inner operand if successful, or the
11292 original expression on failure. We deal with a number of possible
b10f1009
AP
11293 canonicalization variations here. If STRIP_SHIFT is true, then
11294 we can strip off a shift also. */
43e9d192 11295static rtx
b10f1009 11296aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 11297{
77e994c9 11298 scalar_int_mode mode;
43e9d192
IB
11299 rtx op = x;
11300
77e994c9
RS
11301 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
11302 return op;
11303
43e9d192
IB
11304 if (GET_CODE (op) == AND
11305 && GET_CODE (XEXP (op, 0)) == MULT
11306 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
11307 && CONST_INT_P (XEXP (op, 1))
11308 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
11309 INTVAL (XEXP (op, 1))) != 0)
11310 return XEXP (XEXP (op, 0), 0);
11311
11312 /* Now handle extended register, as this may also have an optional
11313 left shift by 1..4. */
b10f1009
AP
11314 if (strip_shift
11315 && GET_CODE (op) == ASHIFT
43e9d192
IB
11316 && CONST_INT_P (XEXP (op, 1))
11317 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
11318 op = XEXP (op, 0);
11319
11320 if (GET_CODE (op) == ZERO_EXTEND
11321 || GET_CODE (op) == SIGN_EXTEND)
11322 op = XEXP (op, 0);
11323
11324 if (op != x)
11325 return op;
11326
4745e701
JG
11327 return x;
11328}
11329
0a78ebe4
KT
11330/* Return true iff CODE is a shift supported in combination
11331 with arithmetic instructions. */
4d1919ed 11332
0a78ebe4
KT
11333static bool
11334aarch64_shift_p (enum rtx_code code)
11335{
11336 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
11337}
11338
b10f1009
AP
11339
11340/* Return true iff X is a cheap shift without a sign extend. */
11341
11342static bool
11343aarch64_cheap_mult_shift_p (rtx x)
11344{
11345 rtx op0, op1;
11346
11347 op0 = XEXP (x, 0);
11348 op1 = XEXP (x, 1);
11349
11350 if (!(aarch64_tune_params.extra_tuning_flags
11351 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
11352 return false;
11353
11354 if (GET_CODE (op0) == SIGN_EXTEND)
11355 return false;
11356
11357 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
11358 && UINTVAL (op1) <= 4)
11359 return true;
11360
11361 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
11362 return false;
11363
11364 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
11365
11366 if (l2 > 0 && l2 <= 4)
11367 return true;
11368
11369 return false;
11370}
11371
4745e701 11372/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
11373 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
11374 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
11375 operands where needed. */
11376
11377static int
e548c9df 11378aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
11379{
11380 rtx op0, op1;
11381 const struct cpu_cost_table *extra_cost
b175b679 11382 = aarch64_tune_params.insn_extra_cost;
4745e701 11383 int cost = 0;
0a78ebe4 11384 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 11385 machine_mode mode = GET_MODE (x);
4745e701
JG
11386
11387 gcc_checking_assert (code == MULT);
11388
11389 op0 = XEXP (x, 0);
11390 op1 = XEXP (x, 1);
11391
11392 if (VECTOR_MODE_P (mode))
df81764b
TC
11393 {
11394 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
11395 mode = GET_MODE_INNER (mode);
11396 if (vec_flags & VEC_ADVSIMD)
11397 {
11398 /* The by-element versions of the instruction have the same costs as
11399 the normal 3-vector version. So don't add the costs of the
11400 duplicate into the costs of the multiply. We make an assumption
11401 that the input to the VEC_DUPLICATE is already on the FP & SIMD
11402 side. This means costing of a MUL by element pre RA is a bit
11403 optimistic. */
11404 if (GET_CODE (op0) == VEC_DUPLICATE)
11405 op0 = XEXP (op0, 0);
11406 else if (GET_CODE (op1) == VEC_DUPLICATE)
11407 op1 = XEXP (op1, 0);
11408 }
11409 }
4745e701
JG
11410
11411 /* Integer multiply/fma. */
11412 if (GET_MODE_CLASS (mode) == MODE_INT)
11413 {
11414 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
11415 if (aarch64_shift_p (GET_CODE (x))
11416 || (CONST_INT_P (op1)
11417 && exact_log2 (INTVAL (op1)) > 0))
4745e701 11418 {
0a78ebe4
KT
11419 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
11420 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
11421 if (speed)
11422 {
0a78ebe4
KT
11423 if (compound_p)
11424 {
b10f1009
AP
11425 /* If the shift is considered cheap,
11426 then don't add any cost. */
11427 if (aarch64_cheap_mult_shift_p (x))
11428 ;
11429 else if (REG_P (op1))
0a78ebe4
KT
11430 /* ARITH + shift-by-register. */
11431 cost += extra_cost->alu.arith_shift_reg;
11432 else if (is_extend)
11433 /* ARITH + extended register. We don't have a cost field
11434 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
11435 cost += extra_cost->alu.extend_arith;
11436 else
11437 /* ARITH + shift-by-immediate. */
11438 cost += extra_cost->alu.arith_shift;
11439 }
4745e701
JG
11440 else
11441 /* LSL (immediate). */
0a78ebe4
KT
11442 cost += extra_cost->alu.shift;
11443
4745e701 11444 }
0a78ebe4
KT
11445 /* Strip extends as we will have costed them in the case above. */
11446 if (is_extend)
b10f1009 11447 op0 = aarch64_strip_extend (op0, true);
4745e701 11448
e548c9df 11449 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
11450
11451 return cost;
11452 }
11453
d2ac256b
KT
11454 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
11455 compound and let the below cases handle it. After all, MNEG is a
11456 special-case alias of MSUB. */
11457 if (GET_CODE (op0) == NEG)
11458 {
11459 op0 = XEXP (op0, 0);
11460 compound_p = true;
11461 }
11462
4745e701
JG
11463 /* Integer multiplies or FMAs have zero/sign extending variants. */
11464 if ((GET_CODE (op0) == ZERO_EXTEND
11465 && GET_CODE (op1) == ZERO_EXTEND)
11466 || (GET_CODE (op0) == SIGN_EXTEND
11467 && GET_CODE (op1) == SIGN_EXTEND))
11468 {
e548c9df
AM
11469 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
11470 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
11471
11472 if (speed)
11473 {
0a78ebe4 11474 if (compound_p)
d2ac256b 11475 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
11476 cost += extra_cost->mult[0].extend_add;
11477 else
11478 /* MUL/SMULL/UMULL. */
11479 cost += extra_cost->mult[0].extend;
11480 }
11481
11482 return cost;
11483 }
11484
d2ac256b 11485 /* This is either an integer multiply or a MADD. In both cases
4745e701 11486 we want to recurse and cost the operands. */
e548c9df
AM
11487 cost += rtx_cost (op0, mode, MULT, 0, speed);
11488 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
11489
11490 if (speed)
11491 {
0a78ebe4 11492 if (compound_p)
d2ac256b 11493 /* MADD/MSUB. */
4745e701
JG
11494 cost += extra_cost->mult[mode == DImode].add;
11495 else
11496 /* MUL. */
11497 cost += extra_cost->mult[mode == DImode].simple;
11498 }
11499
11500 return cost;
11501 }
11502 else
11503 {
11504 if (speed)
11505 {
3d840f7d 11506 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
11507 operands, unless the rounding mode is upward or downward in
11508 which case FNMUL is different than FMUL with operand negation. */
11509 bool neg0 = GET_CODE (op0) == NEG;
11510 bool neg1 = GET_CODE (op1) == NEG;
11511 if (compound_p || !flag_rounding_math || (neg0 && neg1))
11512 {
11513 if (neg0)
11514 op0 = XEXP (op0, 0);
11515 if (neg1)
11516 op1 = XEXP (op1, 0);
11517 }
4745e701 11518
0a78ebe4 11519 if (compound_p)
4745e701
JG
11520 /* FMADD/FNMADD/FNMSUB/FMSUB. */
11521 cost += extra_cost->fp[mode == DFmode].fma;
11522 else
3d840f7d 11523 /* FMUL/FNMUL. */
4745e701
JG
11524 cost += extra_cost->fp[mode == DFmode].mult;
11525 }
11526
e548c9df
AM
11527 cost += rtx_cost (op0, mode, MULT, 0, speed);
11528 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
11529 return cost;
11530 }
43e9d192
IB
11531}
11532
67747367
JG
11533static int
11534aarch64_address_cost (rtx x,
ef4bddc2 11535 machine_mode mode,
67747367
JG
11536 addr_space_t as ATTRIBUTE_UNUSED,
11537 bool speed)
11538{
11539 enum rtx_code c = GET_CODE (x);
b175b679 11540 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
11541 struct aarch64_address_info info;
11542 int cost = 0;
11543 info.shift = 0;
11544
a97d8b98 11545 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
11546 {
11547 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
11548 {
11549 /* This is a CONST or SYMBOL ref which will be split
11550 in a different way depending on the code model in use.
11551 Cost it through the generic infrastructure. */
e548c9df 11552 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
11553 /* Divide through by the cost of one instruction to
11554 bring it to the same units as the address costs. */
11555 cost_symbol_ref /= COSTS_N_INSNS (1);
11556 /* The cost is then the cost of preparing the address,
11557 followed by an immediate (possibly 0) offset. */
11558 return cost_symbol_ref + addr_cost->imm_offset;
11559 }
11560 else
11561 {
11562 /* This is most likely a jump table from a case
11563 statement. */
11564 return addr_cost->register_offset;
11565 }
11566 }
11567
11568 switch (info.type)
11569 {
11570 case ADDRESS_LO_SUM:
11571 case ADDRESS_SYMBOLIC:
11572 case ADDRESS_REG_IMM:
11573 cost += addr_cost->imm_offset;
11574 break;
11575
11576 case ADDRESS_REG_WB:
11577 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
11578 cost += addr_cost->pre_modify;
11579 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
11580 cost += addr_cost->post_modify;
11581 else
11582 gcc_unreachable ();
11583
11584 break;
11585
11586 case ADDRESS_REG_REG:
11587 cost += addr_cost->register_offset;
11588 break;
11589
67747367 11590 case ADDRESS_REG_SXTW:
783879e6
EM
11591 cost += addr_cost->register_sextend;
11592 break;
11593
11594 case ADDRESS_REG_UXTW:
11595 cost += addr_cost->register_zextend;
67747367
JG
11596 break;
11597
11598 default:
11599 gcc_unreachable ();
11600 }
11601
11602
11603 if (info.shift > 0)
11604 {
11605 /* For the sake of calculating the cost of the shifted register
11606 component, we can treat same sized modes in the same way. */
6a70badb
RS
11607 if (known_eq (GET_MODE_BITSIZE (mode), 16))
11608 cost += addr_cost->addr_scale_costs.hi;
11609 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
11610 cost += addr_cost->addr_scale_costs.si;
11611 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
11612 cost += addr_cost->addr_scale_costs.di;
11613 else
11614 /* We can't tell, or this is a 128-bit vector. */
11615 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
11616 }
11617
11618 return cost;
11619}
11620
b9066f5a
MW
11621/* Return the cost of a branch. If SPEED_P is true then the compiler is
11622 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
11623 to be taken. */
11624
11625int
11626aarch64_branch_cost (bool speed_p, bool predictable_p)
11627{
11628 /* When optimizing for speed, use the cost of unpredictable branches. */
11629 const struct cpu_branch_cost *branch_costs =
b175b679 11630 aarch64_tune_params.branch_costs;
b9066f5a
MW
11631
11632 if (!speed_p || predictable_p)
11633 return branch_costs->predictable;
11634 else
11635 return branch_costs->unpredictable;
11636}
11637
7de23b8c 11638/* Return true if X is a zero or sign extract
7cc2145f
JG
11639 usable in an ADD or SUB (extended register) instruction. */
11640static bool
7de23b8c 11641aarch64_rtx_arith_op_extract_p (rtx x)
7cc2145f 11642{
e47c4031
KT
11643 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
11644 No shift. */
7de23b8c
AC
11645 if (GET_CODE (x) == SIGN_EXTEND
11646 || GET_CODE (x) == ZERO_EXTEND)
e47c4031 11647 return REG_P (XEXP (x, 0));
7cc2145f
JG
11648
11649 return false;
11650}
11651
61263118
KT
11652static bool
11653aarch64_frint_unspec_p (unsigned int u)
11654{
11655 switch (u)
11656 {
11657 case UNSPEC_FRINTZ:
11658 case UNSPEC_FRINTP:
11659 case UNSPEC_FRINTM:
11660 case UNSPEC_FRINTA:
11661 case UNSPEC_FRINTN:
11662 case UNSPEC_FRINTX:
11663 case UNSPEC_FRINTI:
11664 return true;
11665
11666 default:
11667 return false;
11668 }
11669}
11670
fb0cb7fa
KT
11671/* Return true iff X is an rtx that will match an extr instruction
11672 i.e. as described in the *extr<mode>5_insn family of patterns.
11673 OP0 and OP1 will be set to the operands of the shifts involved
11674 on success and will be NULL_RTX otherwise. */
11675
11676static bool
11677aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
11678{
11679 rtx op0, op1;
77e994c9
RS
11680 scalar_int_mode mode;
11681 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
11682 return false;
fb0cb7fa
KT
11683
11684 *res_op0 = NULL_RTX;
11685 *res_op1 = NULL_RTX;
11686
11687 if (GET_CODE (x) != IOR)
11688 return false;
11689
11690 op0 = XEXP (x, 0);
11691 op1 = XEXP (x, 1);
11692
11693 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
11694 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
11695 {
11696 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
11697 if (GET_CODE (op1) == ASHIFT)
11698 std::swap (op0, op1);
11699
11700 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
11701 return false;
11702
11703 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
11704 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
11705
11706 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
11707 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
11708 {
11709 *res_op0 = XEXP (op0, 0);
11710 *res_op1 = XEXP (op1, 0);
11711 return true;
11712 }
11713 }
11714
11715 return false;
11716}
11717
2d5ffe46
AP
11718/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
11719 storing it in *COST. Result is true if the total cost of the operation
11720 has now been calculated. */
11721static bool
11722aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
11723{
b9e3afe9
AP
11724 rtx inner;
11725 rtx comparator;
11726 enum rtx_code cmpcode;
e2a14bec
RS
11727 const struct cpu_cost_table *extra_cost
11728 = aarch64_tune_params.insn_extra_cost;
b9e3afe9
AP
11729
11730 if (COMPARISON_P (op0))
11731 {
11732 inner = XEXP (op0, 0);
11733 comparator = XEXP (op0, 1);
11734 cmpcode = GET_CODE (op0);
11735 }
11736 else
11737 {
11738 inner = op0;
11739 comparator = const0_rtx;
11740 cmpcode = NE;
11741 }
11742
2d5ffe46
AP
11743 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
11744 {
11745 /* Conditional branch. */
b9e3afe9 11746 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
11747 return true;
11748 else
11749 {
b9e3afe9 11750 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 11751 {
2d5ffe46
AP
11752 if (comparator == const0_rtx)
11753 {
11754 /* TBZ/TBNZ/CBZ/CBNZ. */
11755 if (GET_CODE (inner) == ZERO_EXTRACT)
11756 /* TBZ/TBNZ. */
e548c9df
AM
11757 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
11758 ZERO_EXTRACT, 0, speed);
11759 else
11760 /* CBZ/CBNZ. */
11761 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46 11762
e2a14bec
RS
11763 return true;
11764 }
11765 if (register_operand (inner, VOIDmode)
11766 && aarch64_imm24 (comparator, VOIDmode))
11767 {
11768 /* SUB and SUBS. */
11769 *cost += COSTS_N_INSNS (2);
11770 if (speed)
11771 *cost += extra_cost->alu.arith * 2;
11772 return true;
11773 }
2d5ffe46 11774 }
b9e3afe9 11775 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 11776 {
2d5ffe46
AP
11777 /* TBZ/TBNZ. */
11778 if (comparator == const0_rtx)
11779 return true;
11780 }
11781 }
11782 }
b9e3afe9 11783 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 11784 {
786298dc 11785 /* CCMP. */
6dfeb7ce 11786 if (GET_CODE (op1) == COMPARE)
786298dc
WD
11787 {
11788 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
11789 if (XEXP (op1, 1) == const0_rtx)
11790 *cost += 1;
11791 if (speed)
11792 {
11793 machine_mode mode = GET_MODE (XEXP (op1, 0));
786298dc
WD
11794
11795 if (GET_MODE_CLASS (mode) == MODE_INT)
11796 *cost += extra_cost->alu.arith;
11797 else
11798 *cost += extra_cost->fp[mode == DFmode].compare;
11799 }
11800 return true;
11801 }
11802
2d5ffe46
AP
11803 /* It's a conditional operation based on the status flags,
11804 so it must be some flavor of CSEL. */
11805
11806 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
11807 if (GET_CODE (op1) == NEG
11808 || GET_CODE (op1) == NOT
11809 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
11810 op1 = XEXP (op1, 0);
bad00732
KT
11811 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
11812 {
11813 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
11814 op1 = XEXP (op1, 0);
11815 op2 = XEXP (op2, 0);
11816 }
d572ad49
AC
11817 else if (GET_CODE (op1) == ZERO_EXTEND && op2 == const0_rtx)
11818 {
11819 inner = XEXP (op1, 0);
11820 if (GET_CODE (inner) == NEG || GET_CODE (inner) == NOT)
11821 /* CSINV/NEG with zero extend + const 0 (*csinv3_uxtw_insn3). */
11822 op1 = XEXP (inner, 0);
11823 }
2d5ffe46 11824
e548c9df
AM
11825 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
11826 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
11827 return true;
11828 }
11829
11830 /* We don't know what this is, cost all operands. */
11831 return false;
11832}
11833
283b6c85
KT
11834/* Check whether X is a bitfield operation of the form shift + extend that
11835 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
11836 operand to which the bitfield operation is applied. Otherwise return
11837 NULL_RTX. */
11838
11839static rtx
11840aarch64_extend_bitfield_pattern_p (rtx x)
11841{
11842 rtx_code outer_code = GET_CODE (x);
11843 machine_mode outer_mode = GET_MODE (x);
11844
11845 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
11846 && outer_mode != SImode && outer_mode != DImode)
11847 return NULL_RTX;
11848
11849 rtx inner = XEXP (x, 0);
11850 rtx_code inner_code = GET_CODE (inner);
11851 machine_mode inner_mode = GET_MODE (inner);
11852 rtx op = NULL_RTX;
11853
11854 switch (inner_code)
11855 {
11856 case ASHIFT:
11857 if (CONST_INT_P (XEXP (inner, 1))
11858 && (inner_mode == QImode || inner_mode == HImode))
11859 op = XEXP (inner, 0);
11860 break;
11861 case LSHIFTRT:
11862 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
11863 && (inner_mode == QImode || inner_mode == HImode))
11864 op = XEXP (inner, 0);
11865 break;
11866 case ASHIFTRT:
11867 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
11868 && (inner_mode == QImode || inner_mode == HImode))
11869 op = XEXP (inner, 0);
11870 break;
11871 default:
11872 break;
11873 }
11874
11875 return op;
11876}
11877
8c83f71d
KT
11878/* Return true if the mask and a shift amount from an RTX of the form
11879 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
11880 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
11881
11882bool
77e994c9
RS
11883aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
11884 rtx shft_amnt)
8c83f71d
KT
11885{
11886 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
11887 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
11888 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
11889 && (INTVAL (mask)
11890 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
11891}
11892
6a0d3939
SE
11893/* Return true if the masks and a shift amount from an RTX of the form
11894 ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
11895 a BFI instruction of mode MODE. See *arch64_bfi patterns. */
11896
11897bool
11898aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
11899 unsigned HOST_WIDE_INT mask1,
11900 unsigned HOST_WIDE_INT shft_amnt,
11901 unsigned HOST_WIDE_INT mask2)
11902{
11903 unsigned HOST_WIDE_INT t;
11904
11905 /* Verify that there is no overlap in what bits are set in the two masks. */
11906 if (mask1 != ~mask2)
11907 return false;
11908
11909 /* Verify that mask2 is not all zeros or ones. */
11910 if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
11911 return false;
11912
11913 /* The shift amount should always be less than the mode size. */
11914 gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
11915
11916 /* Verify that the mask being shifted is contiguous and would be in the
11917 least significant bits after shifting by shft_amnt. */
11918 t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
11919 return (t == (t & -t));
11920}
11921
43e9d192
IB
11922/* Calculate the cost of calculating X, storing it in *COST. Result
11923 is true if the total cost of the operation has now been calculated. */
11924static bool
e548c9df 11925aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
11926 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
11927{
a8eecd00 11928 rtx op0, op1, op2;
73250c4c 11929 const struct cpu_cost_table *extra_cost
b175b679 11930 = aarch64_tune_params.insn_extra_cost;
e548c9df 11931 int code = GET_CODE (x);
b4206259 11932 scalar_int_mode int_mode;
43e9d192 11933
7fc5ef02
JG
11934 /* By default, assume that everything has equivalent cost to the
11935 cheapest instruction. Any additional costs are applied as a delta
11936 above this default. */
11937 *cost = COSTS_N_INSNS (1);
11938
43e9d192
IB
11939 switch (code)
11940 {
11941 case SET:
ba123b0d
JG
11942 /* The cost depends entirely on the operands to SET. */
11943 *cost = 0;
43e9d192
IB
11944 op0 = SET_DEST (x);
11945 op1 = SET_SRC (x);
11946
11947 switch (GET_CODE (op0))
11948 {
11949 case MEM:
11950 if (speed)
2961177e
JG
11951 {
11952 rtx address = XEXP (op0, 0);
b6875aac
KV
11953 if (VECTOR_MODE_P (mode))
11954 *cost += extra_cost->ldst.storev;
11955 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
11956 *cost += extra_cost->ldst.store;
11957 else if (mode == SFmode)
11958 *cost += extra_cost->ldst.storef;
11959 else if (mode == DFmode)
11960 *cost += extra_cost->ldst.stored;
11961
11962 *cost +=
11963 COSTS_N_INSNS (aarch64_address_cost (address, mode,
11964 0, speed));
11965 }
43e9d192 11966
e548c9df 11967 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
11968 return true;
11969
11970 case SUBREG:
11971 if (! REG_P (SUBREG_REG (op0)))
e548c9df 11972 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 11973
43e9d192
IB
11974 /* Fall through. */
11975 case REG:
b6875aac
KV
11976 /* The cost is one per vector-register copied. */
11977 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
11978 {
fe1447a1
RS
11979 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
11980 *cost = COSTS_N_INSNS (nregs);
b6875aac 11981 }
ba123b0d
JG
11982 /* const0_rtx is in general free, but we will use an
11983 instruction to set a register to 0. */
b6875aac
KV
11984 else if (REG_P (op1) || op1 == const0_rtx)
11985 {
11986 /* The cost is 1 per register copied. */
fe1447a1
RS
11987 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
11988 *cost = COSTS_N_INSNS (nregs);
b6875aac 11989 }
ba123b0d
JG
11990 else
11991 /* Cost is just the cost of the RHS of the set. */
e548c9df 11992 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
11993 return true;
11994
ba123b0d 11995 case ZERO_EXTRACT:
43e9d192 11996 case SIGN_EXTRACT:
ba123b0d
JG
11997 /* Bit-field insertion. Strip any redundant widening of
11998 the RHS to meet the width of the target. */
43e9d192
IB
11999 if (GET_CODE (op1) == SUBREG)
12000 op1 = SUBREG_REG (op1);
12001 if ((GET_CODE (op1) == ZERO_EXTEND
12002 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 12003 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
12004 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
12005 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 12006 op1 = XEXP (op1, 0);
ba123b0d
JG
12007
12008 if (CONST_INT_P (op1))
12009 {
12010 /* MOV immediate is assumed to always be cheap. */
12011 *cost = COSTS_N_INSNS (1);
12012 }
12013 else
12014 {
12015 /* BFM. */
12016 if (speed)
12017 *cost += extra_cost->alu.bfi;
e548c9df 12018 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
12019 }
12020
43e9d192
IB
12021 return true;
12022
12023 default:
ba123b0d
JG
12024 /* We can't make sense of this, assume default cost. */
12025 *cost = COSTS_N_INSNS (1);
61263118 12026 return false;
43e9d192
IB
12027 }
12028 return false;
12029
9dfc162c
JG
12030 case CONST_INT:
12031 /* If an instruction can incorporate a constant within the
12032 instruction, the instruction's expression avoids calling
12033 rtx_cost() on the constant. If rtx_cost() is called on a
12034 constant, then it is usually because the constant must be
12035 moved into a register by one or more instructions.
12036
12037 The exception is constant 0, which can be expressed
12038 as XZR/WZR and is therefore free. The exception to this is
12039 if we have (set (reg) (const0_rtx)) in which case we must cost
12040 the move. However, we can catch that when we cost the SET, so
12041 we don't need to consider that here. */
12042 if (x == const0_rtx)
12043 *cost = 0;
12044 else
12045 {
12046 /* To an approximation, building any other constant is
12047 proportionally expensive to the number of instructions
12048 required to build that constant. This is true whether we
12049 are compiling for SPEED or otherwise. */
77e994c9
RS
12050 if (!is_a <scalar_int_mode> (mode, &int_mode))
12051 int_mode = word_mode;
82614948 12052 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 12053 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
12054 }
12055 return true;
12056
12057 case CONST_DOUBLE:
a2170965
TC
12058
12059 /* First determine number of instructions to do the move
12060 as an integer constant. */
12061 if (!aarch64_float_const_representable_p (x)
12062 && !aarch64_can_const_movi_rtx_p (x, mode)
12063 && aarch64_float_const_rtx_p (x))
12064 {
12065 unsigned HOST_WIDE_INT ival;
12066 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
12067 gcc_assert (succeed);
12068
77e994c9
RS
12069 scalar_int_mode imode = (mode == HFmode
12070 ? SImode
12071 : int_mode_for_mode (mode).require ());
a2170965
TC
12072 int ncost = aarch64_internal_mov_immediate
12073 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
12074 *cost += COSTS_N_INSNS (ncost);
12075 return true;
12076 }
12077
9dfc162c
JG
12078 if (speed)
12079 {
12080 /* mov[df,sf]_aarch64. */
12081 if (aarch64_float_const_representable_p (x))
12082 /* FMOV (scalar immediate). */
12083 *cost += extra_cost->fp[mode == DFmode].fpconst;
12084 else if (!aarch64_float_const_zero_rtx_p (x))
12085 {
12086 /* This will be a load from memory. */
12087 if (mode == DFmode)
12088 *cost += extra_cost->ldst.loadd;
12089 else
12090 *cost += extra_cost->ldst.loadf;
12091 }
12092 else
12093 /* Otherwise this is +0.0. We get this using MOVI d0, #0
12094 or MOV v0.s[0], wzr - neither of which are modeled by the
12095 cost tables. Just use the default cost. */
12096 {
12097 }
12098 }
12099
12100 return true;
12101
43e9d192
IB
12102 case MEM:
12103 if (speed)
2961177e
JG
12104 {
12105 /* For loads we want the base cost of a load, plus an
12106 approximation for the additional cost of the addressing
12107 mode. */
12108 rtx address = XEXP (x, 0);
b6875aac
KV
12109 if (VECTOR_MODE_P (mode))
12110 *cost += extra_cost->ldst.loadv;
12111 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
12112 *cost += extra_cost->ldst.load;
12113 else if (mode == SFmode)
12114 *cost += extra_cost->ldst.loadf;
12115 else if (mode == DFmode)
12116 *cost += extra_cost->ldst.loadd;
12117
12118 *cost +=
12119 COSTS_N_INSNS (aarch64_address_cost (address, mode,
12120 0, speed));
12121 }
43e9d192
IB
12122
12123 return true;
12124
12125 case NEG:
4745e701
JG
12126 op0 = XEXP (x, 0);
12127
b6875aac
KV
12128 if (VECTOR_MODE_P (mode))
12129 {
12130 if (speed)
12131 {
12132 /* FNEG. */
12133 *cost += extra_cost->vect.alu;
12134 }
12135 return false;
12136 }
12137
e548c9df
AM
12138 if (GET_MODE_CLASS (mode) == MODE_INT)
12139 {
4745e701
JG
12140 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
12141 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
12142 {
12143 /* CSETM. */
e548c9df 12144 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
12145 return true;
12146 }
12147
12148 /* Cost this as SUB wzr, X. */
e548c9df 12149 op0 = CONST0_RTX (mode);
4745e701
JG
12150 op1 = XEXP (x, 0);
12151 goto cost_minus;
12152 }
12153
e548c9df 12154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
12155 {
12156 /* Support (neg(fma...)) as a single instruction only if
12157 sign of zeros is unimportant. This matches the decision
12158 making in aarch64.md. */
12159 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
12160 {
12161 /* FNMADD. */
e548c9df 12162 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
12163 return true;
12164 }
d318517d
SN
12165 if (GET_CODE (op0) == MULT)
12166 {
12167 /* FNMUL. */
12168 *cost = rtx_cost (op0, mode, NEG, 0, speed);
12169 return true;
12170 }
4745e701
JG
12171 if (speed)
12172 /* FNEG. */
12173 *cost += extra_cost->fp[mode == DFmode].neg;
12174 return false;
12175 }
12176
12177 return false;
43e9d192 12178
781aeb73
KT
12179 case CLRSB:
12180 case CLZ:
12181 if (speed)
b6875aac
KV
12182 {
12183 if (VECTOR_MODE_P (mode))
12184 *cost += extra_cost->vect.alu;
12185 else
12186 *cost += extra_cost->alu.clz;
12187 }
781aeb73
KT
12188
12189 return false;
12190
5bfc8303
WD
12191 case CTZ:
12192 *cost = COSTS_N_INSNS (2);
12193
12194 if (speed)
12195 *cost += extra_cost->alu.clz + extra_cost->alu.rev;
12196 return false;
12197
43e9d192
IB
12198 case COMPARE:
12199 op0 = XEXP (x, 0);
12200 op1 = XEXP (x, 1);
12201
12202 if (op1 == const0_rtx
12203 && GET_CODE (op0) == AND)
12204 {
12205 x = op0;
e548c9df 12206 mode = GET_MODE (op0);
43e9d192
IB
12207 goto cost_logic;
12208 }
12209
a8eecd00
JG
12210 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
12211 {
12212 /* TODO: A write to the CC flags possibly costs extra, this
12213 needs encoding in the cost tables. */
12214
e548c9df 12215 mode = GET_MODE (op0);
a8eecd00
JG
12216 /* ANDS. */
12217 if (GET_CODE (op0) == AND)
12218 {
12219 x = op0;
12220 goto cost_logic;
12221 }
12222
12223 if (GET_CODE (op0) == PLUS)
12224 {
12225 /* ADDS (and CMN alias). */
12226 x = op0;
12227 goto cost_plus;
12228 }
12229
12230 if (GET_CODE (op0) == MINUS)
12231 {
12232 /* SUBS. */
12233 x = op0;
12234 goto cost_minus;
12235 }
12236
345854d8
KT
12237 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
12238 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
12239 && CONST_INT_P (XEXP (op0, 2)))
12240 {
12241 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
12242 Handle it here directly rather than going to cost_logic
12243 since we know the immediate generated for the TST is valid
12244 so we can avoid creating an intermediate rtx for it only
12245 for costing purposes. */
12246 if (speed)
12247 *cost += extra_cost->alu.logical;
12248
12249 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
12250 ZERO_EXTRACT, 0, speed);
12251 return true;
12252 }
12253
a8eecd00
JG
12254 if (GET_CODE (op1) == NEG)
12255 {
12256 /* CMN. */
12257 if (speed)
12258 *cost += extra_cost->alu.arith;
12259
e548c9df
AM
12260 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
12261 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
12262 return true;
12263 }
12264
12265 /* CMP.
12266
12267 Compare can freely swap the order of operands, and
12268 canonicalization puts the more complex operation first.
12269 But the integer MINUS logic expects the shift/extend
12270 operation in op1. */
12271 if (! (REG_P (op0)
12272 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
12273 {
12274 op0 = XEXP (x, 1);
12275 op1 = XEXP (x, 0);
12276 }
12277 goto cost_minus;
12278 }
12279
12280 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
12281 {
12282 /* FCMP. */
12283 if (speed)
12284 *cost += extra_cost->fp[mode == DFmode].compare;
12285
12286 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
12287 {
e548c9df 12288 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
12289 /* FCMP supports constant 0.0 for no extra cost. */
12290 return true;
12291 }
12292 return false;
12293 }
12294
b6875aac
KV
12295 if (VECTOR_MODE_P (mode))
12296 {
12297 /* Vector compare. */
12298 if (speed)
12299 *cost += extra_cost->vect.alu;
12300
12301 if (aarch64_float_const_zero_rtx_p (op1))
12302 {
12303 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
12304 cost. */
12305 return true;
12306 }
12307 return false;
12308 }
a8eecd00 12309 return false;
43e9d192
IB
12310
12311 case MINUS:
4745e701
JG
12312 {
12313 op0 = XEXP (x, 0);
12314 op1 = XEXP (x, 1);
12315
12316cost_minus:
e548c9df 12317 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 12318
4745e701
JG
12319 /* Detect valid immediates. */
12320 if ((GET_MODE_CLASS (mode) == MODE_INT
12321 || (GET_MODE_CLASS (mode) == MODE_CC
12322 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
12323 && CONST_INT_P (op1)
12324 && aarch64_uimm12_shift (INTVAL (op1)))
12325 {
4745e701
JG
12326 if (speed)
12327 /* SUB(S) (immediate). */
12328 *cost += extra_cost->alu.arith;
12329 return true;
4745e701
JG
12330 }
12331
7cc2145f 12332 /* Look for SUB (extended register). */
7de23b8c
AC
12333 if (is_a <scalar_int_mode> (mode)
12334 && aarch64_rtx_arith_op_extract_p (op1))
7cc2145f
JG
12335 {
12336 if (speed)
2533c820 12337 *cost += extra_cost->alu.extend_arith;
7cc2145f 12338
b10f1009 12339 op1 = aarch64_strip_extend (op1, true);
e47c4031 12340 *cost += rtx_cost (op1, VOIDmode,
e548c9df 12341 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
12342 return true;
12343 }
12344
b10f1009 12345 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
12346
12347 /* Cost this as an FMA-alike operation. */
12348 if ((GET_CODE (new_op1) == MULT
0a78ebe4 12349 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
12350 && code != COMPARE)
12351 {
12352 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
12353 (enum rtx_code) code,
12354 speed);
4745e701
JG
12355 return true;
12356 }
43e9d192 12357
e548c9df 12358 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 12359
4745e701
JG
12360 if (speed)
12361 {
b6875aac
KV
12362 if (VECTOR_MODE_P (mode))
12363 {
12364 /* Vector SUB. */
12365 *cost += extra_cost->vect.alu;
12366 }
12367 else if (GET_MODE_CLASS (mode) == MODE_INT)
12368 {
12369 /* SUB(S). */
12370 *cost += extra_cost->alu.arith;
12371 }
4745e701 12372 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
12373 {
12374 /* FSUB. */
12375 *cost += extra_cost->fp[mode == DFmode].addsub;
12376 }
4745e701
JG
12377 }
12378 return true;
12379 }
43e9d192
IB
12380
12381 case PLUS:
4745e701
JG
12382 {
12383 rtx new_op0;
43e9d192 12384
4745e701
JG
12385 op0 = XEXP (x, 0);
12386 op1 = XEXP (x, 1);
43e9d192 12387
a8eecd00 12388cost_plus:
4745e701
JG
12389 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
12390 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
12391 {
12392 /* CSINC. */
e548c9df
AM
12393 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
12394 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
12395 return true;
12396 }
43e9d192 12397
4745e701 12398 if (GET_MODE_CLASS (mode) == MODE_INT
835d50c6 12399 && (aarch64_plus_immediate (op1, mode)
43cacb12 12400 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 12401 {
e548c9df 12402 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 12403
4745e701
JG
12404 if (speed)
12405 /* ADD (immediate). */
12406 *cost += extra_cost->alu.arith;
12407 return true;
12408 }
12409
e548c9df 12410 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 12411
7cc2145f 12412 /* Look for ADD (extended register). */
7de23b8c
AC
12413 if (is_a <scalar_int_mode> (mode)
12414 && aarch64_rtx_arith_op_extract_p (op0))
7cc2145f
JG
12415 {
12416 if (speed)
2533c820 12417 *cost += extra_cost->alu.extend_arith;
7cc2145f 12418
b10f1009 12419 op0 = aarch64_strip_extend (op0, true);
e47c4031 12420 *cost += rtx_cost (op0, VOIDmode,
e548c9df 12421 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
12422 return true;
12423 }
12424
4745e701
JG
12425 /* Strip any extend, leave shifts behind as we will
12426 cost them through mult_cost. */
b10f1009 12427 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
12428
12429 if (GET_CODE (new_op0) == MULT
0a78ebe4 12430 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
12431 {
12432 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
12433 speed);
4745e701
JG
12434 return true;
12435 }
12436
e548c9df 12437 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
12438
12439 if (speed)
12440 {
b6875aac
KV
12441 if (VECTOR_MODE_P (mode))
12442 {
12443 /* Vector ADD. */
12444 *cost += extra_cost->vect.alu;
12445 }
12446 else if (GET_MODE_CLASS (mode) == MODE_INT)
12447 {
12448 /* ADD. */
12449 *cost += extra_cost->alu.arith;
12450 }
4745e701 12451 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
12452 {
12453 /* FADD. */
12454 *cost += extra_cost->fp[mode == DFmode].addsub;
12455 }
4745e701
JG
12456 }
12457 return true;
12458 }
43e9d192 12459
18b42b2a
KT
12460 case BSWAP:
12461 *cost = COSTS_N_INSNS (1);
12462
12463 if (speed)
b6875aac
KV
12464 {
12465 if (VECTOR_MODE_P (mode))
12466 *cost += extra_cost->vect.alu;
12467 else
12468 *cost += extra_cost->alu.rev;
12469 }
18b42b2a
KT
12470 return false;
12471
43e9d192 12472 case IOR:
f7d5cf8d
KT
12473 if (aarch_rev16_p (x))
12474 {
12475 *cost = COSTS_N_INSNS (1);
12476
b6875aac
KV
12477 if (speed)
12478 {
12479 if (VECTOR_MODE_P (mode))
12480 *cost += extra_cost->vect.alu;
12481 else
12482 *cost += extra_cost->alu.rev;
12483 }
12484 return true;
f7d5cf8d 12485 }
fb0cb7fa
KT
12486
12487 if (aarch64_extr_rtx_p (x, &op0, &op1))
12488 {
e548c9df
AM
12489 *cost += rtx_cost (op0, mode, IOR, 0, speed);
12490 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
12491 if (speed)
12492 *cost += extra_cost->alu.shift;
12493
12494 return true;
12495 }
f7d5cf8d 12496 /* Fall through. */
43e9d192
IB
12497 case XOR:
12498 case AND:
12499 cost_logic:
12500 op0 = XEXP (x, 0);
12501 op1 = XEXP (x, 1);
12502
b6875aac
KV
12503 if (VECTOR_MODE_P (mode))
12504 {
12505 if (speed)
12506 *cost += extra_cost->vect.alu;
12507 return true;
12508 }
12509
268c3b47
JG
12510 if (code == AND
12511 && GET_CODE (op0) == MULT
12512 && CONST_INT_P (XEXP (op0, 1))
12513 && CONST_INT_P (op1)
12514 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
12515 INTVAL (op1)) != 0)
12516 {
12517 /* This is a UBFM/SBFM. */
e548c9df 12518 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
12519 if (speed)
12520 *cost += extra_cost->alu.bfx;
12521 return true;
12522 }
12523
b4206259 12524 if (is_int_mode (mode, &int_mode))
43e9d192 12525 {
8c83f71d 12526 if (CONST_INT_P (op1))
43e9d192 12527 {
8c83f71d
KT
12528 /* We have a mask + shift version of a UBFIZ
12529 i.e. the *andim_ashift<mode>_bfiz pattern. */
12530 if (GET_CODE (op0) == ASHIFT
b4206259
RS
12531 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
12532 XEXP (op0, 1)))
8c83f71d 12533 {
b4206259 12534 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
12535 (enum rtx_code) code, 0, speed);
12536 if (speed)
12537 *cost += extra_cost->alu.bfx;
268c3b47 12538
8c83f71d
KT
12539 return true;
12540 }
b4206259 12541 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
12542 {
12543 /* We possibly get the immediate for free, this is not
12544 modelled. */
b4206259
RS
12545 *cost += rtx_cost (op0, int_mode,
12546 (enum rtx_code) code, 0, speed);
8c83f71d
KT
12547 if (speed)
12548 *cost += extra_cost->alu.logical;
268c3b47 12549
8c83f71d
KT
12550 return true;
12551 }
43e9d192
IB
12552 }
12553 else
12554 {
268c3b47
JG
12555 rtx new_op0 = op0;
12556
12557 /* Handle ORN, EON, or BIC. */
43e9d192
IB
12558 if (GET_CODE (op0) == NOT)
12559 op0 = XEXP (op0, 0);
268c3b47
JG
12560
12561 new_op0 = aarch64_strip_shift (op0);
12562
12563 /* If we had a shift on op0 then this is a logical-shift-
12564 by-register/immediate operation. Otherwise, this is just
12565 a logical operation. */
12566 if (speed)
12567 {
12568 if (new_op0 != op0)
12569 {
12570 /* Shift by immediate. */
12571 if (CONST_INT_P (XEXP (op0, 1)))
12572 *cost += extra_cost->alu.log_shift;
12573 else
12574 *cost += extra_cost->alu.log_shift_reg;
12575 }
12576 else
12577 *cost += extra_cost->alu.logical;
12578 }
12579
12580 /* In both cases we want to cost both operands. */
b4206259
RS
12581 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
12582 0, speed);
12583 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
12584 1, speed);
268c3b47
JG
12585
12586 return true;
43e9d192 12587 }
43e9d192
IB
12588 }
12589 return false;
12590
268c3b47 12591 case NOT:
6365da9e
KT
12592 x = XEXP (x, 0);
12593 op0 = aarch64_strip_shift (x);
12594
b6875aac
KV
12595 if (VECTOR_MODE_P (mode))
12596 {
12597 /* Vector NOT. */
12598 *cost += extra_cost->vect.alu;
12599 return false;
12600 }
12601
6365da9e
KT
12602 /* MVN-shifted-reg. */
12603 if (op0 != x)
12604 {
e548c9df 12605 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
12606
12607 if (speed)
12608 *cost += extra_cost->alu.log_shift;
12609
12610 return true;
12611 }
12612 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
12613 Handle the second form here taking care that 'a' in the above can
12614 be a shift. */
12615 else if (GET_CODE (op0) == XOR)
12616 {
12617 rtx newop0 = XEXP (op0, 0);
12618 rtx newop1 = XEXP (op0, 1);
12619 rtx op0_stripped = aarch64_strip_shift (newop0);
12620
e548c9df
AM
12621 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
12622 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
12623
12624 if (speed)
12625 {
12626 if (op0_stripped != newop0)
12627 *cost += extra_cost->alu.log_shift;
12628 else
12629 *cost += extra_cost->alu.logical;
12630 }
12631
12632 return true;
12633 }
268c3b47
JG
12634 /* MVN. */
12635 if (speed)
12636 *cost += extra_cost->alu.logical;
12637
268c3b47
JG
12638 return false;
12639
43e9d192 12640 case ZERO_EXTEND:
b1685e62
JG
12641
12642 op0 = XEXP (x, 0);
12643 /* If a value is written in SI mode, then zero extended to DI
12644 mode, the operation will in general be free as a write to
12645 a 'w' register implicitly zeroes the upper bits of an 'x'
12646 register. However, if this is
12647
12648 (set (reg) (zero_extend (reg)))
12649
12650 we must cost the explicit register move. */
12651 if (mode == DImode
12652 && GET_MODE (op0) == SImode
12653 && outer == SET)
12654 {
e548c9df 12655 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 12656
dde23f43
KM
12657 /* If OP_COST is non-zero, then the cost of the zero extend
12658 is effectively the cost of the inner operation. Otherwise
12659 we have a MOV instruction and we take the cost from the MOV
12660 itself. This is true independently of whether we are
12661 optimizing for space or time. */
12662 if (op_cost)
b1685e62
JG
12663 *cost = op_cost;
12664
12665 return true;
12666 }
e548c9df 12667 else if (MEM_P (op0))
43e9d192 12668 {
b1685e62 12669 /* All loads can zero extend to any size for free. */
e548c9df 12670 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
12671 return true;
12672 }
b1685e62 12673
283b6c85
KT
12674 op0 = aarch64_extend_bitfield_pattern_p (x);
12675 if (op0)
12676 {
12677 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
12678 if (speed)
12679 *cost += extra_cost->alu.bfx;
12680 return true;
12681 }
12682
b1685e62 12683 if (speed)
b6875aac
KV
12684 {
12685 if (VECTOR_MODE_P (mode))
12686 {
12687 /* UMOV. */
12688 *cost += extra_cost->vect.alu;
12689 }
12690 else
12691 {
63715e5e
WD
12692 /* We generate an AND instead of UXTB/UXTH. */
12693 *cost += extra_cost->alu.logical;
b6875aac
KV
12694 }
12695 }
43e9d192
IB
12696 return false;
12697
12698 case SIGN_EXTEND:
b1685e62 12699 if (MEM_P (XEXP (x, 0)))
43e9d192 12700 {
b1685e62
JG
12701 /* LDRSH. */
12702 if (speed)
12703 {
12704 rtx address = XEXP (XEXP (x, 0), 0);
12705 *cost += extra_cost->ldst.load_sign_extend;
12706
12707 *cost +=
12708 COSTS_N_INSNS (aarch64_address_cost (address, mode,
12709 0, speed));
12710 }
43e9d192
IB
12711 return true;
12712 }
b1685e62 12713
283b6c85
KT
12714 op0 = aarch64_extend_bitfield_pattern_p (x);
12715 if (op0)
12716 {
12717 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
12718 if (speed)
12719 *cost += extra_cost->alu.bfx;
12720 return true;
12721 }
12722
b1685e62 12723 if (speed)
b6875aac
KV
12724 {
12725 if (VECTOR_MODE_P (mode))
12726 *cost += extra_cost->vect.alu;
12727 else
12728 *cost += extra_cost->alu.extend;
12729 }
43e9d192
IB
12730 return false;
12731
ba0cfa17
JG
12732 case ASHIFT:
12733 op0 = XEXP (x, 0);
12734 op1 = XEXP (x, 1);
12735
12736 if (CONST_INT_P (op1))
12737 {
ba0cfa17 12738 if (speed)
b6875aac
KV
12739 {
12740 if (VECTOR_MODE_P (mode))
12741 {
12742 /* Vector shift (immediate). */
12743 *cost += extra_cost->vect.alu;
12744 }
12745 else
12746 {
12747 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
12748 aliases. */
12749 *cost += extra_cost->alu.shift;
12750 }
12751 }
ba0cfa17
JG
12752
12753 /* We can incorporate zero/sign extend for free. */
12754 if (GET_CODE (op0) == ZERO_EXTEND
12755 || GET_CODE (op0) == SIGN_EXTEND)
12756 op0 = XEXP (op0, 0);
12757
e548c9df 12758 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
12759 return true;
12760 }
12761 else
12762 {
7813b280 12763 if (VECTOR_MODE_P (mode))
b6875aac 12764 {
7813b280
KT
12765 if (speed)
12766 /* Vector shift (register). */
12767 *cost += extra_cost->vect.alu;
12768 }
12769 else
12770 {
12771 if (speed)
12772 /* LSLV. */
12773 *cost += extra_cost->alu.shift_reg;
12774
12775 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
12776 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
12777 && known_eq (INTVAL (XEXP (op1, 1)),
12778 GET_MODE_BITSIZE (mode) - 1))
b6875aac 12779 {
7813b280
KT
12780 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
12781 /* We already demanded XEXP (op1, 0) to be REG_P, so
12782 don't recurse into it. */
12783 return true;
b6875aac
KV
12784 }
12785 }
ba0cfa17
JG
12786 return false; /* All arguments need to be in registers. */
12787 }
12788
43e9d192 12789 case ROTATE:
43e9d192
IB
12790 case ROTATERT:
12791 case LSHIFTRT:
43e9d192 12792 case ASHIFTRT:
ba0cfa17
JG
12793 op0 = XEXP (x, 0);
12794 op1 = XEXP (x, 1);
43e9d192 12795
ba0cfa17
JG
12796 if (CONST_INT_P (op1))
12797 {
12798 /* ASR (immediate) and friends. */
12799 if (speed)
b6875aac
KV
12800 {
12801 if (VECTOR_MODE_P (mode))
12802 *cost += extra_cost->vect.alu;
12803 else
12804 *cost += extra_cost->alu.shift;
12805 }
43e9d192 12806
e548c9df 12807 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
12808 return true;
12809 }
12810 else
12811 {
7813b280 12812 if (VECTOR_MODE_P (mode))
b6875aac 12813 {
7813b280
KT
12814 if (speed)
12815 /* Vector shift (register). */
b6875aac 12816 *cost += extra_cost->vect.alu;
7813b280
KT
12817 }
12818 else
12819 {
12820 if (speed)
12821 /* ASR (register) and friends. */
b6875aac 12822 *cost += extra_cost->alu.shift_reg;
7813b280
KT
12823
12824 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
12825 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
12826 && known_eq (INTVAL (XEXP (op1, 1)),
12827 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
12828 {
12829 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
12830 /* We already demanded XEXP (op1, 0) to be REG_P, so
12831 don't recurse into it. */
12832 return true;
12833 }
b6875aac 12834 }
ba0cfa17
JG
12835 return false; /* All arguments need to be in registers. */
12836 }
43e9d192 12837
909734be
JG
12838 case SYMBOL_REF:
12839
1b1e81f8
JW
12840 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
12841 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
12842 {
12843 /* LDR. */
12844 if (speed)
12845 *cost += extra_cost->ldst.load;
12846 }
12847 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
12848 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
12849 {
12850 /* ADRP, followed by ADD. */
12851 *cost += COSTS_N_INSNS (1);
12852 if (speed)
12853 *cost += 2 * extra_cost->alu.arith;
12854 }
12855 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
12856 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
12857 {
12858 /* ADR. */
12859 if (speed)
12860 *cost += extra_cost->alu.arith;
12861 }
12862
12863 if (flag_pic)
12864 {
12865 /* One extra load instruction, after accessing the GOT. */
12866 *cost += COSTS_N_INSNS (1);
12867 if (speed)
12868 *cost += extra_cost->ldst.load;
12869 }
43e9d192
IB
12870 return true;
12871
909734be 12872 case HIGH:
43e9d192 12873 case LO_SUM:
909734be
JG
12874 /* ADRP/ADD (immediate). */
12875 if (speed)
12876 *cost += extra_cost->alu.arith;
43e9d192
IB
12877 return true;
12878
12879 case ZERO_EXTRACT:
12880 case SIGN_EXTRACT:
7cc2145f
JG
12881 /* UBFX/SBFX. */
12882 if (speed)
b6875aac
KV
12883 {
12884 if (VECTOR_MODE_P (mode))
12885 *cost += extra_cost->vect.alu;
12886 else
12887 *cost += extra_cost->alu.bfx;
12888 }
7cc2145f
JG
12889
12890 /* We can trust that the immediates used will be correct (there
12891 are no by-register forms), so we need only cost op0. */
e548c9df 12892 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
12893 return true;
12894
12895 case MULT:
4745e701
JG
12896 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
12897 /* aarch64_rtx_mult_cost always handles recursion to its
12898 operands. */
12899 return true;
43e9d192
IB
12900
12901 case MOD:
4f58fe36
KT
12902 /* We can expand signed mod by power of 2 using a NEGS, two parallel
12903 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
12904 an unconditional negate. This case should only ever be reached through
12905 the set_smod_pow2_cheap check in expmed.c. */
12906 if (CONST_INT_P (XEXP (x, 1))
12907 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
12908 && (mode == SImode || mode == DImode))
12909 {
12910 /* We expand to 4 instructions. Reset the baseline. */
12911 *cost = COSTS_N_INSNS (4);
12912
12913 if (speed)
12914 *cost += 2 * extra_cost->alu.logical
12915 + 2 * extra_cost->alu.arith;
12916
12917 return true;
12918 }
12919
12920 /* Fall-through. */
43e9d192 12921 case UMOD:
43e9d192
IB
12922 if (speed)
12923 {
cb9ac430 12924 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
12925 if (VECTOR_MODE_P (mode))
12926 *cost += extra_cost->vect.alu;
e548c9df
AM
12927 else if (GET_MODE_CLASS (mode) == MODE_INT)
12928 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
12929 + extra_cost->mult[mode == DImode].idiv
12930 + (code == MOD ? 1 : 0));
43e9d192
IB
12931 }
12932 return false; /* All arguments need to be in registers. */
12933
12934 case DIV:
12935 case UDIV:
4105fe38 12936 case SQRT:
43e9d192
IB
12937 if (speed)
12938 {
b6875aac
KV
12939 if (VECTOR_MODE_P (mode))
12940 *cost += extra_cost->vect.alu;
12941 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
12942 /* There is no integer SQRT, so only DIV and UDIV can get
12943 here. */
cb9ac430
TC
12944 *cost += (extra_cost->mult[mode == DImode].idiv
12945 /* Slighly prefer UDIV over SDIV. */
12946 + (code == DIV ? 1 : 0));
4105fe38
JG
12947 else
12948 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
12949 }
12950 return false; /* All arguments need to be in registers. */
12951
a8eecd00 12952 case IF_THEN_ELSE:
2d5ffe46
AP
12953 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
12954 XEXP (x, 2), cost, speed);
a8eecd00
JG
12955
12956 case EQ:
12957 case NE:
12958 case GT:
12959 case GTU:
12960 case LT:
12961 case LTU:
12962 case GE:
12963 case GEU:
12964 case LE:
12965 case LEU:
12966
12967 return false; /* All arguments must be in registers. */
12968
b292109f
JG
12969 case FMA:
12970 op0 = XEXP (x, 0);
12971 op1 = XEXP (x, 1);
12972 op2 = XEXP (x, 2);
12973
12974 if (speed)
b6875aac
KV
12975 {
12976 if (VECTOR_MODE_P (mode))
12977 *cost += extra_cost->vect.alu;
12978 else
12979 *cost += extra_cost->fp[mode == DFmode].fma;
12980 }
b292109f
JG
12981
12982 /* FMSUB, FNMADD, and FNMSUB are free. */
12983 if (GET_CODE (op0) == NEG)
12984 op0 = XEXP (op0, 0);
12985
12986 if (GET_CODE (op2) == NEG)
12987 op2 = XEXP (op2, 0);
12988
12989 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
12990 and the by-element operand as operand 0. */
12991 if (GET_CODE (op1) == NEG)
12992 op1 = XEXP (op1, 0);
12993
12994 /* Catch vector-by-element operations. The by-element operand can
12995 either be (vec_duplicate (vec_select (x))) or just
12996 (vec_select (x)), depending on whether we are multiplying by
12997 a vector or a scalar.
12998
12999 Canonicalization is not very good in these cases, FMA4 will put the
13000 by-element operand as operand 0, FNMA4 will have it as operand 1. */
13001 if (GET_CODE (op0) == VEC_DUPLICATE)
13002 op0 = XEXP (op0, 0);
13003 else if (GET_CODE (op1) == VEC_DUPLICATE)
13004 op1 = XEXP (op1, 0);
13005
13006 if (GET_CODE (op0) == VEC_SELECT)
13007 op0 = XEXP (op0, 0);
13008 else if (GET_CODE (op1) == VEC_SELECT)
13009 op1 = XEXP (op1, 0);
13010
13011 /* If the remaining parameters are not registers,
13012 get the cost to put them into registers. */
e548c9df
AM
13013 *cost += rtx_cost (op0, mode, FMA, 0, speed);
13014 *cost += rtx_cost (op1, mode, FMA, 1, speed);
13015 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
13016 return true;
13017
5e2a765b
KT
13018 case FLOAT:
13019 case UNSIGNED_FLOAT:
13020 if (speed)
13021 *cost += extra_cost->fp[mode == DFmode].fromint;
13022 return false;
13023
b292109f
JG
13024 case FLOAT_EXTEND:
13025 if (speed)
b6875aac
KV
13026 {
13027 if (VECTOR_MODE_P (mode))
13028 {
13029 /*Vector truncate. */
13030 *cost += extra_cost->vect.alu;
13031 }
13032 else
13033 *cost += extra_cost->fp[mode == DFmode].widen;
13034 }
b292109f
JG
13035 return false;
13036
13037 case FLOAT_TRUNCATE:
13038 if (speed)
b6875aac
KV
13039 {
13040 if (VECTOR_MODE_P (mode))
13041 {
13042 /*Vector conversion. */
13043 *cost += extra_cost->vect.alu;
13044 }
13045 else
13046 *cost += extra_cost->fp[mode == DFmode].narrow;
13047 }
b292109f
JG
13048 return false;
13049
61263118
KT
13050 case FIX:
13051 case UNSIGNED_FIX:
13052 x = XEXP (x, 0);
13053 /* Strip the rounding part. They will all be implemented
13054 by the fcvt* family of instructions anyway. */
13055 if (GET_CODE (x) == UNSPEC)
13056 {
13057 unsigned int uns_code = XINT (x, 1);
13058
13059 if (uns_code == UNSPEC_FRINTA
13060 || uns_code == UNSPEC_FRINTM
13061 || uns_code == UNSPEC_FRINTN
13062 || uns_code == UNSPEC_FRINTP
13063 || uns_code == UNSPEC_FRINTZ)
13064 x = XVECEXP (x, 0, 0);
13065 }
13066
13067 if (speed)
b6875aac
KV
13068 {
13069 if (VECTOR_MODE_P (mode))
13070 *cost += extra_cost->vect.alu;
13071 else
13072 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
13073 }
39252973
KT
13074
13075 /* We can combine fmul by a power of 2 followed by a fcvt into a single
13076 fixed-point fcvt. */
13077 if (GET_CODE (x) == MULT
13078 && ((VECTOR_MODE_P (mode)
13079 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
13080 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
13081 {
13082 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
13083 0, speed);
13084 return true;
13085 }
13086
e548c9df 13087 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
13088 return true;
13089
b292109f 13090 case ABS:
b6875aac
KV
13091 if (VECTOR_MODE_P (mode))
13092 {
13093 /* ABS (vector). */
13094 if (speed)
13095 *cost += extra_cost->vect.alu;
13096 }
13097 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 13098 {
19261b99
KT
13099 op0 = XEXP (x, 0);
13100
13101 /* FABD, which is analogous to FADD. */
13102 if (GET_CODE (op0) == MINUS)
13103 {
e548c9df
AM
13104 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
13105 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
13106 if (speed)
13107 *cost += extra_cost->fp[mode == DFmode].addsub;
13108
13109 return true;
13110 }
13111 /* Simple FABS is analogous to FNEG. */
b292109f
JG
13112 if (speed)
13113 *cost += extra_cost->fp[mode == DFmode].neg;
13114 }
13115 else
13116 {
13117 /* Integer ABS will either be split to
13118 two arithmetic instructions, or will be an ABS
13119 (scalar), which we don't model. */
13120 *cost = COSTS_N_INSNS (2);
13121 if (speed)
13122 *cost += 2 * extra_cost->alu.arith;
13123 }
13124 return false;
13125
13126 case SMAX:
13127 case SMIN:
13128 if (speed)
13129 {
b6875aac
KV
13130 if (VECTOR_MODE_P (mode))
13131 *cost += extra_cost->vect.alu;
13132 else
13133 {
13134 /* FMAXNM/FMINNM/FMAX/FMIN.
13135 TODO: This may not be accurate for all implementations, but
13136 we do not model this in the cost tables. */
13137 *cost += extra_cost->fp[mode == DFmode].addsub;
13138 }
b292109f
JG
13139 }
13140 return false;
13141
61263118
KT
13142 case UNSPEC:
13143 /* The floating point round to integer frint* instructions. */
13144 if (aarch64_frint_unspec_p (XINT (x, 1)))
13145 {
13146 if (speed)
13147 *cost += extra_cost->fp[mode == DFmode].roundint;
13148
13149 return false;
13150 }
781aeb73
KT
13151
13152 if (XINT (x, 1) == UNSPEC_RBIT)
13153 {
13154 if (speed)
13155 *cost += extra_cost->alu.rev;
13156
13157 return false;
13158 }
61263118
KT
13159 break;
13160
fb620c4a
JG
13161 case TRUNCATE:
13162
13163 /* Decompose <su>muldi3_highpart. */
13164 if (/* (truncate:DI */
13165 mode == DImode
13166 /* (lshiftrt:TI */
13167 && GET_MODE (XEXP (x, 0)) == TImode
13168 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
13169 /* (mult:TI */
13170 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13171 /* (ANY_EXTEND:TI (reg:DI))
13172 (ANY_EXTEND:TI (reg:DI))) */
13173 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
13174 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
13175 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
13176 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
13177 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
13178 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
13179 /* (const_int 64) */
13180 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13181 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
13182 {
13183 /* UMULH/SMULH. */
13184 if (speed)
13185 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
13186 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
13187 mode, MULT, 0, speed);
13188 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
13189 mode, MULT, 1, speed);
fb620c4a
JG
13190 return true;
13191 }
13192
13193 /* Fall through. */
43e9d192 13194 default:
61263118 13195 break;
43e9d192 13196 }
61263118 13197
c10e3d7f
AP
13198 if (dump_file
13199 && flag_aarch64_verbose_cost)
61263118
KT
13200 fprintf (dump_file,
13201 "\nFailed to cost RTX. Assuming default cost.\n");
13202
13203 return true;
43e9d192
IB
13204}
13205
0ee859b5
JG
13206/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
13207 calculated for X. This cost is stored in *COST. Returns true
13208 if the total cost of X was calculated. */
13209static bool
e548c9df 13210aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
13211 int param, int *cost, bool speed)
13212{
e548c9df 13213 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 13214
c10e3d7f
AP
13215 if (dump_file
13216 && flag_aarch64_verbose_cost)
0ee859b5
JG
13217 {
13218 print_rtl_single (dump_file, x);
13219 fprintf (dump_file, "\n%s cost: %d (%s)\n",
13220 speed ? "Hot" : "Cold",
13221 *cost, result ? "final" : "partial");
13222 }
13223
13224 return result;
13225}
13226
43e9d192 13227static int
ef4bddc2 13228aarch64_register_move_cost (machine_mode mode,
8a3a7e67 13229 reg_class_t from_i, reg_class_t to_i)
43e9d192 13230{
8a3a7e67
RH
13231 enum reg_class from = (enum reg_class) from_i;
13232 enum reg_class to = (enum reg_class) to_i;
43e9d192 13233 const struct cpu_regmove_cost *regmove_cost
b175b679 13234 = aarch64_tune_params.regmove_cost;
43e9d192 13235
3be07662 13236 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
96b7f495
MM
13237 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS
13238 || to == STUB_REGS)
3be07662
WD
13239 to = GENERAL_REGS;
13240
96b7f495
MM
13241 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS
13242 || from == STUB_REGS)
3be07662
WD
13243 from = GENERAL_REGS;
13244
183bfdaf
RS
13245 /* Make RDFFR very expensive. In particular, if we know that the FFR
13246 contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR
13247 as a way of obtaining a PTRUE. */
13248 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
13249 && hard_reg_set_subset_p (reg_class_contents[from_i],
13250 reg_class_contents[FFR_REGS]))
13251 return 80;
13252
6ee70f81
AP
13253 /* Moving between GPR and stack cost is the same as GP2GP. */
13254 if ((from == GENERAL_REGS && to == STACK_REG)
13255 || (to == GENERAL_REGS && from == STACK_REG))
13256 return regmove_cost->GP2GP;
13257
13258 /* To/From the stack register, we move via the gprs. */
13259 if (to == STACK_REG || from == STACK_REG)
13260 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
13261 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
13262
6a70badb 13263 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
13264 {
13265 /* 128-bit operations on general registers require 2 instructions. */
13266 if (from == GENERAL_REGS && to == GENERAL_REGS)
13267 return regmove_cost->GP2GP * 2;
13268 else if (from == GENERAL_REGS)
13269 return regmove_cost->GP2FP * 2;
13270 else if (to == GENERAL_REGS)
13271 return regmove_cost->FP2GP * 2;
13272
13273 /* When AdvSIMD instructions are disabled it is not possible to move
13274 a 128-bit value directly between Q registers. This is handled in
13275 secondary reload. A general register is used as a scratch to move
13276 the upper DI value and the lower DI value is moved directly,
13277 hence the cost is the sum of three moves. */
13278 if (! TARGET_SIMD)
13279 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
13280
13281 return regmove_cost->FP2FP;
13282 }
13283
43e9d192
IB
13284 if (from == GENERAL_REGS && to == GENERAL_REGS)
13285 return regmove_cost->GP2GP;
13286 else if (from == GENERAL_REGS)
13287 return regmove_cost->GP2FP;
13288 else if (to == GENERAL_REGS)
13289 return regmove_cost->FP2GP;
13290
43e9d192
IB
13291 return regmove_cost->FP2FP;
13292}
13293
13294static int
ef4bddc2 13295aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
13296 reg_class_t rclass ATTRIBUTE_UNUSED,
13297 bool in ATTRIBUTE_UNUSED)
13298{
b175b679 13299 return aarch64_tune_params.memmov_cost;
43e9d192
IB
13300}
13301
6d4d616a
RS
13302/* Implement TARGET_INIT_BUILTINS. */
13303static void
13304aarch64_init_builtins ()
13305{
13306 aarch64_general_init_builtins ();
624d0f07 13307 aarch64_sve::init_builtins ();
6d4d616a
RS
13308}
13309
13310/* Implement TARGET_FOLD_BUILTIN. */
13311static tree
13312aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool)
13313{
13314 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
13315 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
13316 tree type = TREE_TYPE (TREE_TYPE (fndecl));
13317 switch (code & AARCH64_BUILTIN_CLASS)
13318 {
13319 case AARCH64_BUILTIN_GENERAL:
13320 return aarch64_general_fold_builtin (subcode, type, nargs, args);
624d0f07
RS
13321
13322 case AARCH64_BUILTIN_SVE:
13323 return NULL_TREE;
6d4d616a
RS
13324 }
13325 gcc_unreachable ();
13326}
13327
13328/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
13329static bool
13330aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
13331{
13332 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
13333 tree fndecl = gimple_call_fndecl (stmt);
13334 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
13335 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
13336 gimple *new_stmt = NULL;
13337 switch (code & AARCH64_BUILTIN_CLASS)
13338 {
13339 case AARCH64_BUILTIN_GENERAL:
13340 new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt);
13341 break;
624d0f07
RS
13342
13343 case AARCH64_BUILTIN_SVE:
13344 new_stmt = aarch64_sve::gimple_fold_builtin (subcode, gsi, stmt);
13345 break;
6d4d616a
RS
13346 }
13347
13348 if (!new_stmt)
13349 return false;
13350
13351 gsi_replace (gsi, new_stmt, true);
13352 return true;
13353}
13354
13355/* Implement TARGET_EXPAND_BUILTIN. */
13356static rtx
c5dc215d 13357aarch64_expand_builtin (tree exp, rtx target, rtx, machine_mode, int ignore)
6d4d616a
RS
13358{
13359 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13360 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
13361 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
13362 switch (code & AARCH64_BUILTIN_CLASS)
13363 {
13364 case AARCH64_BUILTIN_GENERAL:
c5dc215d 13365 return aarch64_general_expand_builtin (subcode, exp, target, ignore);
624d0f07
RS
13366
13367 case AARCH64_BUILTIN_SVE:
13368 return aarch64_sve::expand_builtin (subcode, exp, target);
6d4d616a
RS
13369 }
13370 gcc_unreachable ();
13371}
13372
13373/* Implement TARGET_BUILTIN_DECL. */
13374static tree
13375aarch64_builtin_decl (unsigned int code, bool initialize_p)
13376{
13377 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
13378 switch (code & AARCH64_BUILTIN_CLASS)
13379 {
13380 case AARCH64_BUILTIN_GENERAL:
13381 return aarch64_general_builtin_decl (subcode, initialize_p);
624d0f07
RS
13382
13383 case AARCH64_BUILTIN_SVE:
13384 return aarch64_sve::builtin_decl (subcode, initialize_p);
6d4d616a
RS
13385 }
13386 gcc_unreachable ();
13387}
13388
0c30e0f3
EM
13389/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
13390 to optimize 1.0/sqrt. */
ee62a5a6
RS
13391
13392static bool
9acc9cbe 13393use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
13394{
13395 return (!flag_trapping_math
13396 && flag_unsafe_math_optimizations
9acc9cbe
EM
13397 && ((aarch64_tune_params.approx_modes->recip_sqrt
13398 & AARCH64_APPROX_MODE (mode))
1a33079e 13399 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
13400}
13401
0c30e0f3
EM
13402/* Function to decide when to use the approximate reciprocal square root
13403 builtin. */
a6fc00da
BH
13404
13405static tree
ee62a5a6 13406aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 13407{
9acc9cbe
EM
13408 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
13409
13410 if (!use_rsqrt_p (mode))
a6fc00da 13411 return NULL_TREE;
6d4d616a
RS
13412 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
13413 unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
13414 switch (code & AARCH64_BUILTIN_CLASS)
13415 {
13416 case AARCH64_BUILTIN_GENERAL:
13417 return aarch64_general_builtin_rsqrt (subcode);
624d0f07
RS
13418
13419 case AARCH64_BUILTIN_SVE:
13420 return NULL_TREE;
6d4d616a
RS
13421 }
13422 gcc_unreachable ();
a6fc00da
BH
13423}
13424
04f307cb
RS
13425/* Emit code to perform the floating-point operation:
13426
13427 DST = SRC1 * SRC2
13428
13429 where all three operands are already known to be registers.
13430 If the operation is an SVE one, PTRUE is a suitable all-true
13431 predicate. */
13432
13433static void
13434aarch64_emit_mult (rtx dst, rtx ptrue, rtx src1, rtx src2)
13435{
13436 if (ptrue)
13437 emit_insn (gen_aarch64_pred (UNSPEC_COND_FMUL, GET_MODE (dst),
13438 dst, ptrue, src1, src2,
13439 gen_int_mode (SVE_RELAXED_GP, SImode)));
13440 else
13441 emit_set_insn (dst, gen_rtx_MULT (GET_MODE (dst), src1, src2));
13442}
13443
98daafa0
EM
13444/* Emit instruction sequence to compute either the approximate square root
13445 or its approximate reciprocal, depending on the flag RECP, and return
13446 whether the sequence was emitted or not. */
a6fc00da 13447
98daafa0
EM
13448bool
13449aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 13450{
98daafa0 13451 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
13452
13453 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
13454 {
13455 gcc_assert (!recp);
13456 return false;
13457 }
13458
2e19adc8
RE
13459 if (!recp)
13460 {
13461 if (!(flag_mlow_precision_sqrt
13462 || (aarch64_tune_params.approx_modes->sqrt
13463 & AARCH64_APPROX_MODE (mode))))
13464 return false;
13465
902d28bd 13466 if (!flag_finite_math_only
2e19adc8
RE
13467 || flag_trapping_math
13468 || !flag_unsafe_math_optimizations
13469 || optimize_function_for_size_p (cfun))
13470 return false;
13471 }
13472 else
13473 /* Caller assumes we cannot fail. */
13474 gcc_assert (use_rsqrt_p (mode));
daef0a8c 13475
a0ee8352
RS
13476 rtx pg = NULL_RTX;
13477 if (aarch64_sve_mode_p (mode))
13478 pg = aarch64_ptrue_reg (aarch64_sve_pred_mode (mode));
d7814449 13479 machine_mode mmsk = (VECTOR_MODE_P (mode)
d083ee47 13480 ? related_int_vector_mode (mode).require ()
d7814449 13481 : int_mode_for_mode (mode).require ());
0df28e68 13482 rtx xmsk = NULL_RTX;
98daafa0 13483 if (!recp)
0df28e68
RS
13484 {
13485 /* When calculating the approximate square root, compare the
13486 argument with 0.0 and create a mask. */
a0ee8352
RS
13487 rtx zero = CONST0_RTX (mode);
13488 if (pg)
13489 {
13490 xmsk = gen_reg_rtx (GET_MODE (pg));
13491 rtx hint = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
13492 emit_insn (gen_aarch64_pred_fcm (UNSPEC_COND_FCMNE, mode,
13493 xmsk, pg, hint, src, zero));
13494 }
13495 else
13496 {
13497 xmsk = gen_reg_rtx (mmsk);
13498 emit_insn (gen_rtx_SET (xmsk,
13499 gen_rtx_NEG (mmsk,
13500 gen_rtx_EQ (mmsk, src, zero))));
13501 }
0df28e68 13502 }
a6fc00da 13503
98daafa0
EM
13504 /* Estimate the approximate reciprocal square root. */
13505 rtx xdst = gen_reg_rtx (mode);
0016d8d9 13506 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 13507
98daafa0
EM
13508 /* Iterate over the series twice for SF and thrice for DF. */
13509 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 13510
98daafa0
EM
13511 /* Optionally iterate over the series once less for faster performance
13512 while sacrificing the accuracy. */
13513 if ((recp && flag_mrecip_low_precision_sqrt)
13514 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
13515 iterations--;
13516
98daafa0
EM
13517 /* Iterate over the series to calculate the approximate reciprocal square
13518 root. */
13519 rtx x1 = gen_reg_rtx (mode);
13520 while (iterations--)
a6fc00da 13521 {
a6fc00da 13522 rtx x2 = gen_reg_rtx (mode);
a0ee8352 13523 aarch64_emit_mult (x2, pg, xdst, xdst);
98daafa0 13524
0016d8d9 13525 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 13526
98daafa0 13527 if (iterations > 0)
a0ee8352 13528 aarch64_emit_mult (xdst, pg, xdst, x1);
98daafa0
EM
13529 }
13530
13531 if (!recp)
13532 {
a0ee8352
RS
13533 if (pg)
13534 /* Multiply nonzero source values by the corresponding intermediate
13535 result elements, so that the final calculation is the approximate
13536 square root rather than its reciprocal. Select a zero result for
13537 zero source values, to avoid the Inf * 0 -> NaN that we'd get
13538 otherwise. */
13539 emit_insn (gen_cond (UNSPEC_COND_FMUL, mode,
13540 xdst, xmsk, xdst, src, CONST0_RTX (mode)));
13541 else
13542 {
13543 /* Qualify the approximate reciprocal square root when the
13544 argument is 0.0 by squashing the intermediary result to 0.0. */
13545 rtx xtmp = gen_reg_rtx (mmsk);
13546 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
13547 gen_rtx_SUBREG (mmsk, xdst, 0)));
13548 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 13549
a0ee8352
RS
13550 /* Calculate the approximate square root. */
13551 aarch64_emit_mult (xdst, pg, xdst, src);
13552 }
a6fc00da
BH
13553 }
13554
98daafa0 13555 /* Finalize the approximation. */
a0ee8352 13556 aarch64_emit_mult (dst, pg, xdst, x1);
98daafa0
EM
13557
13558 return true;
a6fc00da
BH
13559}
13560
79a2bc2d
EM
13561/* Emit the instruction sequence to compute the approximation for the division
13562 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
13563
13564bool
13565aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
13566{
13567 machine_mode mode = GET_MODE (quo);
33d72b63
JW
13568
13569 if (GET_MODE_INNER (mode) == HFmode)
13570 return false;
13571
79a2bc2d
EM
13572 bool use_approx_division_p = (flag_mlow_precision_div
13573 || (aarch64_tune_params.approx_modes->division
13574 & AARCH64_APPROX_MODE (mode)));
13575
13576 if (!flag_finite_math_only
13577 || flag_trapping_math
13578 || !flag_unsafe_math_optimizations
13579 || optimize_function_for_size_p (cfun)
13580 || !use_approx_division_p)
13581 return false;
13582
1be49a38
RR
13583 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
13584 return false;
13585
04f307cb
RS
13586 rtx pg = NULL_RTX;
13587 if (aarch64_sve_mode_p (mode))
13588 pg = aarch64_ptrue_reg (aarch64_sve_pred_mode (mode));
13589
79a2bc2d
EM
13590 /* Estimate the approximate reciprocal. */
13591 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 13592 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
13593
13594 /* Iterate over the series twice for SF and thrice for DF. */
13595 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
13596
dbf3dc75
BL
13597 /* Optionally iterate over the series less for faster performance,
13598 while sacrificing the accuracy. The default is 2 for DF and 1 for SF. */
79a2bc2d 13599 if (flag_mlow_precision_div)
dbf3dc75
BL
13600 iterations = (GET_MODE_INNER (mode) == DFmode
13601 ? aarch64_double_recp_precision
13602 : aarch64_float_recp_precision);
79a2bc2d
EM
13603
13604 /* Iterate over the series to calculate the approximate reciprocal. */
13605 rtx xtmp = gen_reg_rtx (mode);
13606 while (iterations--)
13607 {
0016d8d9 13608 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
13609
13610 if (iterations > 0)
04f307cb 13611 aarch64_emit_mult (xrcp, pg, xrcp, xtmp);
79a2bc2d
EM
13612 }
13613
13614 if (num != CONST1_RTX (mode))
13615 {
13616 /* As the approximate reciprocal of DEN is already calculated, only
13617 calculate the approximate division when NUM is not 1.0. */
13618 rtx xnum = force_reg (mode, num);
04f307cb 13619 aarch64_emit_mult (xrcp, pg, xrcp, xnum);
79a2bc2d
EM
13620 }
13621
13622 /* Finalize the approximation. */
04f307cb 13623 aarch64_emit_mult (quo, pg, xrcp, xtmp);
79a2bc2d
EM
13624 return true;
13625}
13626
d126a4ae
AP
13627/* Return the number of instructions that can be issued per cycle. */
13628static int
13629aarch64_sched_issue_rate (void)
13630{
b175b679 13631 return aarch64_tune_params.issue_rate;
d126a4ae
AP
13632}
13633
d0bc0cb6
RS
13634/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
13635static int
13636aarch64_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
13637{
13638 if (DEBUG_INSN_P (insn))
13639 return more;
13640
13641 rtx_code code = GET_CODE (PATTERN (insn));
13642 if (code == USE || code == CLOBBER)
13643 return more;
13644
13645 if (get_attr_type (insn) == TYPE_NO_INSN)
13646 return more;
13647
13648 return more - 1;
13649}
13650
d03f7e44
MK
13651static int
13652aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
13653{
13654 int issue_rate = aarch64_sched_issue_rate ();
13655
13656 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
13657}
13658
2d6bc7fa
KT
13659
13660/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
13661 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
13662 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
13663
13664static int
13665aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
13666 int ready_index)
13667{
13668 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
13669}
13670
13671
8990e73a
TB
13672/* Vectorizer cost model target hooks. */
13673
13674/* Implement targetm.vectorize.builtin_vectorization_cost. */
13675static int
13676aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
13677 tree vectype,
13678 int misalign ATTRIBUTE_UNUSED)
13679{
13680 unsigned elements;
cd8ae5ed
AP
13681 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
13682 bool fp = false;
13683
13684 if (vectype != NULL)
13685 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
13686
13687 switch (type_of_cost)
13688 {
13689 case scalar_stmt:
cd8ae5ed 13690 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
13691
13692 case scalar_load:
cd8ae5ed 13693 return costs->scalar_load_cost;
8990e73a
TB
13694
13695 case scalar_store:
cd8ae5ed 13696 return costs->scalar_store_cost;
8990e73a
TB
13697
13698 case vector_stmt:
cd8ae5ed 13699 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
13700
13701 case vector_load:
cd8ae5ed 13702 return costs->vec_align_load_cost;
8990e73a
TB
13703
13704 case vector_store:
cd8ae5ed 13705 return costs->vec_store_cost;
8990e73a
TB
13706
13707 case vec_to_scalar:
cd8ae5ed 13708 return costs->vec_to_scalar_cost;
8990e73a
TB
13709
13710 case scalar_to_vec:
cd8ae5ed 13711 return costs->scalar_to_vec_cost;
8990e73a
TB
13712
13713 case unaligned_load:
cc9fe6bb 13714 case vector_gather_load:
cd8ae5ed 13715 return costs->vec_unalign_load_cost;
8990e73a
TB
13716
13717 case unaligned_store:
cc9fe6bb 13718 case vector_scatter_store:
cd8ae5ed 13719 return costs->vec_unalign_store_cost;
8990e73a
TB
13720
13721 case cond_branch_taken:
cd8ae5ed 13722 return costs->cond_taken_branch_cost;
8990e73a
TB
13723
13724 case cond_branch_not_taken:
cd8ae5ed 13725 return costs->cond_not_taken_branch_cost;
8990e73a
TB
13726
13727 case vec_perm:
cd8ae5ed 13728 return costs->vec_permute_cost;
c428f91c 13729
8990e73a 13730 case vec_promote_demote:
cd8ae5ed 13731 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
13732
13733 case vec_construct:
6a70badb 13734 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
13735 return elements / 2 + 1;
13736
13737 default:
13738 gcc_unreachable ();
13739 }
13740}
13741
8b50d7a4
RS
13742/* Return true if creating multiple copies of STMT_INFO for Advanced SIMD
13743 vectors would produce a series of LDP or STP operations. KIND is the
13744 kind of statement that STMT_INFO represents. */
13745static bool
13746aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind,
13747 stmt_vec_info stmt_info)
13748{
13749 switch (kind)
13750 {
13751 case vector_load:
13752 case vector_store:
13753 case unaligned_load:
13754 case unaligned_store:
13755 break;
13756
13757 default:
13758 return false;
13759 }
13760
13761 if (aarch64_tune_params.extra_tuning_flags
13762 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
13763 return false;
13764
13765 return is_gimple_assign (stmt_info->stmt);
13766}
13767
217ccab8
RS
13768/* Return true if STMT_INFO extends the result of a load. */
13769static bool
308bc496 13770aarch64_extending_load_p (class vec_info *vinfo, stmt_vec_info stmt_info)
217ccab8
RS
13771{
13772 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
13773 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
13774 return false;
13775
13776 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
13777 tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
13778 tree rhs_type = TREE_TYPE (rhs);
13779 if (!INTEGRAL_TYPE_P (lhs_type)
13780 || !INTEGRAL_TYPE_P (rhs_type)
13781 || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
13782 return false;
13783
308bc496 13784 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
217ccab8
RS
13785 return (def_stmt_info
13786 && STMT_VINFO_DATA_REF (def_stmt_info)
13787 && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
13788}
13789
2d56600c
RS
13790/* Return true if STMT_INFO is an integer truncation. */
13791static bool
13792aarch64_integer_truncation_p (stmt_vec_info stmt_info)
13793{
13794 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
13795 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
13796 return false;
13797
13798 tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
13799 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
13800 return (INTEGRAL_TYPE_P (lhs_type)
13801 && INTEGRAL_TYPE_P (rhs_type)
13802 && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
13803}
13804
217ccab8 13805/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
8b50d7a4
RS
13806 for STMT_INFO, which has cost kind KIND and which when vectorized would
13807 operate on vector type VECTYPE. Adjust the cost as necessary for SVE
13808 targets. */
217ccab8 13809static unsigned int
308bc496 13810aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
8b50d7a4 13811 stmt_vec_info stmt_info, tree vectype,
217ccab8
RS
13812 unsigned int stmt_cost)
13813{
13814 /* Unlike vec_promote_demote, vector_stmt conversions do not change the
13815 vector register size or number of units. Integer promotions of this
13816 type therefore map to SXT[BHW] or UXT[BHW].
13817
13818 Most loads have extending forms that can do the sign or zero extension
13819 on the fly. Optimistically assume that a load followed by an extension
13820 will fold to this form during combine, and that the extension therefore
13821 comes for free. */
308bc496 13822 if (kind == vector_stmt && aarch64_extending_load_p (vinfo, stmt_info))
217ccab8
RS
13823 stmt_cost = 0;
13824
2d56600c
RS
13825 /* For similar reasons, vector_stmt integer truncations are a no-op,
13826 because we can just ignore the unused upper bits of the source. */
13827 if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
13828 stmt_cost = 0;
13829
8b50d7a4
RS
13830 /* Advanced SIMD can load and store pairs of registers using LDP and STP,
13831 but there are no equivalent instructions for SVE. This means that
13832 (all other things being equal) 128-bit SVE needs twice as many load
13833 and store instructions as Advanced SIMD in order to process vector pairs.
13834
13835 Also, scalar code can often use LDP and STP to access pairs of values,
13836 so it is too simplistic to say that one SVE load or store replaces
13837 VF scalar loads and stores.
13838
13839 Ideally we would account for this in the scalar and Advanced SIMD
13840 costs by making suitable load/store pairs as cheap as a single
13841 load/store. However, that would be a very invasive change and in
13842 practice it tends to stress other parts of the cost model too much.
13843 E.g. stores of scalar constants currently count just a store,
13844 whereas stores of vector constants count a store and a vec_init.
13845 This is an artificial distinction for AArch64, where stores of
13846 nonzero scalar constants need the same kind of register invariant
13847 as vector stores.
13848
13849 An alternative would be to double the cost of any SVE loads and stores
13850 that could be paired in Advanced SIMD (and possibly also paired in
13851 scalar code). But this tends to stress other parts of the cost model
13852 in the same way. It also means that we can fall back to Advanced SIMD
13853 even if full-loop predication would have been useful.
13854
13855 Here we go for a more conservative version: double the costs of SVE
13856 loads and stores if one iteration of the scalar loop processes enough
13857 elements for it to use a whole number of Advanced SIMD LDP or STP
13858 instructions. This makes it very likely that the VF would be 1 for
13859 Advanced SIMD, and so no epilogue should be needed. */
13860 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
13861 {
13862 stmt_vec_info first = DR_GROUP_FIRST_ELEMENT (stmt_info);
13863 unsigned int count = DR_GROUP_SIZE (first) - DR_GROUP_GAP (first);
13864 unsigned int elt_bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype));
13865 if (multiple_p (count * elt_bits, 256)
13866 && aarch64_advsimd_ldp_stp_p (kind, stmt_info))
13867 stmt_cost *= 2;
13868 }
13869
217ccab8
RS
13870 return stmt_cost;
13871}
13872
8990e73a
TB
13873/* Implement targetm.vectorize.add_stmt_cost. */
13874static unsigned
308bc496
RB
13875aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count,
13876 enum vect_cost_for_stmt kind,
78db0e09
RB
13877 struct _stmt_vec_info *stmt_info, tree vectype,
13878 int misalign, enum vect_cost_model_location where)
8990e73a
TB
13879{
13880 unsigned *cost = (unsigned *) data;
13881 unsigned retval = 0;
13882
13883 if (flag_vect_cost_model)
13884 {
8990e73a
TB
13885 int stmt_cost =
13886 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
13887
217ccab8 13888 if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
308bc496
RB
13889 stmt_cost = aarch64_sve_adjust_stmt_cost (vinfo, kind, stmt_info,
13890 vectype, stmt_cost);
217ccab8 13891
8990e73a
TB
13892 /* Statements in an inner loop relative to the loop being
13893 vectorized are weighted more heavily. The value here is
058e4c71 13894 arbitrary and could potentially be improved with analysis. */
308bc496
RB
13895 if (where == vect_body && stmt_info
13896 && stmt_in_inner_loop_p (vinfo, stmt_info))
058e4c71 13897 count *= 50; /* FIXME */
8990e73a
TB
13898
13899 retval = (unsigned) (count * stmt_cost);
13900 cost[where] += retval;
13901 }
13902
13903 return retval;
13904}
13905
0cfff2a1 13906static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 13907
0cfff2a1
KT
13908/* Parse the TO_PARSE string and put the architecture struct that it
13909 selects into RES and the architectural features into ISA_FLAGS.
13910 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
13911 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
13912 When the TO_PARSE string contains an invalid extension,
13913 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 13914
0cfff2a1
KT
13915static enum aarch64_parse_opt_result
13916aarch64_parse_arch (const char *to_parse, const struct processor **res,
28108a53 13917 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 13918{
ff150bc4 13919 const char *ext;
43e9d192 13920 const struct processor *arch;
43e9d192
IB
13921 size_t len;
13922
ff150bc4 13923 ext = strchr (to_parse, '+');
43e9d192
IB
13924
13925 if (ext != NULL)
ff150bc4 13926 len = ext - to_parse;
43e9d192 13927 else
ff150bc4 13928 len = strlen (to_parse);
43e9d192
IB
13929
13930 if (len == 0)
0cfff2a1
KT
13931 return AARCH64_PARSE_MISSING_ARG;
13932
43e9d192 13933
0cfff2a1 13934 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
13935 for (arch = all_architectures; arch->name != NULL; arch++)
13936 {
ff150bc4
ML
13937 if (strlen (arch->name) == len
13938 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 13939 {
28108a53 13940 uint64_t isa_temp = arch->flags;
43e9d192
IB
13941
13942 if (ext != NULL)
13943 {
0cfff2a1
KT
13944 /* TO_PARSE string contains at least one extension. */
13945 enum aarch64_parse_opt_result ext_res
c7887347 13946 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 13947
0cfff2a1
KT
13948 if (ext_res != AARCH64_PARSE_OK)
13949 return ext_res;
ffee7aa9 13950 }
0cfff2a1
KT
13951 /* Extension parsing was successful. Confirm the result
13952 arch and ISA flags. */
13953 *res = arch;
13954 *isa_flags = isa_temp;
13955 return AARCH64_PARSE_OK;
43e9d192
IB
13956 }
13957 }
13958
13959 /* ARCH name not found in list. */
0cfff2a1 13960 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
13961}
13962
0cfff2a1
KT
13963/* Parse the TO_PARSE string and put the result tuning in RES and the
13964 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
13965 describing the parse result. If there is an error parsing, RES and
c7887347
ML
13966 ISA_FLAGS are left unchanged.
13967 When the TO_PARSE string contains an invalid extension,
13968 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 13969
0cfff2a1
KT
13970static enum aarch64_parse_opt_result
13971aarch64_parse_cpu (const char *to_parse, const struct processor **res,
28108a53 13972 uint64_t *isa_flags, std::string *invalid_extension)
43e9d192 13973{
ff150bc4 13974 const char *ext;
43e9d192 13975 const struct processor *cpu;
43e9d192
IB
13976 size_t len;
13977
ff150bc4 13978 ext = strchr (to_parse, '+');
43e9d192
IB
13979
13980 if (ext != NULL)
ff150bc4 13981 len = ext - to_parse;
43e9d192 13982 else
ff150bc4 13983 len = strlen (to_parse);
43e9d192
IB
13984
13985 if (len == 0)
0cfff2a1
KT
13986 return AARCH64_PARSE_MISSING_ARG;
13987
43e9d192
IB
13988
13989 /* Loop through the list of supported CPUs to find a match. */
13990 for (cpu = all_cores; cpu->name != NULL; cpu++)
13991 {
ff150bc4 13992 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 13993 {
28108a53 13994 uint64_t isa_temp = cpu->flags;
0cfff2a1 13995
43e9d192
IB
13996
13997 if (ext != NULL)
13998 {
0cfff2a1
KT
13999 /* TO_PARSE string contains at least one extension. */
14000 enum aarch64_parse_opt_result ext_res
c7887347 14001 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 14002
0cfff2a1
KT
14003 if (ext_res != AARCH64_PARSE_OK)
14004 return ext_res;
14005 }
14006 /* Extension parsing was successfull. Confirm the result
14007 cpu and ISA flags. */
14008 *res = cpu;
14009 *isa_flags = isa_temp;
14010 return AARCH64_PARSE_OK;
43e9d192
IB
14011 }
14012 }
14013
14014 /* CPU name not found in list. */
0cfff2a1 14015 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
14016}
14017
0cfff2a1
KT
14018/* Parse the TO_PARSE string and put the cpu it selects into RES.
14019 Return an aarch64_parse_opt_result describing the parse result.
14020 If the parsing fails the RES does not change. */
43e9d192 14021
0cfff2a1
KT
14022static enum aarch64_parse_opt_result
14023aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
14024{
14025 const struct processor *cpu;
43e9d192
IB
14026
14027 /* Loop through the list of supported CPUs to find a match. */
14028 for (cpu = all_cores; cpu->name != NULL; cpu++)
14029 {
ff150bc4 14030 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 14031 {
0cfff2a1
KT
14032 *res = cpu;
14033 return AARCH64_PARSE_OK;
43e9d192
IB
14034 }
14035 }
14036
14037 /* CPU name not found in list. */
0cfff2a1 14038 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
14039}
14040
8dec06f2
JG
14041/* Parse TOKEN, which has length LENGTH to see if it is an option
14042 described in FLAG. If it is, return the index bit for that fusion type.
14043 If not, error (printing OPTION_NAME) and return zero. */
14044
14045static unsigned int
14046aarch64_parse_one_option_token (const char *token,
14047 size_t length,
14048 const struct aarch64_flag_desc *flag,
14049 const char *option_name)
14050{
14051 for (; flag->name != NULL; flag++)
14052 {
14053 if (length == strlen (flag->name)
14054 && !strncmp (flag->name, token, length))
14055 return flag->flag;
14056 }
14057
a3f9f006 14058 error ("unknown flag passed in %<-moverride=%s%> (%s)", option_name, token);
8dec06f2
JG
14059 return 0;
14060}
14061
14062/* Parse OPTION which is a comma-separated list of flags to enable.
14063 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
14064 default state we inherit from the CPU tuning structures. OPTION_NAME
14065 gives the top-level option we are parsing in the -moverride string,
14066 for use in error messages. */
14067
14068static unsigned int
14069aarch64_parse_boolean_options (const char *option,
14070 const struct aarch64_flag_desc *flags,
14071 unsigned int initial_state,
14072 const char *option_name)
14073{
14074 const char separator = '.';
14075 const char* specs = option;
14076 const char* ntoken = option;
14077 unsigned int found_flags = initial_state;
14078
14079 while ((ntoken = strchr (specs, separator)))
14080 {
14081 size_t token_length = ntoken - specs;
14082 unsigned token_ops = aarch64_parse_one_option_token (specs,
14083 token_length,
14084 flags,
14085 option_name);
14086 /* If we find "none" (or, for simplicity's sake, an error) anywhere
14087 in the token stream, reset the supported operations. So:
14088
14089 adrp+add.cmp+branch.none.adrp+add
14090
14091 would have the result of turning on only adrp+add fusion. */
14092 if (!token_ops)
14093 found_flags = 0;
14094
14095 found_flags |= token_ops;
14096 specs = ++ntoken;
14097 }
14098
14099 /* We ended with a comma, print something. */
14100 if (!(*specs))
14101 {
14102 error ("%s string ill-formed\n", option_name);
14103 return 0;
14104 }
14105
14106 /* We still have one more token to parse. */
14107 size_t token_length = strlen (specs);
14108 unsigned token_ops = aarch64_parse_one_option_token (specs,
14109 token_length,
14110 flags,
14111 option_name);
14112 if (!token_ops)
14113 found_flags = 0;
14114
14115 found_flags |= token_ops;
14116 return found_flags;
14117}
14118
14119/* Support for overriding instruction fusion. */
14120
14121static void
14122aarch64_parse_fuse_string (const char *fuse_string,
14123 struct tune_params *tune)
14124{
14125 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
14126 aarch64_fusible_pairs,
14127 tune->fusible_ops,
14128 "fuse=");
14129}
14130
14131/* Support for overriding other tuning flags. */
14132
14133static void
14134aarch64_parse_tune_string (const char *tune_string,
14135 struct tune_params *tune)
14136{
14137 tune->extra_tuning_flags
14138 = aarch64_parse_boolean_options (tune_string,
14139 aarch64_tuning_flags,
14140 tune->extra_tuning_flags,
14141 "tune=");
14142}
14143
886f092f
KT
14144/* Parse the sve_width tuning moverride string in TUNE_STRING.
14145 Accept the valid SVE vector widths allowed by
14146 aarch64_sve_vector_bits_enum and use it to override sve_width
14147 in TUNE. */
14148
14149static void
14150aarch64_parse_sve_width_string (const char *tune_string,
14151 struct tune_params *tune)
14152{
14153 int width = -1;
14154
14155 int n = sscanf (tune_string, "%d", &width);
14156 if (n == EOF)
14157 {
14158 error ("invalid format for sve_width");
14159 return;
14160 }
14161 switch (width)
14162 {
14163 case SVE_128:
14164 case SVE_256:
14165 case SVE_512:
14166 case SVE_1024:
14167 case SVE_2048:
14168 break;
14169 default:
14170 error ("invalid sve_width value: %d", width);
14171 }
14172 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
14173}
14174
8dec06f2
JG
14175/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
14176 we understand. If it is, extract the option string and handoff to
14177 the appropriate function. */
14178
14179void
14180aarch64_parse_one_override_token (const char* token,
14181 size_t length,
14182 struct tune_params *tune)
14183{
14184 const struct aarch64_tuning_override_function *fn
14185 = aarch64_tuning_override_functions;
14186
14187 const char *option_part = strchr (token, '=');
14188 if (!option_part)
14189 {
14190 error ("tuning string missing in option (%s)", token);
14191 return;
14192 }
14193
14194 /* Get the length of the option name. */
14195 length = option_part - token;
14196 /* Skip the '=' to get to the option string. */
14197 option_part++;
14198
14199 for (; fn->name != NULL; fn++)
14200 {
14201 if (!strncmp (fn->name, token, length))
14202 {
14203 fn->parse_override (option_part, tune);
14204 return;
14205 }
14206 }
14207
14208 error ("unknown tuning option (%s)",token);
14209 return;
14210}
14211
5eee3c34
JW
14212/* A checking mechanism for the implementation of the tls size. */
14213
14214static void
14215initialize_aarch64_tls_size (struct gcc_options *opts)
14216{
14217 if (aarch64_tls_size == 0)
14218 aarch64_tls_size = 24;
14219
14220 switch (opts->x_aarch64_cmodel_var)
14221 {
14222 case AARCH64_CMODEL_TINY:
14223 /* Both the default and maximum TLS size allowed under tiny is 1M which
14224 needs two instructions to address, so we clamp the size to 24. */
14225 if (aarch64_tls_size > 24)
14226 aarch64_tls_size = 24;
14227 break;
14228 case AARCH64_CMODEL_SMALL:
14229 /* The maximum TLS size allowed under small is 4G. */
14230 if (aarch64_tls_size > 32)
14231 aarch64_tls_size = 32;
14232 break;
14233 case AARCH64_CMODEL_LARGE:
14234 /* The maximum TLS size allowed under large is 16E.
14235 FIXME: 16E should be 64bit, we only support 48bit offset now. */
14236 if (aarch64_tls_size > 48)
14237 aarch64_tls_size = 48;
14238 break;
14239 default:
14240 gcc_unreachable ();
14241 }
14242
14243 return;
14244}
14245
8dec06f2
JG
14246/* Parse STRING looking for options in the format:
14247 string :: option:string
14248 option :: name=substring
14249 name :: {a-z}
14250 substring :: defined by option. */
14251
14252static void
14253aarch64_parse_override_string (const char* input_string,
14254 struct tune_params* tune)
14255{
14256 const char separator = ':';
14257 size_t string_length = strlen (input_string) + 1;
14258 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
14259 char *string = string_root;
14260 strncpy (string, input_string, string_length);
14261 string[string_length - 1] = '\0';
14262
14263 char* ntoken = string;
14264
14265 while ((ntoken = strchr (string, separator)))
14266 {
14267 size_t token_length = ntoken - string;
14268 /* Make this substring look like a string. */
14269 *ntoken = '\0';
14270 aarch64_parse_one_override_token (string, token_length, tune);
14271 string = ++ntoken;
14272 }
14273
14274 /* One last option to parse. */
14275 aarch64_parse_one_override_token (string, strlen (string), tune);
14276 free (string_root);
14277}
43e9d192 14278
43e9d192
IB
14279
14280static void
0cfff2a1 14281aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 14282{
efac62a3
ST
14283 if (accepted_branch_protection_string)
14284 {
14285 opts->x_aarch64_branch_protection_string
14286 = xstrdup (accepted_branch_protection_string);
14287 }
14288
acea40ac
WD
14289 /* PR 70044: We have to be careful about being called multiple times for the
14290 same function. This means all changes should be repeatable. */
14291
d6cb6d6a
WD
14292 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
14293 Disable the frame pointer flag so the mid-end will not use a frame
14294 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
14295 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
14296 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
14297 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 14298 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 14299 opts->x_flag_omit_frame_pointer = 2;
43e9d192 14300
1be34295 14301 /* If not optimizing for size, set the default
0cfff2a1
KT
14302 alignment to what the target wants. */
14303 if (!opts->x_optimize_size)
43e9d192 14304 {
c518c102
ML
14305 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
14306 opts->x_str_align_loops = aarch64_tune_params.loop_align;
14307 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
14308 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
14309 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
14310 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 14311 }
b4f50fd4 14312
9ee6540a
WD
14313 /* We default to no pc-relative literal loads. */
14314
14315 aarch64_pcrelative_literal_loads = false;
14316
14317 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 14318 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
14319 if (opts->x_pcrelative_literal_loads == 1)
14320 aarch64_pcrelative_literal_loads = true;
b4f50fd4 14321
9ee6540a
WD
14322 /* In the tiny memory model it makes no sense to disallow PC relative
14323 literal pool loads. */
14324 if (aarch64_cmodel == AARCH64_CMODEL_TINY
14325 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
14326 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
14327
14328 /* When enabling the lower precision Newton series for the square root, also
14329 enable it for the reciprocal square root, since the latter is an
14330 intermediary step for the former. */
14331 if (flag_mlow_precision_sqrt)
14332 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 14333}
43e9d192 14334
0cfff2a1
KT
14335/* 'Unpack' up the internal tuning structs and update the options
14336 in OPTS. The caller must have set up selected_tune and selected_arch
14337 as all the other target-specific codegen decisions are
14338 derived from them. */
14339
e4ea20c8 14340void
0cfff2a1
KT
14341aarch64_override_options_internal (struct gcc_options *opts)
14342{
14343 aarch64_tune_flags = selected_tune->flags;
14344 aarch64_tune = selected_tune->sched_core;
14345 /* Make a copy of the tuning parameters attached to the core, which
14346 we may later overwrite. */
14347 aarch64_tune_params = *(selected_tune->tune);
14348 aarch64_architecture_version = selected_arch->architecture_version;
14349
14350 if (opts->x_aarch64_override_tune_string)
14351 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
14352 &aarch64_tune_params);
14353
14354 /* This target defaults to strict volatile bitfields. */
14355 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
14356 opts->x_flag_strict_volatile_bitfields = 1;
14357
cd0b2d36
RR
14358 if (aarch64_stack_protector_guard == SSP_GLOBAL
14359 && opts->x_aarch64_stack_protector_guard_offset_str)
14360 {
41804907 14361 error ("incompatible options %<-mstack-protector-guard=global%> and "
63d42e89 14362 "%<-mstack-protector-guard-offset=%s%>",
cd0b2d36
RR
14363 aarch64_stack_protector_guard_offset_str);
14364 }
14365
14366 if (aarch64_stack_protector_guard == SSP_SYSREG
14367 && !(opts->x_aarch64_stack_protector_guard_offset_str
14368 && opts->x_aarch64_stack_protector_guard_reg_str))
14369 {
a3f9f006
ML
14370 error ("both %<-mstack-protector-guard-offset%> and "
14371 "%<-mstack-protector-guard-reg%> must be used "
14372 "with %<-mstack-protector-guard=sysreg%>");
cd0b2d36
RR
14373 }
14374
14375 if (opts->x_aarch64_stack_protector_guard_reg_str)
14376 {
14377 if (strlen (opts->x_aarch64_stack_protector_guard_reg_str) > 100)
14378 error ("specify a system register with a small string length.");
14379 }
14380
14381 if (opts->x_aarch64_stack_protector_guard_offset_str)
14382 {
14383 char *end;
14384 const char *str = aarch64_stack_protector_guard_offset_str;
14385 errno = 0;
14386 long offs = strtol (aarch64_stack_protector_guard_offset_str, &end, 0);
14387 if (!*str || *end || errno)
14388 error ("%qs is not a valid offset in %qs", str,
63d42e89 14389 "-mstack-protector-guard-offset=");
cd0b2d36
RR
14390 aarch64_stack_protector_guard_offset = offs;
14391 }
14392
0cfff2a1 14393 initialize_aarch64_code_model (opts);
5eee3c34 14394 initialize_aarch64_tls_size (opts);
63892fa2 14395
2d6bc7fa
KT
14396 int queue_depth = 0;
14397 switch (aarch64_tune_params.autoprefetcher_model)
14398 {
14399 case tune_params::AUTOPREFETCHER_OFF:
14400 queue_depth = -1;
14401 break;
14402 case tune_params::AUTOPREFETCHER_WEAK:
14403 queue_depth = 0;
14404 break;
14405 case tune_params::AUTOPREFETCHER_STRONG:
14406 queue_depth = max_insn_queue_index + 1;
14407 break;
14408 default:
14409 gcc_unreachable ();
14410 }
14411
14412 /* We don't mind passing in global_options_set here as we don't use
14413 the *options_set structs anyway. */
028d4092
ML
14414 SET_OPTION_IF_UNSET (opts, &global_options_set,
14415 param_sched_autopref_queue_depth, queue_depth);
2d6bc7fa 14416
9d2c6e2e
MK
14417 /* Set up parameters to be used in prefetching algorithm. Do not
14418 override the defaults unless we are tuning for a core we have
14419 researched values for. */
14420 if (aarch64_tune_params.prefetch->num_slots > 0)
028d4092
ML
14421 SET_OPTION_IF_UNSET (opts, &global_options_set,
14422 param_simultaneous_prefetches,
14423 aarch64_tune_params.prefetch->num_slots);
9d2c6e2e 14424 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
028d4092
ML
14425 SET_OPTION_IF_UNSET (opts, &global_options_set,
14426 param_l1_cache_size,
14427 aarch64_tune_params.prefetch->l1_cache_size);
9d2c6e2e 14428 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
028d4092
ML
14429 SET_OPTION_IF_UNSET (opts, &global_options_set,
14430 param_l1_cache_line_size,
14431 aarch64_tune_params.prefetch->l1_cache_line_size);
9d2c6e2e 14432 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
028d4092
ML
14433 SET_OPTION_IF_UNSET (opts, &global_options_set,
14434 param_l2_cache_size,
14435 aarch64_tune_params.prefetch->l2_cache_size);
d2ff35c0 14436 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
028d4092
ML
14437 SET_OPTION_IF_UNSET (opts, &global_options_set,
14438 param_prefetch_dynamic_strides, 0);
59100dfc 14439 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
028d4092
ML
14440 SET_OPTION_IF_UNSET (opts, &global_options_set,
14441 param_prefetch_minimum_stride,
14442 aarch64_tune_params.prefetch->minimum_stride);
50487d79 14443
13494fcb 14444 /* Use the alternative scheduling-pressure algorithm by default. */
028d4092
ML
14445 SET_OPTION_IF_UNSET (opts, &global_options_set,
14446 param_sched_pressure_algorithm,
14447 SCHED_PRESSURE_MODEL);
13494fcb 14448
fbe9af50 14449 /* Validate the guard size. */
028d4092 14450 int guard_size = param_stack_clash_protection_guard_size;
fbe9af50 14451
8100e93b
ML
14452 if (guard_size != 12 && guard_size != 16)
14453 error ("only values 12 (4 KB) and 16 (64 KB) are supported for guard "
14454 "size. Given value %d (%llu KB) is out of range",
14455 guard_size, (1ULL << guard_size) / 1024ULL);
14456
fbe9af50
TC
14457 /* Enforce that interval is the same size as size so the mid-end does the
14458 right thing. */
028d4092
ML
14459 SET_OPTION_IF_UNSET (opts, &global_options_set,
14460 param_stack_clash_protection_probe_interval,
14461 guard_size);
fbe9af50
TC
14462
14463 /* The maybe_set calls won't update the value if the user has explicitly set
14464 one. Which means we need to validate that probing interval and guard size
14465 are equal. */
14466 int probe_interval
028d4092 14467 = param_stack_clash_protection_probe_interval;
fbe9af50 14468 if (guard_size != probe_interval)
904f3daa
ML
14469 error ("stack clash guard size %<%d%> must be equal to probing interval "
14470 "%<%d%>", guard_size, probe_interval);
fbe9af50 14471
16b2cafd
MK
14472 /* Enable sw prefetching at specified optimization level for
14473 CPUS that have prefetch. Lower optimization level threshold by 1
14474 when profiling is enabled. */
14475 if (opts->x_flag_prefetch_loop_arrays < 0
14476 && !opts->x_optimize_size
14477 && aarch64_tune_params.prefetch->default_opt_level >= 0
14478 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
14479 opts->x_flag_prefetch_loop_arrays = 1;
14480
266c2b54
ML
14481 if (opts->x_aarch64_arch_string == NULL)
14482 opts->x_aarch64_arch_string = selected_arch->name;
14483 if (opts->x_aarch64_cpu_string == NULL)
14484 opts->x_aarch64_cpu_string = selected_cpu->name;
14485 if (opts->x_aarch64_tune_string == NULL)
14486 opts->x_aarch64_tune_string = selected_tune->name;
14487
0cfff2a1
KT
14488 aarch64_override_options_after_change_1 (opts);
14489}
43e9d192 14490
01f44038
KT
14491/* Print a hint with a suggestion for a core or architecture name that
14492 most closely resembles what the user passed in STR. ARCH is true if
14493 the user is asking for an architecture name. ARCH is false if the user
14494 is asking for a core name. */
14495
14496static void
14497aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
14498{
14499 auto_vec<const char *> candidates;
14500 const struct processor *entry = arch ? all_architectures : all_cores;
14501 for (; entry->name != NULL; entry++)
14502 candidates.safe_push (entry->name);
a08b5429
ML
14503
14504#ifdef HAVE_LOCAL_CPU_DETECT
14505 /* Add also "native" as possible value. */
14506 if (arch)
14507 candidates.safe_push ("native");
14508#endif
14509
01f44038
KT
14510 char *s;
14511 const char *hint = candidates_list_and_hint (str, s, candidates);
14512 if (hint)
14513 inform (input_location, "valid arguments are: %s;"
14514 " did you mean %qs?", s, hint);
6285e915
ML
14515 else
14516 inform (input_location, "valid arguments are: %s", s);
14517
01f44038
KT
14518 XDELETEVEC (s);
14519}
14520
14521/* Print a hint with a suggestion for a core name that most closely resembles
14522 what the user passed in STR. */
14523
14524inline static void
14525aarch64_print_hint_for_core (const char *str)
14526{
14527 aarch64_print_hint_for_core_or_arch (str, false);
14528}
14529
14530/* Print a hint with a suggestion for an architecture name that most closely
14531 resembles what the user passed in STR. */
14532
14533inline static void
14534aarch64_print_hint_for_arch (const char *str)
14535{
14536 aarch64_print_hint_for_core_or_arch (str, true);
14537}
14538
c7887347
ML
14539
14540/* Print a hint with a suggestion for an extension name
14541 that most closely resembles what the user passed in STR. */
14542
14543void
14544aarch64_print_hint_for_extensions (const std::string &str)
14545{
14546 auto_vec<const char *> candidates;
14547 aarch64_get_all_extension_candidates (&candidates);
14548 char *s;
14549 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
14550 if (hint)
14551 inform (input_location, "valid arguments are: %s;"
14552 " did you mean %qs?", s, hint);
14553 else
14554 inform (input_location, "valid arguments are: %s;", s);
14555
14556 XDELETEVEC (s);
14557}
14558
0cfff2a1
KT
14559/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
14560 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
14561 they are valid in RES and ISA_FLAGS. Return whether the option is
14562 valid. */
43e9d192 14563
361fb3ee 14564static bool
0cfff2a1 14565aarch64_validate_mcpu (const char *str, const struct processor **res,
28108a53 14566 uint64_t *isa_flags)
0cfff2a1 14567{
c7887347 14568 std::string invalid_extension;
0cfff2a1 14569 enum aarch64_parse_opt_result parse_res
c7887347 14570 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
14571
14572 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 14573 return true;
0cfff2a1
KT
14574
14575 switch (parse_res)
14576 {
14577 case AARCH64_PARSE_MISSING_ARG:
fb241da2 14578 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
14579 break;
14580 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 14581 error ("unknown value %qs for %<-mcpu%>", str);
01f44038 14582 aarch64_print_hint_for_core (str);
0cfff2a1
KT
14583 break;
14584 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
14585 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
14586 invalid_extension.c_str (), str);
14587 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
14588 break;
14589 default:
14590 gcc_unreachable ();
14591 }
361fb3ee
KT
14592
14593 return false;
0cfff2a1
KT
14594}
14595
a9ba2a9b
MM
14596/* Straight line speculation indicators. */
14597enum aarch64_sls_hardening_type
14598{
14599 SLS_NONE = 0,
14600 SLS_RETBR = 1,
14601 SLS_BLR = 2,
14602 SLS_ALL = 3,
14603};
14604static enum aarch64_sls_hardening_type aarch64_sls_hardening;
14605
14606/* Return whether we should mitigatate Straight Line Speculation for the RET
14607 and BR instructions. */
14608bool
14609aarch64_harden_sls_retbr_p (void)
14610{
14611 return aarch64_sls_hardening & SLS_RETBR;
14612}
14613
14614/* Return whether we should mitigatate Straight Line Speculation for the BLR
14615 instruction. */
14616bool
14617aarch64_harden_sls_blr_p (void)
14618{
14619 return aarch64_sls_hardening & SLS_BLR;
14620}
14621
14622/* As of yet we only allow setting these options globally, in the future we may
14623 allow setting them per function. */
14624static void
14625aarch64_validate_sls_mitigation (const char *const_str)
14626{
14627 char *token_save = NULL;
14628 char *str = NULL;
14629
14630 if (strcmp (const_str, "none") == 0)
14631 {
14632 aarch64_sls_hardening = SLS_NONE;
14633 return;
14634 }
14635 if (strcmp (const_str, "all") == 0)
14636 {
14637 aarch64_sls_hardening = SLS_ALL;
14638 return;
14639 }
14640
14641 char *str_root = xstrdup (const_str);
14642 str = strtok_r (str_root, ",", &token_save);
14643 if (!str)
14644 error ("invalid argument given to %<-mharden-sls=%>");
14645
14646 int temp = SLS_NONE;
14647 while (str)
14648 {
14649 if (strcmp (str, "blr") == 0)
14650 temp |= SLS_BLR;
14651 else if (strcmp (str, "retbr") == 0)
14652 temp |= SLS_RETBR;
14653 else if (strcmp (str, "none") == 0 || strcmp (str, "all") == 0)
14654 {
14655 error ("%<%s%> must be by itself for %<-mharden-sls=%>", str);
14656 break;
14657 }
14658 else
14659 {
14660 error ("invalid argument %<%s%> for %<-mharden-sls=%>", str);
14661 break;
14662 }
14663 str = strtok_r (NULL, ",", &token_save);
14664 }
14665 aarch64_sls_hardening = (aarch64_sls_hardening_type) temp;
14666 free (str_root);
14667}
14668
efac62a3
ST
14669/* Parses CONST_STR for branch protection features specified in
14670 aarch64_branch_protect_types, and set any global variables required. Returns
14671 the parsing result and assigns LAST_STR to the last processed token from
14672 CONST_STR so that it can be used for error reporting. */
14673
14674static enum
14675aarch64_parse_opt_result aarch64_parse_branch_protection (const char *const_str,
14676 char** last_str)
14677{
14678 char *str_root = xstrdup (const_str);
14679 char* token_save = NULL;
14680 char *str = strtok_r (str_root, "+", &token_save);
14681 enum aarch64_parse_opt_result res = AARCH64_PARSE_OK;
14682 if (!str)
14683 res = AARCH64_PARSE_MISSING_ARG;
14684 else
14685 {
14686 char *next_str = strtok_r (NULL, "+", &token_save);
14687 /* Reset the branch protection features to their defaults. */
14688 aarch64_handle_no_branch_protection (NULL, NULL);
14689
14690 while (str && res == AARCH64_PARSE_OK)
14691 {
14692 const aarch64_branch_protect_type* type = aarch64_branch_protect_types;
14693 bool found = false;
14694 /* Search for this type. */
14695 while (type && type->name && !found && res == AARCH64_PARSE_OK)
14696 {
14697 if (strcmp (str, type->name) == 0)
14698 {
14699 found = true;
14700 res = type->handler (str, next_str);
14701 str = next_str;
14702 next_str = strtok_r (NULL, "+", &token_save);
14703 }
14704 else
14705 type++;
14706 }
14707 if (found && res == AARCH64_PARSE_OK)
14708 {
14709 bool found_subtype = true;
14710 /* Loop through each token until we find one that isn't a
14711 subtype. */
14712 while (found_subtype)
14713 {
14714 found_subtype = false;
14715 const aarch64_branch_protect_type *subtype = type->subtypes;
14716 /* Search for the subtype. */
14717 while (str && subtype && subtype->name && !found_subtype
14718 && res == AARCH64_PARSE_OK)
14719 {
14720 if (strcmp (str, subtype->name) == 0)
14721 {
14722 found_subtype = true;
14723 res = subtype->handler (str, next_str);
14724 str = next_str;
14725 next_str = strtok_r (NULL, "+", &token_save);
14726 }
14727 else
14728 subtype++;
14729 }
14730 }
14731 }
14732 else if (!found)
14733 res = AARCH64_PARSE_INVALID_ARG;
14734 }
14735 }
14736 /* Copy the last processed token into the argument to pass it back.
14737 Used by option and attribute validation to print the offending token. */
14738 if (last_str)
14739 {
14740 if (str) strcpy (*last_str, str);
14741 else *last_str = NULL;
14742 }
14743 if (res == AARCH64_PARSE_OK)
14744 {
14745 /* If needed, alloc the accepted string then copy in const_str.
14746 Used by override_option_after_change_1. */
14747 if (!accepted_branch_protection_string)
14748 accepted_branch_protection_string = (char *) xmalloc (
14749 BRANCH_PROTECT_STR_MAX
14750 + 1);
14751 strncpy (accepted_branch_protection_string, const_str,
14752 BRANCH_PROTECT_STR_MAX + 1);
14753 /* Forcibly null-terminate. */
14754 accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0';
14755 }
14756 return res;
14757}
14758
14759static bool
14760aarch64_validate_mbranch_protection (const char *const_str)
14761{
14762 char *str = (char *) xmalloc (strlen (const_str));
14763 enum aarch64_parse_opt_result res =
14764 aarch64_parse_branch_protection (const_str, &str);
14765 if (res == AARCH64_PARSE_INVALID_ARG)
a9c697b8 14766 error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str);
efac62a3 14767 else if (res == AARCH64_PARSE_MISSING_ARG)
a9c697b8 14768 error ("missing argument for %<-mbranch-protection=%>");
efac62a3
ST
14769 free (str);
14770 return res == AARCH64_PARSE_OK;
14771}
14772
0cfff2a1
KT
14773/* Validate a command-line -march option. Parse the arch and extensions
14774 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
14775 results, if they are valid, in RES and ISA_FLAGS. Return whether the
14776 option is valid. */
0cfff2a1 14777
361fb3ee 14778static bool
0cfff2a1 14779aarch64_validate_march (const char *str, const struct processor **res,
28108a53 14780 uint64_t *isa_flags)
0cfff2a1 14781{
c7887347 14782 std::string invalid_extension;
0cfff2a1 14783 enum aarch64_parse_opt_result parse_res
c7887347 14784 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
14785
14786 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 14787 return true;
0cfff2a1
KT
14788
14789 switch (parse_res)
14790 {
14791 case AARCH64_PARSE_MISSING_ARG:
fb241da2 14792 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
14793 break;
14794 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 14795 error ("unknown value %qs for %<-march%>", str);
01f44038 14796 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
14797 break;
14798 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
14799 error ("invalid feature modifier %qs in %<-march=%s%>",
14800 invalid_extension.c_str (), str);
14801 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
14802 break;
14803 default:
14804 gcc_unreachable ();
14805 }
361fb3ee
KT
14806
14807 return false;
0cfff2a1
KT
14808}
14809
14810/* Validate a command-line -mtune option. Parse the cpu
14811 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
14812 result, if it is valid, in RES. Return whether the option is
14813 valid. */
0cfff2a1 14814
361fb3ee 14815static bool
0cfff2a1
KT
14816aarch64_validate_mtune (const char *str, const struct processor **res)
14817{
14818 enum aarch64_parse_opt_result parse_res
14819 = aarch64_parse_tune (str, res);
14820
14821 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 14822 return true;
0cfff2a1
KT
14823
14824 switch (parse_res)
14825 {
14826 case AARCH64_PARSE_MISSING_ARG:
fb241da2 14827 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
14828 break;
14829 case AARCH64_PARSE_INVALID_ARG:
a3f9f006 14830 error ("unknown value %qs for %<-mtune%>", str);
01f44038 14831 aarch64_print_hint_for_core (str);
0cfff2a1
KT
14832 break;
14833 default:
14834 gcc_unreachable ();
14835 }
361fb3ee
KT
14836 return false;
14837}
14838
14839/* Return the CPU corresponding to the enum CPU.
14840 If it doesn't specify a cpu, return the default. */
14841
14842static const struct processor *
14843aarch64_get_tune_cpu (enum aarch64_processor cpu)
14844{
14845 if (cpu != aarch64_none)
14846 return &all_cores[cpu];
14847
14848 /* The & 0x3f is to extract the bottom 6 bits that encode the
14849 default cpu as selected by the --with-cpu GCC configure option
14850 in config.gcc.
14851 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
14852 flags mechanism should be reworked to make it more sane. */
14853 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
14854}
14855
14856/* Return the architecture corresponding to the enum ARCH.
14857 If it doesn't specify a valid architecture, return the default. */
14858
14859static const struct processor *
14860aarch64_get_arch (enum aarch64_arch arch)
14861{
14862 if (arch != aarch64_no_arch)
14863 return &all_architectures[arch];
14864
14865 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
14866
14867 return &all_architectures[cpu->arch];
0cfff2a1
KT
14868}
14869
43cacb12
RS
14870/* Return the VG value associated with -msve-vector-bits= value VALUE. */
14871
14872static poly_uint16
14873aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
14874{
9b070057
RS
14875 /* 128-bit SVE and Advanced SIMD modes use different register layouts
14876 on big-endian targets, so we would need to forbid subregs that convert
14877 from one to the other. By default a reinterpret sequence would then
14878 involve a store to memory in one mode and a load back in the other.
14879 Even if we optimize that sequence using reverse instructions,
14880 it would still be a significant potential overhead.
14881
14882 For now, it seems better to generate length-agnostic code for that
14883 case instead. */
14884 if (value == SVE_SCALABLE
14885 || (value == SVE_128 && BYTES_BIG_ENDIAN))
43cacb12
RS
14886 return poly_uint16 (2, 2);
14887 else
14888 return (int) value / 64;
14889}
14890
0cfff2a1
KT
14891/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
14892 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
14893 tuning structs. In particular it must set selected_tune and
14894 aarch64_isa_flags that define the available ISA features and tuning
14895 decisions. It must also set selected_arch as this will be used to
14896 output the .arch asm tags for each function. */
14897
14898static void
14899aarch64_override_options (void)
14900{
28108a53
MM
14901 uint64_t cpu_isa = 0;
14902 uint64_t arch_isa = 0;
0cfff2a1
KT
14903 aarch64_isa_flags = 0;
14904
361fb3ee
KT
14905 bool valid_cpu = true;
14906 bool valid_tune = true;
14907 bool valid_arch = true;
14908
0cfff2a1
KT
14909 selected_cpu = NULL;
14910 selected_arch = NULL;
14911 selected_tune = NULL;
14912
a9ba2a9b
MM
14913 if (aarch64_harden_sls_string)
14914 aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
14915
efac62a3
ST
14916 if (aarch64_branch_protection_string)
14917 aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
14918
0cfff2a1
KT
14919 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
14920 If either of -march or -mtune is given, they override their
14921 respective component of -mcpu. */
14922 if (aarch64_cpu_string)
361fb3ee
KT
14923 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
14924 &cpu_isa);
0cfff2a1
KT
14925
14926 if (aarch64_arch_string)
361fb3ee
KT
14927 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
14928 &arch_isa);
0cfff2a1
KT
14929
14930 if (aarch64_tune_string)
361fb3ee 14931 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192 14932
6881e3c1
OH
14933#ifdef SUBTARGET_OVERRIDE_OPTIONS
14934 SUBTARGET_OVERRIDE_OPTIONS;
14935#endif
14936
43e9d192
IB
14937 /* If the user did not specify a processor, choose the default
14938 one for them. This will be the CPU set during configuration using
a3cd0246 14939 --with-cpu, otherwise it is "generic". */
43e9d192
IB
14940 if (!selected_cpu)
14941 {
0cfff2a1
KT
14942 if (selected_arch)
14943 {
14944 selected_cpu = &all_cores[selected_arch->ident];
14945 aarch64_isa_flags = arch_isa;
361fb3ee 14946 explicit_arch = selected_arch->arch;
0cfff2a1
KT
14947 }
14948 else
14949 {
361fb3ee
KT
14950 /* Get default configure-time CPU. */
14951 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
14952 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
14953 }
361fb3ee
KT
14954
14955 if (selected_tune)
14956 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
14957 }
14958 /* If both -mcpu and -march are specified check that they are architecturally
14959 compatible, warn if they're not and prefer the -march ISA flags. */
14960 else if (selected_arch)
14961 {
14962 if (selected_arch->arch != selected_cpu->arch)
14963 {
a3f9f006 14964 warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
349297b6
JH
14965 aarch64_cpu_string,
14966 aarch64_arch_string);
0cfff2a1
KT
14967 }
14968 aarch64_isa_flags = arch_isa;
361fb3ee
KT
14969 explicit_arch = selected_arch->arch;
14970 explicit_tune_core = selected_tune ? selected_tune->ident
14971 : selected_cpu->ident;
0cfff2a1
KT
14972 }
14973 else
14974 {
14975 /* -mcpu but no -march. */
14976 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
14977 explicit_tune_core = selected_tune ? selected_tune->ident
14978 : selected_cpu->ident;
14979 gcc_assert (selected_cpu);
14980 selected_arch = &all_architectures[selected_cpu->arch];
14981 explicit_arch = selected_arch->arch;
43e9d192
IB
14982 }
14983
0cfff2a1
KT
14984 /* Set the arch as well as we will need it when outputing
14985 the .arch directive in assembly. */
14986 if (!selected_arch)
14987 {
14988 gcc_assert (selected_cpu);
14989 selected_arch = &all_architectures[selected_cpu->arch];
14990 }
43e9d192 14991
43e9d192 14992 if (!selected_tune)
3edaf26d 14993 selected_tune = selected_cpu;
43e9d192 14994
c7ff4f0f
SD
14995 if (aarch64_enable_bti == 2)
14996 {
14997#ifdef TARGET_ENABLE_BTI
14998 aarch64_enable_bti = 1;
14999#else
15000 aarch64_enable_bti = 0;
15001#endif
15002 }
15003
15004 /* Return address signing is currently not supported for ILP32 targets. For
15005 LP64 targets use the configured option in the absence of a command-line
15006 option for -mbranch-protection. */
15007 if (!TARGET_ILP32 && accepted_branch_protection_string == NULL)
15008 {
15009#ifdef TARGET_ENABLE_PAC_RET
15010 aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
c7ff4f0f
SD
15011#else
15012 aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
15013#endif
15014 }
15015
0cfff2a1
KT
15016#ifndef HAVE_AS_MABI_OPTION
15017 /* The compiler may have been configured with 2.23.* binutils, which does
15018 not have support for ILP32. */
15019 if (TARGET_ILP32)
a3f9f006 15020 error ("assembler does not support %<-mabi=ilp32%>");
0cfff2a1 15021#endif
43e9d192 15022
43cacb12
RS
15023 /* Convert -msve-vector-bits to a VG count. */
15024 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
15025
db58fd89 15026 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
a3f9f006 15027 sorry ("return address signing is only supported for %<-mabi=lp64%>");
db58fd89 15028
361fb3ee
KT
15029 /* Make sure we properly set up the explicit options. */
15030 if ((aarch64_cpu_string && valid_cpu)
15031 || (aarch64_tune_string && valid_tune))
15032 gcc_assert (explicit_tune_core != aarch64_none);
15033
15034 if ((aarch64_cpu_string && valid_cpu)
15035 || (aarch64_arch_string && valid_arch))
15036 gcc_assert (explicit_arch != aarch64_no_arch);
15037
5f7dbaa0
RE
15038 /* The pass to insert speculation tracking runs before
15039 shrink-wrapping and the latter does not know how to update the
15040 tracking status. So disable it in this case. */
15041 if (aarch64_track_speculation)
15042 flag_shrink_wrap = 0;
15043
0cfff2a1
KT
15044 aarch64_override_options_internal (&global_options);
15045
15046 /* Save these options as the default ones in case we push and pop them later
15047 while processing functions with potential target attributes. */
15048 target_option_default_node = target_option_current_node
ba948b37 15049 = build_target_option_node (&global_options, &global_options_set);
43e9d192
IB
15050}
15051
15052/* Implement targetm.override_options_after_change. */
15053
15054static void
15055aarch64_override_options_after_change (void)
15056{
0cfff2a1 15057 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
15058}
15059
29a14a1a
MK
15060/* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15061static char *
15062aarch64_offload_options (void)
15063{
15064 if (TARGET_ILP32)
15065 return xstrdup ("-foffload-abi=ilp32");
15066 else
15067 return xstrdup ("-foffload-abi=lp64");
15068}
15069
43e9d192
IB
15070static struct machine_function *
15071aarch64_init_machine_status (void)
15072{
15073 struct machine_function *machine;
766090c2 15074 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
15075 return machine;
15076}
15077
15078void
15079aarch64_init_expanders (void)
15080{
15081 init_machine_status = aarch64_init_machine_status;
15082}
15083
15084/* A checking mechanism for the implementation of the various code models. */
15085static void
0cfff2a1 15086initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 15087{
6c0ab626
X
15088 aarch64_cmodel = opts->x_aarch64_cmodel_var;
15089 switch (opts->x_aarch64_cmodel_var)
15090 {
15091 case AARCH64_CMODEL_TINY:
15092 if (opts->x_flag_pic)
15093 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
15094 break;
15095 case AARCH64_CMODEL_SMALL:
15096 if (opts->x_flag_pic)
15097 {
34ecdb0f 15098#ifdef HAVE_AS_SMALL_PIC_RELOCS
6c0ab626
X
15099 aarch64_cmodel = (flag_pic == 2
15100 ? AARCH64_CMODEL_SMALL_PIC
15101 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f 15102#else
6c0ab626 15103 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
34ecdb0f 15104#endif
6c0ab626
X
15105 }
15106 break;
15107 case AARCH64_CMODEL_LARGE:
15108 if (opts->x_flag_pic)
15109 sorry ("code model %qs with %<-f%s%>", "large",
15110 opts->x_flag_pic > 1 ? "PIC" : "pic");
15111 if (opts->x_aarch64_abi == AARCH64_ABI_ILP32)
15112 sorry ("code model %qs not supported in ilp32 mode", "large");
15113 break;
15114 case AARCH64_CMODEL_TINY_PIC:
15115 case AARCH64_CMODEL_SMALL_PIC:
15116 case AARCH64_CMODEL_SMALL_SPIC:
15117 gcc_unreachable ();
15118 }
43e9d192
IB
15119}
15120
361fb3ee
KT
15121/* Implement TARGET_OPTION_SAVE. */
15122
15123static void
ba948b37
JJ
15124aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts,
15125 struct gcc_options */* opts_set */)
361fb3ee
KT
15126{
15127 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
efac62a3
ST
15128 ptr->x_aarch64_branch_protection_string
15129 = opts->x_aarch64_branch_protection_string;
361fb3ee
KT
15130}
15131
15132/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
15133 using the information saved in PTR. */
15134
15135static void
ba948b37
JJ
15136aarch64_option_restore (struct gcc_options *opts,
15137 struct gcc_options */* opts_set */,
15138 struct cl_target_option *ptr)
361fb3ee
KT
15139{
15140 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
15141 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
15142 opts->x_explicit_arch = ptr->x_explicit_arch;
15143 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
15144 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
efac62a3
ST
15145 opts->x_aarch64_branch_protection_string
15146 = ptr->x_aarch64_branch_protection_string;
15147 if (opts->x_aarch64_branch_protection_string)
15148 {
15149 aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
15150 NULL);
15151 }
361fb3ee
KT
15152
15153 aarch64_override_options_internal (opts);
15154}
15155
15156/* Implement TARGET_OPTION_PRINT. */
15157
15158static void
15159aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
15160{
15161 const struct processor *cpu
15162 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
28108a53 15163 uint64_t isa_flags = ptr->x_aarch64_isa_flags;
361fb3ee 15164 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 15165 std::string extension
04a99ebe 15166 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
15167
15168 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
15169 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
15170 arch->name, extension.c_str ());
361fb3ee
KT
15171}
15172
d78006d9
KT
15173static GTY(()) tree aarch64_previous_fndecl;
15174
e4ea20c8
KT
15175void
15176aarch64_reset_previous_fndecl (void)
15177{
15178 aarch64_previous_fndecl = NULL;
15179}
15180
acfc1ac1
KT
15181/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
15182 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
15183 make sure optab availability predicates are recomputed when necessary. */
15184
15185void
15186aarch64_save_restore_target_globals (tree new_tree)
15187{
15188 if (TREE_TARGET_GLOBALS (new_tree))
15189 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15190 else if (new_tree == target_option_default_node)
15191 restore_target_globals (&default_target_globals);
15192 else
15193 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15194}
15195
d78006d9
KT
15196/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
15197 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
15198 of the function, if such exists. This function may be called multiple
15199 times on a single function so use aarch64_previous_fndecl to avoid
15200 setting up identical state. */
15201
15202static void
15203aarch64_set_current_function (tree fndecl)
15204{
acfc1ac1
KT
15205 if (!fndecl || fndecl == aarch64_previous_fndecl)
15206 return;
15207
d78006d9
KT
15208 tree old_tree = (aarch64_previous_fndecl
15209 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
15210 : NULL_TREE);
15211
acfc1ac1 15212 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 15213
acfc1ac1
KT
15214 /* If current function has no attributes but the previous one did,
15215 use the default node. */
15216 if (!new_tree && old_tree)
15217 new_tree = target_option_default_node;
d78006d9 15218
acfc1ac1
KT
15219 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
15220 the default have been handled by aarch64_save_restore_target_globals from
15221 aarch64_pragma_target_parse. */
15222 if (old_tree == new_tree)
15223 return;
d78006d9 15224
acfc1ac1 15225 aarch64_previous_fndecl = fndecl;
6e17a23b 15226
acfc1ac1 15227 /* First set the target options. */
ba948b37
JJ
15228 cl_target_option_restore (&global_options, &global_options_set,
15229 TREE_TARGET_OPTION (new_tree));
6e17a23b 15230
acfc1ac1 15231 aarch64_save_restore_target_globals (new_tree);
d78006d9 15232}
361fb3ee 15233
5a2c8331
KT
15234/* Enum describing the various ways we can handle attributes.
15235 In many cases we can reuse the generic option handling machinery. */
15236
15237enum aarch64_attr_opt_type
15238{
15239 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
15240 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
15241 aarch64_attr_enum, /* Attribute sets an enum variable. */
15242 aarch64_attr_custom /* Attribute requires a custom handling function. */
15243};
15244
15245/* All the information needed to handle a target attribute.
15246 NAME is the name of the attribute.
9c582551 15247 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
15248 in the definition of enum aarch64_attr_opt_type.
15249 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
15250 HANDLER is the function that takes the attribute string as an argument
15251 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 15252 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 15253 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
15254 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
15255 aarch64_attr_enum. */
15256
15257struct aarch64_attribute_info
15258{
15259 const char *name;
15260 enum aarch64_attr_opt_type attr_type;
15261 bool allow_neg;
ab93e9b7 15262 bool (*handler) (const char *);
5a2c8331
KT
15263 enum opt_code opt_num;
15264};
15265
ab93e9b7 15266/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
15267
15268static bool
ab93e9b7 15269aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
15270{
15271 const struct processor *tmp_arch = NULL;
c7887347 15272 std::string invalid_extension;
5a2c8331 15273 enum aarch64_parse_opt_result parse_res
c7887347 15274 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
15275
15276 if (parse_res == AARCH64_PARSE_OK)
15277 {
15278 gcc_assert (tmp_arch);
15279 selected_arch = tmp_arch;
15280 explicit_arch = selected_arch->arch;
15281 return true;
15282 }
15283
15284 switch (parse_res)
15285 {
15286 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 15287 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
15288 break;
15289 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 15290 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 15291 aarch64_print_hint_for_arch (str);
5a2c8331
KT
15292 break;
15293 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
15294 error ("invalid feature modifier %s of value (\"%s\") in "
15295 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
15296 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
15297 break;
15298 default:
15299 gcc_unreachable ();
15300 }
15301
15302 return false;
15303}
15304
ab93e9b7 15305/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
15306
15307static bool
ab93e9b7 15308aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
15309{
15310 const struct processor *tmp_cpu = NULL;
c7887347 15311 std::string invalid_extension;
5a2c8331 15312 enum aarch64_parse_opt_result parse_res
c7887347 15313 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
15314
15315 if (parse_res == AARCH64_PARSE_OK)
15316 {
15317 gcc_assert (tmp_cpu);
15318 selected_tune = tmp_cpu;
15319 explicit_tune_core = selected_tune->ident;
15320
15321 selected_arch = &all_architectures[tmp_cpu->arch];
15322 explicit_arch = selected_arch->arch;
15323 return true;
15324 }
15325
15326 switch (parse_res)
15327 {
15328 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 15329 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
15330 break;
15331 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 15332 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 15333 aarch64_print_hint_for_core (str);
5a2c8331
KT
15334 break;
15335 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
15336 error ("invalid feature modifier %s of value (\"%s\") in "
15337 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
15338 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
15339 break;
15340 default:
15341 gcc_unreachable ();
15342 }
15343
15344 return false;
15345}
15346
efac62a3
ST
15347/* Handle the argument STR to the branch-protection= attribute. */
15348
15349 static bool
15350 aarch64_handle_attr_branch_protection (const char* str)
15351 {
81e40f3a 15352 char *err_str = (char *) xmalloc (strlen (str) + 1);
efac62a3
ST
15353 enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
15354 &err_str);
15355 bool success = false;
15356 switch (res)
15357 {
15358 case AARCH64_PARSE_MISSING_ARG:
15359 error ("missing argument to %<target(\"branch-protection=\")%> pragma or"
15360 " attribute");
15361 break;
15362 case AARCH64_PARSE_INVALID_ARG:
15363 error ("invalid protection type (\"%s\") in %<target(\"branch-protection"
15364 "=\")%> pragma or attribute", err_str);
15365 break;
15366 case AARCH64_PARSE_OK:
15367 success = true;
15368 /* Fall through. */
15369 case AARCH64_PARSE_INVALID_FEATURE:
15370 break;
15371 default:
15372 gcc_unreachable ();
15373 }
15374 free (err_str);
15375 return success;
15376 }
15377
ab93e9b7 15378/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
15379
15380static bool
ab93e9b7 15381aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
15382{
15383 const struct processor *tmp_tune = NULL;
15384 enum aarch64_parse_opt_result parse_res
15385 = aarch64_parse_tune (str, &tmp_tune);
15386
15387 if (parse_res == AARCH64_PARSE_OK)
15388 {
15389 gcc_assert (tmp_tune);
15390 selected_tune = tmp_tune;
15391 explicit_tune_core = selected_tune->ident;
15392 return true;
15393 }
15394
15395 switch (parse_res)
15396 {
15397 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 15398 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 15399 aarch64_print_hint_for_core (str);
5a2c8331
KT
15400 break;
15401 default:
15402 gcc_unreachable ();
15403 }
15404
15405 return false;
15406}
15407
15408/* Parse an architecture extensions target attribute string specified in STR.
15409 For example "+fp+nosimd". Show any errors if needed. Return TRUE
15410 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 15411 modified. */
5a2c8331
KT
15412
15413static bool
ab93e9b7 15414aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
15415{
15416 enum aarch64_parse_opt_result parse_res;
28108a53 15417 uint64_t isa_flags = aarch64_isa_flags;
5a2c8331 15418
e4ea20c8
KT
15419 /* We allow "+nothing" in the beginning to clear out all architectural
15420 features if the user wants to handpick specific features. */
15421 if (strncmp ("+nothing", str, 8) == 0)
15422 {
15423 isa_flags = 0;
15424 str += 8;
15425 }
15426
c7887347
ML
15427 std::string invalid_extension;
15428 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
15429
15430 if (parse_res == AARCH64_PARSE_OK)
15431 {
15432 aarch64_isa_flags = isa_flags;
15433 return true;
15434 }
15435
15436 switch (parse_res)
15437 {
15438 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 15439 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
15440 break;
15441
15442 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
15443 error ("invalid feature modifier %s of value (\"%s\") in "
15444 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
15445 break;
15446
15447 default:
15448 gcc_unreachable ();
15449 }
15450
15451 return false;
15452}
15453
15454/* The target attributes that we support. On top of these we also support just
15455 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
15456 handled explicitly in aarch64_process_one_target_attr. */
15457
15458static const struct aarch64_attribute_info aarch64_attributes[] =
15459{
15460 { "general-regs-only", aarch64_attr_mask, false, NULL,
15461 OPT_mgeneral_regs_only },
15462 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
15463 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
15464 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
15465 OPT_mfix_cortex_a53_843419 },
5a2c8331 15466 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 15467 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
15468 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
15469 OPT_momit_leaf_frame_pointer },
15470 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
15471 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
15472 OPT_march_ },
15473 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
15474 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
15475 OPT_mtune_ },
efac62a3
ST
15476 { "branch-protection", aarch64_attr_custom, false,
15477 aarch64_handle_attr_branch_protection, OPT_mbranch_protection_ },
db58fd89
JW
15478 { "sign-return-address", aarch64_attr_enum, false, NULL,
15479 OPT_msign_return_address_ },
9e02b45f
ML
15480 { "outline-atomics", aarch64_attr_bool, true, NULL,
15481 OPT_moutline_atomics},
5a2c8331
KT
15482 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
15483};
15484
15485/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 15486 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
15487
15488static bool
ab93e9b7 15489aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
15490{
15491 bool invert = false;
15492
15493 size_t len = strlen (arg_str);
15494
15495 if (len == 0)
15496 {
ab93e9b7 15497 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
15498 return false;
15499 }
15500
15501 char *str_to_check = (char *) alloca (len + 1);
15502 strcpy (str_to_check, arg_str);
15503
5a2c8331
KT
15504 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
15505 It is easier to detect and handle it explicitly here rather than going
15506 through the machinery for the rest of the target attributes in this
15507 function. */
15508 if (*str_to_check == '+')
ab93e9b7 15509 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
15510
15511 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
15512 {
15513 invert = true;
15514 str_to_check += 3;
15515 }
15516 char *arg = strchr (str_to_check, '=');
15517
15518 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
15519 and point ARG to "foo". */
15520 if (arg)
15521 {
15522 *arg = '\0';
15523 arg++;
15524 }
15525 const struct aarch64_attribute_info *p_attr;
16d12992 15526 bool found = false;
5a2c8331
KT
15527 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
15528 {
15529 /* If the names don't match up, or the user has given an argument
15530 to an attribute that doesn't accept one, or didn't give an argument
15531 to an attribute that expects one, fail to match. */
15532 if (strcmp (str_to_check, p_attr->name) != 0)
15533 continue;
15534
16d12992 15535 found = true;
5a2c8331
KT
15536 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
15537 || p_attr->attr_type == aarch64_attr_enum;
15538
15539 if (attr_need_arg_p ^ (arg != NULL))
15540 {
ab93e9b7 15541 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
15542 return false;
15543 }
15544
15545 /* If the name matches but the attribute does not allow "no-" versions
15546 then we can't match. */
15547 if (invert && !p_attr->allow_neg)
15548 {
ab93e9b7 15549 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
15550 return false;
15551 }
15552
15553 switch (p_attr->attr_type)
15554 {
15555 /* Has a custom handler registered.
15556 For example, cpu=, arch=, tune=. */
15557 case aarch64_attr_custom:
15558 gcc_assert (p_attr->handler);
ab93e9b7 15559 if (!p_attr->handler (arg))
5a2c8331
KT
15560 return false;
15561 break;
15562
15563 /* Either set or unset a boolean option. */
15564 case aarch64_attr_bool:
15565 {
15566 struct cl_decoded_option decoded;
15567
15568 generate_option (p_attr->opt_num, NULL, !invert,
15569 CL_TARGET, &decoded);
15570 aarch64_handle_option (&global_options, &global_options_set,
15571 &decoded, input_location);
15572 break;
15573 }
15574 /* Set or unset a bit in the target_flags. aarch64_handle_option
15575 should know what mask to apply given the option number. */
15576 case aarch64_attr_mask:
15577 {
15578 struct cl_decoded_option decoded;
15579 /* We only need to specify the option number.
15580 aarch64_handle_option will know which mask to apply. */
15581 decoded.opt_index = p_attr->opt_num;
15582 decoded.value = !invert;
15583 aarch64_handle_option (&global_options, &global_options_set,
15584 &decoded, input_location);
15585 break;
15586 }
15587 /* Use the option setting machinery to set an option to an enum. */
15588 case aarch64_attr_enum:
15589 {
15590 gcc_assert (arg);
15591 bool valid;
15592 int value;
15593 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
15594 &value, CL_TARGET);
15595 if (valid)
15596 {
15597 set_option (&global_options, NULL, p_attr->opt_num, value,
15598 NULL, DK_UNSPECIFIED, input_location,
15599 global_dc);
15600 }
15601 else
15602 {
ab93e9b7 15603 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
15604 }
15605 break;
15606 }
15607 default:
15608 gcc_unreachable ();
15609 }
15610 }
15611
16d12992
KT
15612 /* If we reached here we either have found an attribute and validated
15613 it or didn't match any. If we matched an attribute but its arguments
15614 were malformed we will have returned false already. */
15615 return found;
5a2c8331
KT
15616}
15617
15618/* Count how many times the character C appears in
15619 NULL-terminated string STR. */
15620
15621static unsigned int
15622num_occurences_in_str (char c, char *str)
15623{
15624 unsigned int res = 0;
15625 while (*str != '\0')
15626 {
15627 if (*str == c)
15628 res++;
15629
15630 str++;
15631 }
15632
15633 return res;
15634}
15635
15636/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 15637 and update the global target options space. */
5a2c8331
KT
15638
15639bool
ab93e9b7 15640aarch64_process_target_attr (tree args)
5a2c8331
KT
15641{
15642 if (TREE_CODE (args) == TREE_LIST)
15643 {
15644 do
15645 {
15646 tree head = TREE_VALUE (args);
15647 if (head)
15648 {
ab93e9b7 15649 if (!aarch64_process_target_attr (head))
5a2c8331
KT
15650 return false;
15651 }
15652 args = TREE_CHAIN (args);
15653 } while (args);
15654
15655 return true;
15656 }
3b6cb9e3
ML
15657
15658 if (TREE_CODE (args) != STRING_CST)
15659 {
15660 error ("attribute %<target%> argument not a string");
15661 return false;
15662 }
5a2c8331
KT
15663
15664 size_t len = strlen (TREE_STRING_POINTER (args));
15665 char *str_to_check = (char *) alloca (len + 1);
15666 strcpy (str_to_check, TREE_STRING_POINTER (args));
15667
15668 if (len == 0)
15669 {
ab93e9b7 15670 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
15671 return false;
15672 }
15673
15674 /* Used to catch empty spaces between commas i.e.
15675 attribute ((target ("attr1,,attr2"))). */
15676 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
15677
15678 /* Handle multiple target attributes separated by ','. */
7185a4eb 15679 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
15680
15681 unsigned int num_attrs = 0;
15682 while (token)
15683 {
15684 num_attrs++;
ab93e9b7 15685 if (!aarch64_process_one_target_attr (token))
5a2c8331 15686 {
ab93e9b7 15687 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
15688 return false;
15689 }
15690
7185a4eb 15691 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
15692 }
15693
15694 if (num_attrs != num_commas + 1)
15695 {
ab93e9b7 15696 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
15697 return false;
15698 }
15699
15700 return true;
15701}
15702
15703/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
15704 process attribute ((target ("..."))). */
15705
15706static bool
15707aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
15708{
15709 struct cl_target_option cur_target;
15710 bool ret;
15711 tree old_optimize;
15712 tree new_target, new_optimize;
15713 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
15714
15715 /* If what we're processing is the current pragma string then the
15716 target option node is already stored in target_option_current_node
15717 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
15718 having to re-parse the string. This is especially useful to keep
15719 arm_neon.h compile times down since that header contains a lot
15720 of intrinsics enclosed in pragmas. */
15721 if (!existing_target && args == current_target_pragma)
15722 {
15723 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
15724 return true;
15725 }
5a2c8331
KT
15726 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15727
ba948b37
JJ
15728 old_optimize
15729 = build_optimization_node (&global_options, &global_options_set);
5a2c8331
KT
15730 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15731
15732 /* If the function changed the optimization levels as well as setting
15733 target options, start with the optimizations specified. */
15734 if (func_optimize && func_optimize != old_optimize)
ba948b37 15735 cl_optimization_restore (&global_options, &global_options_set,
5a2c8331
KT
15736 TREE_OPTIMIZATION (func_optimize));
15737
15738 /* Save the current target options to restore at the end. */
ba948b37 15739 cl_target_option_save (&cur_target, &global_options, &global_options_set);
5a2c8331
KT
15740
15741 /* If fndecl already has some target attributes applied to it, unpack
15742 them so that we add this attribute on top of them, rather than
15743 overwriting them. */
15744 if (existing_target)
15745 {
15746 struct cl_target_option *existing_options
15747 = TREE_TARGET_OPTION (existing_target);
15748
15749 if (existing_options)
ba948b37
JJ
15750 cl_target_option_restore (&global_options, &global_options_set,
15751 existing_options);
5a2c8331
KT
15752 }
15753 else
ba948b37
JJ
15754 cl_target_option_restore (&global_options, &global_options_set,
15755 TREE_TARGET_OPTION (target_option_current_node));
5a2c8331 15756
ab93e9b7 15757 ret = aarch64_process_target_attr (args);
5a2c8331
KT
15758
15759 /* Set up any additional state. */
15760 if (ret)
15761 {
15762 aarch64_override_options_internal (&global_options);
e95a988a
KT
15763 /* Initialize SIMD builtins if we haven't already.
15764 Set current_target_pragma to NULL for the duration so that
15765 the builtin initialization code doesn't try to tag the functions
15766 being built with the attributes specified by any current pragma, thus
15767 going into an infinite recursion. */
15768 if (TARGET_SIMD)
15769 {
15770 tree saved_current_target_pragma = current_target_pragma;
15771 current_target_pragma = NULL;
15772 aarch64_init_simd_builtins ();
15773 current_target_pragma = saved_current_target_pragma;
15774 }
ba948b37
JJ
15775 new_target = build_target_option_node (&global_options,
15776 &global_options_set);
5a2c8331
KT
15777 }
15778 else
15779 new_target = NULL;
15780
ba948b37
JJ
15781 new_optimize = build_optimization_node (&global_options,
15782 &global_options_set);
5a2c8331
KT
15783
15784 if (fndecl && ret)
15785 {
15786 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15787
15788 if (old_optimize != new_optimize)
15789 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15790 }
15791
ba948b37 15792 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
5a2c8331
KT
15793
15794 if (old_optimize != new_optimize)
ba948b37 15795 cl_optimization_restore (&global_options, &global_options_set,
5a2c8331
KT
15796 TREE_OPTIMIZATION (old_optimize));
15797 return ret;
15798}
15799
1fd8d40c
KT
15800/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
15801 tri-bool options (yes, no, don't care) and the default value is
15802 DEF, determine whether to reject inlining. */
15803
15804static bool
15805aarch64_tribools_ok_for_inlining_p (int caller, int callee,
15806 int dont_care, int def)
15807{
15808 /* If the callee doesn't care, always allow inlining. */
15809 if (callee == dont_care)
15810 return true;
15811
15812 /* If the caller doesn't care, always allow inlining. */
15813 if (caller == dont_care)
15814 return true;
15815
15816 /* Otherwise, allow inlining if either the callee and caller values
15817 agree, or if the callee is using the default value. */
15818 return (callee == caller || callee == def);
15819}
15820
15821/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
15822 to inline CALLEE into CALLER based on target-specific info.
15823 Make sure that the caller and callee have compatible architectural
15824 features. Then go through the other possible target attributes
15825 and see if they can block inlining. Try not to reject always_inline
15826 callees unless they are incompatible architecturally. */
15827
15828static bool
15829aarch64_can_inline_p (tree caller, tree callee)
15830{
15831 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15832 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15833
1fd8d40c
KT
15834 struct cl_target_option *caller_opts
15835 = TREE_TARGET_OPTION (caller_tree ? caller_tree
15836 : target_option_default_node);
15837
675d044c
SD
15838 struct cl_target_option *callee_opts
15839 = TREE_TARGET_OPTION (callee_tree ? callee_tree
15840 : target_option_default_node);
1fd8d40c
KT
15841
15842 /* Callee's ISA flags should be a subset of the caller's. */
15843 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
15844 != callee_opts->x_aarch64_isa_flags)
15845 return false;
15846
15847 /* Allow non-strict aligned functions inlining into strict
15848 aligned ones. */
15849 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
15850 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
15851 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
15852 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
15853 return false;
15854
15855 bool always_inline = lookup_attribute ("always_inline",
15856 DECL_ATTRIBUTES (callee));
15857
15858 /* If the architectural features match up and the callee is always_inline
15859 then the other attributes don't matter. */
15860 if (always_inline)
15861 return true;
15862
15863 if (caller_opts->x_aarch64_cmodel_var
15864 != callee_opts->x_aarch64_cmodel_var)
15865 return false;
15866
15867 if (caller_opts->x_aarch64_tls_dialect
15868 != callee_opts->x_aarch64_tls_dialect)
15869 return false;
15870
15871 /* Honour explicit requests to workaround errata. */
15872 if (!aarch64_tribools_ok_for_inlining_p (
15873 caller_opts->x_aarch64_fix_a53_err835769,
15874 callee_opts->x_aarch64_fix_a53_err835769,
15875 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
15876 return false;
15877
48bb1a55
CL
15878 if (!aarch64_tribools_ok_for_inlining_p (
15879 caller_opts->x_aarch64_fix_a53_err843419,
15880 callee_opts->x_aarch64_fix_a53_err843419,
15881 2, TARGET_FIX_ERR_A53_843419))
15882 return false;
15883
1fd8d40c
KT
15884 /* If the user explicitly specified -momit-leaf-frame-pointer for the
15885 caller and calle and they don't match up, reject inlining. */
15886 if (!aarch64_tribools_ok_for_inlining_p (
15887 caller_opts->x_flag_omit_leaf_frame_pointer,
15888 callee_opts->x_flag_omit_leaf_frame_pointer,
15889 2, 1))
15890 return false;
15891
15892 /* If the callee has specific tuning overrides, respect them. */
15893 if (callee_opts->x_aarch64_override_tune_string != NULL
15894 && caller_opts->x_aarch64_override_tune_string == NULL)
15895 return false;
15896
15897 /* If the user specified tuning override strings for the
15898 caller and callee and they don't match up, reject inlining.
15899 We just do a string compare here, we don't analyze the meaning
15900 of the string, as it would be too costly for little gain. */
15901 if (callee_opts->x_aarch64_override_tune_string
15902 && caller_opts->x_aarch64_override_tune_string
15903 && (strcmp (callee_opts->x_aarch64_override_tune_string,
15904 caller_opts->x_aarch64_override_tune_string) != 0))
15905 return false;
15906
15907 return true;
15908}
15909
bb6ce448
RS
15910/* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't
15911 been already. */
15912
15913unsigned int
15914aarch64_tlsdesc_abi_id ()
15915{
15916 predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC];
15917 if (!tlsdesc_abi.initialized_p ())
15918 {
15919 HARD_REG_SET full_reg_clobbers;
15920 CLEAR_HARD_REG_SET (full_reg_clobbers);
15921 SET_HARD_REG_BIT (full_reg_clobbers, R0_REGNUM);
15922 SET_HARD_REG_BIT (full_reg_clobbers, CC_REGNUM);
15923 for (int regno = P0_REGNUM; regno <= P15_REGNUM; ++regno)
15924 SET_HARD_REG_BIT (full_reg_clobbers, regno);
15925 tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers);
15926 }
15927 return tlsdesc_abi.id ();
15928}
15929
43e9d192
IB
15930/* Return true if SYMBOL_REF X binds locally. */
15931
15932static bool
15933aarch64_symbol_binds_local_p (const_rtx x)
15934{
15935 return (SYMBOL_REF_DECL (x)
15936 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
15937 : SYMBOL_REF_LOCAL_P (x));
15938}
15939
15940/* Return true if SYMBOL_REF X is thread local */
15941static bool
15942aarch64_tls_symbol_p (rtx x)
15943{
15944 if (! TARGET_HAVE_TLS)
15945 return false;
15946
74b27d8e 15947 x = strip_salt (x);
43e9d192
IB
15948 if (GET_CODE (x) != SYMBOL_REF)
15949 return false;
15950
15951 return SYMBOL_REF_TLS_MODEL (x) != 0;
15952}
15953
15954/* Classify a TLS symbol into one of the TLS kinds. */
15955enum aarch64_symbol_type
15956aarch64_classify_tls_symbol (rtx x)
15957{
15958 enum tls_model tls_kind = tls_symbolic_operand_type (x);
15959
15960 switch (tls_kind)
15961 {
15962 case TLS_MODEL_GLOBAL_DYNAMIC:
15963 case TLS_MODEL_LOCAL_DYNAMIC:
15964 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
15965
15966 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
15967 switch (aarch64_cmodel)
15968 {
15969 case AARCH64_CMODEL_TINY:
15970 case AARCH64_CMODEL_TINY_PIC:
15971 return SYMBOL_TINY_TLSIE;
15972 default:
79496620 15973 return SYMBOL_SMALL_TLSIE;
5ae7caad 15974 }
43e9d192
IB
15975
15976 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
15977 if (aarch64_tls_size == 12)
15978 return SYMBOL_TLSLE12;
15979 else if (aarch64_tls_size == 24)
15980 return SYMBOL_TLSLE24;
15981 else if (aarch64_tls_size == 32)
15982 return SYMBOL_TLSLE32;
15983 else if (aarch64_tls_size == 48)
15984 return SYMBOL_TLSLE48;
15985 else
15986 gcc_unreachable ();
43e9d192
IB
15987
15988 case TLS_MODEL_EMULATED:
15989 case TLS_MODEL_NONE:
15990 return SYMBOL_FORCE_TO_MEM;
15991
15992 default:
15993 gcc_unreachable ();
15994 }
15995}
15996
43cacb12
RS
15997/* Return the correct method for accessing X + OFFSET, where X is either
15998 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 15999
43e9d192 16000enum aarch64_symbol_type
43cacb12 16001aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192 16002{
74b27d8e
RS
16003 x = strip_salt (x);
16004
43e9d192
IB
16005 if (GET_CODE (x) == LABEL_REF)
16006 {
16007 switch (aarch64_cmodel)
16008 {
16009 case AARCH64_CMODEL_LARGE:
16010 return SYMBOL_FORCE_TO_MEM;
16011
16012 case AARCH64_CMODEL_TINY_PIC:
16013 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
16014 return SYMBOL_TINY_ABSOLUTE;
16015
1b1e81f8 16016 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
16017 case AARCH64_CMODEL_SMALL_PIC:
16018 case AARCH64_CMODEL_SMALL:
16019 return SYMBOL_SMALL_ABSOLUTE;
16020
16021 default:
16022 gcc_unreachable ();
16023 }
16024 }
16025
17f4d4bf 16026 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 16027 {
43e9d192
IB
16028 if (aarch64_tls_symbol_p (x))
16029 return aarch64_classify_tls_symbol (x);
16030
17f4d4bf
CSS
16031 switch (aarch64_cmodel)
16032 {
16033 case AARCH64_CMODEL_TINY:
15f6e0da 16034 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
16035 the offset does not cause overflow of the final address. But
16036 we have no way of knowing the address of symbol at compile time
16037 so we can't accurately say if the distance between the PC and
7d3b27ff
WD
16038 symbol + offset is outside the addressible range of +/-1MB in the
16039 TINY code model. So we limit the maximum offset to +/-64KB and
16040 assume the offset to the symbol is not larger than +/-(1MB - 64KB).
16041 If offset_within_block_p is true we allow larger offsets.
16042 Furthermore force to memory if the symbol is a weak reference to
16043 something that doesn't resolve to a symbol in this module. */
16044
16045 if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
a5350ddc 16046 return SYMBOL_FORCE_TO_MEM;
7d3b27ff
WD
16047 if (!(IN_RANGE (offset, -0x10000, 0x10000)
16048 || offset_within_block_p (x, offset)))
16049 return SYMBOL_FORCE_TO_MEM;
16050
a5350ddc
CSS
16051 return SYMBOL_TINY_ABSOLUTE;
16052
17f4d4bf 16053 case AARCH64_CMODEL_SMALL:
f8b756b7 16054 /* Same reasoning as the tiny code model, but the offset cap here is
7d3b27ff
WD
16055 1MB, allowing +/-3.9GB for the offset to the symbol. */
16056
16057 if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
17f4d4bf 16058 return SYMBOL_FORCE_TO_MEM;
7d3b27ff
WD
16059 if (!(IN_RANGE (offset, -0x100000, 0x100000)
16060 || offset_within_block_p (x, offset)))
16061 return SYMBOL_FORCE_TO_MEM;
16062
17f4d4bf 16063 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 16064
17f4d4bf 16065 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 16066 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 16067 return SYMBOL_TINY_GOT;
38e6c9a6
MS
16068 return SYMBOL_TINY_ABSOLUTE;
16069
1b1e81f8 16070 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
16071 case AARCH64_CMODEL_SMALL_PIC:
16072 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
16073 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
16074 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 16075 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 16076
9ee6540a
WD
16077 case AARCH64_CMODEL_LARGE:
16078 /* This is alright even in PIC code as the constant
16079 pool reference is always PC relative and within
16080 the same translation unit. */
d47d34bb 16081 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
16082 return SYMBOL_SMALL_ABSOLUTE;
16083 else
16084 return SYMBOL_FORCE_TO_MEM;
16085
17f4d4bf
CSS
16086 default:
16087 gcc_unreachable ();
16088 }
43e9d192 16089 }
17f4d4bf 16090
43e9d192
IB
16091 /* By default push everything into the constant pool. */
16092 return SYMBOL_FORCE_TO_MEM;
16093}
16094
43e9d192
IB
16095bool
16096aarch64_constant_address_p (rtx x)
16097{
16098 return (CONSTANT_P (x) && memory_address_p (DImode, x));
16099}
16100
16101bool
16102aarch64_legitimate_pic_operand_p (rtx x)
16103{
74b27d8e
RS
16104 poly_int64 offset;
16105 x = strip_offset_and_salt (x, &offset);
16106 if (GET_CODE (x) == SYMBOL_REF)
16107 return false;
43e9d192
IB
16108
16109 return true;
16110}
16111
26895c21
WD
16112/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
16113 that should be rematerialized rather than spilled. */
3520f7cc 16114
43e9d192 16115static bool
ef4bddc2 16116aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 16117{
26895c21 16118 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 16119 if (CONST_INT_P (x)
9f7b87ca 16120 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 16121 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
16122 return true;
16123
43cacb12
RS
16124 /* Do not allow vector struct mode constants for Advanced SIMD.
16125 We could support 0 and -1 easily, but they need support in
16126 aarch64-simd.md. */
16127 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
16128 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
16129 return false;
16130
43cacb12
RS
16131 /* Only accept variable-length vector constants if they can be
16132 handled directly.
16133
16134 ??? It would be possible to handle rematerialization of other
16135 constants via secondary reloads. */
16136 if (vec_flags & VEC_ANY_SVE)
16137 return aarch64_simd_valid_immediate (x, NULL);
16138
509bb9b6
RS
16139 if (GET_CODE (x) == HIGH)
16140 x = XEXP (x, 0);
16141
43cacb12
RS
16142 /* Accept polynomial constants that can be calculated by using the
16143 destination of a move as the sole temporary. Constants that
16144 require a second temporary cannot be rematerialized (they can't be
16145 forced to memory and also aren't legitimate constants). */
16146 poly_int64 offset;
16147 if (poly_int_rtx_p (x, &offset))
16148 return aarch64_offset_temporaries (false, offset) <= 1;
16149
16150 /* If an offset is being added to something else, we need to allow the
16151 base to be moved into the destination register, meaning that there
16152 are no free temporaries for the offset. */
74b27d8e 16153 x = strip_offset_and_salt (x, &offset);
43cacb12
RS
16154 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
16155 return false;
26895c21 16156
43cacb12
RS
16157 /* Do not allow const (plus (anchor_symbol, const_int)). */
16158 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
16159 return false;
26895c21 16160
f28e54bd
WD
16161 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
16162 so spilling them is better than rematerialization. */
16163 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
16164 return true;
16165
26895c21
WD
16166 /* Label references are always constant. */
16167 if (GET_CODE (x) == LABEL_REF)
16168 return true;
16169
16170 return false;
43e9d192
IB
16171}
16172
a5bc806c 16173rtx
43e9d192
IB
16174aarch64_load_tp (rtx target)
16175{
16176 if (!target
16177 || GET_MODE (target) != Pmode
16178 || !register_operand (target, Pmode))
16179 target = gen_reg_rtx (Pmode);
16180
16181 /* Can return in any reg. */
16182 emit_insn (gen_aarch64_load_tp_hard (target));
16183 return target;
16184}
16185
43e9d192
IB
16186/* On AAPCS systems, this is the "struct __va_list". */
16187static GTY(()) tree va_list_type;
16188
16189/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
16190 Return the type to use as __builtin_va_list.
16191
16192 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
16193
16194 struct __va_list
16195 {
16196 void *__stack;
16197 void *__gr_top;
16198 void *__vr_top;
16199 int __gr_offs;
16200 int __vr_offs;
16201 }; */
16202
16203static tree
16204aarch64_build_builtin_va_list (void)
16205{
16206 tree va_list_name;
16207 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
16208
16209 /* Create the type. */
16210 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
16211 /* Give it the required name. */
16212 va_list_name = build_decl (BUILTINS_LOCATION,
16213 TYPE_DECL,
16214 get_identifier ("__va_list"),
16215 va_list_type);
16216 DECL_ARTIFICIAL (va_list_name) = 1;
16217 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 16218 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
16219
16220 /* Create the fields. */
16221 f_stack = build_decl (BUILTINS_LOCATION,
16222 FIELD_DECL, get_identifier ("__stack"),
16223 ptr_type_node);
16224 f_grtop = build_decl (BUILTINS_LOCATION,
16225 FIELD_DECL, get_identifier ("__gr_top"),
16226 ptr_type_node);
16227 f_vrtop = build_decl (BUILTINS_LOCATION,
16228 FIELD_DECL, get_identifier ("__vr_top"),
16229 ptr_type_node);
16230 f_groff = build_decl (BUILTINS_LOCATION,
16231 FIELD_DECL, get_identifier ("__gr_offs"),
16232 integer_type_node);
16233 f_vroff = build_decl (BUILTINS_LOCATION,
16234 FIELD_DECL, get_identifier ("__vr_offs"),
16235 integer_type_node);
16236
88e3bdd1 16237 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
16238 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
16239 purpose to identify whether the code is updating va_list internal
16240 offset fields through irregular way. */
16241 va_list_gpr_counter_field = f_groff;
16242 va_list_fpr_counter_field = f_vroff;
16243
43e9d192
IB
16244 DECL_ARTIFICIAL (f_stack) = 1;
16245 DECL_ARTIFICIAL (f_grtop) = 1;
16246 DECL_ARTIFICIAL (f_vrtop) = 1;
16247 DECL_ARTIFICIAL (f_groff) = 1;
16248 DECL_ARTIFICIAL (f_vroff) = 1;
16249
16250 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
16251 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
16252 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
16253 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
16254 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
16255
16256 TYPE_FIELDS (va_list_type) = f_stack;
16257 DECL_CHAIN (f_stack) = f_grtop;
16258 DECL_CHAIN (f_grtop) = f_vrtop;
16259 DECL_CHAIN (f_vrtop) = f_groff;
16260 DECL_CHAIN (f_groff) = f_vroff;
16261
16262 /* Compute its layout. */
16263 layout_type (va_list_type);
16264
16265 return va_list_type;
16266}
16267
16268/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
16269static void
16270aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
16271{
16272 const CUMULATIVE_ARGS *cum;
16273 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
16274 tree stack, grtop, vrtop, groff, vroff;
16275 tree t;
88e3bdd1
JW
16276 int gr_save_area_size = cfun->va_list_gpr_size;
16277 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
16278 int vr_offset;
16279
16280 cum = &crtl->args.info;
88e3bdd1
JW
16281 if (cfun->va_list_gpr_size)
16282 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
16283 cfun->va_list_gpr_size);
16284 if (cfun->va_list_fpr_size)
16285 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
16286 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 16287
d5726973 16288 if (!TARGET_FLOAT)
43e9d192 16289 {
261fb553 16290 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
16291 vr_save_area_size = 0;
16292 }
16293
16294 f_stack = TYPE_FIELDS (va_list_type_node);
16295 f_grtop = DECL_CHAIN (f_stack);
16296 f_vrtop = DECL_CHAIN (f_grtop);
16297 f_groff = DECL_CHAIN (f_vrtop);
16298 f_vroff = DECL_CHAIN (f_groff);
16299
16300 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
16301 NULL_TREE);
16302 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
16303 NULL_TREE);
16304 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
16305 NULL_TREE);
16306 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
16307 NULL_TREE);
16308 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
16309 NULL_TREE);
16310
16311 /* Emit code to initialize STACK, which points to the next varargs stack
16312 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
16313 by named arguments. STACK is 8-byte aligned. */
16314 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
16315 if (cum->aapcs_stack_size > 0)
16316 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
16317 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
16318 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
16319
16320 /* Emit code to initialize GRTOP, the top of the GR save area.
16321 virtual_incoming_args_rtx should have been 16 byte aligned. */
16322 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
16323 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
16324 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
16325
16326 /* Emit code to initialize VRTOP, the top of the VR save area.
16327 This address is gr_save_area_bytes below GRTOP, rounded
16328 down to the next 16-byte boundary. */
16329 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
16330 vr_offset = ROUND_UP (gr_save_area_size,
16331 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
16332
16333 if (vr_offset)
16334 t = fold_build_pointer_plus_hwi (t, -vr_offset);
16335 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
16336 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
16337
16338 /* Emit code to initialize GROFF, the offset from GRTOP of the
16339 next GPR argument. */
16340 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
16341 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
16342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
16343
16344 /* Likewise emit code to initialize VROFF, the offset from FTOP
16345 of the next VR argument. */
16346 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
16347 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
16348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
16349}
16350
16351/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
16352
16353static tree
16354aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
16355 gimple_seq *post_p ATTRIBUTE_UNUSED)
16356{
16357 tree addr;
16358 bool indirect_p;
16359 bool is_ha; /* is HFA or HVA. */
16360 bool dw_align; /* double-word align. */
ef4bddc2 16361 machine_mode ag_mode = VOIDmode;
43e9d192 16362 int nregs;
ef4bddc2 16363 machine_mode mode;
43e9d192
IB
16364
16365 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
16366 tree stack, f_top, f_off, off, arg, roundup, on_stack;
16367 HOST_WIDE_INT size, rsize, adjust, align;
16368 tree t, u, cond1, cond2;
16369
fde65a89 16370 indirect_p = pass_va_arg_by_reference (type);
43e9d192
IB
16371 if (indirect_p)
16372 type = build_pointer_type (type);
16373
16374 mode = TYPE_MODE (type);
16375
16376 f_stack = TYPE_FIELDS (va_list_type_node);
16377 f_grtop = DECL_CHAIN (f_stack);
16378 f_vrtop = DECL_CHAIN (f_grtop);
16379 f_groff = DECL_CHAIN (f_vrtop);
16380 f_vroff = DECL_CHAIN (f_groff);
16381
16382 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
16383 f_stack, NULL_TREE);
16384 size = int_size_in_bytes (type);
c590597c
RE
16385
16386 bool abi_break;
16387 align
16388 = aarch64_function_arg_alignment (mode, type, &abi_break) / BITS_PER_UNIT;
43e9d192
IB
16389
16390 dw_align = false;
16391 adjust = 0;
56fe3ca3
RS
16392 if (aarch64_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &nregs,
16393 &is_ha, false))
43e9d192 16394 {
6a70badb
RS
16395 /* No frontends can create types with variable-sized modes, so we
16396 shouldn't be asked to pass or return them. */
16397 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
16398
43e9d192 16399 /* TYPE passed in fp/simd registers. */
d5726973 16400 if (!TARGET_FLOAT)
fc29dfc9 16401 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
16402
16403 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
16404 unshare_expr (valist), f_vrtop, NULL_TREE);
16405 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
16406 unshare_expr (valist), f_vroff, NULL_TREE);
16407
16408 rsize = nregs * UNITS_PER_VREG;
16409
16410 if (is_ha)
16411 {
6a70badb
RS
16412 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
16413 adjust = UNITS_PER_VREG - ag_size;
43e9d192 16414 }
76b0cbf8 16415 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
16416 && size < UNITS_PER_VREG)
16417 {
16418 adjust = UNITS_PER_VREG - size;
16419 }
16420 }
16421 else
16422 {
16423 /* TYPE passed in general registers. */
16424 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
16425 unshare_expr (valist), f_grtop, NULL_TREE);
16426 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
16427 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 16428 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
16429 nregs = rsize / UNITS_PER_WORD;
16430
16431 if (align > 8)
c590597c
RE
16432 {
16433 if (abi_break && warn_psabi)
16434 inform (input_location, "parameter passing for argument of type "
16435 "%qT changed in GCC 9.1", type);
16436 dw_align = true;
16437 }
43e9d192 16438
76b0cbf8 16439 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
16440 && size < UNITS_PER_WORD)
16441 {
16442 adjust = UNITS_PER_WORD - size;
16443 }
16444 }
16445
16446 /* Get a local temporary for the field value. */
16447 off = get_initialized_tmp_var (f_off, pre_p, NULL);
16448
16449 /* Emit code to branch if off >= 0. */
16450 t = build2 (GE_EXPR, boolean_type_node, off,
16451 build_int_cst (TREE_TYPE (off), 0));
16452 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
16453
16454 if (dw_align)
16455 {
16456 /* Emit: offs = (offs + 15) & -16. */
16457 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
16458 build_int_cst (TREE_TYPE (off), 15));
16459 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
16460 build_int_cst (TREE_TYPE (off), -16));
16461 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
16462 }
16463 else
16464 roundup = NULL;
16465
16466 /* Update ap.__[g|v]r_offs */
16467 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
16468 build_int_cst (TREE_TYPE (off), rsize));
16469 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
16470
16471 /* String up. */
16472 if (roundup)
16473 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
16474
16475 /* [cond2] if (ap.__[g|v]r_offs > 0) */
16476 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
16477 build_int_cst (TREE_TYPE (f_off), 0));
16478 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
16479
16480 /* String up: make sure the assignment happens before the use. */
16481 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
16482 COND_EXPR_ELSE (cond1) = t;
16483
16484 /* Prepare the trees handling the argument that is passed on the stack;
16485 the top level node will store in ON_STACK. */
16486 arg = get_initialized_tmp_var (stack, pre_p, NULL);
16487 if (align > 8)
16488 {
16489 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 16490 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
16491 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
16492 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
16493 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
16494 }
16495 else
16496 roundup = NULL;
16497 /* Advance ap.__stack */
4bdc2738 16498 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
16499 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
16500 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
16501 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
16502 /* String up roundup and advance. */
16503 if (roundup)
16504 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
16505 /* String up with arg */
16506 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
16507 /* Big-endianness related address adjustment. */
76b0cbf8 16508 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
16509 && size < UNITS_PER_WORD)
16510 {
16511 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
16512 size_int (UNITS_PER_WORD - size));
16513 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
16514 }
16515
16516 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
16517 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
16518
16519 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
16520 t = off;
16521 if (adjust)
16522 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
16523 build_int_cst (TREE_TYPE (off), adjust));
16524
16525 t = fold_convert (sizetype, t);
16526 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
16527
16528 if (is_ha)
16529 {
16530 /* type ha; // treat as "struct {ftype field[n];}"
16531 ... [computing offs]
16532 for (i = 0; i <nregs; ++i, offs += 16)
16533 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
16534 return ha; */
16535 int i;
16536 tree tmp_ha, field_t, field_ptr_t;
16537
16538 /* Declare a local variable. */
16539 tmp_ha = create_tmp_var_raw (type, "ha");
16540 gimple_add_tmp_var (tmp_ha);
16541
16542 /* Establish the base type. */
16543 switch (ag_mode)
16544 {
4e10a5a7 16545 case E_SFmode:
43e9d192
IB
16546 field_t = float_type_node;
16547 field_ptr_t = float_ptr_type_node;
16548 break;
4e10a5a7 16549 case E_DFmode:
43e9d192
IB
16550 field_t = double_type_node;
16551 field_ptr_t = double_ptr_type_node;
16552 break;
4e10a5a7 16553 case E_TFmode:
43e9d192
IB
16554 field_t = long_double_type_node;
16555 field_ptr_t = long_double_ptr_type_node;
16556 break;
4e10a5a7 16557 case E_HFmode:
1b62ed4f
JG
16558 field_t = aarch64_fp16_type_node;
16559 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 16560 break;
abbe1ed2
SMW
16561 case E_BFmode:
16562 field_t = aarch64_bf16_type_node;
16563 field_ptr_t = aarch64_bf16_ptr_type_node;
16564 break;
4e10a5a7
RS
16565 case E_V2SImode:
16566 case E_V4SImode:
43e9d192
IB
16567 {
16568 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
16569 field_t = build_vector_type_for_mode (innertype, ag_mode);
16570 field_ptr_t = build_pointer_type (field_t);
16571 }
16572 break;
16573 default:
16574 gcc_assert (0);
16575 }
16576
16577 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
ab563903 16578 TREE_ADDRESSABLE (tmp_ha) = 1;
43e9d192
IB
16579 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
16580 addr = t;
16581 t = fold_convert (field_ptr_t, addr);
16582 t = build2 (MODIFY_EXPR, field_t,
16583 build1 (INDIRECT_REF, field_t, tmp_ha),
16584 build1 (INDIRECT_REF, field_t, t));
16585
16586 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
16587 for (i = 1; i < nregs; ++i)
16588 {
16589 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
16590 u = fold_convert (field_ptr_t, addr);
16591 u = build2 (MODIFY_EXPR, field_t,
16592 build2 (MEM_REF, field_t, tmp_ha,
16593 build_int_cst (field_ptr_t,
16594 (i *
16595 int_size_in_bytes (field_t)))),
16596 build1 (INDIRECT_REF, field_t, u));
16597 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
16598 }
16599
16600 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
16601 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
16602 }
16603
16604 COND_EXPR_ELSE (cond2) = t;
16605 addr = fold_convert (build_pointer_type (type), cond1);
16606 addr = build_va_arg_indirect_ref (addr);
16607
16608 if (indirect_p)
16609 addr = build_va_arg_indirect_ref (addr);
16610
16611 return addr;
16612}
16613
16614/* Implement TARGET_SETUP_INCOMING_VARARGS. */
16615
16616static void
e7056ca4
RS
16617aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
16618 const function_arg_info &arg,
16619 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
43e9d192
IB
16620{
16621 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16622 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
16623 int gr_saved = cfun->va_list_gpr_size;
16624 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
16625
16626 /* The caller has advanced CUM up to, but not beyond, the last named
16627 argument. Advance a local copy of CUM past the last "real" named
16628 argument, to find out how many registers are left over. */
16629 local_cum = *cum;
6930c98c 16630 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), arg);
43e9d192 16631
88e3bdd1
JW
16632 /* Found out how many registers we need to save.
16633 Honor tree-stdvar analysis results. */
16634 if (cfun->va_list_gpr_size)
16635 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
16636 cfun->va_list_gpr_size / UNITS_PER_WORD);
16637 if (cfun->va_list_fpr_size)
16638 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
16639 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 16640
d5726973 16641 if (!TARGET_FLOAT)
43e9d192 16642 {
261fb553 16643 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
16644 vr_saved = 0;
16645 }
16646
16647 if (!no_rtl)
16648 {
16649 if (gr_saved > 0)
16650 {
16651 rtx ptr, mem;
16652
16653 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
16654 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
16655 - gr_saved * UNITS_PER_WORD);
16656 mem = gen_frame_mem (BLKmode, ptr);
16657 set_mem_alias_set (mem, get_varargs_alias_set ());
16658
16659 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
16660 mem, gr_saved);
16661 }
16662 if (vr_saved > 0)
16663 {
16664 /* We can't use move_block_from_reg, because it will use
16665 the wrong mode, storing D regs only. */
ef4bddc2 16666 machine_mode mode = TImode;
88e3bdd1 16667 int off, i, vr_start;
43e9d192
IB
16668
16669 /* Set OFF to the offset from virtual_incoming_args_rtx of
16670 the first vector register. The VR save area lies below
16671 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
16672 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
16673 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
16674 off -= vr_saved * UNITS_PER_VREG;
16675
88e3bdd1
JW
16676 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
16677 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
16678 {
16679 rtx ptr, mem;
16680
16681 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
16682 mem = gen_frame_mem (mode, ptr);
16683 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 16684 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
16685 off += UNITS_PER_VREG;
16686 }
16687 }
16688 }
16689
16690 /* We don't save the size into *PRETEND_SIZE because we want to avoid
16691 any complication of having crtl->args.pretend_args_size changed. */
8799637a 16692 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
16693 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
16694 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
16695 + vr_saved * UNITS_PER_VREG);
16696}
16697
16698static void
16699aarch64_conditional_register_usage (void)
16700{
16701 int i;
16702 if (!TARGET_FLOAT)
16703 {
16704 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
16705 {
16706 fixed_regs[i] = 1;
16707 call_used_regs[i] = 1;
16708 }
16709 }
43cacb12
RS
16710 if (!TARGET_SVE)
16711 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
16712 {
16713 fixed_regs[i] = 1;
16714 call_used_regs[i] = 1;
16715 }
3751345d 16716
183bfdaf
RS
16717 /* Only allow the FFR and FFRT to be accessed via special patterns. */
16718 CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
16719 CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
16720
3751345d
RE
16721 /* When tracking speculation, we need a couple of call-clobbered registers
16722 to track the speculation state. It would be nice to just use
16723 IP0 and IP1, but currently there are numerous places that just
16724 assume these registers are free for other uses (eg pointer
16725 authentication). */
16726 if (aarch64_track_speculation)
16727 {
16728 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
16729 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
16730 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
16731 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
16732 }
43e9d192
IB
16733}
16734
38e62001
RS
16735/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. */
16736
16737bool
16738aarch64_member_type_forces_blk (const_tree field_or_array, machine_mode mode)
16739{
16740 /* For records we're passed a FIELD_DECL, for arrays we're passed
16741 an ARRAY_TYPE. In both cases we're interested in the TREE_TYPE. */
16742 const_tree type = TREE_TYPE (field_or_array);
16743
16744 /* Assign BLKmode to anything that contains multiple SVE predicates.
16745 For structures, the "multiple" case is indicated by MODE being
16746 VOIDmode. */
16747 unsigned int num_zr, num_pr;
16748 if (aarch64_sve::builtin_type_p (type, &num_zr, &num_pr) && num_pr != 0)
16749 {
16750 if (TREE_CODE (field_or_array) == ARRAY_TYPE)
16751 return !simple_cst_equal (TYPE_SIZE (field_or_array),
16752 TYPE_SIZE (type));
16753 return mode == VOIDmode;
16754 }
16755
16756 return default_member_type_forces_blk (field_or_array, mode);
16757}
16758
56fe3ca3
RS
16759/* Bitmasks that indicate whether earlier versions of GCC would have
16760 taken a different path through the ABI logic. This should result in
16761 a -Wpsabi warning if the earlier path led to a different ABI decision.
16762
16763 WARN_PSABI_EMPTY_CXX17_BASE
16764 Indicates that the type includes an artificial empty C++17 base field
16765 that, prior to GCC 10.1, would prevent the type from being treated as
16766 a HFA or HVA. See PR94383 for details.
16767
16768 WARN_PSABI_NO_UNIQUE_ADDRESS
16769 Indicates that the type includes an empty [[no_unique_address]] field
16770 that, prior to GCC 10.1, would prevent the type from being treated as
16771 a HFA or HVA. */
16772const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
16773const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
16774
43e9d192
IB
16775/* Walk down the type tree of TYPE counting consecutive base elements.
16776 If *MODEP is VOIDmode, then set it to the first valid floating point
16777 type. If a non-floating point type is found, or if a floating point
16778 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
e73a32d6
MM
16779 otherwise return the count in the sub-tree.
16780
56fe3ca3
RS
16781 The WARN_PSABI_FLAGS argument allows the caller to check whether this
16782 function has changed its behavior relative to earlier versions of GCC.
16783 Normally the argument should be nonnull and point to a zero-initialized
16784 variable. The function then records whether the ABI decision might
16785 be affected by a known fix to the ABI logic, setting the associated
16786 WARN_PSABI_* bits if so.
16787
16788 When the argument is instead a null pointer, the function tries to
16789 simulate the behavior of GCC before all such ABI fixes were made.
16790 This is useful to check whether the function returns something
16791 different after the ABI fixes. */
43e9d192 16792static int
e73a32d6 16793aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
56fe3ca3 16794 unsigned int *warn_psabi_flags)
43e9d192 16795{
ef4bddc2 16796 machine_mode mode;
43e9d192
IB
16797 HOST_WIDE_INT size;
16798
38e62001
RS
16799 if (aarch64_sve::builtin_type_p (type))
16800 return -1;
c600df9a 16801
43e9d192
IB
16802 switch (TREE_CODE (type))
16803 {
16804 case REAL_TYPE:
16805 mode = TYPE_MODE (type);
1b62ed4f
JG
16806 if (mode != DFmode && mode != SFmode
16807 && mode != TFmode && mode != HFmode)
43e9d192
IB
16808 return -1;
16809
16810 if (*modep == VOIDmode)
16811 *modep = mode;
16812
16813 if (*modep == mode)
16814 return 1;
16815
16816 break;
16817
16818 case COMPLEX_TYPE:
16819 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
16820 if (mode != DFmode && mode != SFmode
16821 && mode != TFmode && mode != HFmode)
43e9d192
IB
16822 return -1;
16823
16824 if (*modep == VOIDmode)
16825 *modep = mode;
16826
16827 if (*modep == mode)
16828 return 2;
16829
16830 break;
16831
16832 case VECTOR_TYPE:
16833 /* Use V2SImode and V4SImode as representatives of all 64-bit
16834 and 128-bit vector types. */
16835 size = int_size_in_bytes (type);
16836 switch (size)
16837 {
16838 case 8:
16839 mode = V2SImode;
16840 break;
16841 case 16:
16842 mode = V4SImode;
16843 break;
16844 default:
16845 return -1;
16846 }
16847
16848 if (*modep == VOIDmode)
16849 *modep = mode;
16850
16851 /* Vector modes are considered to be opaque: two vectors are
16852 equivalent for the purposes of being homogeneous aggregates
16853 if they are the same size. */
16854 if (*modep == mode)
16855 return 1;
16856
16857 break;
16858
16859 case ARRAY_TYPE:
16860 {
16861 int count;
16862 tree index = TYPE_DOMAIN (type);
16863
807e902e
KZ
16864 /* Can't handle incomplete types nor sizes that are not
16865 fixed. */
16866 if (!COMPLETE_TYPE_P (type)
16867 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
16868 return -1;
16869
e73a32d6 16870 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
56fe3ca3 16871 warn_psabi_flags);
43e9d192
IB
16872 if (count == -1
16873 || !index
16874 || !TYPE_MAX_VALUE (index)
cc269bb6 16875 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 16876 || !TYPE_MIN_VALUE (index)
cc269bb6 16877 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
16878 || count < 0)
16879 return -1;
16880
ae7e9ddd
RS
16881 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
16882 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
16883
16884 /* There must be no padding. */
6a70badb
RS
16885 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
16886 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
16887 return -1;
16888
16889 return count;
16890 }
16891
16892 case RECORD_TYPE:
16893 {
16894 int count = 0;
16895 int sub_count;
16896 tree field;
16897
807e902e
KZ
16898 /* Can't handle incomplete types nor sizes that are not
16899 fixed. */
16900 if (!COMPLETE_TYPE_P (type)
16901 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
16902 return -1;
16903
16904 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
16905 {
16906 if (TREE_CODE (field) != FIELD_DECL)
16907 continue;
16908
56fe3ca3 16909 if (DECL_FIELD_ABI_IGNORED (field))
e73a32d6 16910 {
56fe3ca3
RS
16911 /* See whether this is something that earlier versions of
16912 GCC failed to ignore. */
16913 unsigned int flag;
16914 if (lookup_attribute ("no_unique_address",
16915 DECL_ATTRIBUTES (field)))
16916 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
16917 else if (cxx17_empty_base_field_p (field))
16918 flag = WARN_PSABI_EMPTY_CXX17_BASE;
16919 else
16920 /* No compatibility problem. */
16921 continue;
16922
16923 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
16924 if (warn_psabi_flags)
16925 {
16926 *warn_psabi_flags |= flag;
16927 continue;
16928 }
e73a32d6
MM
16929 }
16930
16931 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
56fe3ca3 16932 warn_psabi_flags);
43e9d192
IB
16933 if (sub_count < 0)
16934 return -1;
16935 count += sub_count;
16936 }
16937
16938 /* There must be no padding. */
6a70badb
RS
16939 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
16940 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
16941 return -1;
16942
16943 return count;
16944 }
16945
16946 case UNION_TYPE:
16947 case QUAL_UNION_TYPE:
16948 {
16949 /* These aren't very interesting except in a degenerate case. */
16950 int count = 0;
16951 int sub_count;
16952 tree field;
16953
807e902e
KZ
16954 /* Can't handle incomplete types nor sizes that are not
16955 fixed. */
16956 if (!COMPLETE_TYPE_P (type)
16957 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
16958 return -1;
16959
16960 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
16961 {
16962 if (TREE_CODE (field) != FIELD_DECL)
16963 continue;
16964
e73a32d6 16965 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
56fe3ca3 16966 warn_psabi_flags);
43e9d192
IB
16967 if (sub_count < 0)
16968 return -1;
16969 count = count > sub_count ? count : sub_count;
16970 }
16971
16972 /* There must be no padding. */
6a70badb
RS
16973 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
16974 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
16975 return -1;
16976
16977 return count;
16978 }
16979
16980 default:
16981 break;
16982 }
16983
16984 return -1;
16985}
16986
b6ec6215
KT
16987/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
16988 type as described in AAPCS64 \S 4.1.2.
16989
16990 See the comment above aarch64_composite_type_p for the notes on MODE. */
16991
16992static bool
16993aarch64_short_vector_p (const_tree type,
16994 machine_mode mode)
16995{
6a70badb 16996 poly_int64 size = -1;
b6ec6215
KT
16997
16998 if (type && TREE_CODE (type) == VECTOR_TYPE)
38e62001
RS
16999 {
17000 if (aarch64_sve::builtin_type_p (type))
17001 return false;
17002 size = int_size_in_bytes (type);
17003 }
b6ec6215 17004 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
38e62001
RS
17005 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17006 {
17007 /* Rely only on the type, not the mode, when processing SVE types. */
17008 if (type && aarch64_some_values_include_pst_objects_p (type))
b2672dd6
FY
17009 /* Leave later code to report an error if SVE is disabled. */
17010 gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
38e62001
RS
17011 else
17012 size = GET_MODE_SIZE (mode);
17013 }
17014 if (known_eq (size, 8) || known_eq (size, 16))
17015 {
17016 /* 64-bit and 128-bit vectors should only acquire an SVE mode if
17017 they are being treated as scalable AAPCS64 types. */
17018 gcc_assert (!aarch64_sve_mode_p (mode));
17019 return true;
17020 }
17021 return false;
b6ec6215
KT
17022}
17023
43e9d192
IB
17024/* Return TRUE if the type, as described by TYPE and MODE, is a composite
17025 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
17026 array types. The C99 floating-point complex types are also considered
17027 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
17028 types, which are GCC extensions and out of the scope of AAPCS64, are
17029 treated as composite types here as well.
17030
17031 Note that MODE itself is not sufficient in determining whether a type
17032 is such a composite type or not. This is because
17033 stor-layout.c:compute_record_mode may have already changed the MODE
17034 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
17035 structure with only one field may have its MODE set to the mode of the
17036 field. Also an integer mode whose size matches the size of the
17037 RECORD_TYPE type may be used to substitute the original mode
17038 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
17039 solely relied on. */
17040
17041static bool
17042aarch64_composite_type_p (const_tree type,
ef4bddc2 17043 machine_mode mode)
43e9d192 17044{
b6ec6215
KT
17045 if (aarch64_short_vector_p (type, mode))
17046 return false;
17047
43e9d192
IB
17048 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
17049 return true;
17050
17051 if (mode == BLKmode
17052 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
17053 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17054 return true;
17055
17056 return false;
17057}
17058
43e9d192
IB
17059/* Return TRUE if an argument, whose type is described by TYPE and MODE,
17060 shall be passed or returned in simd/fp register(s) (providing these
17061 parameter passing registers are available).
17062
17063 Upon successful return, *COUNT returns the number of needed registers,
17064 *BASE_MODE returns the mode of the individual register and when IS_HAF
17065 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
56fe3ca3
RS
17066 floating-point aggregate or a homogeneous short-vector aggregate.
17067
17068 SILENT_P is true if the function should refrain from reporting any
17069 diagnostics. This should only be used if the caller is certain that
17070 any ABI decisions would eventually come through this function with
17071 SILENT_P set to false. */
43e9d192
IB
17072
17073static bool
ef4bddc2 17074aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 17075 const_tree type,
ef4bddc2 17076 machine_mode *base_mode,
43e9d192 17077 int *count,
56fe3ca3
RS
17078 bool *is_ha,
17079 bool silent_p)
43e9d192 17080{
c600df9a
RS
17081 if (is_ha != NULL) *is_ha = false;
17082
ef4bddc2 17083 machine_mode new_mode = VOIDmode;
43e9d192
IB
17084 bool composite_p = aarch64_composite_type_p (type, mode);
17085
43e9d192
IB
17086 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
17087 || aarch64_short_vector_p (type, mode))
17088 {
17089 *count = 1;
17090 new_mode = mode;
17091 }
17092 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
17093 {
17094 if (is_ha != NULL) *is_ha = true;
17095 *count = 2;
17096 new_mode = GET_MODE_INNER (mode);
17097 }
17098 else if (type && composite_p)
17099 {
56fe3ca3
RS
17100 unsigned int warn_psabi_flags = 0;
17101 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
17102 &warn_psabi_flags);
43e9d192
IB
17103 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
17104 {
e73a32d6
MM
17105 static unsigned last_reported_type_uid;
17106 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
17107 int alt;
56fe3ca3
RS
17108 if (!silent_p
17109 && warn_psabi
17110 && warn_psabi_flags
e73a32d6
MM
17111 && uid != last_reported_type_uid
17112 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
17113 != ag_count))
17114 {
e33a1eae
JJ
17115 const char *url
17116 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
e73a32d6
MM
17117 gcc_assert (alt == -1);
17118 last_reported_type_uid = uid;
56fe3ca3
RS
17119 /* Use TYPE_MAIN_VARIANT to strip any redundant const
17120 qualification. */
17121 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
17122 inform (input_location, "parameter passing for argument of "
17123 "type %qT with %<[[no_unique_address]]%> members "
691eeb65
JJ
17124 "changed %{in GCC 10.1%}",
17125 TYPE_MAIN_VARIANT (type), url);
56fe3ca3
RS
17126 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
17127 inform (input_location, "parameter passing for argument of "
17128 "type %qT when C++17 is enabled changed to match "
691eeb65
JJ
17129 "C++14 %{in GCC 10.1%}",
17130 TYPE_MAIN_VARIANT (type), url);
e73a32d6
MM
17131 }
17132
43e9d192
IB
17133 if (is_ha != NULL) *is_ha = true;
17134 *count = ag_count;
17135 }
17136 else
17137 return false;
17138 }
17139 else
17140 return false;
17141
38e62001 17142 gcc_assert (!aarch64_sve_mode_p (new_mode));
43e9d192
IB
17143 *base_mode = new_mode;
17144 return true;
17145}
17146
17147/* Implement TARGET_STRUCT_VALUE_RTX. */
17148
17149static rtx
17150aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
17151 int incoming ATTRIBUTE_UNUSED)
17152{
17153 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
17154}
17155
17156/* Implements target hook vector_mode_supported_p. */
17157static bool
ef4bddc2 17158aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 17159{
43cacb12 17160 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
cc68f7c2 17161 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
17162}
17163
4aeb1ba7
RS
17164/* Return the full-width SVE vector mode for element mode MODE, if one
17165 exists. */
17166opt_machine_mode
17167aarch64_full_sve_mode (scalar_mode mode)
17168{
17169 switch (mode)
17170 {
17171 case E_DFmode:
17172 return VNx2DFmode;
17173 case E_SFmode:
17174 return VNx4SFmode;
17175 case E_HFmode:
17176 return VNx8HFmode;
02fcd8ac
RS
17177 case E_BFmode:
17178 return VNx8BFmode;
4aeb1ba7 17179 case E_DImode:
02fcd8ac 17180 return VNx2DImode;
4aeb1ba7
RS
17181 case E_SImode:
17182 return VNx4SImode;
17183 case E_HImode:
17184 return VNx8HImode;
17185 case E_QImode:
17186 return VNx16QImode;
17187 default:
17188 return opt_machine_mode ();
17189 }
17190}
17191
17192/* Return the 128-bit Advanced SIMD vector mode for element mode MODE,
17193 if it exists. */
17194opt_machine_mode
17195aarch64_vq_mode (scalar_mode mode)
17196{
17197 switch (mode)
17198 {
17199 case E_DFmode:
17200 return V2DFmode;
17201 case E_SFmode:
17202 return V4SFmode;
17203 case E_HFmode:
17204 return V8HFmode;
abbe1ed2
SMW
17205 case E_BFmode:
17206 return V8BFmode;
4aeb1ba7
RS
17207 case E_SImode:
17208 return V4SImode;
17209 case E_HImode:
17210 return V8HImode;
17211 case E_QImode:
17212 return V16QImode;
17213 case E_DImode:
17214 return V2DImode;
17215 default:
17216 return opt_machine_mode ();
17217 }
17218}
17219
b7342d25
IB
17220/* Return appropriate SIMD container
17221 for MODE within a vector of WIDTH bits. */
ef4bddc2 17222static machine_mode
43cacb12 17223aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 17224{
9b070057
RS
17225 if (TARGET_SVE
17226 && maybe_ne (width, 128)
17227 && known_eq (width, BITS_PER_SVE_VECTOR))
4aeb1ba7 17228 return aarch64_full_sve_mode (mode).else_mode (word_mode);
43cacb12
RS
17229
17230 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 17231 if (TARGET_SIMD)
b7342d25 17232 {
43cacb12 17233 if (known_eq (width, 128))
4aeb1ba7 17234 return aarch64_vq_mode (mode).else_mode (word_mode);
b7342d25
IB
17235 else
17236 switch (mode)
17237 {
4e10a5a7 17238 case E_SFmode:
b7342d25 17239 return V2SFmode;
4e10a5a7 17240 case E_HFmode:
b719f884 17241 return V4HFmode;
abbe1ed2
SMW
17242 case E_BFmode:
17243 return V4BFmode;
4e10a5a7 17244 case E_SImode:
b7342d25 17245 return V2SImode;
4e10a5a7 17246 case E_HImode:
b7342d25 17247 return V4HImode;
4e10a5a7 17248 case E_QImode:
b7342d25
IB
17249 return V8QImode;
17250 default:
17251 break;
17252 }
17253 }
43e9d192
IB
17254 return word_mode;
17255}
17256
b7342d25 17257/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 17258static machine_mode
005ba29c 17259aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 17260{
43cacb12
RS
17261 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
17262 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
17263}
17264
86e36728 17265/* Return a list of possible vector sizes for the vectorizer
3b357264 17266 to iterate over. */
bcc7e346 17267static unsigned int
e021fb86 17268aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
3b357264 17269{
cc68f7c2
RS
17270 static const machine_mode sve_modes[] = {
17271 /* Try using full vectors for all element types. */
17272 VNx16QImode,
17273
17274 /* Try using 16-bit containers for 8-bit elements and full vectors
17275 for wider elements. */
17276 VNx8QImode,
17277
17278 /* Try using 32-bit containers for 8-bit and 16-bit elements and
17279 full vectors for wider elements. */
17280 VNx4QImode,
74166aab 17281
cc68f7c2
RS
17282 /* Try using 64-bit containers for all element types. */
17283 VNx2QImode
17284 };
17285
17286 static const machine_mode advsimd_modes[] = {
17287 /* Try using 128-bit vectors for all element types. */
17288 V16QImode,
17289
17290 /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
17291 for wider elements. */
17292 V8QImode,
17293
17294 /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
17295 for wider elements.
17296
17297 TODO: We could support a limited form of V4QImode too, so that
17298 we use 32-bit vectors for 8-bit elements. */
17299 V4HImode,
17300
17301 /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
17302 for 64-bit elements.
74166aab 17303
cc68f7c2
RS
17304 TODO: We could similarly support limited forms of V2QImode and V2HImode
17305 for this case. */
17306 V2SImode
17307 };
74166aab 17308
cc68f7c2
RS
17309 /* Try using N-byte SVE modes only after trying N-byte Advanced SIMD mode.
17310 This is because:
74166aab 17311
cc68f7c2
RS
17312 - If we can't use N-byte Advanced SIMD vectors then the placement
17313 doesn't matter; we'll just continue as though the Advanced SIMD
17314 entry didn't exist.
74166aab 17315
cc68f7c2
RS
17316 - If an SVE main loop with N bytes ends up being cheaper than an
17317 Advanced SIMD main loop with N bytes then by default we'll replace
17318 the Advanced SIMD version with the SVE one.
74166aab 17319
cc68f7c2
RS
17320 - If an Advanced SIMD main loop with N bytes ends up being cheaper
17321 than an SVE main loop with N bytes then by default we'll try to
17322 use the SVE loop to vectorize the epilogue instead. */
17323 unsigned int sve_i = TARGET_SVE ? 0 : ARRAY_SIZE (sve_modes);
17324 unsigned int advsimd_i = 0;
17325 while (advsimd_i < ARRAY_SIZE (advsimd_modes))
17326 {
17327 if (sve_i < ARRAY_SIZE (sve_modes)
17328 && maybe_gt (GET_MODE_NUNITS (sve_modes[sve_i]),
17329 GET_MODE_NUNITS (advsimd_modes[advsimd_i])))
17330 modes->safe_push (sve_modes[sve_i++]);
17331 else
17332 modes->safe_push (advsimd_modes[advsimd_i++]);
17333 }
17334 while (sve_i < ARRAY_SIZE (sve_modes))
17335 modes->safe_push (sve_modes[sve_i++]);
bcc7e346 17336
eb23241b
RS
17337 unsigned int flags = 0;
17338 /* Consider enabling VECT_COMPARE_COSTS for SVE, both so that we
17339 can compare SVE against Advanced SIMD and so that we can compare
17340 multiple SVE vectorization approaches against each other. There's
17341 not really any point doing this for Advanced SIMD only, since the
17342 first mode that works should always be the best. */
17343 if (TARGET_SVE && aarch64_sve_compare_costs)
17344 flags |= VECT_COMPARE_COSTS;
17345 return flags;
3b357264
JG
17346}
17347
ac2b960f
YZ
17348/* Implement TARGET_MANGLE_TYPE. */
17349
6f549691 17350static const char *
ac2b960f
YZ
17351aarch64_mangle_type (const_tree type)
17352{
17353 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 17354 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
17355 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
17356 return "St9__va_list";
17357
abbe1ed2 17358 /* Half-precision floating point types. */
c2ec330c 17359 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
abbe1ed2
SMW
17360 {
17361 if (TYPE_MODE (type) == BFmode)
17362 return "u6__bf16";
17363 else
17364 return "Dh";
17365 }
c2ec330c 17366
f9d53c27
TB
17367 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
17368 builtin types. */
17369 if (TYPE_NAME (type) != NULL)
624d0f07
RS
17370 {
17371 const char *res;
17372 if ((res = aarch64_general_mangle_builtin_type (type))
17373 || (res = aarch64_sve::mangle_builtin_type (type)))
17374 return res;
17375 }
c6fc9e43 17376
ac2b960f
YZ
17377 /* Use the default mangling. */
17378 return NULL;
17379}
17380
65ef05d0
RS
17381/* Implement TARGET_VERIFY_TYPE_CONTEXT. */
17382
17383static bool
17384aarch64_verify_type_context (location_t loc, type_context_kind context,
17385 const_tree type, bool silent_p)
17386{
17387 return aarch64_sve::verify_type_context (loc, context, type, silent_p);
17388}
17389
75cf1494
KT
17390/* Find the first rtx_insn before insn that will generate an assembly
17391 instruction. */
17392
17393static rtx_insn *
17394aarch64_prev_real_insn (rtx_insn *insn)
17395{
17396 if (!insn)
17397 return NULL;
17398
17399 do
17400 {
17401 insn = prev_real_insn (insn);
17402 }
17403 while (insn && recog_memoized (insn) < 0);
17404
17405 return insn;
17406}
17407
17408static bool
17409is_madd_op (enum attr_type t1)
17410{
17411 unsigned int i;
17412 /* A number of these may be AArch32 only. */
17413 enum attr_type mlatypes[] = {
17414 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
17415 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
17416 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
17417 };
17418
17419 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
17420 {
17421 if (t1 == mlatypes[i])
17422 return true;
17423 }
17424
17425 return false;
17426}
17427
17428/* Check if there is a register dependency between a load and the insn
17429 for which we hold recog_data. */
17430
17431static bool
17432dep_between_memop_and_curr (rtx memop)
17433{
17434 rtx load_reg;
17435 int opno;
17436
8baff86e 17437 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
17438
17439 if (!REG_P (SET_DEST (memop)))
17440 return false;
17441
17442 load_reg = SET_DEST (memop);
8baff86e 17443 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
17444 {
17445 rtx operand = recog_data.operand[opno];
17446 if (REG_P (operand)
17447 && reg_overlap_mentioned_p (load_reg, operand))
17448 return true;
17449
17450 }
17451 return false;
17452}
17453
8baff86e
KT
17454
17455/* When working around the Cortex-A53 erratum 835769,
17456 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
17457 instruction and has a preceding memory instruction such that a NOP
17458 should be inserted between them. */
17459
75cf1494
KT
17460bool
17461aarch64_madd_needs_nop (rtx_insn* insn)
17462{
17463 enum attr_type attr_type;
17464 rtx_insn *prev;
17465 rtx body;
17466
b32c1043 17467 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
17468 return false;
17469
e322d6e3 17470 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
17471 return false;
17472
17473 attr_type = get_attr_type (insn);
17474 if (!is_madd_op (attr_type))
17475 return false;
17476
17477 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
17478 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
17479 Restore recog state to INSN to avoid state corruption. */
17480 extract_constrain_insn_cached (insn);
17481
550e2205 17482 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
17483 return false;
17484
17485 body = single_set (prev);
17486
17487 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
17488 it and the DImode madd, emit a NOP between them. If body is NULL then we
17489 have a complex memory operation, probably a load/store pair.
17490 Be conservative for now and emit a NOP. */
17491 if (GET_MODE (recog_data.operand[0]) == DImode
17492 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
17493 return true;
17494
17495 return false;
17496
17497}
17498
8baff86e
KT
17499
17500/* Implement FINAL_PRESCAN_INSN. */
17501
75cf1494
KT
17502void
17503aarch64_final_prescan_insn (rtx_insn *insn)
17504{
17505 if (aarch64_madd_needs_nop (insn))
17506 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
17507}
17508
17509
43cacb12
RS
17510/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
17511 instruction. */
17512
17513bool
17514aarch64_sve_index_immediate_p (rtx base_or_step)
17515{
17516 return (CONST_INT_P (base_or_step)
17517 && IN_RANGE (INTVAL (base_or_step), -16, 15));
17518}
17519
f3582fda
RS
17520/* Return true if X is a valid immediate for the SVE ADD and SUB instructions
17521 when applied to mode MODE. Negate X first if NEGATE_P is true. */
43cacb12
RS
17522
17523bool
f3582fda 17524aarch64_sve_arith_immediate_p (machine_mode mode, rtx x, bool negate_p)
43cacb12 17525{
f3582fda
RS
17526 rtx elt = unwrap_const_vec_duplicate (x);
17527 if (!CONST_INT_P (elt))
43cacb12
RS
17528 return false;
17529
17530 HOST_WIDE_INT val = INTVAL (elt);
17531 if (negate_p)
17532 val = -val;
f3582fda 17533 val &= GET_MODE_MASK (GET_MODE_INNER (mode));
43cacb12
RS
17534
17535 if (val & 0xff)
17536 return IN_RANGE (val, 0, 0xff);
17537 return IN_RANGE (val, 0, 0xff00);
17538}
17539
624d0f07 17540/* Return true if X is a valid immediate for the SVE SQADD and SQSUB
f3582fda
RS
17541 instructions when applied to mode MODE. Negate X first if NEGATE_P
17542 is true. */
624d0f07
RS
17543
17544bool
f3582fda 17545aarch64_sve_sqadd_sqsub_immediate_p (machine_mode mode, rtx x, bool negate_p)
624d0f07 17546{
f3582fda 17547 if (!aarch64_sve_arith_immediate_p (mode, x, negate_p))
624d0f07
RS
17548 return false;
17549
17550 /* After the optional negation, the immediate must be nonnegative.
17551 E.g. a saturating add of -127 must be done via SQSUB Zn.B, Zn.B, #127
17552 instead of SQADD Zn.B, Zn.B, #129. */
f3582fda 17553 rtx elt = unwrap_const_vec_duplicate (x);
624d0f07
RS
17554 return negate_p == (INTVAL (elt) < 0);
17555}
17556
43cacb12
RS
17557/* Return true if X is a valid immediate operand for an SVE logical
17558 instruction such as AND. */
17559
17560bool
17561aarch64_sve_bitmask_immediate_p (rtx x)
17562{
17563 rtx elt;
17564
17565 return (const_vec_duplicate_p (x, &elt)
17566 && CONST_INT_P (elt)
17567 && aarch64_bitmask_imm (INTVAL (elt),
17568 GET_MODE_INNER (GET_MODE (x))));
17569}
17570
17571/* Return true if X is a valid immediate for the SVE DUP and CPY
17572 instructions. */
17573
17574bool
17575aarch64_sve_dup_immediate_p (rtx x)
17576{
d29f7dd5
RS
17577 x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
17578 if (!CONST_INT_P (x))
43cacb12
RS
17579 return false;
17580
d29f7dd5 17581 HOST_WIDE_INT val = INTVAL (x);
43cacb12
RS
17582 if (val & 0xff)
17583 return IN_RANGE (val, -0x80, 0x7f);
17584 return IN_RANGE (val, -0x8000, 0x7f00);
17585}
17586
17587/* Return true if X is a valid immediate operand for an SVE CMP instruction.
17588 SIGNED_P says whether the operand is signed rather than unsigned. */
17589
17590bool
17591aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
17592{
6bc67182
RS
17593 x = unwrap_const_vec_duplicate (x);
17594 return (CONST_INT_P (x)
43cacb12 17595 && (signed_p
6bc67182
RS
17596 ? IN_RANGE (INTVAL (x), -16, 15)
17597 : IN_RANGE (INTVAL (x), 0, 127)));
43cacb12
RS
17598}
17599
17600/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
17601 instruction. Negate X first if NEGATE_P is true. */
17602
17603bool
17604aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
17605{
17606 rtx elt;
17607 REAL_VALUE_TYPE r;
17608
17609 if (!const_vec_duplicate_p (x, &elt)
17610 || GET_CODE (elt) != CONST_DOUBLE)
17611 return false;
17612
17613 r = *CONST_DOUBLE_REAL_VALUE (elt);
17614
17615 if (negate_p)
17616 r = real_value_negate (&r);
17617
17618 if (real_equal (&r, &dconst1))
17619 return true;
17620 if (real_equal (&r, &dconsthalf))
17621 return true;
17622 return false;
17623}
17624
17625/* Return true if X is a valid immediate operand for an SVE FMUL
17626 instruction. */
17627
17628bool
17629aarch64_sve_float_mul_immediate_p (rtx x)
17630{
17631 rtx elt;
17632
43cacb12
RS
17633 return (const_vec_duplicate_p (x, &elt)
17634 && GET_CODE (elt) == CONST_DOUBLE
a19ba9e1
RS
17635 && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)
17636 || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2)));
43cacb12
RS
17637}
17638
b187677b
RS
17639/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
17640 for the Advanced SIMD operation described by WHICH and INSN. If INFO
17641 is nonnull, use it to describe valid immediates. */
3520f7cc 17642static bool
b187677b
RS
17643aarch64_advsimd_valid_immediate_hs (unsigned int val32,
17644 simd_immediate_info *info,
17645 enum simd_immediate_check which,
17646 simd_immediate_info::insn_type insn)
17647{
17648 /* Try a 4-byte immediate with LSL. */
17649 for (unsigned int shift = 0; shift < 32; shift += 8)
17650 if ((val32 & (0xff << shift)) == val32)
17651 {
17652 if (info)
17653 *info = simd_immediate_info (SImode, val32 >> shift, insn,
17654 simd_immediate_info::LSL, shift);
17655 return true;
17656 }
3520f7cc 17657
b187677b
RS
17658 /* Try a 2-byte immediate with LSL. */
17659 unsigned int imm16 = val32 & 0xffff;
17660 if (imm16 == (val32 >> 16))
17661 for (unsigned int shift = 0; shift < 16; shift += 8)
17662 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 17663 {
b187677b
RS
17664 if (info)
17665 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
17666 simd_immediate_info::LSL, shift);
17667 return true;
48063b9d 17668 }
3520f7cc 17669
b187677b
RS
17670 /* Try a 4-byte immediate with MSL, except for cases that MVN
17671 can handle. */
17672 if (which == AARCH64_CHECK_MOV)
17673 for (unsigned int shift = 8; shift < 24; shift += 8)
17674 {
17675 unsigned int low = (1 << shift) - 1;
17676 if (((val32 & (0xff << shift)) | low) == val32)
17677 {
17678 if (info)
17679 *info = simd_immediate_info (SImode, val32 >> shift, insn,
17680 simd_immediate_info::MSL, shift);
17681 return true;
17682 }
17683 }
43e9d192 17684
b187677b
RS
17685 return false;
17686}
17687
17688/* Return true if replicating VAL64 is a valid immediate for the
17689 Advanced SIMD operation described by WHICH. If INFO is nonnull,
17690 use it to describe valid immediates. */
17691static bool
17692aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
17693 simd_immediate_info *info,
17694 enum simd_immediate_check which)
17695{
17696 unsigned int val32 = val64 & 0xffffffff;
17697 unsigned int val16 = val64 & 0xffff;
17698 unsigned int val8 = val64 & 0xff;
17699
17700 if (val32 == (val64 >> 32))
43e9d192 17701 {
b187677b
RS
17702 if ((which & AARCH64_CHECK_ORR) != 0
17703 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
17704 simd_immediate_info::MOV))
17705 return true;
43e9d192 17706
b187677b
RS
17707 if ((which & AARCH64_CHECK_BIC) != 0
17708 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
17709 simd_immediate_info::MVN))
17710 return true;
ee78df47 17711
b187677b
RS
17712 /* Try using a replicated byte. */
17713 if (which == AARCH64_CHECK_MOV
17714 && val16 == (val32 >> 16)
17715 && val8 == (val16 >> 8))
ee78df47 17716 {
b187677b
RS
17717 if (info)
17718 *info = simd_immediate_info (QImode, val8);
17719 return true;
ee78df47 17720 }
43e9d192
IB
17721 }
17722
b187677b
RS
17723 /* Try using a bit-to-bytemask. */
17724 if (which == AARCH64_CHECK_MOV)
43e9d192 17725 {
b187677b
RS
17726 unsigned int i;
17727 for (i = 0; i < 64; i += 8)
ab6501d7 17728 {
b187677b
RS
17729 unsigned char byte = (val64 >> i) & 0xff;
17730 if (byte != 0 && byte != 0xff)
17731 break;
ab6501d7 17732 }
b187677b 17733 if (i == 64)
ab6501d7 17734 {
b187677b
RS
17735 if (info)
17736 *info = simd_immediate_info (DImode, val64);
17737 return true;
ab6501d7 17738 }
43e9d192 17739 }
b187677b
RS
17740 return false;
17741}
43e9d192 17742
43cacb12
RS
17743/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
17744 instruction. If INFO is nonnull, use it to describe valid immediates. */
17745
17746static bool
17747aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
17748 simd_immediate_info *info)
17749{
17750 scalar_int_mode mode = DImode;
17751 unsigned int val32 = val64 & 0xffffffff;
17752 if (val32 == (val64 >> 32))
17753 {
17754 mode = SImode;
17755 unsigned int val16 = val32 & 0xffff;
17756 if (val16 == (val32 >> 16))
17757 {
17758 mode = HImode;
17759 unsigned int val8 = val16 & 0xff;
17760 if (val8 == (val16 >> 8))
17761 mode = QImode;
17762 }
17763 }
17764 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
17765 if (IN_RANGE (val, -0x80, 0x7f))
17766 {
17767 /* DUP with no shift. */
17768 if (info)
17769 *info = simd_immediate_info (mode, val);
17770 return true;
17771 }
17772 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
17773 {
17774 /* DUP with LSL #8. */
17775 if (info)
17776 *info = simd_immediate_info (mode, val);
17777 return true;
17778 }
17779 if (aarch64_bitmask_imm (val64, mode))
17780 {
17781 /* DUPM. */
17782 if (info)
17783 *info = simd_immediate_info (mode, val);
17784 return true;
17785 }
17786 return false;
17787}
17788
624d0f07
RS
17789/* Return true if X is an UNSPEC_PTRUE constant of the form:
17790
17791 (const (unspec [PATTERN ZERO] UNSPEC_PTRUE))
17792
17793 where PATTERN is the svpattern as a CONST_INT and where ZERO
17794 is a zero constant of the required PTRUE mode (which can have
17795 fewer elements than X's mode, if zero bits are significant).
17796
17797 If so, and if INFO is nonnull, describe the immediate in INFO. */
17798bool
17799aarch64_sve_ptrue_svpattern_p (rtx x, struct simd_immediate_info *info)
17800{
17801 if (GET_CODE (x) != CONST)
17802 return false;
17803
17804 x = XEXP (x, 0);
17805 if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_PTRUE)
17806 return false;
17807
17808 if (info)
17809 {
17810 aarch64_svpattern pattern
17811 = (aarch64_svpattern) INTVAL (XVECEXP (x, 0, 0));
17812 machine_mode pred_mode = GET_MODE (XVECEXP (x, 0, 1));
17813 scalar_int_mode int_mode = aarch64_sve_element_int_mode (pred_mode);
17814 *info = simd_immediate_info (int_mode, pattern);
17815 }
17816 return true;
17817}
17818
0b1fe8cf
RS
17819/* Return true if X is a valid SVE predicate. If INFO is nonnull, use
17820 it to describe valid immediates. */
17821
17822static bool
17823aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
17824{
624d0f07
RS
17825 if (aarch64_sve_ptrue_svpattern_p (x, info))
17826 return true;
17827
0b1fe8cf
RS
17828 if (x == CONST0_RTX (GET_MODE (x)))
17829 {
17830 if (info)
17831 *info = simd_immediate_info (DImode, 0);
17832 return true;
17833 }
17834
17835 /* Analyze the value as a VNx16BImode. This should be relatively
17836 efficient, since rtx_vector_builder has enough built-in capacity
17837 to store all VLA predicate constants without needing the heap. */
17838 rtx_vector_builder builder;
17839 if (!aarch64_get_sve_pred_bits (builder, x))
17840 return false;
17841
17842 unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
17843 if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
17844 {
17845 machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
17846 aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl);
17847 if (pattern != AARCH64_NUM_SVPATTERNS)
17848 {
17849 if (info)
17850 {
17851 scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
17852 *info = simd_immediate_info (int_mode, pattern);
17853 }
17854 return true;
17855 }
17856 }
17857 return false;
17858}
17859
b187677b
RS
17860/* Return true if OP is a valid SIMD immediate for the operation
17861 described by WHICH. If INFO is nonnull, use it to describe valid
17862 immediates. */
17863bool
17864aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
17865 enum simd_immediate_check which)
17866{
43cacb12
RS
17867 machine_mode mode = GET_MODE (op);
17868 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17869 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
17870 return false;
17871
0b1fe8cf
RS
17872 if (vec_flags & VEC_SVE_PRED)
17873 return aarch64_sve_pred_valid_immediate (op, info);
17874
43cacb12 17875 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 17876 rtx base, step;
b187677b 17877 unsigned int n_elts;
f9093f23
RS
17878 if (GET_CODE (op) == CONST_VECTOR
17879 && CONST_VECTOR_DUPLICATE_P (op))
17880 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
17881 else if ((vec_flags & VEC_SVE_DATA)
17882 && const_vec_series_p (op, &base, &step))
17883 {
17884 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
17885 if (!aarch64_sve_index_immediate_p (base)
17886 || !aarch64_sve_index_immediate_p (step))
17887 return false;
17888
17889 if (info)
cc68f7c2
RS
17890 {
17891 /* Get the corresponding container mode. E.g. an INDEX on V2SI
17892 should yield two integer values per 128-bit block, meaning
17893 that we need to treat it in the same way as V2DI and then
17894 ignore the upper 32 bits of each element. */
17895 elt_mode = aarch64_sve_container_int_mode (mode);
17896 *info = simd_immediate_info (elt_mode, base, step);
17897 }
43cacb12
RS
17898 return true;
17899 }
6a70badb
RS
17900 else if (GET_CODE (op) == CONST_VECTOR
17901 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
17902 /* N_ELTS set above. */;
b187677b 17903 else
d8edd899 17904 return false;
43e9d192 17905
b187677b 17906 scalar_float_mode elt_float_mode;
f9093f23
RS
17907 if (n_elts == 1
17908 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 17909 {
f9093f23
RS
17910 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
17911 if (aarch64_float_const_zero_rtx_p (elt)
17912 || aarch64_float_const_representable_p (elt))
17913 {
17914 if (info)
17915 *info = simd_immediate_info (elt_float_mode, elt);
17916 return true;
17917 }
b187677b 17918 }
43e9d192 17919
b23c6a2c
RS
17920 /* If all elements in an SVE vector have the same value, we have a free
17921 choice between using the element mode and using the container mode.
17922 Using the element mode means that unused parts of the vector are
17923 duplicates of the used elements, while using the container mode means
17924 that the unused parts are an extension of the used elements. Using the
17925 element mode is better for (say) VNx4HI 0x101, since 0x01010101 is valid
17926 for its container mode VNx4SI while 0x00000101 isn't.
17927
17928 If not all elements in an SVE vector have the same value, we need the
17929 transition from one element to the next to occur at container boundaries.
17930 E.g. a fixed-length VNx4HI containing { 1, 2, 3, 4 } should be treated
17931 in the same way as a VNx4SI containing { 1, 2, 3, 4 }. */
17932 scalar_int_mode elt_int_mode;
17933 if ((vec_flags & VEC_SVE_DATA) && n_elts > 1)
17934 elt_int_mode = aarch64_sve_container_int_mode (mode);
17935 else
17936 elt_int_mode = int_mode_for_mode (elt_mode).require ();
17937
17938 unsigned int elt_size = GET_MODE_SIZE (elt_int_mode);
b187677b
RS
17939 if (elt_size > 8)
17940 return false;
e4f0f84d 17941
b187677b
RS
17942 /* Expand the vector constant out into a byte vector, with the least
17943 significant byte of the register first. */
17944 auto_vec<unsigned char, 16> bytes;
17945 bytes.reserve (n_elts * elt_size);
17946 for (unsigned int i = 0; i < n_elts; i++)
17947 {
f9093f23
RS
17948 /* The vector is provided in gcc endian-neutral fashion.
17949 For aarch64_be Advanced SIMD, it must be laid out in the vector
17950 register in reverse order. */
17951 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
17952 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 17953
b187677b
RS
17954 if (elt_mode != elt_int_mode)
17955 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 17956
b187677b
RS
17957 if (!CONST_INT_P (elt))
17958 return false;
43e9d192 17959
b187677b
RS
17960 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
17961 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 17962 {
b187677b
RS
17963 bytes.quick_push (elt_val & 0xff);
17964 elt_val >>= BITS_PER_UNIT;
48063b9d 17965 }
43e9d192
IB
17966 }
17967
b187677b
RS
17968 /* The immediate must repeat every eight bytes. */
17969 unsigned int nbytes = bytes.length ();
17970 for (unsigned i = 8; i < nbytes; ++i)
17971 if (bytes[i] != bytes[i - 8])
17972 return false;
17973
17974 /* Get the repeating 8-byte value as an integer. No endian correction
17975 is needed here because bytes is already in lsb-first order. */
17976 unsigned HOST_WIDE_INT val64 = 0;
17977 for (unsigned int i = 0; i < 8; i++)
17978 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
17979 << (i * BITS_PER_UNIT));
17980
43cacb12
RS
17981 if (vec_flags & VEC_SVE_DATA)
17982 return aarch64_sve_valid_immediate (val64, info);
17983 else
17984 return aarch64_advsimd_valid_immediate (val64, info, which);
17985}
17986
17987/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
17988 has a step in the range of INDEX. Return the index expression if so,
17989 otherwise return null. */
17990rtx
17991aarch64_check_zero_based_sve_index_immediate (rtx x)
17992{
17993 rtx base, step;
17994 if (const_vec_series_p (x, &base, &step)
17995 && base == const0_rtx
17996 && aarch64_sve_index_immediate_p (step))
17997 return step;
17998 return NULL_RTX;
43e9d192
IB
17999}
18000
43e9d192
IB
18001/* Check of immediate shift constants are within range. */
18002bool
ef4bddc2 18003aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192 18004{
6bc67182
RS
18005 x = unwrap_const_vec_duplicate (x);
18006 if (!CONST_INT_P (x))
18007 return false;
43e9d192
IB
18008 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
18009 if (left)
6bc67182 18010 return IN_RANGE (INTVAL (x), 0, bit_width - 1);
43e9d192 18011 else
6bc67182 18012 return IN_RANGE (INTVAL (x), 1, bit_width);
43e9d192
IB
18013}
18014
7325d85a
KT
18015/* Return the bitmask CONST_INT to select the bits required by a zero extract
18016 operation of width WIDTH at bit position POS. */
18017
18018rtx
18019aarch64_mask_from_zextract_ops (rtx width, rtx pos)
18020{
18021 gcc_assert (CONST_INT_P (width));
18022 gcc_assert (CONST_INT_P (pos));
18023
18024 unsigned HOST_WIDE_INT mask
18025 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
18026 return GEN_INT (mask << UINTVAL (pos));
18027}
18028
83f8c414 18029bool
a6e0bfa7 18030aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 18031{
83f8c414
CSS
18032 if (GET_CODE (x) == HIGH
18033 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
18034 return true;
18035
82614948 18036 if (CONST_INT_P (x))
83f8c414
CSS
18037 return true;
18038
43cacb12 18039 if (VECTOR_MODE_P (GET_MODE (x)))
678faefc
RS
18040 {
18041 /* Require predicate constants to be VNx16BI before RA, so that we
18042 force everything to have a canonical form. */
18043 if (!lra_in_progress
18044 && !reload_completed
18045 && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
18046 && GET_MODE (x) != VNx16BImode)
18047 return false;
18048
18049 return aarch64_simd_valid_immediate (x, NULL);
18050 }
43cacb12 18051
74b27d8e 18052 x = strip_salt (x);
83f8c414
CSS
18053 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
18054 return true;
18055
c0e0174b 18056 if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
43cacb12
RS
18057 return true;
18058
a6e0bfa7 18059 return aarch64_classify_symbolic_expression (x)
a5350ddc 18060 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
18061}
18062
43e9d192
IB
18063/* Return a const_int vector of VAL. */
18064rtx
ab014eb3 18065aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 18066{
59d06c05
RS
18067 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
18068 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
18069}
18070
051d0e2f
SN
18071/* Check OP is a legal scalar immediate for the MOVI instruction. */
18072
18073bool
77e994c9 18074aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 18075{
ef4bddc2 18076 machine_mode vmode;
051d0e2f 18077
43cacb12 18078 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 18079 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 18080 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
18081}
18082
988fa693
JG
18083/* Construct and return a PARALLEL RTX vector with elements numbering the
18084 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
18085 the vector - from the perspective of the architecture. This does not
18086 line up with GCC's perspective on lane numbers, so we end up with
18087 different masks depending on our target endian-ness. The diagram
18088 below may help. We must draw the distinction when building masks
18089 which select one half of the vector. An instruction selecting
18090 architectural low-lanes for a big-endian target, must be described using
18091 a mask selecting GCC high-lanes.
18092
18093 Big-Endian Little-Endian
18094
18095GCC 0 1 2 3 3 2 1 0
18096 | x | x | x | x | | x | x | x | x |
18097Architecture 3 2 1 0 3 2 1 0
18098
18099Low Mask: { 2, 3 } { 0, 1 }
18100High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
18101
18102 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 18103
43e9d192 18104rtx
f5cbabc1 18105aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 18106{
43e9d192 18107 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
18108 int high_base = nunits / 2;
18109 int low_base = 0;
18110 int base;
43e9d192
IB
18111 rtx t1;
18112 int i;
18113
988fa693
JG
18114 if (BYTES_BIG_ENDIAN)
18115 base = high ? low_base : high_base;
18116 else
18117 base = high ? high_base : low_base;
18118
18119 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
18120 RTVEC_ELT (v, i) = GEN_INT (base + i);
18121
18122 t1 = gen_rtx_PARALLEL (mode, v);
18123 return t1;
18124}
18125
988fa693
JG
18126/* Check OP for validity as a PARALLEL RTX vector with elements
18127 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
18128 from the perspective of the architecture. See the diagram above
18129 aarch64_simd_vect_par_cnst_half for more details. */
18130
18131bool
ef4bddc2 18132aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
18133 bool high)
18134{
6a70badb
RS
18135 int nelts;
18136 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
18137 return false;
18138
6a70badb 18139 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
18140 HOST_WIDE_INT count_op = XVECLEN (op, 0);
18141 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
18142 int i = 0;
18143
988fa693
JG
18144 if (count_op != count_ideal)
18145 return false;
18146
18147 for (i = 0; i < count_ideal; i++)
18148 {
18149 rtx elt_op = XVECEXP (op, 0, i);
18150 rtx elt_ideal = XVECEXP (ideal, 0, i);
18151
4aa81c2e 18152 if (!CONST_INT_P (elt_op)
988fa693
JG
18153 || INTVAL (elt_ideal) != INTVAL (elt_op))
18154 return false;
18155 }
18156 return true;
18157}
18158
4aeb1ba7
RS
18159/* Return a PARALLEL containing NELTS elements, with element I equal
18160 to BASE + I * STEP. */
18161
18162rtx
18163aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step)
18164{
18165 rtvec vec = rtvec_alloc (nelts);
18166 for (unsigned int i = 0; i < nelts; ++i)
18167 RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode);
18168 return gen_rtx_PARALLEL (VOIDmode, vec);
18169}
18170
18171/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
18172 series with step STEP. */
18173
18174bool
18175aarch64_stepped_int_parallel_p (rtx op, int step)
18176{
18177 if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
18178 return false;
18179
18180 unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0));
18181 for (int i = 1; i < XVECLEN (op, 0); ++i)
18182 if (!CONST_INT_P (XVECEXP (op, 0, i))
18183 || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
18184 return false;
18185
18186 return true;
18187}
18188
43e9d192
IB
18189/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
18190 HIGH (exclusive). */
18191void
46ed6024
CB
18192aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
18193 const_tree exp)
43e9d192
IB
18194{
18195 HOST_WIDE_INT lane;
4aa81c2e 18196 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
18197 lane = INTVAL (operand);
18198
18199 if (lane < low || lane >= high)
46ed6024
CB
18200 {
18201 if (exp)
cf0c27ef 18202 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 18203 else
cf0c27ef 18204 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 18205 }
43e9d192
IB
18206}
18207
7ac29c0f
RS
18208/* Peform endian correction on lane number N, which indexes a vector
18209 of mode MODE, and return the result as an SImode rtx. */
18210
18211rtx
18212aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
18213{
18214 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
18215}
18216
43e9d192 18217/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 18218
43e9d192
IB
18219bool
18220aarch64_simd_mem_operand_p (rtx op)
18221{
18222 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 18223 || REG_P (XEXP (op, 0)));
43e9d192
IB
18224}
18225
43cacb12
RS
18226/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
18227
18228bool
18229aarch64_sve_ld1r_operand_p (rtx op)
18230{
18231 struct aarch64_address_info addr;
18232 scalar_mode mode;
18233
18234 return (MEM_P (op)
18235 && is_a <scalar_mode> (GET_MODE (op), &mode)
18236 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
18237 && addr.type == ADDRESS_REG_IMM
18238 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
18239}
18240
9ceec73f
MM
18241/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction
18242 where the size of the read data is specified by `mode` and the size of the
18243 vector elements are specified by `elem_mode`. */
4aeb1ba7 18244bool
9ceec73f
MM
18245aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode,
18246 scalar_mode elem_mode)
4aeb1ba7
RS
18247{
18248 struct aarch64_address_info addr;
4aeb1ba7
RS
18249 if (!MEM_P (op)
18250 || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false))
18251 return false;
18252
18253 if (addr.type == ADDRESS_REG_IMM)
9ceec73f 18254 return offset_4bit_signed_scaled_p (mode, addr.const_offset);
4aeb1ba7
RS
18255
18256 if (addr.type == ADDRESS_REG_REG)
18257 return (1U << addr.shift) == GET_MODE_SIZE (elem_mode);
18258
18259 return false;
18260}
18261
9ceec73f
MM
18262/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */
18263bool
18264aarch64_sve_ld1rq_operand_p (rtx op)
18265{
18266 return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode,
18267 GET_MODE_INNER (GET_MODE (op)));
18268}
18269
18270/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for
18271 accessing a vector where the element size is specified by `elem_mode`. */
18272bool
18273aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode)
18274{
18275 return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode);
18276}
18277
624d0f07
RS
18278/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */
18279bool
18280aarch64_sve_ldff1_operand_p (rtx op)
18281{
18282 if (!MEM_P (op))
18283 return false;
18284
18285 struct aarch64_address_info addr;
18286 if (!aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op), false))
18287 return false;
18288
18289 if (addr.type == ADDRESS_REG_IMM)
18290 return known_eq (addr.const_offset, 0);
18291
18292 return addr.type == ADDRESS_REG_REG;
18293}
18294
18295/* Return true if OP is a valid MEM operand for an SVE LDNF1 instruction. */
18296bool
18297aarch64_sve_ldnf1_operand_p (rtx op)
18298{
18299 struct aarch64_address_info addr;
18300
18301 return (MEM_P (op)
18302 && aarch64_classify_address (&addr, XEXP (op, 0),
18303 GET_MODE (op), false)
18304 && addr.type == ADDRESS_REG_IMM);
18305}
18306
43cacb12
RS
18307/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
18308 The conditions for STR are the same. */
18309bool
18310aarch64_sve_ldr_operand_p (rtx op)
18311{
18312 struct aarch64_address_info addr;
18313
18314 return (MEM_P (op)
18315 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
18316 false, ADDR_QUERY_ANY)
18317 && addr.type == ADDRESS_REG_IMM);
18318}
18319
624d0f07
RS
18320/* Return true if OP is a valid address for an SVE PRF[BHWD] instruction,
18321 addressing memory of mode MODE. */
18322bool
18323aarch64_sve_prefetch_operand_p (rtx op, machine_mode mode)
18324{
18325 struct aarch64_address_info addr;
18326 if (!aarch64_classify_address (&addr, op, mode, false))
18327 return false;
18328
18329 if (addr.type == ADDRESS_REG_IMM)
18330 return known_eq (addr.const_offset, 0);
18331
18332 return addr.type == ADDRESS_REG_REG;
18333}
18334
9f4cbab8
RS
18335/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
18336 We need to be able to access the individual pieces, so the range
18337 is different from LD[234] and ST[234]. */
18338bool
18339aarch64_sve_struct_memory_operand_p (rtx op)
18340{
18341 if (!MEM_P (op))
18342 return false;
18343
18344 machine_mode mode = GET_MODE (op);
18345 struct aarch64_address_info addr;
18346 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
18347 ADDR_QUERY_ANY)
18348 || addr.type != ADDRESS_REG_IMM)
18349 return false;
18350
18351 poly_int64 first = addr.const_offset;
18352 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
18353 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
18354 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
18355}
18356
2d8c6dc1
AH
18357/* Emit a register copy from operand to operand, taking care not to
18358 early-clobber source registers in the process.
43e9d192 18359
2d8c6dc1
AH
18360 COUNT is the number of components into which the copy needs to be
18361 decomposed. */
43e9d192 18362void
b8506a8a 18363aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 18364 unsigned int count)
43e9d192
IB
18365{
18366 unsigned int i;
2d8c6dc1
AH
18367 int rdest = REGNO (operands[0]);
18368 int rsrc = REGNO (operands[1]);
43e9d192
IB
18369
18370 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
18371 || rdest < rsrc)
18372 for (i = 0; i < count; i++)
18373 emit_move_insn (gen_rtx_REG (mode, rdest + i),
18374 gen_rtx_REG (mode, rsrc + i));
43e9d192 18375 else
2d8c6dc1
AH
18376 for (i = 0; i < count; i++)
18377 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
18378 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
18379}
18380
668046d1 18381/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 18382 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 18383int
b8506a8a 18384aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 18385{
6a70badb
RS
18386 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
18387 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
18388}
18389
db0253a4 18390/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
18391 alignment of a vector to 128 bits. SVE predicates have an alignment of
18392 16 bits. */
db0253a4
TB
18393static HOST_WIDE_INT
18394aarch64_simd_vector_alignment (const_tree type)
18395{
07108a9e
RS
18396 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
18397 be set for non-predicate vectors of booleans. Modes are the most
18398 direct way we have of identifying real SVE predicate types. */
18399 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
18400 return 16;
cc68f7c2
RS
18401 widest_int min_size
18402 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
18403 return wi::umin (min_size, 128).to_uhwi ();
db0253a4
TB
18404}
18405
43cacb12 18406/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 18407static poly_uint64
43cacb12
RS
18408aarch64_vectorize_preferred_vector_alignment (const_tree type)
18409{
18410 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
18411 {
18412 /* If the length of the vector is fixed, try to align to that length,
18413 otherwise don't try to align at all. */
18414 HOST_WIDE_INT result;
18415 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
18416 result = TYPE_ALIGN (TREE_TYPE (type));
18417 return result;
18418 }
18419 return TYPE_ALIGN (type);
18420}
18421
db0253a4
TB
18422/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
18423static bool
18424aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
18425{
18426 if (is_packed)
18427 return false;
18428
43cacb12
RS
18429 /* For fixed-length vectors, check that the vectorizer will aim for
18430 full-vector alignment. This isn't true for generic GCC vectors
18431 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
18432 poly_uint64 preferred_alignment =
18433 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 18434 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
18435 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
18436 preferred_alignment))
db0253a4
TB
18437 return false;
18438
18439 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
18440 return true;
18441}
18442
7df76747
N
18443/* Return true if the vector misalignment factor is supported by the
18444 target. */
18445static bool
18446aarch64_builtin_support_vector_misalignment (machine_mode mode,
18447 const_tree type, int misalignment,
18448 bool is_packed)
18449{
18450 if (TARGET_SIMD && STRICT_ALIGNMENT)
18451 {
18452 /* Return if movmisalign pattern is not supported for this mode. */
18453 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
18454 return false;
18455
a509c571 18456 /* Misalignment factor is unknown at compile time. */
7df76747 18457 if (misalignment == -1)
a509c571 18458 return false;
7df76747
N
18459 }
18460 return default_builtin_support_vector_misalignment (mode, type, misalignment,
18461 is_packed);
18462}
18463
4369c11e
TB
18464/* If VALS is a vector constant that can be loaded into a register
18465 using DUP, generate instructions to do so and return an RTX to
18466 assign to the register. Otherwise return NULL_RTX. */
18467static rtx
18468aarch64_simd_dup_constant (rtx vals)
18469{
ef4bddc2
RS
18470 machine_mode mode = GET_MODE (vals);
18471 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 18472 rtx x;
4369c11e 18473
92695fbb 18474 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
18475 return NULL_RTX;
18476
18477 /* We can load this constant by using DUP and a constant in a
18478 single ARM register. This will be cheaper than a vector
18479 load. */
92695fbb 18480 x = copy_to_mode_reg (inner_mode, x);
59d06c05 18481 return gen_vec_duplicate (mode, x);
4369c11e
TB
18482}
18483
18484
18485/* Generate code to load VALS, which is a PARALLEL containing only
18486 constants (for vec_init) or CONST_VECTOR, efficiently into a
18487 register. Returns an RTX to copy into the register, or NULL_RTX
67914693 18488 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
1df3f464 18489static rtx
4369c11e
TB
18490aarch64_simd_make_constant (rtx vals)
18491{
ef4bddc2 18492 machine_mode mode = GET_MODE (vals);
4369c11e
TB
18493 rtx const_dup;
18494 rtx const_vec = NULL_RTX;
4369c11e
TB
18495 int n_const = 0;
18496 int i;
18497
18498 if (GET_CODE (vals) == CONST_VECTOR)
18499 const_vec = vals;
18500 else if (GET_CODE (vals) == PARALLEL)
18501 {
18502 /* A CONST_VECTOR must contain only CONST_INTs and
18503 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
18504 Only store valid constants in a CONST_VECTOR. */
6a70badb 18505 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
18506 for (i = 0; i < n_elts; ++i)
18507 {
18508 rtx x = XVECEXP (vals, 0, i);
18509 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18510 n_const++;
18511 }
18512 if (n_const == n_elts)
18513 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
18514 }
18515 else
18516 gcc_unreachable ();
18517
18518 if (const_vec != NULL_RTX
b187677b 18519 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
18520 /* Load using MOVI/MVNI. */
18521 return const_vec;
18522 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
18523 /* Loaded using DUP. */
18524 return const_dup;
18525 else if (const_vec != NULL_RTX)
67914693 18526 /* Load from constant pool. We cannot take advantage of single-cycle
4369c11e
TB
18527 LD1 because we need a PC-relative addressing mode. */
18528 return const_vec;
18529 else
18530 /* A PARALLEL containing something not valid inside CONST_VECTOR.
67914693 18531 We cannot construct an initializer. */
4369c11e
TB
18532 return NULL_RTX;
18533}
18534
35a093b6
JG
18535/* Expand a vector initialisation sequence, such that TARGET is
18536 initialised to contain VALS. */
18537
4369c11e
TB
18538void
18539aarch64_expand_vector_init (rtx target, rtx vals)
18540{
ef4bddc2 18541 machine_mode mode = GET_MODE (target);
146c2e3a 18542 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 18543 /* The number of vector elements. */
6a70badb 18544 int n_elts = XVECLEN (vals, 0);
35a093b6 18545 /* The number of vector elements which are not constant. */
8b66a2d4
AL
18546 int n_var = 0;
18547 rtx any_const = NULL_RTX;
35a093b6
JG
18548 /* The first element of vals. */
18549 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 18550 bool all_same = true;
4369c11e 18551
41dab855
KT
18552 /* This is a special vec_init<M><N> where N is not an element mode but a
18553 vector mode with half the elements of M. We expect to find two entries
18554 of mode N in VALS and we must put their concatentation into TARGET. */
18555 if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
18556 {
18557 gcc_assert (known_eq (GET_MODE_SIZE (mode),
18558 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
18559 rtx lo = XVECEXP (vals, 0, 0);
18560 rtx hi = XVECEXP (vals, 0, 1);
18561 machine_mode narrow_mode = GET_MODE (lo);
18562 gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
18563 gcc_assert (narrow_mode == GET_MODE (hi));
18564
18565 /* When we want to concatenate a half-width vector with zeroes we can
18566 use the aarch64_combinez[_be] patterns. Just make sure that the
18567 zeroes are in the right half. */
18568 if (BYTES_BIG_ENDIAN
18569 && aarch64_simd_imm_zero (lo, narrow_mode)
18570 && general_operand (hi, narrow_mode))
18571 emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
18572 else if (!BYTES_BIG_ENDIAN
18573 && aarch64_simd_imm_zero (hi, narrow_mode)
18574 && general_operand (lo, narrow_mode))
18575 emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
18576 else
18577 {
18578 /* Else create the two half-width registers and combine them. */
18579 if (!REG_P (lo))
18580 lo = force_reg (GET_MODE (lo), lo);
18581 if (!REG_P (hi))
18582 hi = force_reg (GET_MODE (hi), hi);
18583
18584 if (BYTES_BIG_ENDIAN)
18585 std::swap (lo, hi);
18586 emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
18587 }
18588 return;
18589 }
18590
35a093b6 18591 /* Count the number of variable elements to initialise. */
8b66a2d4 18592 for (int i = 0; i < n_elts; ++i)
4369c11e 18593 {
8b66a2d4 18594 rtx x = XVECEXP (vals, 0, i);
35a093b6 18595 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
18596 ++n_var;
18597 else
18598 any_const = x;
4369c11e 18599
35a093b6 18600 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
18601 }
18602
35a093b6
JG
18603 /* No variable elements, hand off to aarch64_simd_make_constant which knows
18604 how best to handle this. */
4369c11e
TB
18605 if (n_var == 0)
18606 {
18607 rtx constant = aarch64_simd_make_constant (vals);
18608 if (constant != NULL_RTX)
18609 {
18610 emit_move_insn (target, constant);
18611 return;
18612 }
18613 }
18614
18615 /* Splat a single non-constant element if we can. */
18616 if (all_same)
18617 {
35a093b6 18618 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 18619 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
18620 return;
18621 }
18622
85c1b6d7
AP
18623 enum insn_code icode = optab_handler (vec_set_optab, mode);
18624 gcc_assert (icode != CODE_FOR_nothing);
18625
18626 /* If there are only variable elements, try to optimize
18627 the insertion using dup for the most common element
18628 followed by insertions. */
18629
18630 /* The algorithm will fill matches[*][0] with the earliest matching element,
18631 and matches[X][1] with the count of duplicate elements (if X is the
18632 earliest element which has duplicates). */
18633
18634 if (n_var == n_elts && n_elts <= 16)
18635 {
18636 int matches[16][2] = {0};
18637 for (int i = 0; i < n_elts; i++)
18638 {
18639 for (int j = 0; j <= i; j++)
18640 {
18641 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
18642 {
18643 matches[i][0] = j;
18644 matches[j][1]++;
18645 break;
18646 }
18647 }
18648 }
18649 int maxelement = 0;
18650 int maxv = 0;
18651 for (int i = 0; i < n_elts; i++)
18652 if (matches[i][1] > maxv)
18653 {
18654 maxelement = i;
18655 maxv = matches[i][1];
18656 }
18657
b4e2cd5b
JG
18658 /* Create a duplicate of the most common element, unless all elements
18659 are equally useless to us, in which case just immediately set the
18660 vector register using the first element. */
18661
18662 if (maxv == 1)
18663 {
18664 /* For vectors of two 64-bit elements, we can do even better. */
18665 if (n_elts == 2
18666 && (inner_mode == E_DImode
18667 || inner_mode == E_DFmode))
18668
18669 {
18670 rtx x0 = XVECEXP (vals, 0, 0);
18671 rtx x1 = XVECEXP (vals, 0, 1);
18672 /* Combine can pick up this case, but handling it directly
18673 here leaves clearer RTL.
18674
18675 This is load_pair_lanes<mode>, and also gives us a clean-up
18676 for store_pair_lanes<mode>. */
18677 if (memory_operand (x0, inner_mode)
18678 && memory_operand (x1, inner_mode)
18679 && !STRICT_ALIGNMENT
18680 && rtx_equal_p (XEXP (x1, 0),
18681 plus_constant (Pmode,
18682 XEXP (x0, 0),
18683 GET_MODE_SIZE (inner_mode))))
18684 {
18685 rtx t;
18686 if (inner_mode == DFmode)
18687 t = gen_load_pair_lanesdf (target, x0, x1);
18688 else
18689 t = gen_load_pair_lanesdi (target, x0, x1);
18690 emit_insn (t);
18691 return;
18692 }
18693 }
18694 /* The subreg-move sequence below will move into lane zero of the
18695 vector register. For big-endian we want that position to hold
18696 the last element of VALS. */
18697 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
18698 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
18699 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
18700 }
18701 else
18702 {
18703 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
18704 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
18705 }
85c1b6d7
AP
18706
18707 /* Insert the rest. */
18708 for (int i = 0; i < n_elts; i++)
18709 {
18710 rtx x = XVECEXP (vals, 0, i);
18711 if (matches[i][0] == maxelement)
18712 continue;
18713 x = copy_to_mode_reg (inner_mode, x);
18714 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
18715 }
18716 return;
18717 }
18718
35a093b6
JG
18719 /* Initialise a vector which is part-variable. We want to first try
18720 to build those lanes which are constant in the most efficient way we
18721 can. */
18722 if (n_var != n_elts)
4369c11e
TB
18723 {
18724 rtx copy = copy_rtx (vals);
4369c11e 18725
8b66a2d4
AL
18726 /* Load constant part of vector. We really don't care what goes into the
18727 parts we will overwrite, but we're more likely to be able to load the
18728 constant efficiently if it has fewer, larger, repeating parts
18729 (see aarch64_simd_valid_immediate). */
18730 for (int i = 0; i < n_elts; i++)
18731 {
18732 rtx x = XVECEXP (vals, 0, i);
18733 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18734 continue;
18735 rtx subst = any_const;
18736 for (int bit = n_elts / 2; bit > 0; bit /= 2)
18737 {
18738 /* Look in the copied vector, as more elements are const. */
18739 rtx test = XVECEXP (copy, 0, i ^ bit);
18740 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
18741 {
18742 subst = test;
18743 break;
18744 }
18745 }
18746 XVECEXP (copy, 0, i) = subst;
18747 }
4369c11e 18748 aarch64_expand_vector_init (target, copy);
35a093b6 18749 }
4369c11e 18750
35a093b6 18751 /* Insert the variable lanes directly. */
8b66a2d4 18752 for (int i = 0; i < n_elts; i++)
35a093b6
JG
18753 {
18754 rtx x = XVECEXP (vals, 0, i);
18755 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18756 continue;
18757 x = copy_to_mode_reg (inner_mode, x);
18758 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
18759 }
4369c11e
TB
18760}
18761
3a0afad0
PK
18762/* Emit RTL corresponding to:
18763 insr TARGET, ELEM. */
18764
18765static void
18766emit_insr (rtx target, rtx elem)
18767{
18768 machine_mode mode = GET_MODE (target);
18769 scalar_mode elem_mode = GET_MODE_INNER (mode);
18770 elem = force_reg (elem_mode, elem);
18771
18772 insn_code icode = optab_handler (vec_shl_insert_optab, mode);
18773 gcc_assert (icode != CODE_FOR_nothing);
18774 emit_insn (GEN_FCN (icode) (target, target, elem));
18775}
18776
18777/* Subroutine of aarch64_sve_expand_vector_init for handling
18778 trailing constants.
18779 This function works as follows:
18780 (a) Create a new vector consisting of trailing constants.
18781 (b) Initialize TARGET with the constant vector using emit_move_insn.
18782 (c) Insert remaining elements in TARGET using insr.
18783 NELTS is the total number of elements in original vector while
18784 while NELTS_REQD is the number of elements that are actually
18785 significant.
18786
18787 ??? The heuristic used is to do above only if number of constants
18788 is at least half the total number of elements. May need fine tuning. */
18789
18790static bool
18791aarch64_sve_expand_vector_init_handle_trailing_constants
18792 (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd)
18793{
18794 machine_mode mode = GET_MODE (target);
18795 scalar_mode elem_mode = GET_MODE_INNER (mode);
18796 int n_trailing_constants = 0;
18797
18798 for (int i = nelts_reqd - 1;
5da301cb 18799 i >= 0 && valid_for_const_vector_p (elem_mode, builder.elt (i));
3a0afad0
PK
18800 i--)
18801 n_trailing_constants++;
18802
18803 if (n_trailing_constants >= nelts_reqd / 2)
18804 {
5da301cb
RS
18805 /* Try to use the natural pattern of BUILDER to extend the trailing
18806 constant elements to a full vector. Replace any variables in the
18807 extra elements with zeros.
18808
18809 ??? It would be better if the builders supported "don't care"
18810 elements, with the builder filling in whichever elements
18811 give the most compact encoding. */
18812 rtx_vector_builder v (mode, nelts, 1);
3a0afad0 18813 for (int i = 0; i < nelts; i++)
5da301cb
RS
18814 {
18815 rtx x = builder.elt (i + nelts_reqd - n_trailing_constants);
18816 if (!valid_for_const_vector_p (elem_mode, x))
18817 x = const0_rtx;
18818 v.quick_push (x);
18819 }
3a0afad0
PK
18820 rtx const_vec = v.build ();
18821 emit_move_insn (target, const_vec);
18822
18823 for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--)
18824 emit_insr (target, builder.elt (i));
18825
18826 return true;
18827 }
18828
18829 return false;
18830}
18831
18832/* Subroutine of aarch64_sve_expand_vector_init.
18833 Works as follows:
18834 (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
18835 (b) Skip trailing elements from BUILDER, which are the same as
18836 element NELTS_REQD - 1.
18837 (c) Insert earlier elements in reverse order in TARGET using insr. */
18838
18839static void
18840aarch64_sve_expand_vector_init_insert_elems (rtx target,
18841 const rtx_vector_builder &builder,
18842 int nelts_reqd)
18843{
18844 machine_mode mode = GET_MODE (target);
18845 scalar_mode elem_mode = GET_MODE_INNER (mode);
18846
18847 struct expand_operand ops[2];
18848 enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
18849 gcc_assert (icode != CODE_FOR_nothing);
18850
18851 create_output_operand (&ops[0], target, mode);
18852 create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode);
18853 expand_insn (icode, 2, ops);
18854
18855 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
18856 for (int i = nelts_reqd - ndups - 1; i >= 0; i--)
18857 emit_insr (target, builder.elt (i));
18858}
18859
18860/* Subroutine of aarch64_sve_expand_vector_init to handle case
18861 when all trailing elements of builder are same.
18862 This works as follows:
18863 (a) Use expand_insn interface to broadcast last vector element in TARGET.
18864 (b) Insert remaining elements in TARGET using insr.
18865
18866 ??? The heuristic used is to do above if number of same trailing elements
18867 is at least 3/4 of total number of elements, loosely based on
18868 heuristic from mostly_zeros_p. May need fine-tuning. */
18869
18870static bool
18871aarch64_sve_expand_vector_init_handle_trailing_same_elem
18872 (rtx target, const rtx_vector_builder &builder, int nelts_reqd)
18873{
18874 int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
18875 if (ndups >= (3 * nelts_reqd) / 4)
18876 {
18877 aarch64_sve_expand_vector_init_insert_elems (target, builder,
18878 nelts_reqd - ndups + 1);
18879 return true;
18880 }
18881
18882 return false;
18883}
18884
18885/* Initialize register TARGET from BUILDER. NELTS is the constant number
18886 of elements in BUILDER.
18887
18888 The function tries to initialize TARGET from BUILDER if it fits one
18889 of the special cases outlined below.
18890
18891 Failing that, the function divides BUILDER into two sub-vectors:
18892 v_even = even elements of BUILDER;
18893 v_odd = odd elements of BUILDER;
18894
18895 and recursively calls itself with v_even and v_odd.
18896
18897 if (recursive call succeeded for v_even or v_odd)
18898 TARGET = zip (v_even, v_odd)
18899
18900 The function returns true if it managed to build TARGET from BUILDER
18901 with one of the special cases, false otherwise.
18902
18903 Example: {a, 1, b, 2, c, 3, d, 4}
18904
18905 The vector gets divided into:
18906 v_even = {a, b, c, d}
18907 v_odd = {1, 2, 3, 4}
18908
18909 aarch64_sve_expand_vector_init(v_odd) hits case 1 and
18910 initialize tmp2 from constant vector v_odd using emit_move_insn.
18911
18912 aarch64_sve_expand_vector_init(v_even) fails since v_even contains
18913 4 elements, so we construct tmp1 from v_even using insr:
18914 tmp1 = dup(d)
18915 insr tmp1, c
18916 insr tmp1, b
18917 insr tmp1, a
18918
18919 And finally:
18920 TARGET = zip (tmp1, tmp2)
18921 which sets TARGET to {a, 1, b, 2, c, 3, d, 4}. */
18922
18923static bool
18924aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder,
18925 int nelts, int nelts_reqd)
18926{
18927 machine_mode mode = GET_MODE (target);
18928
18929 /* Case 1: Vector contains trailing constants. */
18930
18931 if (aarch64_sve_expand_vector_init_handle_trailing_constants
18932 (target, builder, nelts, nelts_reqd))
18933 return true;
18934
18935 /* Case 2: Vector contains leading constants. */
18936
5da301cb 18937 rtx_vector_builder rev_builder (mode, nelts_reqd, 1);
3a0afad0
PK
18938 for (int i = 0; i < nelts_reqd; i++)
18939 rev_builder.quick_push (builder.elt (nelts_reqd - i - 1));
18940 rev_builder.finalize ();
18941
18942 if (aarch64_sve_expand_vector_init_handle_trailing_constants
18943 (target, rev_builder, nelts, nelts_reqd))
18944 {
18945 emit_insn (gen_aarch64_sve_rev (mode, target, target));
18946 return true;
18947 }
18948
18949 /* Case 3: Vector contains trailing same element. */
18950
18951 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
18952 (target, builder, nelts_reqd))
18953 return true;
18954
18955 /* Case 4: Vector contains leading same element. */
18956
18957 if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
18958 (target, rev_builder, nelts_reqd) && nelts_reqd == nelts)
18959 {
18960 emit_insn (gen_aarch64_sve_rev (mode, target, target));
18961 return true;
18962 }
18963
18964 /* Avoid recursing below 4-elements.
18965 ??? The threshold 4 may need fine-tuning. */
18966
18967 if (nelts_reqd <= 4)
18968 return false;
18969
5da301cb
RS
18970 rtx_vector_builder v_even (mode, nelts, 1);
18971 rtx_vector_builder v_odd (mode, nelts, 1);
3a0afad0
PK
18972
18973 for (int i = 0; i < nelts * 2; i += 2)
18974 {
18975 v_even.quick_push (builder.elt (i));
18976 v_odd.quick_push (builder.elt (i + 1));
18977 }
18978
18979 v_even.finalize ();
18980 v_odd.finalize ();
18981
18982 rtx tmp1 = gen_reg_rtx (mode);
18983 bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even,
18984 nelts, nelts_reqd / 2);
18985
18986 rtx tmp2 = gen_reg_rtx (mode);
18987 bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd,
18988 nelts, nelts_reqd / 2);
18989
18990 if (!did_even_p && !did_odd_p)
18991 return false;
18992
18993 /* Initialize v_even and v_odd using INSR if it didn't match any of the
18994 special cases and zip v_even, v_odd. */
18995
18996 if (!did_even_p)
18997 aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2);
18998
18999 if (!did_odd_p)
19000 aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2);
19001
19002 rtvec v = gen_rtvec (2, tmp1, tmp2);
19003 emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
19004 return true;
19005}
19006
19007/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */
19008
19009void
19010aarch64_sve_expand_vector_init (rtx target, rtx vals)
19011{
19012 machine_mode mode = GET_MODE (target);
19013 int nelts = XVECLEN (vals, 0);
19014
5da301cb 19015 rtx_vector_builder v (mode, nelts, 1);
3a0afad0
PK
19016 for (int i = 0; i < nelts; i++)
19017 v.quick_push (XVECEXP (vals, 0, i));
19018 v.finalize ();
19019
19020 /* If neither sub-vectors of v could be initialized specially,
19021 then use INSR to insert all elements from v into TARGET.
19022 ??? This might not be optimal for vectors with large
19023 initializers like 16-element or above.
19024 For nelts < 4, it probably isn't useful to handle specially. */
19025
19026 if (nelts < 4
19027 || !aarch64_sve_expand_vector_init (target, v, nelts, nelts))
19028 aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
19029}
19030
b6c3aea1
RS
19031/* Check whether VALUE is a vector constant in which every element
19032 is either a power of 2 or a negated power of 2. If so, return
19033 a constant vector of log2s, and flip CODE between PLUS and MINUS
19034 if VALUE contains negated powers of 2. Return NULL_RTX otherwise. */
19035
19036static rtx
19037aarch64_convert_mult_to_shift (rtx value, rtx_code &code)
19038{
19039 if (GET_CODE (value) != CONST_VECTOR)
19040 return NULL_RTX;
19041
19042 rtx_vector_builder builder;
19043 if (!builder.new_unary_operation (GET_MODE (value), value, false))
19044 return NULL_RTX;
19045
19046 scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value));
19047 /* 1 if the result of the multiplication must be negated,
19048 0 if it mustn't, or -1 if we don't yet care. */
19049 int negate = -1;
19050 unsigned int encoded_nelts = const_vector_encoded_nelts (value);
19051 for (unsigned int i = 0; i < encoded_nelts; ++i)
19052 {
19053 rtx elt = CONST_VECTOR_ENCODED_ELT (value, i);
19054 if (!CONST_SCALAR_INT_P (elt))
19055 return NULL_RTX;
19056 rtx_mode_t val (elt, int_mode);
19057 wide_int pow2 = wi::neg (val);
19058 if (val != pow2)
19059 {
19060 /* It matters whether we negate or not. Make that choice,
19061 and make sure that it's consistent with previous elements. */
19062 if (negate == !wi::neg_p (val))
19063 return NULL_RTX;
19064 negate = wi::neg_p (val);
19065 if (!negate)
19066 pow2 = val;
19067 }
19068 /* POW2 is now the value that we want to be a power of 2. */
19069 int shift = wi::exact_log2 (pow2);
19070 if (shift < 0)
19071 return NULL_RTX;
19072 builder.quick_push (gen_int_mode (shift, int_mode));
19073 }
19074 if (negate == -1)
19075 /* PLUS and MINUS are equivalent; canonicalize on PLUS. */
19076 code = PLUS;
19077 else if (negate == 1)
19078 code = code == PLUS ? MINUS : PLUS;
19079 return builder.build ();
19080}
19081
19082/* Prepare for an integer SVE multiply-add or multiply-subtract pattern;
19083 CODE is PLUS for the former and MINUS for the latter. OPERANDS is the
19084 operands array, in the same order as for fma_optab. Return true if
19085 the function emitted all the necessary instructions, false if the caller
19086 should generate the pattern normally with the new OPERANDS array. */
19087
19088bool
19089aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code)
19090{
19091 machine_mode mode = GET_MODE (operands[0]);
19092 if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code))
19093 {
19094 rtx product = expand_binop (mode, vashl_optab, operands[1], shifts,
19095 NULL_RTX, true, OPTAB_DIRECT);
19096 force_expand_binop (mode, code == PLUS ? add_optab : sub_optab,
19097 operands[3], product, operands[0], true,
19098 OPTAB_DIRECT);
19099 return true;
19100 }
19101 operands[2] = force_reg (mode, operands[2]);
19102 return false;
19103}
19104
19105/* Likewise, but for a conditional pattern. */
19106
19107bool
19108aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code)
19109{
19110 machine_mode mode = GET_MODE (operands[0]);
19111 if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code))
19112 {
19113 rtx product = expand_binop (mode, vashl_optab, operands[2], shifts,
19114 NULL_RTX, true, OPTAB_DIRECT);
19115 emit_insn (gen_cond (code, mode, operands[0], operands[1],
19116 operands[4], product, operands[5]));
19117 return true;
19118 }
19119 operands[3] = force_reg (mode, operands[3]);
19120 return false;
19121}
19122
43e9d192 19123static unsigned HOST_WIDE_INT
ef4bddc2 19124aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 19125{
43cacb12
RS
19126 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
19127 return 0;
19128 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
19129}
19130
43e9d192
IB
19131/* Select a format to encode pointers in exception handling data. */
19132int
19133aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
19134{
19135 int type;
19136 switch (aarch64_cmodel)
19137 {
19138 case AARCH64_CMODEL_TINY:
19139 case AARCH64_CMODEL_TINY_PIC:
19140 case AARCH64_CMODEL_SMALL:
19141 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 19142 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
19143 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
19144 for everything. */
19145 type = DW_EH_PE_sdata4;
19146 break;
19147 default:
19148 /* No assumptions here. 8-byte relocs required. */
19149 type = DW_EH_PE_sdata8;
19150 break;
19151 }
19152 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19153}
19154
b07fc91c
SN
19155/* Output .variant_pcs for aarch64_vector_pcs function symbols. */
19156
19157static void
19158aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
19159{
c600df9a 19160 if (TREE_CODE (decl) == FUNCTION_DECL)
b07fc91c 19161 {
c600df9a
RS
19162 arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id ();
19163 if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
19164 {
19165 fprintf (stream, "\t.variant_pcs\t");
19166 assemble_name (stream, name);
19167 fprintf (stream, "\n");
19168 }
b07fc91c
SN
19169 }
19170}
19171
e1c1ecb0
KT
19172/* The last .arch and .tune assembly strings that we printed. */
19173static std::string aarch64_last_printed_arch_string;
19174static std::string aarch64_last_printed_tune_string;
19175
361fb3ee
KT
19176/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
19177 by the function fndecl. */
19178
19179void
19180aarch64_declare_function_name (FILE *stream, const char* name,
19181 tree fndecl)
19182{
19183 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
19184
19185 struct cl_target_option *targ_options;
19186 if (target_parts)
19187 targ_options = TREE_TARGET_OPTION (target_parts);
19188 else
19189 targ_options = TREE_TARGET_OPTION (target_option_current_node);
19190 gcc_assert (targ_options);
19191
19192 const struct processor *this_arch
19193 = aarch64_get_arch (targ_options->x_explicit_arch);
19194
28108a53 19195 uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
054b4005 19196 std::string extension
04a99ebe
JG
19197 = aarch64_get_extension_string_for_isa_flags (isa_flags,
19198 this_arch->flags);
e1c1ecb0
KT
19199 /* Only update the assembler .arch string if it is distinct from the last
19200 such string we printed. */
19201 std::string to_print = this_arch->name + extension;
19202 if (to_print != aarch64_last_printed_arch_string)
19203 {
19204 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
19205 aarch64_last_printed_arch_string = to_print;
19206 }
361fb3ee
KT
19207
19208 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
19209 useful to readers of the generated asm. Do it only when it changes
19210 from function to function and verbose assembly is requested. */
361fb3ee
KT
19211 const struct processor *this_tune
19212 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
19213
e1c1ecb0
KT
19214 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
19215 {
19216 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
19217 this_tune->name);
19218 aarch64_last_printed_tune_string = this_tune->name;
19219 }
361fb3ee 19220
b07fc91c
SN
19221 aarch64_asm_output_variant_pcs (stream, fndecl, name);
19222
361fb3ee
KT
19223 /* Don't forget the type directive for ELF. */
19224 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
19225 ASM_OUTPUT_LABEL (stream, name);
c292cfe5
SN
19226
19227 cfun->machine->label_is_assembled = true;
19228}
19229
19230/* Implement PRINT_PATCHABLE_FUNCTION_ENTRY. Check if the patch area is after
19231 the function label and emit a BTI if necessary. */
19232
19233void
19234aarch64_print_patchable_function_entry (FILE *file,
19235 unsigned HOST_WIDE_INT patch_area_size,
19236 bool record_p)
19237{
19238 if (cfun->machine->label_is_assembled
19239 && aarch64_bti_enabled ()
19240 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
19241 {
19242 /* Remove the BTI that follows the patch area and insert a new BTI
19243 before the patch area right after the function label. */
19244 rtx_insn *insn = next_real_nondebug_insn (get_insns ());
19245 if (insn
19246 && INSN_P (insn)
19247 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19248 && XINT (PATTERN (insn), 1) == UNSPECV_BTI_C)
19249 delete_insn (insn);
19250 asm_fprintf (file, "\thint\t34 // bti c\n");
19251 }
19252
19253 default_print_patchable_function_entry (file, patch_area_size, record_p);
361fb3ee
KT
19254}
19255
b07fc91c
SN
19256/* Implement ASM_OUTPUT_DEF_FROM_DECLS. Output .variant_pcs for aliases. */
19257
19258void
19259aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
19260{
19261 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
19262 const char *value = IDENTIFIER_POINTER (target);
19263 aarch64_asm_output_variant_pcs (stream, decl, name);
19264 ASM_OUTPUT_DEF (stream, name, value);
19265}
19266
19267/* Implement ASM_OUTPUT_EXTERNAL. Output .variant_pcs for undefined
19268 function symbol references. */
19269
19270void
e8c47069 19271aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
b07fc91c 19272{
e8c47069 19273 default_elf_asm_output_external (stream, decl, name);
b07fc91c
SN
19274 aarch64_asm_output_variant_pcs (stream, decl, name);
19275}
19276
8fc16d72
ST
19277/* Triggered after a .cfi_startproc directive is emitted into the assembly file.
19278 Used to output the .cfi_b_key_frame directive when signing the current
19279 function with the B key. */
19280
19281void
19282aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED)
19283{
2bdc7dcb 19284 if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled ()
8fc16d72
ST
19285 && aarch64_ra_sign_key == AARCH64_KEY_B)
19286 asm_fprintf (f, "\t.cfi_b_key_frame\n");
19287}
19288
e1c1ecb0
KT
19289/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
19290
19291static void
19292aarch64_start_file (void)
19293{
19294 struct cl_target_option *default_options
19295 = TREE_TARGET_OPTION (target_option_default_node);
19296
19297 const struct processor *default_arch
19298 = aarch64_get_arch (default_options->x_explicit_arch);
28108a53 19299 uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
e1c1ecb0 19300 std::string extension
04a99ebe
JG
19301 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
19302 default_arch->flags);
e1c1ecb0
KT
19303
19304 aarch64_last_printed_arch_string = default_arch->name + extension;
19305 aarch64_last_printed_tune_string = "";
19306 asm_fprintf (asm_out_file, "\t.arch %s\n",
19307 aarch64_last_printed_arch_string.c_str ());
19308
19309 default_file_start ();
19310}
19311
0462169c
SN
19312/* Emit load exclusive. */
19313
19314static void
ef4bddc2 19315aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
19316 rtx mem, rtx model_rtx)
19317{
4a2095eb
RH
19318 if (mode == TImode)
19319 emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
19320 gen_highpart (DImode, rval),
19321 mem, model_rtx));
19322 else
19323 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
19324}
19325
19326/* Emit store exclusive. */
19327
19328static void
ef4bddc2 19329aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
4a2095eb 19330 rtx mem, rtx rval, rtx model_rtx)
0462169c 19331{
4a2095eb
RH
19332 if (mode == TImode)
19333 emit_insn (gen_aarch64_store_exclusive_pair
19334 (bval, mem, operand_subword (rval, 0, 0, TImode),
19335 operand_subword (rval, 1, 0, TImode), model_rtx));
19336 else
19337 emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx));
0462169c
SN
19338}
19339
19340/* Mark the previous jump instruction as unlikely. */
19341
19342static void
19343aarch64_emit_unlikely_jump (rtx insn)
19344{
f370536c 19345 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 19346 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
19347}
19348
3950b229
RH
19349/* We store the names of the various atomic helpers in a 5x4 array.
19350 Return the libcall function given MODE, MODEL and NAMES. */
19351
19352rtx
19353aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
19354 const atomic_ool_names *names)
19355{
19356 memmodel model = memmodel_base (INTVAL (model_rtx));
19357 int mode_idx, model_idx;
19358
19359 switch (mode)
19360 {
19361 case E_QImode:
19362 mode_idx = 0;
19363 break;
19364 case E_HImode:
19365 mode_idx = 1;
19366 break;
19367 case E_SImode:
19368 mode_idx = 2;
19369 break;
19370 case E_DImode:
19371 mode_idx = 3;
19372 break;
19373 case E_TImode:
19374 mode_idx = 4;
19375 break;
19376 default:
19377 gcc_unreachable ();
19378 }
19379
19380 switch (model)
19381 {
19382 case MEMMODEL_RELAXED:
19383 model_idx = 0;
19384 break;
19385 case MEMMODEL_CONSUME:
19386 case MEMMODEL_ACQUIRE:
19387 model_idx = 1;
19388 break;
19389 case MEMMODEL_RELEASE:
19390 model_idx = 2;
19391 break;
19392 case MEMMODEL_ACQ_REL:
19393 case MEMMODEL_SEQ_CST:
19394 model_idx = 3;
19395 break;
19396 default:
19397 gcc_unreachable ();
19398 }
19399
19400 return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
19401 VISIBILITY_HIDDEN);
19402}
19403
19404#define DEF0(B, N) \
19405 { "__aarch64_" #B #N "_relax", \
19406 "__aarch64_" #B #N "_acq", \
19407 "__aarch64_" #B #N "_rel", \
19408 "__aarch64_" #B #N "_acq_rel" }
19409
19410#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
19411 { NULL, NULL, NULL, NULL }
19412#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
19413
19414static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
19415const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
19416const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
19417const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
19418const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
19419const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
19420
19421#undef DEF0
19422#undef DEF4
19423#undef DEF5
19424
0462169c
SN
19425/* Expand a compare and swap pattern. */
19426
19427void
19428aarch64_expand_compare_and_swap (rtx operands[])
19429{
d400fda3
RH
19430 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
19431 machine_mode mode, r_mode;
0462169c
SN
19432
19433 bval = operands[0];
19434 rval = operands[1];
19435 mem = operands[2];
19436 oldval = operands[3];
19437 newval = operands[4];
19438 is_weak = operands[5];
19439 mod_s = operands[6];
19440 mod_f = operands[7];
19441 mode = GET_MODE (mem);
0462169c
SN
19442
19443 /* Normally the succ memory model must be stronger than fail, but in the
19444 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
19445 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
19446 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
19447 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
19448 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
19449
d400fda3
RH
19450 r_mode = mode;
19451 if (mode == QImode || mode == HImode)
0462169c 19452 {
d400fda3
RH
19453 r_mode = SImode;
19454 rval = gen_reg_rtx (r_mode);
0462169c
SN
19455 }
19456
b0770c0f 19457 if (TARGET_LSE)
77f33f44
RH
19458 {
19459 /* The CAS insn requires oldval and rval overlap, but we need to
19460 have a copy of oldval saved across the operation to tell if
19461 the operation is successful. */
d400fda3
RH
19462 if (reg_overlap_mentioned_p (rval, oldval))
19463 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 19464 else
d400fda3
RH
19465 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
19466
77f33f44
RH
19467 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
19468 newval, mod_s));
d400fda3 19469 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 19470 }
3950b229
RH
19471 else if (TARGET_OUTLINE_ATOMICS)
19472 {
19473 /* Oldval must satisfy compare afterward. */
19474 if (!aarch64_plus_operand (oldval, mode))
19475 oldval = force_reg (mode, oldval);
19476 rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
19477 rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
19478 oldval, mode, newval, mode,
19479 XEXP (mem, 0), Pmode);
19480 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
19481 }
b0770c0f 19482 else
d400fda3
RH
19483 {
19484 /* The oldval predicate varies by mode. Test it and force to reg. */
19485 insn_code code = code_for_aarch64_compare_and_swap (mode);
19486 if (!insn_data[code].operand[2].predicate (oldval, mode))
19487 oldval = force_reg (mode, oldval);
0462169c 19488
d400fda3
RH
19489 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
19490 is_weak, mod_s, mod_f));
19491 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
19492 }
19493
19494 if (r_mode != mode)
77f33f44
RH
19495 rval = gen_lowpart (mode, rval);
19496 emit_move_insn (operands[1], rval);
0462169c 19497
d400fda3 19498 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 19499 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
19500}
19501
f70fb3b6
MW
19502/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
19503 sequence implementing an atomic operation. */
19504
19505static void
19506aarch64_emit_post_barrier (enum memmodel model)
19507{
19508 const enum memmodel base_model = memmodel_base (model);
19509
19510 if (is_mm_sync (model)
19511 && (base_model == MEMMODEL_ACQUIRE
19512 || base_model == MEMMODEL_ACQ_REL
19513 || base_model == MEMMODEL_SEQ_CST))
19514 {
19515 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
19516 }
19517}
19518
0462169c
SN
19519/* Split a compare and swap pattern. */
19520
19521void
19522aarch64_split_compare_and_swap (rtx operands[])
19523{
e5e07b68
WD
19524 /* Split after prolog/epilog to avoid interactions with shrinkwrapping. */
19525 gcc_assert (epilogue_completed);
19526
b7e560de 19527 rtx rval, mem, oldval, newval, scratch, x, model_rtx;
ef4bddc2 19528 machine_mode mode;
0462169c 19529 bool is_weak;
5d8a22a5 19530 rtx_code_label *label1, *label2;
ab876106 19531 enum memmodel model;
0462169c
SN
19532
19533 rval = operands[0];
19534 mem = operands[1];
19535 oldval = operands[2];
19536 newval = operands[3];
19537 is_weak = (operands[4] != const0_rtx);
ab876106 19538 model_rtx = operands[5];
0462169c
SN
19539 scratch = operands[7];
19540 mode = GET_MODE (mem);
ab876106 19541 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 19542
17f47f86
KT
19543 /* When OLDVAL is zero and we want the strong version we can emit a tighter
19544 loop:
19545 .label1:
19546 LD[A]XR rval, [mem]
19547 CBNZ rval, .label2
19548 ST[L]XR scratch, newval, [mem]
19549 CBNZ scratch, .label1
19550 .label2:
19551 CMP rval, 0. */
b7e560de
RH
19552 bool strong_zero_p = (!is_weak && !aarch64_track_speculation &&
19553 oldval == const0_rtx && mode != TImode);
17f47f86 19554
5d8a22a5 19555 label1 = NULL;
0462169c
SN
19556 if (!is_weak)
19557 {
19558 label1 = gen_label_rtx ();
19559 emit_label (label1);
19560 }
19561 label2 = gen_label_rtx ();
19562
ab876106
MW
19563 /* The initial load can be relaxed for a __sync operation since a final
19564 barrier will be emitted to stop code hoisting. */
19565 if (is_mm_sync (model))
b7e560de 19566 aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED));
ab876106
MW
19567 else
19568 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 19569
17f47f86 19570 if (strong_zero_p)
b7e560de 19571 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
17f47f86
KT
19572 else
19573 {
b7e560de
RH
19574 rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
19575 x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
17f47f86 19576 }
b7e560de
RH
19577 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
19578 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
19579 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 19580
ab876106 19581 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
19582
19583 if (!is_weak)
19584 {
6e1eaca9
RE
19585 if (aarch64_track_speculation)
19586 {
19587 /* Emit an explicit compare instruction, so that we can correctly
19588 track the condition codes. */
19589 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
19590 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
19591 }
19592 else
19593 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
19594
0462169c
SN
19595 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
19596 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 19597 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
19598 }
19599 else
b7e560de 19600 aarch64_gen_compare_reg (NE, scratch, const0_rtx);
0462169c
SN
19601
19602 emit_label (label2);
b7e560de 19603
17f47f86
KT
19604 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
19605 to set the condition flags. If this is not used it will be removed by
19606 later passes. */
19607 if (strong_zero_p)
b7e560de
RH
19608 aarch64_gen_compare_reg (NE, rval, const0_rtx);
19609
ab876106
MW
19610 /* Emit any final barrier needed for a __sync operation. */
19611 if (is_mm_sync (model))
19612 aarch64_emit_post_barrier (model);
0462169c 19613}
9cd7b720 19614
0462169c
SN
19615/* Split an atomic operation. */
19616
19617void
19618aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 19619 rtx value, rtx model_rtx, rtx cond)
0462169c 19620{
e5e07b68
WD
19621 /* Split after prolog/epilog to avoid interactions with shrinkwrapping. */
19622 gcc_assert (epilogue_completed);
19623
ef4bddc2
RS
19624 machine_mode mode = GET_MODE (mem);
19625 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
19626 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
19627 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
19628 rtx_code_label *label;
19629 rtx x;
0462169c 19630
9cd7b720 19631 /* Split the atomic operation into a sequence. */
0462169c
SN
19632 label = gen_label_rtx ();
19633 emit_label (label);
19634
19635 if (new_out)
19636 new_out = gen_lowpart (wmode, new_out);
19637 if (old_out)
19638 old_out = gen_lowpart (wmode, old_out);
19639 else
19640 old_out = new_out;
19641 value = simplify_gen_subreg (wmode, value, mode, 0);
19642
f70fb3b6
MW
19643 /* The initial load can be relaxed for a __sync operation since a final
19644 barrier will be emitted to stop code hoisting. */
19645 if (is_sync)
19646 aarch64_emit_load_exclusive (mode, old_out, mem,
19647 GEN_INT (MEMMODEL_RELAXED));
19648 else
19649 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
19650
19651 switch (code)
19652 {
19653 case SET:
19654 new_out = value;
19655 break;
19656
19657 case NOT:
19658 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 19659 emit_insn (gen_rtx_SET (new_out, x));
0462169c 19660 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 19661 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
19662 break;
19663
19664 case MINUS:
19665 if (CONST_INT_P (value))
19666 {
19667 value = GEN_INT (-INTVAL (value));
19668 code = PLUS;
19669 }
19670 /* Fall through. */
19671
19672 default:
19673 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 19674 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
19675 break;
19676 }
19677
19678 aarch64_emit_store_exclusive (mode, cond, mem,
19679 gen_lowpart (mode, new_out), model_rtx);
19680
6e1eaca9
RE
19681 if (aarch64_track_speculation)
19682 {
19683 /* Emit an explicit compare instruction, so that we can correctly
19684 track the condition codes. */
19685 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
19686 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
19687 }
19688 else
19689 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19690
0462169c
SN
19691 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
19692 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 19693 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
19694
19695 /* Emit any final barrier needed for a __sync operation. */
19696 if (is_sync)
19697 aarch64_emit_post_barrier (model);
0462169c
SN
19698}
19699
c2ec330c
AL
19700static void
19701aarch64_init_libfuncs (void)
19702{
19703 /* Half-precision float operations. The compiler handles all operations
19704 with NULL libfuncs by converting to SFmode. */
19705
19706 /* Conversions. */
19707 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
19708 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
19709
19710 /* Arithmetic. */
19711 set_optab_libfunc (add_optab, HFmode, NULL);
19712 set_optab_libfunc (sdiv_optab, HFmode, NULL);
19713 set_optab_libfunc (smul_optab, HFmode, NULL);
19714 set_optab_libfunc (neg_optab, HFmode, NULL);
19715 set_optab_libfunc (sub_optab, HFmode, NULL);
19716
19717 /* Comparisons. */
19718 set_optab_libfunc (eq_optab, HFmode, NULL);
19719 set_optab_libfunc (ne_optab, HFmode, NULL);
19720 set_optab_libfunc (lt_optab, HFmode, NULL);
19721 set_optab_libfunc (le_optab, HFmode, NULL);
19722 set_optab_libfunc (ge_optab, HFmode, NULL);
19723 set_optab_libfunc (gt_optab, HFmode, NULL);
19724 set_optab_libfunc (unord_optab, HFmode, NULL);
19725}
19726
43e9d192 19727/* Target hook for c_mode_for_suffix. */
ef4bddc2 19728static machine_mode
43e9d192
IB
19729aarch64_c_mode_for_suffix (char suffix)
19730{
19731 if (suffix == 'q')
19732 return TFmode;
19733
19734 return VOIDmode;
19735}
19736
3520f7cc
JG
19737/* We can only represent floating point constants which will fit in
19738 "quarter-precision" values. These values are characterised by
19739 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
19740 by:
19741
19742 (-1)^s * (n/16) * 2^r
19743
19744 Where:
19745 's' is the sign bit.
19746 'n' is an integer in the range 16 <= n <= 31.
19747 'r' is an integer in the range -3 <= r <= 4. */
19748
19749/* Return true iff X can be represented by a quarter-precision
19750 floating point immediate operand X. Note, we cannot represent 0.0. */
19751bool
19752aarch64_float_const_representable_p (rtx x)
19753{
19754 /* This represents our current view of how many bits
19755 make up the mantissa. */
19756 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 19757 int exponent;
3520f7cc 19758 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 19759 REAL_VALUE_TYPE r, m;
807e902e 19760 bool fail;
3520f7cc 19761
d29f7dd5 19762 x = unwrap_const_vec_duplicate (x);
3520f7cc
JG
19763 if (!CONST_DOUBLE_P (x))
19764 return false;
19765
a4518821
RS
19766 if (GET_MODE (x) == VOIDmode
19767 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
19768 return false;
19769
34a72c33 19770 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
19771
19772 /* We cannot represent infinities, NaNs or +/-zero. We won't
19773 know if we have +zero until we analyse the mantissa, but we
19774 can reject the other invalid values. */
19775 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
19776 || REAL_VALUE_MINUS_ZERO (r))
19777 return false;
19778
ba96cdfb 19779 /* Extract exponent. */
3520f7cc
JG
19780 r = real_value_abs (&r);
19781 exponent = REAL_EXP (&r);
19782
19783 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
19784 highest (sign) bit, with a fixed binary point at bit point_pos.
19785 m1 holds the low part of the mantissa, m2 the high part.
19786 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
19787 bits for the mantissa, this can fail (low bits will be lost). */
19788 real_ldexp (&m, &r, point_pos - exponent);
807e902e 19789 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
19790
19791 /* If the low part of the mantissa has bits set we cannot represent
19792 the value. */
d9074b29 19793 if (w.ulow () != 0)
3520f7cc
JG
19794 return false;
19795 /* We have rejected the lower HOST_WIDE_INT, so update our
19796 understanding of how many bits lie in the mantissa and
19797 look only at the high HOST_WIDE_INT. */
807e902e 19798 mantissa = w.elt (1);
3520f7cc
JG
19799 point_pos -= HOST_BITS_PER_WIDE_INT;
19800
19801 /* We can only represent values with a mantissa of the form 1.xxxx. */
19802 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
19803 if ((mantissa & mask) != 0)
19804 return false;
19805
19806 /* Having filtered unrepresentable values, we may now remove all
19807 but the highest 5 bits. */
19808 mantissa >>= point_pos - 5;
19809
19810 /* We cannot represent the value 0.0, so reject it. This is handled
19811 elsewhere. */
19812 if (mantissa == 0)
19813 return false;
19814
19815 /* Then, as bit 4 is always set, we can mask it off, leaving
19816 the mantissa in the range [0, 15]. */
19817 mantissa &= ~(1 << 4);
19818 gcc_assert (mantissa <= 15);
19819
19820 /* GCC internally does not use IEEE754-like encoding (where normalized
19821 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
19822 Our mantissa values are shifted 4 places to the left relative to
19823 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
19824 by 5 places to correct for GCC's representation. */
19825 exponent = 5 - exponent;
19826
19827 return (exponent >= 0 && exponent <= 7);
19828}
19829
ab6501d7
SD
19830/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
19831 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
19832 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 19833char*
b187677b 19834aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 19835 enum simd_immediate_check which)
3520f7cc 19836{
3ea63f60 19837 bool is_valid;
3520f7cc 19838 static char templ[40];
3520f7cc 19839 const char *mnemonic;
e4f0f84d 19840 const char *shift_op;
3520f7cc 19841 unsigned int lane_count = 0;
81c2dfb9 19842 char element_char;
3520f7cc 19843
b187677b 19844 struct simd_immediate_info info;
48063b9d
IB
19845
19846 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
19847 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
19848 It will also update INFO to show how the immediate should be generated.
19849 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 19850 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
19851 gcc_assert (is_valid);
19852
b187677b
RS
19853 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
19854 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 19855
b187677b 19856 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 19857 {
1da83cce
RS
19858 gcc_assert (info.insn == simd_immediate_info::MOV
19859 && info.u.mov.shift == 0);
0d8e1702
KT
19860 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
19861 move immediate path. */
1da83cce
RS
19862 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
19863 info.u.mov.value = GEN_INT (0);
48063b9d
IB
19864 else
19865 {
83faf7d0 19866 const unsigned int buf_size = 20;
48063b9d 19867 char float_buf[buf_size] = {'\0'};
34a72c33 19868 real_to_decimal_for_mode (float_buf,
1da83cce 19869 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
b187677b 19870 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
19871
19872 if (lane_count == 1)
19873 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
19874 else
19875 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 19876 lane_count, element_char, float_buf);
48063b9d
IB
19877 return templ;
19878 }
3520f7cc 19879 }
3520f7cc 19880
1da83cce 19881 gcc_assert (CONST_INT_P (info.u.mov.value));
ab6501d7
SD
19882
19883 if (which == AARCH64_CHECK_MOV)
19884 {
b187677b 19885 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
1da83cce
RS
19886 shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
19887 ? "msl" : "lsl");
ab6501d7
SD
19888 if (lane_count == 1)
19889 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
1da83cce
RS
19890 mnemonic, UINTVAL (info.u.mov.value));
19891 else if (info.u.mov.shift)
ab6501d7
SD
19892 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
19893 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
1da83cce
RS
19894 element_char, UINTVAL (info.u.mov.value), shift_op,
19895 info.u.mov.shift);
ab6501d7
SD
19896 else
19897 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
19898 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
1da83cce 19899 element_char, UINTVAL (info.u.mov.value));
ab6501d7 19900 }
3520f7cc 19901 else
ab6501d7
SD
19902 {
19903 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 19904 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
1da83cce 19905 if (info.u.mov.shift)
ab6501d7
SD
19906 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
19907 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
1da83cce
RS
19908 element_char, UINTVAL (info.u.mov.value), "lsl",
19909 info.u.mov.shift);
ab6501d7
SD
19910 else
19911 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
19912 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
1da83cce 19913 element_char, UINTVAL (info.u.mov.value));
ab6501d7 19914 }
3520f7cc
JG
19915 return templ;
19916}
19917
b7342d25 19918char*
77e994c9 19919aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 19920{
a2170965
TC
19921
19922 /* If a floating point number was passed and we desire to use it in an
19923 integer mode do the conversion to integer. */
19924 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
19925 {
19926 unsigned HOST_WIDE_INT ival;
19927 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
19928 gcc_unreachable ();
19929 immediate = gen_int_mode (ival, mode);
19930 }
19931
ef4bddc2 19932 machine_mode vmode;
a2170965
TC
19933 /* use a 64 bit mode for everything except for DI/DF mode, where we use
19934 a 128 bit vector mode. */
19935 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 19936
a2170965 19937 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 19938 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 19939 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
19940}
19941
43cacb12
RS
19942/* Return the output string to use for moving immediate CONST_VECTOR
19943 into an SVE register. */
19944
19945char *
19946aarch64_output_sve_mov_immediate (rtx const_vector)
19947{
19948 static char templ[40];
19949 struct simd_immediate_info info;
19950 char element_char;
19951
19952 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
19953 gcc_assert (is_valid);
19954
19955 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
19956
1044fa32
RS
19957 machine_mode vec_mode = GET_MODE (const_vector);
19958 if (aarch64_sve_pred_mode_p (vec_mode))
19959 {
19960 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
0b1fe8cf
RS
19961 if (info.insn == simd_immediate_info::MOV)
19962 {
19963 gcc_assert (info.u.mov.value == const0_rtx);
19964 snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
19965 }
1044fa32 19966 else
0b1fe8cf
RS
19967 {
19968 gcc_assert (info.insn == simd_immediate_info::PTRUE);
19969 unsigned int total_bytes;
19970 if (info.u.pattern == AARCH64_SV_ALL
19971 && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
19972 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
19973 total_bytes / GET_MODE_SIZE (info.elt_mode));
19974 else
19975 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char,
19976 svpattern_token (info.u.pattern));
19977 }
1044fa32
RS
19978 return buf;
19979 }
19980
1da83cce 19981 if (info.insn == simd_immediate_info::INDEX)
43cacb12
RS
19982 {
19983 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
19984 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
1da83cce
RS
19985 element_char, INTVAL (info.u.index.base),
19986 INTVAL (info.u.index.step));
43cacb12
RS
19987 return templ;
19988 }
19989
19990 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
19991 {
1da83cce
RS
19992 if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
19993 info.u.mov.value = GEN_INT (0);
43cacb12
RS
19994 else
19995 {
19996 const int buf_size = 20;
19997 char float_buf[buf_size] = {};
19998 real_to_decimal_for_mode (float_buf,
1da83cce 19999 CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
43cacb12
RS
20000 buf_size, buf_size, 1, info.elt_mode);
20001
20002 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
20003 element_char, float_buf);
20004 return templ;
20005 }
20006 }
20007
20008 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
1da83cce 20009 element_char, INTVAL (info.u.mov.value));
43cacb12
RS
20010 return templ;
20011}
20012
624d0f07
RS
20013/* Return the asm template for a PTRUES. CONST_UNSPEC is the
20014 aarch64_sve_ptrue_svpattern_immediate that describes the predicate
20015 pattern. */
20016
20017char *
20018aarch64_output_sve_ptrues (rtx const_unspec)
20019{
20020 static char templ[40];
20021
20022 struct simd_immediate_info info;
20023 bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info);
20024 gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE);
20025
20026 char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
20027 snprintf (templ, sizeof (templ), "ptrues\t%%0.%c, %s", element_char,
20028 svpattern_token (info.u.pattern));
20029 return templ;
20030}
20031
88b08073
JG
20032/* Split operands into moves from op[1] + op[2] into op[0]. */
20033
20034void
20035aarch64_split_combinev16qi (rtx operands[3])
20036{
20037 unsigned int dest = REGNO (operands[0]);
20038 unsigned int src1 = REGNO (operands[1]);
20039 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 20040 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 20041 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
20042 rtx destlo, desthi;
20043
20044 gcc_assert (halfmode == V16QImode);
20045
20046 if (src1 == dest && src2 == dest + halfregs)
20047 {
20048 /* No-op move. Can't split to nothing; emit something. */
20049 emit_note (NOTE_INSN_DELETED);
20050 return;
20051 }
20052
20053 /* Preserve register attributes for variable tracking. */
20054 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20055 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20056 GET_MODE_SIZE (halfmode));
20057
20058 /* Special case of reversed high/low parts. */
20059 if (reg_overlap_mentioned_p (operands[2], destlo)
20060 && reg_overlap_mentioned_p (operands[1], desthi))
20061 {
20062 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
20063 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
20064 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
20065 }
20066 else if (!reg_overlap_mentioned_p (operands[2], destlo))
20067 {
20068 /* Try to avoid unnecessary moves if part of the result
20069 is in the right place already. */
20070 if (src1 != dest)
20071 emit_move_insn (destlo, operands[1]);
20072 if (src2 != dest + halfregs)
20073 emit_move_insn (desthi, operands[2]);
20074 }
20075 else
20076 {
20077 if (src2 != dest + halfregs)
20078 emit_move_insn (desthi, operands[2]);
20079 if (src1 != dest)
20080 emit_move_insn (destlo, operands[1]);
20081 }
20082}
20083
20084/* vec_perm support. */
20085
88b08073
JG
20086struct expand_vec_perm_d
20087{
20088 rtx target, op0, op1;
e3342de4 20089 vec_perm_indices perm;
ef4bddc2 20090 machine_mode vmode;
43cacb12 20091 unsigned int vec_flags;
88b08073
JG
20092 bool one_vector_p;
20093 bool testing_p;
20094};
20095
7efc03fd
DP
20096static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
20097
88b08073
JG
20098/* Generate a variable permutation. */
20099
20100static void
20101aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
20102{
ef4bddc2 20103 machine_mode vmode = GET_MODE (target);
88b08073
JG
20104 bool one_vector_p = rtx_equal_p (op0, op1);
20105
20106 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
20107 gcc_checking_assert (GET_MODE (op0) == vmode);
20108 gcc_checking_assert (GET_MODE (op1) == vmode);
20109 gcc_checking_assert (GET_MODE (sel) == vmode);
20110 gcc_checking_assert (TARGET_SIMD);
20111
20112 if (one_vector_p)
20113 {
20114 if (vmode == V8QImode)
20115 {
20116 /* Expand the argument to a V16QI mode by duplicating it. */
20117 rtx pair = gen_reg_rtx (V16QImode);
20118 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
20119 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
20120 }
20121 else
20122 {
20123 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
20124 }
20125 }
20126 else
20127 {
20128 rtx pair;
20129
20130 if (vmode == V8QImode)
20131 {
20132 pair = gen_reg_rtx (V16QImode);
20133 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
20134 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
20135 }
20136 else
20137 {
20138 pair = gen_reg_rtx (OImode);
20139 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
20140 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
20141 }
20142 }
20143}
20144
80940017
RS
20145/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
20146 NELT is the number of elements in the vector. */
20147
88b08073 20148void
80940017
RS
20149aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
20150 unsigned int nelt)
88b08073 20151{
ef4bddc2 20152 machine_mode vmode = GET_MODE (target);
88b08073 20153 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 20154 rtx mask;
88b08073
JG
20155
20156 /* The TBL instruction does not use a modulo index, so we must take care
20157 of that ourselves. */
f7c4e5b8
AL
20158 mask = aarch64_simd_gen_const_vector_dup (vmode,
20159 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
20160 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
20161
f7c4e5b8
AL
20162 /* For big-endian, we also need to reverse the index within the vector
20163 (but not which vector). */
20164 if (BYTES_BIG_ENDIAN)
20165 {
20166 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
20167 if (!one_vector_p)
20168 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
20169 sel = expand_simple_binop (vmode, XOR, sel, mask,
20170 NULL, 0, OPTAB_LIB_WIDEN);
20171 }
88b08073
JG
20172 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
20173}
20174
43cacb12
RS
20175/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
20176
20177static void
20178emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
20179{
20180 emit_insn (gen_rtx_SET (target,
20181 gen_rtx_UNSPEC (GET_MODE (target),
20182 gen_rtvec (2, op0, op1), code)));
20183}
20184
20185/* Expand an SVE vec_perm with the given operands. */
20186
20187void
20188aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
20189{
20190 machine_mode data_mode = GET_MODE (target);
20191 machine_mode sel_mode = GET_MODE (sel);
20192 /* Enforced by the pattern condition. */
20193 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
20194
20195 /* Note: vec_perm indices are supposed to wrap when they go beyond the
20196 size of the two value vectors, i.e. the upper bits of the indices
20197 are effectively ignored. SVE TBL instead produces 0 for any
20198 out-of-range indices, so we need to modulo all the vec_perm indices
20199 to ensure they are all in range. */
20200 rtx sel_reg = force_reg (sel_mode, sel);
20201
20202 /* Check if the sel only references the first values vector. */
20203 if (GET_CODE (sel) == CONST_VECTOR
20204 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
20205 {
20206 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
20207 return;
20208 }
20209
20210 /* Check if the two values vectors are the same. */
20211 if (rtx_equal_p (op0, op1))
20212 {
20213 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
20214 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
20215 NULL, 0, OPTAB_DIRECT);
20216 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
20217 return;
20218 }
20219
20220 /* Run TBL on for each value vector and combine the results. */
20221
20222 rtx res0 = gen_reg_rtx (data_mode);
20223 rtx res1 = gen_reg_rtx (data_mode);
20224 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
20225 if (GET_CODE (sel) != CONST_VECTOR
20226 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
20227 {
20228 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
20229 2 * nunits - 1);
20230 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
20231 NULL, 0, OPTAB_DIRECT);
20232 }
20233 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
20234 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
20235 NULL, 0, OPTAB_DIRECT);
20236 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
20237 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
20238 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
20239 else
20240 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
20241}
20242
cc4d934f
JG
20243/* Recognize patterns suitable for the TRN instructions. */
20244static bool
20245aarch64_evpc_trn (struct expand_vec_perm_d *d)
20246{
6a70badb
RS
20247 HOST_WIDE_INT odd;
20248 poly_uint64 nelt = d->perm.length ();
cc4d934f 20249 rtx out, in0, in1, x;
ef4bddc2 20250 machine_mode vmode = d->vmode;
cc4d934f
JG
20251
20252 if (GET_MODE_UNIT_SIZE (vmode) > 8)
20253 return false;
20254
20255 /* Note that these are little-endian tests.
20256 We correct for big-endian later. */
6a70badb
RS
20257 if (!d->perm[0].is_constant (&odd)
20258 || (odd != 0 && odd != 1)
326ac20e
RS
20259 || !d->perm.series_p (0, 2, odd, 2)
20260 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 20261 return false;
cc4d934f
JG
20262
20263 /* Success! */
20264 if (d->testing_p)
20265 return true;
20266
20267 in0 = d->op0;
20268 in1 = d->op1;
43cacb12
RS
20269 /* We don't need a big-endian lane correction for SVE; see the comment
20270 at the head of aarch64-sve.md for details. */
20271 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
20272 {
20273 x = in0, in0 = in1, in1 = x;
20274 odd = !odd;
20275 }
20276 out = d->target;
20277
3f8334a5
RS
20278 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
20279 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
20280 return true;
20281}
20282
7efc03fd
DP
20283/* Try to re-encode the PERM constant so it combines odd and even elements.
20284 This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
20285 We retry with this new constant with the full suite of patterns. */
20286static bool
20287aarch64_evpc_reencode (struct expand_vec_perm_d *d)
20288{
20289 expand_vec_perm_d newd;
20290 unsigned HOST_WIDE_INT nelt;
20291
20292 if (d->vec_flags != VEC_ADVSIMD)
20293 return false;
20294
20295 /* Get the new mode. Always twice the size of the inner
20296 and half the elements. */
20297 poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
20298 unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
20299 auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
20300 machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
20301
20302 if (new_mode == word_mode)
20303 return false;
20304
20305 /* to_constant is safe since this routine is specific to Advanced SIMD
20306 vectors. */
20307 nelt = d->perm.length ().to_constant ();
20308
20309 vec_perm_builder newpermconst;
20310 newpermconst.new_vector (nelt / 2, nelt / 2, 1);
20311
20312 /* Convert the perm constant if we can. Require even, odd as the pairs. */
20313 for (unsigned int i = 0; i < nelt; i += 2)
20314 {
20315 poly_int64 elt0 = d->perm[i];
20316 poly_int64 elt1 = d->perm[i + 1];
20317 poly_int64 newelt;
20318 if (!multiple_p (elt0, 2, &newelt) || maybe_ne (elt0 + 1, elt1))
20319 return false;
20320 newpermconst.quick_push (newelt.to_constant ());
20321 }
20322 newpermconst.finalize ();
20323
20324 newd.vmode = new_mode;
20325 newd.vec_flags = VEC_ADVSIMD;
20326 newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
20327 newd.op0 = d->op0 ? gen_lowpart (new_mode, d->op0) : NULL;
20328 newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
20329 newd.testing_p = d->testing_p;
20330 newd.one_vector_p = d->one_vector_p;
20331
20332 newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
20333 return aarch64_expand_vec_perm_const_1 (&newd);
20334}
20335
cc4d934f
JG
20336/* Recognize patterns suitable for the UZP instructions. */
20337static bool
20338aarch64_evpc_uzp (struct expand_vec_perm_d *d)
20339{
6a70badb 20340 HOST_WIDE_INT odd;
cc4d934f 20341 rtx out, in0, in1, x;
ef4bddc2 20342 machine_mode vmode = d->vmode;
cc4d934f
JG
20343
20344 if (GET_MODE_UNIT_SIZE (vmode) > 8)
20345 return false;
20346
20347 /* Note that these are little-endian tests.
20348 We correct for big-endian later. */
6a70badb
RS
20349 if (!d->perm[0].is_constant (&odd)
20350 || (odd != 0 && odd != 1)
326ac20e 20351 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 20352 return false;
cc4d934f
JG
20353
20354 /* Success! */
20355 if (d->testing_p)
20356 return true;
20357
20358 in0 = d->op0;
20359 in1 = d->op1;
43cacb12
RS
20360 /* We don't need a big-endian lane correction for SVE; see the comment
20361 at the head of aarch64-sve.md for details. */
20362 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
20363 {
20364 x = in0, in0 = in1, in1 = x;
20365 odd = !odd;
20366 }
20367 out = d->target;
20368
3f8334a5
RS
20369 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
20370 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
20371 return true;
20372}
20373
20374/* Recognize patterns suitable for the ZIP instructions. */
20375static bool
20376aarch64_evpc_zip (struct expand_vec_perm_d *d)
20377{
6a70badb
RS
20378 unsigned int high;
20379 poly_uint64 nelt = d->perm.length ();
cc4d934f 20380 rtx out, in0, in1, x;
ef4bddc2 20381 machine_mode vmode = d->vmode;
cc4d934f
JG
20382
20383 if (GET_MODE_UNIT_SIZE (vmode) > 8)
20384 return false;
20385
20386 /* Note that these are little-endian tests.
20387 We correct for big-endian later. */
6a70badb
RS
20388 poly_uint64 first = d->perm[0];
20389 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
20390 || !d->perm.series_p (0, 2, first, 1)
20391 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 20392 return false;
6a70badb 20393 high = maybe_ne (first, 0U);
cc4d934f
JG
20394
20395 /* Success! */
20396 if (d->testing_p)
20397 return true;
20398
20399 in0 = d->op0;
20400 in1 = d->op1;
43cacb12
RS
20401 /* We don't need a big-endian lane correction for SVE; see the comment
20402 at the head of aarch64-sve.md for details. */
20403 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
20404 {
20405 x = in0, in0 = in1, in1 = x;
20406 high = !high;
20407 }
20408 out = d->target;
20409
3f8334a5
RS
20410 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
20411 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
20412 return true;
20413}
20414
ae0533da
AL
20415/* Recognize patterns for the EXT insn. */
20416
20417static bool
20418aarch64_evpc_ext (struct expand_vec_perm_d *d)
20419{
6a70badb 20420 HOST_WIDE_INT location;
ae0533da
AL
20421 rtx offset;
20422
6a70badb
RS
20423 /* The first element always refers to the first vector.
20424 Check if the extracted indices are increasing by one. */
43cacb12
RS
20425 if (d->vec_flags == VEC_SVE_PRED
20426 || !d->perm[0].is_constant (&location)
6a70badb 20427 || !d->perm.series_p (0, 1, location, 1))
326ac20e 20428 return false;
ae0533da 20429
ae0533da
AL
20430 /* Success! */
20431 if (d->testing_p)
20432 return true;
20433
b31e65bb 20434 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 20435 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 20436
43cacb12
RS
20437 We don't need a big-endian lane correction for SVE; see the comment
20438 at the head of aarch64-sve.md for details. */
20439 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
20440 {
20441 /* After setup, we want the high elements of the first vector (stored
20442 at the LSB end of the register), and the low elements of the second
20443 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 20444 std::swap (d->op0, d->op1);
6a70badb
RS
20445 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
20446 to_constant () is safe since this is restricted to Advanced SIMD
20447 vectors. */
20448 location = d->perm.length ().to_constant () - location;
ae0533da
AL
20449 }
20450
20451 offset = GEN_INT (location);
3f8334a5
RS
20452 emit_set_insn (d->target,
20453 gen_rtx_UNSPEC (d->vmode,
20454 gen_rtvec (3, d->op0, d->op1, offset),
20455 UNSPEC_EXT));
ae0533da
AL
20456 return true;
20457}
20458
43cacb12
RS
20459/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
20460 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
20461
20462static bool
43cacb12 20463aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 20464{
6a70badb
RS
20465 HOST_WIDE_INT diff;
20466 unsigned int i, size, unspec;
43cacb12 20467 machine_mode pred_mode;
923fcec3 20468
43cacb12
RS
20469 if (d->vec_flags == VEC_SVE_PRED
20470 || !d->one_vector_p
98452668
AC
20471 || !d->perm[0].is_constant (&diff)
20472 || !diff)
923fcec3
AL
20473 return false;
20474
3f8334a5
RS
20475 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
20476 if (size == 8)
43cacb12
RS
20477 {
20478 unspec = UNSPEC_REV64;
20479 pred_mode = VNx2BImode;
20480 }
3f8334a5 20481 else if (size == 4)
43cacb12
RS
20482 {
20483 unspec = UNSPEC_REV32;
20484 pred_mode = VNx4BImode;
20485 }
3f8334a5 20486 else if (size == 2)
43cacb12
RS
20487 {
20488 unspec = UNSPEC_REV16;
20489 pred_mode = VNx8BImode;
20490 }
3f8334a5
RS
20491 else
20492 return false;
923fcec3 20493
326ac20e
RS
20494 unsigned int step = diff + 1;
20495 for (i = 0; i < step; ++i)
20496 if (!d->perm.series_p (i, step, diff - i, step))
20497 return false;
923fcec3
AL
20498
20499 /* Success! */
20500 if (d->testing_p)
20501 return true;
20502
43cacb12
RS
20503 if (d->vec_flags == VEC_SVE_DATA)
20504 {
d7a09c44
RS
20505 machine_mode int_mode = aarch64_sve_int_mode (pred_mode);
20506 rtx target = gen_reg_rtx (int_mode);
20507 if (BYTES_BIG_ENDIAN)
20508 /* The act of taking a subreg between INT_MODE and d->vmode
20509 is itself a reversing operation on big-endian targets;
20510 see the comment at the head of aarch64-sve.md for details.
20511 First reinterpret OP0 as INT_MODE without using a subreg
20512 and without changing the contents. */
20513 emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0));
20514 else
20515 {
20516 /* For SVE we use REV[BHW] unspecs derived from the element size
20517 of v->mode and vector modes whose elements have SIZE bytes.
20518 This ensures that the vector modes match the predicate modes. */
20519 int unspec = aarch64_sve_rev_unspec (d->vmode);
20520 rtx pred = aarch64_ptrue_reg (pred_mode);
20521 emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred,
20522 gen_lowpart (int_mode, d->op0)));
20523 }
20524 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
20525 return true;
43cacb12 20526 }
d7a09c44 20527 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
43cacb12
RS
20528 emit_set_insn (d->target, src);
20529 return true;
20530}
20531
20532/* Recognize patterns for the REV insn, which reverses elements within
20533 a full vector. */
20534
20535static bool
20536aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
20537{
20538 poly_uint64 nelt = d->perm.length ();
20539
28350fd1 20540 if (!d->one_vector_p || d->vec_flags == VEC_ADVSIMD)
43cacb12
RS
20541 return false;
20542
20543 if (!d->perm.series_p (0, 1, nelt - 1, -1))
20544 return false;
20545
20546 /* Success! */
20547 if (d->testing_p)
20548 return true;
20549
20550 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
20551 emit_set_insn (d->target, src);
923fcec3
AL
20552 return true;
20553}
20554
91bd4114
JG
20555static bool
20556aarch64_evpc_dup (struct expand_vec_perm_d *d)
20557{
91bd4114
JG
20558 rtx out = d->target;
20559 rtx in0;
6a70badb 20560 HOST_WIDE_INT elt;
ef4bddc2 20561 machine_mode vmode = d->vmode;
91bd4114
JG
20562 rtx lane;
20563
43cacb12
RS
20564 if (d->vec_flags == VEC_SVE_PRED
20565 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 20566 || !d->perm[0].is_constant (&elt))
326ac20e
RS
20567 return false;
20568
43cacb12
RS
20569 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
20570 return false;
20571
326ac20e
RS
20572 /* Success! */
20573 if (d->testing_p)
20574 return true;
20575
91bd4114
JG
20576 /* The generic preparation in aarch64_expand_vec_perm_const_1
20577 swaps the operand order and the permute indices if it finds
20578 d->perm[0] to be in the second operand. Thus, we can always
20579 use d->op0 and need not do any extra arithmetic to get the
20580 correct lane number. */
20581 in0 = d->op0;
f901401e 20582 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 20583
3f8334a5
RS
20584 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
20585 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
20586 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
20587 return true;
20588}
20589
88b08073
JG
20590static bool
20591aarch64_evpc_tbl (struct expand_vec_perm_d *d)
20592{
43cacb12 20593 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 20594 machine_mode vmode = d->vmode;
6a70badb
RS
20595
20596 /* Make sure that the indices are constant. */
20597 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
20598 for (unsigned int i = 0; i < encoded_nelts; ++i)
20599 if (!d->perm[i].is_constant ())
20600 return false;
88b08073 20601
88b08073
JG
20602 if (d->testing_p)
20603 return true;
20604
20605 /* Generic code will try constant permutation twice. Once with the
20606 original mode and again with the elements lowered to QImode.
20607 So wait and don't do the selector expansion ourselves. */
20608 if (vmode != V8QImode && vmode != V16QImode)
20609 return false;
20610
6a70badb
RS
20611 /* to_constant is safe since this routine is specific to Advanced SIMD
20612 vectors. */
20613 unsigned int nelt = d->perm.length ().to_constant ();
20614 for (unsigned int i = 0; i < nelt; ++i)
20615 /* If big-endian and two vectors we end up with a weird mixed-endian
20616 mode on NEON. Reverse the index within each word but not the word
20617 itself. to_constant is safe because we checked is_constant above. */
20618 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
20619 ? d->perm[i].to_constant () ^ (nelt - 1)
20620 : d->perm[i].to_constant ());
bbcc9c00 20621
88b08073
JG
20622 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
20623 sel = force_reg (vmode, sel);
20624
20625 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
20626 return true;
20627}
20628
43cacb12
RS
20629/* Try to implement D using an SVE TBL instruction. */
20630
20631static bool
20632aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
20633{
20634 unsigned HOST_WIDE_INT nelt;
20635
20636 /* Permuting two variable-length vectors could overflow the
20637 index range. */
20638 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
20639 return false;
20640
20641 if (d->testing_p)
20642 return true;
20643
d083ee47 20644 machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
43cacb12 20645 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
20646 if (d->one_vector_p)
20647 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
20648 else
20649 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
20650 return true;
20651}
20652
9556ef20
PK
20653/* Try to implement D using SVE SEL instruction. */
20654
20655static bool
20656aarch64_evpc_sel (struct expand_vec_perm_d *d)
20657{
20658 machine_mode vmode = d->vmode;
20659 int unit_size = GET_MODE_UNIT_SIZE (vmode);
20660
20661 if (d->vec_flags != VEC_SVE_DATA
20662 || unit_size > 8)
20663 return false;
20664
20665 int n_patterns = d->perm.encoding ().npatterns ();
20666 poly_int64 vec_len = d->perm.length ();
20667
20668 for (int i = 0; i < n_patterns; ++i)
20669 if (!known_eq (d->perm[i], i)
20670 && !known_eq (d->perm[i], vec_len + i))
20671 return false;
20672
20673 for (int i = n_patterns; i < n_patterns * 2; i++)
20674 if (!d->perm.series_p (i, n_patterns, i, n_patterns)
20675 && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
20676 return false;
20677
20678 if (d->testing_p)
20679 return true;
20680
cc68f7c2 20681 machine_mode pred_mode = aarch64_sve_pred_mode (vmode);
9556ef20 20682
b2f5b380 20683 /* Build a predicate that is true when op0 elements should be used. */
9556ef20
PK
20684 rtx_vector_builder builder (pred_mode, n_patterns, 2);
20685 for (int i = 0; i < n_patterns * 2; i++)
20686 {
20687 rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode)
20688 : CONST0_RTX (BImode);
20689 builder.quick_push (elem);
20690 }
20691
20692 rtx const_vec = builder.build ();
20693 rtx pred = force_reg (pred_mode, const_vec);
b2f5b380
RS
20694 /* TARGET = PRED ? OP0 : OP1. */
20695 emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op0, d->op1, pred));
9556ef20
PK
20696 return true;
20697}
20698
c9c87e6f
DP
20699/* Recognize patterns suitable for the INS instructions. */
20700static bool
20701aarch64_evpc_ins (struct expand_vec_perm_d *d)
20702{
20703 machine_mode mode = d->vmode;
20704 unsigned HOST_WIDE_INT nelt;
20705
20706 if (d->vec_flags != VEC_ADVSIMD)
20707 return false;
20708
20709 /* to_constant is safe since this routine is specific to Advanced SIMD
20710 vectors. */
20711 nelt = d->perm.length ().to_constant ();
20712 rtx insv = d->op0;
20713
20714 HOST_WIDE_INT idx = -1;
20715
20716 for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
20717 {
20718 HOST_WIDE_INT elt;
20719 if (!d->perm[i].is_constant (&elt))
20720 return false;
20721 if (elt == (HOST_WIDE_INT) i)
20722 continue;
20723 if (idx != -1)
20724 {
20725 idx = -1;
20726 break;
20727 }
20728 idx = i;
20729 }
20730
20731 if (idx == -1)
20732 {
20733 insv = d->op1;
20734 for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
20735 {
20736 if (d->perm[i].to_constant () == (HOST_WIDE_INT) (i + nelt))
20737 continue;
20738 if (idx != -1)
20739 return false;
20740 idx = i;
20741 }
20742
20743 if (idx == -1)
20744 return false;
20745 }
20746
20747 if (d->testing_p)
20748 return true;
20749
20750 gcc_assert (idx != -1);
20751
20752 unsigned extractindex = d->perm[idx].to_constant ();
20753 rtx extractv = d->op0;
20754 if (extractindex >= nelt)
20755 {
20756 extractv = d->op1;
20757 extractindex -= nelt;
20758 }
20759 gcc_assert (extractindex < nelt);
20760
20761 emit_move_insn (d->target, insv);
20762 insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
20763 expand_operand ops[5];
20764 create_output_operand (&ops[0], d->target, mode);
20765 create_input_operand (&ops[1], d->target, mode);
20766 create_integer_operand (&ops[2], 1 << idx);
20767 create_input_operand (&ops[3], extractv, mode);
20768 create_integer_operand (&ops[4], extractindex);
20769 expand_insn (icode, 5, ops);
20770
20771 return true;
20772}
20773
88b08073
JG
20774static bool
20775aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
20776{
20777 /* The pattern matching functions above are written to look for a small
20778 number to begin the sequence (0, 1, N/2). If we begin with an index
20779 from the second operand, we can swap the operands. */
6a70badb
RS
20780 poly_int64 nelt = d->perm.length ();
20781 if (known_ge (d->perm[0], nelt))
88b08073 20782 {
e3342de4 20783 d->perm.rotate_inputs (1);
cb5c6c29 20784 std::swap (d->op0, d->op1);
88b08073
JG
20785 }
20786
43cacb12
RS
20787 if ((d->vec_flags == VEC_ADVSIMD
20788 || d->vec_flags == VEC_SVE_DATA
20789 || d->vec_flags == VEC_SVE_PRED)
20790 && known_gt (nelt, 1))
cc4d934f 20791 {
43cacb12
RS
20792 if (aarch64_evpc_rev_local (d))
20793 return true;
20794 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
20795 return true;
20796 else if (aarch64_evpc_ext (d))
ae0533da 20797 return true;
f901401e
AL
20798 else if (aarch64_evpc_dup (d))
20799 return true;
ae0533da 20800 else if (aarch64_evpc_zip (d))
cc4d934f
JG
20801 return true;
20802 else if (aarch64_evpc_uzp (d))
20803 return true;
20804 else if (aarch64_evpc_trn (d))
20805 return true;
9556ef20
PK
20806 else if (aarch64_evpc_sel (d))
20807 return true;
c9c87e6f
DP
20808 else if (aarch64_evpc_ins (d))
20809 return true;
7efc03fd
DP
20810 else if (aarch64_evpc_reencode (d))
20811 return true;
43cacb12
RS
20812 if (d->vec_flags == VEC_SVE_DATA)
20813 return aarch64_evpc_sve_tbl (d);
4ec8bb67 20814 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 20815 return aarch64_evpc_tbl (d);
cc4d934f 20816 }
88b08073
JG
20817 return false;
20818}
20819
f151c9e1 20820/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 20821
f151c9e1
RS
20822static bool
20823aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
20824 rtx op1, const vec_perm_indices &sel)
88b08073
JG
20825{
20826 struct expand_vec_perm_d d;
88b08073 20827
326ac20e 20828 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
20829 if (sel.ninputs () == 1
20830 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
20831 d.one_vector_p = true;
20832 else if (sel.all_from_input_p (0))
88b08073 20833 {
326ac20e
RS
20834 d.one_vector_p = true;
20835 op1 = op0;
88b08073 20836 }
326ac20e 20837 else if (sel.all_from_input_p (1))
88b08073 20838 {
88b08073 20839 d.one_vector_p = true;
326ac20e 20840 op0 = op1;
88b08073 20841 }
326ac20e
RS
20842 else
20843 d.one_vector_p = false;
88b08073 20844
326ac20e
RS
20845 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
20846 sel.nelts_per_input ());
20847 d.vmode = vmode;
43cacb12 20848 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
20849 d.target = target;
20850 d.op0 = op0;
20851 d.op1 = op1;
20852 d.testing_p = !target;
e3342de4 20853
f151c9e1
RS
20854 if (!d.testing_p)
20855 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 20856
326ac20e 20857 rtx_insn *last = get_last_insn ();
f151c9e1 20858 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 20859 gcc_assert (last == get_last_insn ());
88b08073
JG
20860
20861 return ret;
20862}
20863
73e3da51
RS
20864/* Generate a byte permute mask for a register of mode MODE,
20865 which has NUNITS units. */
20866
668046d1 20867rtx
73e3da51 20868aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
20869{
20870 /* We have to reverse each vector because we dont have
20871 a permuted load that can reverse-load according to ABI rules. */
20872 rtx mask;
20873 rtvec v = rtvec_alloc (16);
73e3da51
RS
20874 unsigned int i, j;
20875 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
20876
20877 gcc_assert (BYTES_BIG_ENDIAN);
20878 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
20879
20880 for (i = 0; i < nunits; i++)
20881 for (j = 0; j < usize; j++)
20882 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
20883 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
20884 return force_reg (V16QImode, mask);
20885}
20886
4a942af6 20887/* Expand an SVE integer comparison using the SVE equivalent of:
f22d7973 20888
4a942af6
RS
20889 (set TARGET (CODE OP0 OP1)). */
20890
20891void
20892aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
f22d7973 20893{
4a942af6
RS
20894 machine_mode pred_mode = GET_MODE (target);
20895 machine_mode data_mode = GET_MODE (op0);
00fa90d9
RS
20896 rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
20897 op0, op1);
20898 if (!rtx_equal_p (target, res))
20899 emit_move_insn (target, res);
f22d7973
RS
20900}
20901
43cacb12
RS
20902/* Return the UNSPEC_COND_* code for comparison CODE. */
20903
20904static unsigned int
20905aarch64_unspec_cond_code (rtx_code code)
20906{
20907 switch (code)
20908 {
20909 case NE:
cb18e86d 20910 return UNSPEC_COND_FCMNE;
43cacb12 20911 case EQ:
cb18e86d 20912 return UNSPEC_COND_FCMEQ;
43cacb12 20913 case LT:
cb18e86d 20914 return UNSPEC_COND_FCMLT;
43cacb12 20915 case GT:
cb18e86d 20916 return UNSPEC_COND_FCMGT;
43cacb12 20917 case LE:
cb18e86d 20918 return UNSPEC_COND_FCMLE;
43cacb12 20919 case GE:
cb18e86d 20920 return UNSPEC_COND_FCMGE;
4a942af6
RS
20921 case UNORDERED:
20922 return UNSPEC_COND_FCMUO;
43cacb12
RS
20923 default:
20924 gcc_unreachable ();
20925 }
20926}
20927
f22d7973 20928/* Emit:
43cacb12 20929
4a942af6 20930 (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 20931
4a942af6
RS
20932 where <X> is the operation associated with comparison CODE.
20933 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
f22d7973
RS
20934
20935static void
4a942af6
RS
20936aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
20937 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 20938{
4a942af6 20939 rtx flag = gen_int_mode (known_ptrue_p, SImode);
f22d7973 20940 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
4a942af6 20941 gen_rtvec (4, pred, flag, op0, op1),
f22d7973
RS
20942 aarch64_unspec_cond_code (code));
20943 emit_set_insn (target, unspec);
43cacb12
RS
20944}
20945
f22d7973 20946/* Emit the SVE equivalent of:
43cacb12 20947
4a942af6
RS
20948 (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
20949 (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
f22d7973 20950 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 20951
4a942af6
RS
20952 where <Xi> is the operation associated with comparison CODEi.
20953 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
20954
20955static void
4a942af6
RS
20956aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
20957 rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 20958{
4a942af6 20959 machine_mode pred_mode = GET_MODE (pred);
43cacb12 20960 rtx tmp1 = gen_reg_rtx (pred_mode);
4a942af6 20961 aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
43cacb12 20962 rtx tmp2 = gen_reg_rtx (pred_mode);
4a942af6 20963 aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
f22d7973 20964 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
20965}
20966
f22d7973 20967/* Emit the SVE equivalent of:
43cacb12 20968
4a942af6 20969 (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
f22d7973 20970 (set TARGET (not TMP))
43cacb12 20971
4a942af6
RS
20972 where <X> is the operation associated with comparison CODE.
20973 KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
43cacb12
RS
20974
20975static void
4a942af6
RS
20976aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
20977 bool known_ptrue_p, rtx op0, rtx op1)
43cacb12 20978{
4a942af6 20979 machine_mode pred_mode = GET_MODE (pred);
f22d7973 20980 rtx tmp = gen_reg_rtx (pred_mode);
4a942af6 20981 aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
f22d7973 20982 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
20983}
20984
f22d7973 20985/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 20986
f22d7973 20987 (set TARGET (CODE OP0 OP1))
43cacb12
RS
20988
20989 If CAN_INVERT_P is true, the caller can also handle inverted results;
20990 return true if the result is in fact inverted. */
20991
20992bool
20993aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
20994 rtx op0, rtx op1, bool can_invert_p)
20995{
20996 machine_mode pred_mode = GET_MODE (target);
20997 machine_mode data_mode = GET_MODE (op0);
20998
16de3637 20999 rtx ptrue = aarch64_ptrue_reg (pred_mode);
43cacb12
RS
21000 switch (code)
21001 {
21002 case UNORDERED:
21003 /* UNORDERED has no immediate form. */
21004 op1 = force_reg (data_mode, op1);
f22d7973 21005 /* fall through */
43cacb12
RS
21006 case LT:
21007 case LE:
21008 case GT:
21009 case GE:
21010 case EQ:
21011 case NE:
f22d7973
RS
21012 {
21013 /* There is native support for the comparison. */
4a942af6 21014 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
21015 return false;
21016 }
43cacb12
RS
21017
21018 case LTGT:
21019 /* This is a trapping operation (LT or GT). */
4a942af6 21020 aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
43cacb12
RS
21021 return false;
21022
21023 case UNEQ:
21024 if (!flag_trapping_math)
21025 {
21026 /* This would trap for signaling NaNs. */
21027 op1 = force_reg (data_mode, op1);
4a942af6
RS
21028 aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
21029 ptrue, true, op0, op1);
43cacb12
RS
21030 return false;
21031 }
21032 /* fall through */
43cacb12
RS
21033 case UNLT:
21034 case UNLE:
21035 case UNGT:
21036 case UNGE:
f22d7973
RS
21037 if (flag_trapping_math)
21038 {
21039 /* Work out which elements are ordered. */
21040 rtx ordered = gen_reg_rtx (pred_mode);
21041 op1 = force_reg (data_mode, op1);
4a942af6
RS
21042 aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
21043 ptrue, true, op0, op1);
f22d7973
RS
21044
21045 /* Test the opposite condition for the ordered elements,
21046 then invert the result. */
21047 if (code == UNEQ)
21048 code = NE;
21049 else
21050 code = reverse_condition_maybe_unordered (code);
21051 if (can_invert_p)
21052 {
4a942af6
RS
21053 aarch64_emit_sve_fp_cond (target, code,
21054 ordered, false, op0, op1);
f22d7973
RS
21055 return true;
21056 }
4a942af6
RS
21057 aarch64_emit_sve_invert_fp_cond (target, code,
21058 ordered, false, op0, op1);
f22d7973
RS
21059 return false;
21060 }
21061 break;
21062
21063 case ORDERED:
21064 /* ORDERED has no immediate form. */
21065 op1 = force_reg (data_mode, op1);
21066 break;
43cacb12
RS
21067
21068 default:
21069 gcc_unreachable ();
21070 }
f22d7973
RS
21071
21072 /* There is native support for the inverse comparison. */
21073 code = reverse_condition_maybe_unordered (code);
21074 if (can_invert_p)
21075 {
4a942af6 21076 aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973
RS
21077 return true;
21078 }
4a942af6 21079 aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
f22d7973 21080 return false;
43cacb12
RS
21081}
21082
21083/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
21084 of the data being selected and CMP_MODE is the mode of the values being
21085 compared. */
21086
21087void
21088aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
21089 rtx *ops)
21090{
10116ec1 21091 machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
43cacb12
RS
21092 rtx pred = gen_reg_rtx (pred_mode);
21093 if (FLOAT_MODE_P (cmp_mode))
21094 {
21095 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
21096 ops[4], ops[5], true))
21097 std::swap (ops[1], ops[2]);
21098 }
21099 else
21100 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
21101
d29f7dd5
RS
21102 if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
21103 ops[1] = force_reg (data_mode, ops[1]);
21104 /* The "false" value can only be zero if the "true" value is a constant. */
21105 if (register_operand (ops[1], data_mode)
21106 || !aarch64_simd_reg_or_zero (ops[2], data_mode))
21107 ops[2] = force_reg (data_mode, ops[2]);
21108
43cacb12
RS
21109 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
21110 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
21111}
21112
99e1629f
RS
21113/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
21114 true. However due to issues with register allocation it is preferable
21115 to avoid tieing integer scalar and FP scalar modes. Executing integer
21116 operations in general registers is better than treating them as scalar
21117 vector operations. This reduces latency and avoids redundant int<->FP
21118 moves. So tie modes if they are either the same class, or vector modes
21119 with other vector modes, vector structs or any scalar mode. */
97e1ad78 21120
99e1629f 21121static bool
ef4bddc2 21122aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
21123{
21124 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
21125 return true;
21126
21127 /* We specifically want to allow elements of "structure" modes to
21128 be tieable to the structure. This more general condition allows
43cacb12
RS
21129 other rarer situations too. The reason we don't extend this to
21130 predicate modes is that there are no predicate structure modes
21131 nor any specific instructions for extracting part of a predicate
21132 register. */
21133 if (aarch64_vector_data_mode_p (mode1)
21134 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
21135 return true;
21136
21137 /* Also allow any scalar modes with vectors. */
21138 if (aarch64_vector_mode_supported_p (mode1)
21139 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
21140 return true;
21141
21142 return false;
21143}
21144
e2c75eea
JG
21145/* Return a new RTX holding the result of moving POINTER forward by
21146 AMOUNT bytes. */
21147
21148static rtx
6a70badb 21149aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
21150{
21151 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
21152
21153 return adjust_automodify_address (pointer, GET_MODE (pointer),
21154 next, amount);
21155}
21156
21157/* Return a new RTX holding the result of moving POINTER forward by the
21158 size of the mode it points to. */
21159
21160static rtx
21161aarch64_progress_pointer (rtx pointer)
21162{
6a70badb 21163 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
21164}
21165
21166/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
21167 MODE bytes. */
21168
21169static void
21170aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 21171 machine_mode mode)
e2c75eea 21172{
7cda9e08
SD
21173 /* Handle 256-bit memcpy separately. We do this by making 2 adjacent memory
21174 address copies using V4SImode so that we can use Q registers. */
21175 if (known_eq (GET_MODE_BITSIZE (mode), 256))
21176 {
21177 mode = V4SImode;
21178 rtx reg1 = gen_reg_rtx (mode);
21179 rtx reg2 = gen_reg_rtx (mode);
21180 /* "Cast" the pointers to the correct mode. */
21181 *src = adjust_address (*src, mode, 0);
21182 *dst = adjust_address (*dst, mode, 0);
21183 /* Emit the memcpy. */
21184 emit_insn (aarch64_gen_load_pair (mode, reg1, *src, reg2,
21185 aarch64_progress_pointer (*src)));
21186 emit_insn (aarch64_gen_store_pair (mode, *dst, reg1,
21187 aarch64_progress_pointer (*dst), reg2));
21188 /* Move the pointers forward. */
21189 *src = aarch64_move_pointer (*src, 32);
21190 *dst = aarch64_move_pointer (*dst, 32);
21191 return;
21192 }
21193
e2c75eea
JG
21194 rtx reg = gen_reg_rtx (mode);
21195
21196 /* "Cast" the pointers to the correct mode. */
21197 *src = adjust_address (*src, mode, 0);
21198 *dst = adjust_address (*dst, mode, 0);
21199 /* Emit the memcpy. */
21200 emit_move_insn (reg, *src);
21201 emit_move_insn (*dst, reg);
21202 /* Move the pointers forward. */
21203 *src = aarch64_progress_pointer (*src);
21204 *dst = aarch64_progress_pointer (*dst);
21205}
21206
76715c32 21207/* Expand cpymem, as if from a __builtin_memcpy. Return true if
e2c75eea
JG
21208 we succeed, otherwise return false. */
21209
21210bool
76715c32 21211aarch64_expand_cpymem (rtx *operands)
e2c75eea 21212{
0f801e0b
TC
21213 /* These need to be signed as we need to perform arithmetic on n as
21214 signed operations. */
89c52e5e 21215 int n, mode_bits;
e2c75eea
JG
21216 rtx dst = operands[0];
21217 rtx src = operands[1];
21218 rtx base;
89c52e5e 21219 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
21220 bool speed_p = !optimize_function_for_size_p (cfun);
21221
21222 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
21223 memcpy call, but use the default otherwise. Moves larger than 8 bytes
21224 will always require an even number of instructions to do now. And each
0f801e0b
TC
21225 operation requires both a load+store, so divide the max number by 2. */
21226 unsigned int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
21227
21228 /* We can't do anything smart if the amount to copy is not constant. */
21229 if (!CONST_INT_P (operands[2]))
21230 return false;
21231
0f801e0b 21232 unsigned HOST_WIDE_INT tmp = INTVAL (operands[2]);
e2c75eea 21233
89c52e5e
TC
21234 /* Try to keep the number of instructions low. For all cases we will do at
21235 most two moves for the residual amount, since we'll always overlap the
21236 remainder. */
0f801e0b 21237 if (((tmp / 16) + (tmp % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
21238 return false;
21239
0f801e0b
TC
21240 /* At this point tmp is known to have to fit inside an int. */
21241 n = tmp;
21242
e2c75eea
JG
21243 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21244 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
21245
21246 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
21247 src = adjust_automodify_address (src, VOIDmode, base, 0);
21248
89c52e5e
TC
21249 /* Convert n to bits to make the rest of the code simpler. */
21250 n = n * BITS_PER_UNIT;
e2c75eea 21251
7cda9e08
SD
21252 /* Maximum amount to copy in one go. We allow 256-bit chunks based on the
21253 AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter and TARGET_SIMD. */
21254 const int copy_limit = ((aarch64_tune_params.extra_tuning_flags
21255 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
21256 || !TARGET_SIMD)
21257 ? GET_MODE_BITSIZE (TImode) : 256;
f7e1d19d 21258
89c52e5e 21259 while (n > 0)
e2c75eea 21260 {
89c52e5e
TC
21261 /* Find the largest mode in which to do the copy in without over reading
21262 or writing. */
21263 opt_scalar_int_mode mode_iter;
21264 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 21265 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 21266 cur_mode = mode_iter.require ();
e2c75eea 21267
89c52e5e 21268 gcc_assert (cur_mode != BLKmode);
e2c75eea 21269
89c52e5e
TC
21270 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
21271 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 21272
89c52e5e 21273 n -= mode_bits;
e2c75eea 21274
89c52e5e
TC
21275 /* Do certain trailing copies as overlapping if it's going to be
21276 cheaper. i.e. less instructions to do so. For instance doing a 15
21277 byte copy it's more efficient to do two overlapping 8 byte copies than
21278 8 + 6 + 1. */
f7e1d19d 21279 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 21280 {
f7e1d19d
TC
21281 next_mode = smallest_mode_for_size (n, MODE_INT);
21282 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
21283 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
21284 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
21285 n = n_bits;
e2c75eea
JG
21286 }
21287 }
21288
21289 return true;
21290}
21291
141a3ccf
KT
21292/* Split a DImode store of a CONST_INT SRC to MEM DST as two
21293 SImode stores. Handle the case when the constant has identical
21294 bottom and top halves. This is beneficial when the two stores can be
21295 merged into an STP and we avoid synthesising potentially expensive
21296 immediates twice. Return true if such a split is possible. */
21297
21298bool
21299aarch64_split_dimode_const_store (rtx dst, rtx src)
21300{
21301 rtx lo = gen_lowpart (SImode, src);
21302 rtx hi = gen_highpart_mode (SImode, DImode, src);
21303
21304 bool size_p = optimize_function_for_size_p (cfun);
21305
21306 if (!rtx_equal_p (lo, hi))
21307 return false;
21308
21309 unsigned int orig_cost
21310 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
21311 unsigned int lo_cost
21312 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
21313
21314 /* We want to transform:
21315 MOV x1, 49370
21316 MOVK x1, 0x140, lsl 16
21317 MOVK x1, 0xc0da, lsl 32
21318 MOVK x1, 0x140, lsl 48
21319 STR x1, [x0]
21320 into:
21321 MOV w1, 49370
21322 MOVK w1, 0x140, lsl 16
21323 STP w1, w1, [x0]
21324 So we want to perform this only when we save two instructions
21325 or more. When optimizing for size, however, accept any code size
21326 savings we can. */
21327 if (size_p && orig_cost <= lo_cost)
21328 return false;
21329
21330 if (!size_p
21331 && (orig_cost <= lo_cost + 1))
21332 return false;
21333
21334 rtx mem_lo = adjust_address (dst, SImode, 0);
21335 if (!aarch64_mem_pair_operand (mem_lo, SImode))
21336 return false;
21337
21338 rtx tmp_reg = gen_reg_rtx (SImode);
21339 aarch64_expand_mov_immediate (tmp_reg, lo);
21340 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
21341 /* Don't emit an explicit store pair as this may not be always profitable.
21342 Let the sched-fusion logic decide whether to merge them. */
21343 emit_move_insn (mem_lo, tmp_reg);
21344 emit_move_insn (mem_hi, tmp_reg);
21345
21346 return true;
21347}
21348
30c46053
MC
21349/* Generate RTL for a conditional branch with rtx comparison CODE in
21350 mode CC_MODE. The destination of the unlikely conditional branch
21351 is LABEL_REF. */
21352
21353void
21354aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
21355 rtx label_ref)
21356{
21357 rtx x;
21358 x = gen_rtx_fmt_ee (code, VOIDmode,
21359 gen_rtx_REG (cc_mode, CC_REGNUM),
21360 const0_rtx);
21361
21362 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
21363 gen_rtx_LABEL_REF (VOIDmode, label_ref),
21364 pc_rtx);
21365 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
21366}
21367
21368/* Generate DImode scratch registers for 128-bit (TImode) addition.
21369
21370 OP1 represents the TImode destination operand 1
21371 OP2 represents the TImode destination operand 2
21372 LOW_DEST represents the low half (DImode) of TImode operand 0
21373 LOW_IN1 represents the low half (DImode) of TImode operand 1
21374 LOW_IN2 represents the low half (DImode) of TImode operand 2
21375 HIGH_DEST represents the high half (DImode) of TImode operand 0
21376 HIGH_IN1 represents the high half (DImode) of TImode operand 1
21377 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
21378
21379void
21380aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
21381 rtx *low_in1, rtx *low_in2,
21382 rtx *high_dest, rtx *high_in1,
21383 rtx *high_in2)
21384{
21385 *low_dest = gen_reg_rtx (DImode);
21386 *low_in1 = gen_lowpart (DImode, op1);
21387 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
21388 subreg_lowpart_offset (DImode, TImode));
21389 *high_dest = gen_reg_rtx (DImode);
21390 *high_in1 = gen_highpart (DImode, op1);
21391 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
21392 subreg_highpart_offset (DImode, TImode));
21393}
21394
21395/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
21396
21397 This function differs from 'arch64_addti_scratch_regs' in that
21398 OP1 can be an immediate constant (zero). We must call
21399 subreg_highpart_offset with DImode and TImode arguments, otherwise
21400 VOIDmode will be used for the const_int which generates an internal
21401 error from subreg_size_highpart_offset which does not expect a size of zero.
21402
21403 OP1 represents the TImode destination operand 1
21404 OP2 represents the TImode destination operand 2
21405 LOW_DEST represents the low half (DImode) of TImode operand 0
21406 LOW_IN1 represents the low half (DImode) of TImode operand 1
21407 LOW_IN2 represents the low half (DImode) of TImode operand 2
21408 HIGH_DEST represents the high half (DImode) of TImode operand 0
21409 HIGH_IN1 represents the high half (DImode) of TImode operand 1
21410 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
21411
21412
21413void
21414aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
21415 rtx *low_in1, rtx *low_in2,
21416 rtx *high_dest, rtx *high_in1,
21417 rtx *high_in2)
21418{
21419 *low_dest = gen_reg_rtx (DImode);
21420 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
21421 subreg_lowpart_offset (DImode, TImode));
21422
21423 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
21424 subreg_lowpart_offset (DImode, TImode));
21425 *high_dest = gen_reg_rtx (DImode);
21426
21427 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
21428 subreg_highpart_offset (DImode, TImode));
21429 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
21430 subreg_highpart_offset (DImode, TImode));
21431}
21432
21433/* Generate RTL for 128-bit (TImode) subtraction with overflow.
21434
21435 OP0 represents the TImode destination operand 0
21436 LOW_DEST represents the low half (DImode) of TImode operand 0
21437 LOW_IN1 represents the low half (DImode) of TImode operand 1
21438 LOW_IN2 represents the low half (DImode) of TImode operand 2
21439 HIGH_DEST represents the high half (DImode) of TImode operand 0
21440 HIGH_IN1 represents the high half (DImode) of TImode operand 1
a58fe3c5
RE
21441 HIGH_IN2 represents the high half (DImode) of TImode operand 2
21442 UNSIGNED_P is true if the operation is being performed on unsigned
21443 values. */
30c46053
MC
21444void
21445aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
21446 rtx low_in2, rtx high_dest, rtx high_in1,
a58fe3c5 21447 rtx high_in2, bool unsigned_p)
30c46053
MC
21448{
21449 if (low_in2 == const0_rtx)
21450 {
21451 low_dest = low_in1;
a58fe3c5
RE
21452 high_in2 = force_reg (DImode, high_in2);
21453 if (unsigned_p)
21454 emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2));
21455 else
21456 emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2));
30c46053
MC
21457 }
21458 else
21459 {
d80f0a8d
JJ
21460 if (aarch64_plus_immediate (low_in2, DImode))
21461 emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
21462 GEN_INT (-INTVAL (low_in2))));
21463 else
30c46053 21464 {
d80f0a8d
JJ
21465 low_in2 = force_reg (DImode, low_in2);
21466 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
30c46053 21467 }
d80f0a8d 21468 high_in2 = force_reg (DImode, high_in2);
a58fe3c5
RE
21469
21470 if (unsigned_p)
21471 emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
21472 else
21473 emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2));
30c46053
MC
21474 }
21475
21476 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
21477 emit_move_insn (gen_highpart (DImode, op0), high_dest);
21478
21479}
21480
a3125fc2
CL
21481/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
21482
21483static unsigned HOST_WIDE_INT
21484aarch64_asan_shadow_offset (void)
21485{
10078f3e
AP
21486 if (TARGET_ILP32)
21487 return (HOST_WIDE_INT_1 << 29);
21488 else
21489 return (HOST_WIDE_INT_1 << 36);
a3125fc2
CL
21490}
21491
5f3bc026 21492static rtx
cb4347e8 21493aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
21494 int code, tree treeop0, tree treeop1)
21495{
c8012fbc
WD
21496 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
21497 rtx op0, op1;
5f3bc026 21498 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 21499 insn_code icode;
5f3bc026
ZC
21500 struct expand_operand ops[4];
21501
5f3bc026
ZC
21502 start_sequence ();
21503 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
21504
21505 op_mode = GET_MODE (op0);
21506 if (op_mode == VOIDmode)
21507 op_mode = GET_MODE (op1);
21508
21509 switch (op_mode)
21510 {
4e10a5a7
RS
21511 case E_QImode:
21512 case E_HImode:
21513 case E_SImode:
5f3bc026
ZC
21514 cmp_mode = SImode;
21515 icode = CODE_FOR_cmpsi;
21516 break;
21517
4e10a5a7 21518 case E_DImode:
5f3bc026
ZC
21519 cmp_mode = DImode;
21520 icode = CODE_FOR_cmpdi;
21521 break;
21522
4e10a5a7 21523 case E_SFmode:
786e3c06
WD
21524 cmp_mode = SFmode;
21525 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
21526 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
21527 break;
21528
4e10a5a7 21529 case E_DFmode:
786e3c06
WD
21530 cmp_mode = DFmode;
21531 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
21532 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
21533 break;
21534
5f3bc026
ZC
21535 default:
21536 end_sequence ();
21537 return NULL_RTX;
21538 }
21539
c8012fbc
WD
21540 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
21541 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
21542 if (!op0 || !op1)
21543 {
21544 end_sequence ();
21545 return NULL_RTX;
21546 }
21547 *prep_seq = get_insns ();
21548 end_sequence ();
21549
c8012fbc
WD
21550 create_fixed_operand (&ops[0], op0);
21551 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
21552
21553 start_sequence ();
c8012fbc 21554 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
21555 {
21556 end_sequence ();
21557 return NULL_RTX;
21558 }
21559 *gen_seq = get_insns ();
21560 end_sequence ();
21561
c8012fbc
WD
21562 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
21563 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
21564}
21565
21566static rtx
cb4347e8
TS
21567aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
21568 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 21569{
c8012fbc
WD
21570 rtx op0, op1, target;
21571 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 21572 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 21573 insn_code icode;
5f3bc026 21574 struct expand_operand ops[6];
c8012fbc 21575 int aarch64_cond;
5f3bc026 21576
cb4347e8 21577 push_to_sequence (*prep_seq);
5f3bc026
ZC
21578 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
21579
21580 op_mode = GET_MODE (op0);
21581 if (op_mode == VOIDmode)
21582 op_mode = GET_MODE (op1);
21583
21584 switch (op_mode)
21585 {
4e10a5a7
RS
21586 case E_QImode:
21587 case E_HImode:
21588 case E_SImode:
5f3bc026 21589 cmp_mode = SImode;
5f3bc026
ZC
21590 break;
21591
4e10a5a7 21592 case E_DImode:
5f3bc026 21593 cmp_mode = DImode;
5f3bc026
ZC
21594 break;
21595
4e10a5a7 21596 case E_SFmode:
786e3c06
WD
21597 cmp_mode = SFmode;
21598 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
786e3c06
WD
21599 break;
21600
4e10a5a7 21601 case E_DFmode:
786e3c06
WD
21602 cmp_mode = DFmode;
21603 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
786e3c06
WD
21604 break;
21605
5f3bc026
ZC
21606 default:
21607 end_sequence ();
21608 return NULL_RTX;
21609 }
21610
865257c4
RS
21611 icode = code_for_ccmp (cc_mode, cmp_mode);
21612
5f3bc026
ZC
21613 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
21614 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
21615 if (!op0 || !op1)
21616 {
21617 end_sequence ();
21618 return NULL_RTX;
21619 }
21620 *prep_seq = get_insns ();
21621 end_sequence ();
21622
21623 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 21624 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 21625
c8012fbc
WD
21626 if (bit_code != AND)
21627 {
865257c4
RS
21628 /* Treat the ccmp patterns as canonical and use them where possible,
21629 but fall back to ccmp_rev patterns if there's no other option. */
21630 rtx_code prev_code = GET_CODE (prev);
21631 machine_mode prev_mode = GET_MODE (XEXP (prev, 0));
21632 if ((prev_mode == CCFPmode || prev_mode == CCFPEmode)
21633 && !(prev_code == EQ
21634 || prev_code == NE
21635 || prev_code == ORDERED
21636 || prev_code == UNORDERED))
21637 icode = code_for_ccmp_rev (cc_mode, cmp_mode);
21638 else
21639 {
21640 rtx_code code = reverse_condition (prev_code);
21641 prev = gen_rtx_fmt_ee (code, VOIDmode, XEXP (prev, 0), const0_rtx);
21642 }
c8012fbc
WD
21643 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
21644 }
21645
21646 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
21647 create_fixed_operand (&ops[1], target);
21648 create_fixed_operand (&ops[2], op0);
21649 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
21650 create_fixed_operand (&ops[4], prev);
21651 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 21652
cb4347e8 21653 push_to_sequence (*gen_seq);
5f3bc026
ZC
21654 if (!maybe_expand_insn (icode, 6, ops))
21655 {
21656 end_sequence ();
21657 return NULL_RTX;
21658 }
21659
21660 *gen_seq = get_insns ();
21661 end_sequence ();
21662
c8012fbc 21663 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
21664}
21665
21666#undef TARGET_GEN_CCMP_FIRST
21667#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
21668
21669#undef TARGET_GEN_CCMP_NEXT
21670#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
21671
6a569cdd
KT
21672/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
21673 instruction fusion of some sort. */
21674
21675static bool
21676aarch64_macro_fusion_p (void)
21677{
b175b679 21678 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
21679}
21680
21681
21682/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
21683 should be kept together during scheduling. */
21684
21685static bool
21686aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
21687{
21688 rtx set_dest;
21689 rtx prev_set = single_set (prev);
21690 rtx curr_set = single_set (curr);
21691 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
21692 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
21693
21694 if (!aarch64_macro_fusion_p ())
21695 return false;
21696
d7b03373 21697 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
21698 {
21699 /* We are trying to match:
21700 prev (mov) == (set (reg r0) (const_int imm16))
21701 curr (movk) == (set (zero_extract (reg r0)
21702 (const_int 16)
21703 (const_int 16))
21704 (const_int imm16_1)) */
21705
21706 set_dest = SET_DEST (curr_set);
21707
21708 if (GET_CODE (set_dest) == ZERO_EXTRACT
21709 && CONST_INT_P (SET_SRC (curr_set))
21710 && CONST_INT_P (SET_SRC (prev_set))
21711 && CONST_INT_P (XEXP (set_dest, 2))
21712 && INTVAL (XEXP (set_dest, 2)) == 16
21713 && REG_P (XEXP (set_dest, 0))
21714 && REG_P (SET_DEST (prev_set))
21715 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
21716 {
21717 return true;
21718 }
21719 }
21720
d7b03373 21721 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
21722 {
21723
21724 /* We're trying to match:
21725 prev (adrp) == (set (reg r1)
21726 (high (symbol_ref ("SYM"))))
21727 curr (add) == (set (reg r0)
21728 (lo_sum (reg r1)
21729 (symbol_ref ("SYM"))))
21730 Note that r0 need not necessarily be the same as r1, especially
21731 during pre-regalloc scheduling. */
21732
21733 if (satisfies_constraint_Ush (SET_SRC (prev_set))
21734 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
21735 {
21736 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
21737 && REG_P (XEXP (SET_SRC (curr_set), 0))
21738 && REGNO (XEXP (SET_SRC (curr_set), 0))
21739 == REGNO (SET_DEST (prev_set))
21740 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
21741 XEXP (SET_SRC (curr_set), 1)))
21742 return true;
21743 }
21744 }
21745
d7b03373 21746 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
21747 {
21748
21749 /* We're trying to match:
21750 prev (movk) == (set (zero_extract (reg r0)
21751 (const_int 16)
21752 (const_int 32))
21753 (const_int imm16_1))
21754 curr (movk) == (set (zero_extract (reg r0)
21755 (const_int 16)
21756 (const_int 48))
21757 (const_int imm16_2)) */
21758
21759 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
21760 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
21761 && REG_P (XEXP (SET_DEST (prev_set), 0))
21762 && REG_P (XEXP (SET_DEST (curr_set), 0))
21763 && REGNO (XEXP (SET_DEST (prev_set), 0))
21764 == REGNO (XEXP (SET_DEST (curr_set), 0))
21765 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
21766 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
21767 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
21768 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
21769 && CONST_INT_P (SET_SRC (prev_set))
21770 && CONST_INT_P (SET_SRC (curr_set)))
21771 return true;
21772
21773 }
d7b03373 21774 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
21775 {
21776 /* We're trying to match:
21777 prev (adrp) == (set (reg r0)
21778 (high (symbol_ref ("SYM"))))
21779 curr (ldr) == (set (reg r1)
21780 (mem (lo_sum (reg r0)
21781 (symbol_ref ("SYM")))))
21782 or
21783 curr (ldr) == (set (reg r1)
21784 (zero_extend (mem
21785 (lo_sum (reg r0)
21786 (symbol_ref ("SYM")))))) */
21787 if (satisfies_constraint_Ush (SET_SRC (prev_set))
21788 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
21789 {
21790 rtx curr_src = SET_SRC (curr_set);
21791
21792 if (GET_CODE (curr_src) == ZERO_EXTEND)
21793 curr_src = XEXP (curr_src, 0);
21794
21795 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
21796 && REG_P (XEXP (XEXP (curr_src, 0), 0))
21797 && REGNO (XEXP (XEXP (curr_src, 0), 0))
21798 == REGNO (SET_DEST (prev_set))
21799 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
21800 XEXP (SET_SRC (prev_set), 0)))
21801 return true;
21802 }
21803 }
cd0cb232 21804
a4f3fa71 21805 /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
d7b03373 21806 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
a4f3fa71
WD
21807 && prev_set && curr_set && any_condjump_p (curr)
21808 && GET_CODE (SET_SRC (prev_set)) == COMPARE
21809 && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
21810 && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
21811 return true;
21812
21813 /* Fuse flag-setting ALU instructions and conditional branch. */
21814 if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
3759108f
AP
21815 && any_condjump_p (curr))
21816 {
509f819a
N
21817 unsigned int condreg1, condreg2;
21818 rtx cc_reg_1;
21819 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
21820 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
21821
21822 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
21823 && prev
21824 && modified_in_p (cc_reg_1, prev))
21825 {
f8a27206
AP
21826 enum attr_type prev_type = get_attr_type (prev);
21827
509f819a
N
21828 /* FIXME: this misses some which is considered simple arthematic
21829 instructions for ThunderX. Simple shifts are missed here. */
21830 if (prev_type == TYPE_ALUS_SREG
21831 || prev_type == TYPE_ALUS_IMM
21832 || prev_type == TYPE_LOGICS_REG
21833 || prev_type == TYPE_LOGICS_IMM)
21834 return true;
21835 }
3759108f
AP
21836 }
21837
a4f3fa71 21838 /* Fuse ALU instructions and CBZ/CBNZ. */
bee7e0fc
AP
21839 if (prev_set
21840 && curr_set
a4f3fa71 21841 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
00c7c57f
JB
21842 && any_condjump_p (curr))
21843 {
21844 /* We're trying to match:
21845 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
21846 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
21847 (const_int 0))
21848 (label_ref ("SYM"))
21849 (pc)) */
21850 if (SET_DEST (curr_set) == (pc_rtx)
21851 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
21852 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
21853 && REG_P (SET_DEST (prev_set))
21854 && REGNO (SET_DEST (prev_set))
21855 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
21856 {
21857 /* Fuse ALU operations followed by conditional branch instruction. */
21858 switch (get_attr_type (prev))
21859 {
21860 case TYPE_ALU_IMM:
21861 case TYPE_ALU_SREG:
21862 case TYPE_ADC_REG:
21863 case TYPE_ADC_IMM:
21864 case TYPE_ADCS_REG:
21865 case TYPE_ADCS_IMM:
21866 case TYPE_LOGIC_REG:
21867 case TYPE_LOGIC_IMM:
21868 case TYPE_CSEL:
21869 case TYPE_ADR:
21870 case TYPE_MOV_IMM:
21871 case TYPE_SHIFT_REG:
21872 case TYPE_SHIFT_IMM:
21873 case TYPE_BFM:
21874 case TYPE_RBIT:
21875 case TYPE_REV:
21876 case TYPE_EXTEND:
21877 return true;
21878
21879 default:;
21880 }
21881 }
21882 }
21883
6a569cdd
KT
21884 return false;
21885}
21886
f2879a90
KT
21887/* Return true iff the instruction fusion described by OP is enabled. */
21888
21889bool
21890aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
21891{
21892 return (aarch64_tune_params.fusible_ops & op) != 0;
21893}
21894
350013bc
BC
21895/* If MEM is in the form of [base+offset], extract the two parts
21896 of address and set to BASE and OFFSET, otherwise return false
21897 after clearing BASE and OFFSET. */
21898
21899bool
21900extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
21901{
21902 rtx addr;
21903
21904 gcc_assert (MEM_P (mem));
21905
21906 addr = XEXP (mem, 0);
21907
21908 if (REG_P (addr))
21909 {
21910 *base = addr;
21911 *offset = const0_rtx;
21912 return true;
21913 }
21914
21915 if (GET_CODE (addr) == PLUS
21916 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
21917 {
21918 *base = XEXP (addr, 0);
21919 *offset = XEXP (addr, 1);
21920 return true;
21921 }
21922
21923 *base = NULL_RTX;
21924 *offset = NULL_RTX;
21925
21926 return false;
21927}
21928
21929/* Types for scheduling fusion. */
21930enum sched_fusion_type
21931{
21932 SCHED_FUSION_NONE = 0,
21933 SCHED_FUSION_LD_SIGN_EXTEND,
21934 SCHED_FUSION_LD_ZERO_EXTEND,
21935 SCHED_FUSION_LD,
21936 SCHED_FUSION_ST,
21937 SCHED_FUSION_NUM
21938};
21939
21940/* If INSN is a load or store of address in the form of [base+offset],
21941 extract the two parts and set to BASE and OFFSET. Return scheduling
21942 fusion type this INSN is. */
21943
21944static enum sched_fusion_type
21945fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
21946{
21947 rtx x, dest, src;
21948 enum sched_fusion_type fusion = SCHED_FUSION_LD;
21949
21950 gcc_assert (INSN_P (insn));
21951 x = PATTERN (insn);
21952 if (GET_CODE (x) != SET)
21953 return SCHED_FUSION_NONE;
21954
21955 src = SET_SRC (x);
21956 dest = SET_DEST (x);
21957
abc52318
KT
21958 machine_mode dest_mode = GET_MODE (dest);
21959
21960 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
21961 return SCHED_FUSION_NONE;
21962
21963 if (GET_CODE (src) == SIGN_EXTEND)
21964 {
21965 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
21966 src = XEXP (src, 0);
21967 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
21968 return SCHED_FUSION_NONE;
21969 }
21970 else if (GET_CODE (src) == ZERO_EXTEND)
21971 {
21972 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
21973 src = XEXP (src, 0);
21974 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
21975 return SCHED_FUSION_NONE;
21976 }
21977
21978 if (GET_CODE (src) == MEM && REG_P (dest))
21979 extract_base_offset_in_addr (src, base, offset);
21980 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
21981 {
21982 fusion = SCHED_FUSION_ST;
21983 extract_base_offset_in_addr (dest, base, offset);
21984 }
21985 else
21986 return SCHED_FUSION_NONE;
21987
21988 if (*base == NULL_RTX || *offset == NULL_RTX)
21989 fusion = SCHED_FUSION_NONE;
21990
21991 return fusion;
21992}
21993
21994/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
21995
21996 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
21997 and PRI are only calculated for these instructions. For other instruction,
21998 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
21999 type instruction fusion can be added by returning different priorities.
22000
22001 It's important that irrelevant instructions get the largest FUSION_PRI. */
22002
22003static void
22004aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
22005 int *fusion_pri, int *pri)
22006{
22007 int tmp, off_val;
22008 rtx base, offset;
22009 enum sched_fusion_type fusion;
22010
22011 gcc_assert (INSN_P (insn));
22012
22013 tmp = max_pri - 1;
22014 fusion = fusion_load_store (insn, &base, &offset);
22015 if (fusion == SCHED_FUSION_NONE)
22016 {
22017 *pri = tmp;
22018 *fusion_pri = tmp;
22019 return;
22020 }
22021
22022 /* Set FUSION_PRI according to fusion type and base register. */
22023 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
22024
22025 /* Calculate PRI. */
22026 tmp /= 2;
22027
22028 /* INSN with smaller offset goes first. */
22029 off_val = (int)(INTVAL (offset));
22030 if (off_val >= 0)
22031 tmp -= (off_val & 0xfffff);
22032 else
22033 tmp += ((- off_val) & 0xfffff);
22034
22035 *pri = tmp;
22036 return;
22037}
22038
9bca63d4
WD
22039/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
22040 Adjust priority of sha1h instructions so they are scheduled before
22041 other SHA1 instructions. */
22042
22043static int
22044aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
22045{
22046 rtx x = PATTERN (insn);
22047
22048 if (GET_CODE (x) == SET)
22049 {
22050 x = SET_SRC (x);
22051
22052 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
22053 return priority + 10;
22054 }
22055
22056 return priority;
22057}
22058
350013bc
BC
22059/* Given OPERANDS of consecutive load/store, check if we can merge
22060 them into ldp/stp. LOAD is true if they are load instructions.
22061 MODE is the mode of memory operands. */
22062
22063bool
22064aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 22065 machine_mode mode)
350013bc
BC
22066{
22067 HOST_WIDE_INT offval_1, offval_2, msize;
22068 enum reg_class rclass_1, rclass_2;
22069 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22070
22071 if (load)
22072 {
22073 mem_1 = operands[1];
22074 mem_2 = operands[3];
22075 reg_1 = operands[0];
22076 reg_2 = operands[2];
22077 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22078 if (REGNO (reg_1) == REGNO (reg_2))
22079 return false;
22080 }
22081 else
22082 {
22083 mem_1 = operands[0];
22084 mem_2 = operands[2];
22085 reg_1 = operands[1];
22086 reg_2 = operands[3];
22087 }
22088
bf84ac44
AP
22089 /* The mems cannot be volatile. */
22090 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
22091 return false;
22092
54700e2e
AP
22093 /* If we have SImode and slow unaligned ldp,
22094 check the alignment to be at least 8 byte. */
22095 if (mode == SImode
22096 && (aarch64_tune_params.extra_tuning_flags
22097 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
22098 && !optimize_size
22099 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
22100 return false;
22101
350013bc
BC
22102 /* Check if the addresses are in the form of [base+offset]. */
22103 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
22104 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
22105 return false;
22106 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
22107 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
22108 return false;
22109
22110 /* Check if the bases are same. */
22111 if (!rtx_equal_p (base_1, base_2))
22112 return false;
22113
dfe1da23
JW
22114 /* The operands must be of the same size. */
22115 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
22116 GET_MODE_SIZE (GET_MODE (mem_2))));
22117
350013bc
BC
22118 offval_1 = INTVAL (offset_1);
22119 offval_2 = INTVAL (offset_2);
6a70badb
RS
22120 /* We should only be trying this for fixed-sized modes. There is no
22121 SVE LDP/STP instruction. */
22122 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
22123 /* Check if the offsets are consecutive. */
22124 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
22125 return false;
22126
22127 /* Check if the addresses are clobbered by load. */
22128 if (load)
22129 {
22130 if (reg_mentioned_p (reg_1, mem_1))
22131 return false;
22132
22133 /* In increasing order, the last load can clobber the address. */
22134 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 22135 return false;
350013bc
BC
22136 }
22137
9b56ec11
JW
22138 /* One of the memory accesses must be a mempair operand.
22139 If it is not the first one, they need to be swapped by the
22140 peephole. */
22141 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
22142 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
22143 return false;
22144
350013bc
BC
22145 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
22146 rclass_1 = FP_REGS;
22147 else
22148 rclass_1 = GENERAL_REGS;
22149
22150 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
22151 rclass_2 = FP_REGS;
22152 else
22153 rclass_2 = GENERAL_REGS;
22154
22155 /* Check if the registers are of same class. */
22156 if (rclass_1 != rclass_2)
22157 return false;
22158
22159 return true;
22160}
22161
9b56ec11
JW
22162/* Given OPERANDS of consecutive load/store that can be merged,
22163 swap them if they are not in ascending order. */
22164void
22165aarch64_swap_ldrstr_operands (rtx* operands, bool load)
22166{
22167 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
22168 HOST_WIDE_INT offval_1, offval_2;
22169
22170 if (load)
22171 {
22172 mem_1 = operands[1];
22173 mem_2 = operands[3];
22174 }
22175 else
22176 {
22177 mem_1 = operands[0];
22178 mem_2 = operands[2];
22179 }
22180
22181 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
22182 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
22183
22184 offval_1 = INTVAL (offset_1);
22185 offval_2 = INTVAL (offset_2);
22186
22187 if (offval_1 > offval_2)
22188 {
22189 /* Irrespective of whether this is a load or a store,
22190 we do the same swap. */
22191 std::swap (operands[0], operands[2]);
22192 std::swap (operands[1], operands[3]);
22193 }
22194}
22195
d0b51297
JW
22196/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
22197 comparison between the two. */
22198int
22199aarch64_host_wide_int_compare (const void *x, const void *y)
22200{
22201 return wi::cmps (* ((const HOST_WIDE_INT *) x),
22202 * ((const HOST_WIDE_INT *) y));
22203}
22204
22205/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
22206 other pointing to a REG rtx containing an offset, compare the offsets
22207 of the two pairs.
22208
22209 Return:
22210
22211 1 iff offset (X) > offset (Y)
22212 0 iff offset (X) == offset (Y)
22213 -1 iff offset (X) < offset (Y) */
22214int
22215aarch64_ldrstr_offset_compare (const void *x, const void *y)
22216{
22217 const rtx * operands_1 = (const rtx *) x;
22218 const rtx * operands_2 = (const rtx *) y;
22219 rtx mem_1, mem_2, base, offset_1, offset_2;
22220
22221 if (MEM_P (operands_1[0]))
22222 mem_1 = operands_1[0];
22223 else
22224 mem_1 = operands_1[1];
22225
22226 if (MEM_P (operands_2[0]))
22227 mem_2 = operands_2[0];
22228 else
22229 mem_2 = operands_2[1];
22230
22231 /* Extract the offsets. */
22232 extract_base_offset_in_addr (mem_1, &base, &offset_1);
22233 extract_base_offset_in_addr (mem_2, &base, &offset_2);
22234
22235 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
22236
22237 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
22238}
22239
350013bc
BC
22240/* Given OPERANDS of consecutive load/store, check if we can merge
22241 them into ldp/stp by adjusting the offset. LOAD is true if they
22242 are load instructions. MODE is the mode of memory operands.
22243
22244 Given below consecutive stores:
22245
22246 str w1, [xb, 0x100]
22247 str w1, [xb, 0x104]
22248 str w1, [xb, 0x108]
22249 str w1, [xb, 0x10c]
22250
22251 Though the offsets are out of the range supported by stp, we can
22252 still pair them after adjusting the offset, like:
22253
22254 add scratch, xb, 0x100
22255 stp w1, w1, [scratch]
22256 stp w1, w1, [scratch, 0x8]
22257
22258 The peephole patterns detecting this opportunity should guarantee
22259 the scratch register is avaliable. */
22260
22261bool
22262aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
cd91a084 22263 machine_mode mode)
350013bc 22264{
34d7854d
JW
22265 const int num_insns = 4;
22266 enum reg_class rclass;
22267 HOST_WIDE_INT offvals[num_insns], msize;
22268 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
22269
22270 if (load)
22271 {
34d7854d
JW
22272 for (int i = 0; i < num_insns; i++)
22273 {
22274 reg[i] = operands[2 * i];
22275 mem[i] = operands[2 * i + 1];
22276
22277 gcc_assert (REG_P (reg[i]));
22278 }
d0b51297
JW
22279
22280 /* Do not attempt to merge the loads if the loads clobber each other. */
22281 for (int i = 0; i < 8; i += 2)
22282 for (int j = i + 2; j < 8; j += 2)
22283 if (reg_overlap_mentioned_p (operands[i], operands[j]))
22284 return false;
350013bc
BC
22285 }
22286 else
34d7854d
JW
22287 for (int i = 0; i < num_insns; i++)
22288 {
22289 mem[i] = operands[2 * i];
22290 reg[i] = operands[2 * i + 1];
22291 }
350013bc 22292
34d7854d
JW
22293 /* Skip if memory operand is by itself valid for ldp/stp. */
22294 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
22295 return false;
22296
34d7854d
JW
22297 for (int i = 0; i < num_insns; i++)
22298 {
22299 /* The mems cannot be volatile. */
22300 if (MEM_VOLATILE_P (mem[i]))
22301 return false;
22302
22303 /* Check if the addresses are in the form of [base+offset]. */
22304 extract_base_offset_in_addr (mem[i], base + i, offset + i);
22305 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
22306 return false;
22307 }
22308
363b395b
JW
22309 /* Check if the registers are of same class. */
22310 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
22311 ? FP_REGS : GENERAL_REGS;
22312
22313 for (int i = 1; i < num_insns; i++)
22314 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
22315 {
22316 if (rclass != FP_REGS)
22317 return false;
22318 }
22319 else
22320 {
22321 if (rclass != GENERAL_REGS)
22322 return false;
22323 }
22324
22325 /* Only the last register in the order in which they occur
22326 may be clobbered by the load. */
22327 if (rclass == GENERAL_REGS && load)
22328 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
22329 if (reg_mentioned_p (reg[i], mem[i]))
22330 return false;
350013bc
BC
22331
22332 /* Check if the bases are same. */
34d7854d
JW
22333 for (int i = 0; i < num_insns - 1; i++)
22334 if (!rtx_equal_p (base[i], base[i + 1]))
22335 return false;
22336
22337 for (int i = 0; i < num_insns; i++)
22338 offvals[i] = INTVAL (offset[i]);
350013bc 22339
cd91a084 22340 msize = GET_MODE_SIZE (mode).to_constant ();
d0b51297
JW
22341
22342 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
22343 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
22344 aarch64_host_wide_int_compare);
d0b51297
JW
22345
22346 if (!(offvals[1] == offvals[0] + msize
22347 && offvals[3] == offvals[2] + msize))
350013bc
BC
22348 return false;
22349
d0b51297
JW
22350 /* Check that offsets are within range of each other. The ldp/stp
22351 instructions have 7 bit immediate offsets, so use 0x80. */
22352 if (offvals[2] - offvals[0] >= msize * 0x80)
22353 return false;
350013bc 22354
d0b51297
JW
22355 /* The offsets must be aligned with respect to each other. */
22356 if (offvals[0] % msize != offvals[2] % msize)
22357 return false;
22358
54700e2e
AP
22359 /* If we have SImode and slow unaligned ldp,
22360 check the alignment to be at least 8 byte. */
22361 if (mode == SImode
22362 && (aarch64_tune_params.extra_tuning_flags
34d7854d 22363 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 22364 && !optimize_size
34d7854d 22365 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
22366 return false;
22367
350013bc
BC
22368 return true;
22369}
22370
22371/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
22372 into LDP/STP after adjusting the offset. It depends on the fact
22373 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
22374 MODE is the mode of memory operands. CODE is the rtl operator
22375 which should be applied to all memory operands, it's SIGN_EXTEND,
22376 ZERO_EXTEND or UNKNOWN. */
22377
22378bool
22379aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
cd91a084 22380 machine_mode mode, RTX_CODE code)
350013bc 22381{
d0b51297 22382 rtx base, offset_1, offset_3, t1, t2;
350013bc 22383 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
22384 rtx temp_operands[8];
22385 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
22386 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 22387
d0b51297
JW
22388 /* We make changes on a copy as we may still bail out. */
22389 for (int i = 0; i < 8; i ++)
22390 temp_operands[i] = operands[i];
9b56ec11 22391
d0b51297
JW
22392 /* Sort the operands. */
22393 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 22394
f6af9c21
RE
22395 /* Copy the memory operands so that if we have to bail for some
22396 reason the original addresses are unchanged. */
350013bc
BC
22397 if (load)
22398 {
f6af9c21
RE
22399 mem_1 = copy_rtx (temp_operands[1]);
22400 mem_2 = copy_rtx (temp_operands[3]);
22401 mem_3 = copy_rtx (temp_operands[5]);
22402 mem_4 = copy_rtx (temp_operands[7]);
350013bc
BC
22403 }
22404 else
22405 {
f6af9c21
RE
22406 mem_1 = copy_rtx (temp_operands[0]);
22407 mem_2 = copy_rtx (temp_operands[2]);
22408 mem_3 = copy_rtx (temp_operands[4]);
22409 mem_4 = copy_rtx (temp_operands[6]);
350013bc
BC
22410 gcc_assert (code == UNKNOWN);
22411 }
22412
9b56ec11 22413 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
22414 extract_base_offset_in_addr (mem_3, &base, &offset_3);
22415 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
22416 && offset_3 != NULL_RTX);
350013bc 22417
d0b51297 22418 /* Adjust offset so it can fit in LDP/STP instruction. */
cd91a084 22419 msize = GET_MODE_SIZE (mode).to_constant();
d0b51297
JW
22420 stp_off_upper_limit = msize * (0x40 - 1);
22421 stp_off_lower_limit = - msize * 0x40;
350013bc 22422
d0b51297
JW
22423 off_val_1 = INTVAL (offset_1);
22424 off_val_3 = INTVAL (offset_3);
22425
22426 /* The base offset is optimally half way between the two STP/LDP offsets. */
22427 if (msize <= 4)
22428 base_off = (off_val_1 + off_val_3) / 2;
22429 else
22430 /* However, due to issues with negative LDP/STP offset generation for
22431 larger modes, for DF, DI and vector modes. we must not use negative
22432 addresses smaller than 9 signed unadjusted bits can store. This
22433 provides the most range in this case. */
22434 base_off = off_val_1;
22435
22436 /* Adjust the base so that it is aligned with the addresses but still
22437 optimal. */
22438 if (base_off % msize != off_val_1 % msize)
22439 /* Fix the offset, bearing in mind we want to make it bigger not
22440 smaller. */
22441 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
22442 else if (msize <= 4)
22443 /* The negative range of LDP/STP is one larger than the positive range. */
22444 base_off += msize;
22445
22446 /* Check if base offset is too big or too small. We can attempt to resolve
22447 this issue by setting it to the maximum value and seeing if the offsets
22448 still fit. */
22449 if (base_off >= 0x1000)
350013bc 22450 {
d0b51297
JW
22451 base_off = 0x1000 - 1;
22452 /* We must still make sure that the base offset is aligned with respect
700d4cb0 22453 to the address. But it may not be made any bigger. */
d0b51297 22454 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
22455 }
22456
d0b51297
JW
22457 /* Likewise for the case where the base is too small. */
22458 if (base_off <= -0x1000)
350013bc 22459 {
d0b51297
JW
22460 base_off = -0x1000 + 1;
22461 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
22462 }
22463
d0b51297
JW
22464 /* Offset of the first STP/LDP. */
22465 new_off_1 = off_val_1 - base_off;
22466
22467 /* Offset of the second STP/LDP. */
22468 new_off_3 = off_val_3 - base_off;
350013bc 22469
d0b51297
JW
22470 /* The offsets must be within the range of the LDP/STP instructions. */
22471 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
22472 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
22473 return false;
22474
d0b51297
JW
22475 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
22476 new_off_1), true);
22477 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
22478 new_off_1 + msize), true);
22479 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
22480 new_off_3), true);
22481 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
22482 new_off_3 + msize), true);
22483
22484 if (!aarch64_mem_pair_operand (mem_1, mode)
22485 || !aarch64_mem_pair_operand (mem_3, mode))
22486 return false;
350013bc
BC
22487
22488 if (code == ZERO_EXTEND)
22489 {
22490 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
22491 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
22492 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
22493 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
22494 }
22495 else if (code == SIGN_EXTEND)
22496 {
22497 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
22498 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
22499 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
22500 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
22501 }
22502
22503 if (load)
22504 {
d0b51297 22505 operands[0] = temp_operands[0];
350013bc 22506 operands[1] = mem_1;
d0b51297 22507 operands[2] = temp_operands[2];
350013bc 22508 operands[3] = mem_2;
d0b51297 22509 operands[4] = temp_operands[4];
350013bc 22510 operands[5] = mem_3;
d0b51297 22511 operands[6] = temp_operands[6];
350013bc
BC
22512 operands[7] = mem_4;
22513 }
22514 else
22515 {
22516 operands[0] = mem_1;
d0b51297 22517 operands[1] = temp_operands[1];
350013bc 22518 operands[2] = mem_2;
d0b51297 22519 operands[3] = temp_operands[3];
350013bc 22520 operands[4] = mem_3;
d0b51297 22521 operands[5] = temp_operands[5];
350013bc 22522 operands[6] = mem_4;
d0b51297 22523 operands[7] = temp_operands[7];
350013bc
BC
22524 }
22525
22526 /* Emit adjusting instruction. */
d0b51297 22527 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 22528 /* Emit ldp/stp instructions. */
f7df4a84
RS
22529 t1 = gen_rtx_SET (operands[0], operands[1]);
22530 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 22531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
22532 t1 = gen_rtx_SET (operands[4], operands[5]);
22533 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
22534 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
22535 return true;
22536}
22537
76a34e3f
RS
22538/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
22539 it isn't worth branching around empty masked ops (including masked
22540 stores). */
22541
22542static bool
22543aarch64_empty_mask_is_expensive (unsigned)
22544{
22545 return false;
22546}
22547
1b1e81f8
JW
22548/* Return 1 if pseudo register should be created and used to hold
22549 GOT address for PIC code. */
22550
22551bool
22552aarch64_use_pseudo_pic_reg (void)
22553{
22554 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
22555}
22556
7b841a12
JW
22557/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
22558
22559static int
22560aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
22561{
22562 switch (XINT (x, 1))
22563 {
22564 case UNSPEC_GOTSMALLPIC:
22565 case UNSPEC_GOTSMALLPIC28K:
22566 case UNSPEC_GOTTINYPIC:
22567 return 0;
22568 default:
22569 break;
22570 }
22571
22572 return default_unspec_may_trap_p (x, flags);
22573}
22574
39252973
KT
22575
22576/* If X is a positive CONST_DOUBLE with a value that is a power of 2
22577 return the log2 of that value. Otherwise return -1. */
22578
22579int
22580aarch64_fpconst_pow_of_2 (rtx x)
22581{
22582 const REAL_VALUE_TYPE *r;
22583
22584 if (!CONST_DOUBLE_P (x))
22585 return -1;
22586
22587 r = CONST_DOUBLE_REAL_VALUE (x);
22588
22589 if (REAL_VALUE_NEGATIVE (*r)
22590 || REAL_VALUE_ISNAN (*r)
22591 || REAL_VALUE_ISINF (*r)
22592 || !real_isinteger (r, DFmode))
22593 return -1;
22594
22595 return exact_log2 (real_to_integer (r));
22596}
22597
188d0079
JH
22598/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
22599 power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
22600 return n. Otherwise return -1. */
22601
22602int
22603aarch64_fpconst_pow2_recip (rtx x)
22604{
22605 REAL_VALUE_TYPE r0;
22606
22607 if (!CONST_DOUBLE_P (x))
22608 return -1;
22609
22610 r0 = *CONST_DOUBLE_REAL_VALUE (x);
22611 if (exact_real_inverse (DFmode, &r0)
22612 && !REAL_VALUE_NEGATIVE (r0))
22613 {
22614 int ret = exact_log2 (real_to_integer (&r0));
22615 if (ret >= 1 && ret <= 32)
22616 return ret;
22617 }
22618 return -1;
22619}
22620
39252973
KT
22621/* If X is a vector of equal CONST_DOUBLE values and that value is
22622 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
22623
22624int
22625aarch64_vec_fpconst_pow_of_2 (rtx x)
22626{
6a70badb
RS
22627 int nelts;
22628 if (GET_CODE (x) != CONST_VECTOR
22629 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
22630 return -1;
22631
22632 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
22633 return -1;
22634
22635 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
22636 if (firstval <= 0)
22637 return -1;
22638
6a70badb 22639 for (int i = 1; i < nelts; i++)
39252973
KT
22640 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
22641 return -1;
22642
22643 return firstval;
22644}
22645
11e554b3
JG
22646/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
22647 to float.
22648
22649 __fp16 always promotes through this hook.
22650 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
22651 through the generic excess precision logic rather than here. */
22652
c2ec330c
AL
22653static tree
22654aarch64_promoted_type (const_tree t)
22655{
11e554b3
JG
22656 if (SCALAR_FLOAT_TYPE_P (t)
22657 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 22658 return float_type_node;
11e554b3 22659
c2ec330c
AL
22660 return NULL_TREE;
22661}
ee62a5a6
RS
22662
22663/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22664
22665static bool
9acc9cbe 22666aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
22667 optimization_type opt_type)
22668{
22669 switch (op)
22670 {
22671 case rsqrt_optab:
9acc9cbe 22672 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
22673
22674 default:
22675 return true;
22676 }
22677}
22678
43cacb12
RS
22679/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
22680
22681static unsigned int
22682aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
22683 int *offset)
22684{
22685 /* Polynomial invariant 1 == (VG / 2) - 1. */
22686 gcc_assert (i == 1);
22687 *factor = 2;
22688 *offset = 1;
22689 return AARCH64_DWARF_VG;
22690}
22691
11e554b3
JG
22692/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
22693 if MODE is HFmode, and punt to the generic implementation otherwise. */
22694
22695static bool
7c5bd57a 22696aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
22697{
22698 return (mode == HFmode
22699 ? true
22700 : default_libgcc_floating_mode_supported_p (mode));
22701}
22702
2e5f8203
JG
22703/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
22704 if MODE is HFmode, and punt to the generic implementation otherwise. */
22705
22706static bool
18e2a8b8 22707aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
22708{
22709 return (mode == HFmode
22710 ? true
22711 : default_scalar_mode_supported_p (mode));
22712}
22713
11e554b3
JG
22714/* Set the value of FLT_EVAL_METHOD.
22715 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
22716
22717 0: evaluate all operations and constants, whose semantic type has at
22718 most the range and precision of type float, to the range and
22719 precision of float; evaluate all other operations and constants to
22720 the range and precision of the semantic type;
22721
22722 N, where _FloatN is a supported interchange floating type
22723 evaluate all operations and constants, whose semantic type has at
22724 most the range and precision of _FloatN type, to the range and
22725 precision of the _FloatN type; evaluate all other operations and
22726 constants to the range and precision of the semantic type;
22727
22728 If we have the ARMv8.2-A extensions then we support _Float16 in native
22729 precision, so we should set this to 16. Otherwise, we support the type,
22730 but want to evaluate expressions in float precision, so set this to
22731 0. */
22732
22733static enum flt_eval_method
22734aarch64_excess_precision (enum excess_precision_type type)
22735{
22736 switch (type)
22737 {
22738 case EXCESS_PRECISION_TYPE_FAST:
22739 case EXCESS_PRECISION_TYPE_STANDARD:
22740 /* We can calculate either in 16-bit range and precision or
22741 32-bit range and precision. Make that decision based on whether
22742 we have native support for the ARMv8.2-A 16-bit floating-point
22743 instructions or not. */
22744 return (TARGET_FP_F16INST
22745 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
22746 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
22747 case EXCESS_PRECISION_TYPE_IMPLICIT:
22748 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
22749 default:
22750 gcc_unreachable ();
22751 }
22752 return FLT_EVAL_METHOD_UNPREDICTABLE;
22753}
22754
b48d6421
KT
22755/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
22756 scheduled for speculative execution. Reject the long-running division
22757 and square-root instructions. */
22758
22759static bool
22760aarch64_sched_can_speculate_insn (rtx_insn *insn)
22761{
22762 switch (get_attr_type (insn))
22763 {
22764 case TYPE_SDIV:
22765 case TYPE_UDIV:
22766 case TYPE_FDIVS:
22767 case TYPE_FDIVD:
22768 case TYPE_FSQRTS:
22769 case TYPE_FSQRTD:
22770 case TYPE_NEON_FP_SQRT_S:
22771 case TYPE_NEON_FP_SQRT_D:
22772 case TYPE_NEON_FP_SQRT_S_Q:
22773 case TYPE_NEON_FP_SQRT_D_Q:
22774 case TYPE_NEON_FP_DIV_S:
22775 case TYPE_NEON_FP_DIV_D:
22776 case TYPE_NEON_FP_DIV_S_Q:
22777 case TYPE_NEON_FP_DIV_D_Q:
22778 return false;
22779 default:
22780 return true;
22781 }
22782}
22783
43cacb12
RS
22784/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
22785
22786static int
22787aarch64_compute_pressure_classes (reg_class *classes)
22788{
22789 int i = 0;
22790 classes[i++] = GENERAL_REGS;
22791 classes[i++] = FP_REGS;
22792 /* PR_REGS isn't a useful pressure class because many predicate pseudo
22793 registers need to go in PR_LO_REGS at some point during their
22794 lifetime. Splitting it into two halves has the effect of making
22795 all predicates count against PR_LO_REGS, so that we try whenever
22796 possible to restrict the number of live predicates to 8. This
22797 greatly reduces the amount of spilling in certain loops. */
22798 classes[i++] = PR_LO_REGS;
22799 classes[i++] = PR_HI_REGS;
22800 return i;
22801}
22802
22803/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
22804
22805static bool
22806aarch64_can_change_mode_class (machine_mode from,
22807 machine_mode to, reg_class_t)
22808{
76607e7e
RS
22809 unsigned int from_flags = aarch64_classify_vector_mode (from);
22810 unsigned int to_flags = aarch64_classify_vector_mode (to);
22811
22812 bool from_sve_p = (from_flags & VEC_ANY_SVE);
22813 bool to_sve_p = (to_flags & VEC_ANY_SVE);
22814
22815 bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
22816 bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
22817
38e62001
RS
22818 bool from_pred_p = (from_flags & VEC_SVE_PRED);
22819 bool to_pred_p = (to_flags & VEC_SVE_PRED);
22820
22821 /* Don't allow changes between predicate modes and other modes.
22822 Only predicate registers can hold predicate modes and only
22823 non-predicate registers can hold non-predicate modes, so any
22824 attempt to mix them would require a round trip through memory. */
22825 if (from_pred_p != to_pred_p)
22826 return false;
22827
76607e7e
RS
22828 /* Don't allow changes between partial SVE modes and other modes.
22829 The contents of partial SVE modes are distributed evenly across
22830 the register, whereas GCC expects them to be clustered together. */
22831 if (from_partial_sve_p != to_partial_sve_p)
22832 return false;
22833
22834 /* Similarly reject changes between partial SVE modes that have
22835 different patterns of significant and insignificant bits. */
22836 if (from_partial_sve_p
22837 && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
22838 || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
22839 return false;
22840
38e62001
RS
22841 if (maybe_ne (BITS_PER_SVE_VECTOR, 128u))
22842 {
22843 /* Don't allow changes between SVE modes and other modes that might
22844 be bigger than 128 bits. In particular, OImode, CImode and XImode
22845 divide into 128-bit quantities while SVE modes divide into
22846 BITS_PER_SVE_VECTOR quantities. */
22847 if (from_sve_p && !to_sve_p && maybe_gt (GET_MODE_BITSIZE (to), 128))
22848 return false;
22849 if (to_sve_p && !from_sve_p && maybe_gt (GET_MODE_BITSIZE (from), 128))
22850 return false;
22851 }
22852
002092be
RS
22853 if (BYTES_BIG_ENDIAN)
22854 {
002092be
RS
22855 /* Don't allow changes between SVE data modes and non-SVE modes.
22856 See the comment at the head of aarch64-sve.md for details. */
22857 if (from_sve_p != to_sve_p)
22858 return false;
22859
22860 /* Don't allow changes in element size: lane 0 of the new vector
22861 would not then be lane 0 of the old vector. See the comment
22862 above aarch64_maybe_expand_sve_subreg_move for a more detailed
22863 description.
22864
22865 In the worst case, this forces a register to be spilled in
22866 one mode and reloaded in the other, which handles the
22867 endianness correctly. */
22868 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
22869 return false;
22870 }
43cacb12
RS
22871 return true;
22872}
22873
5cce8171
RS
22874/* Implement TARGET_EARLY_REMAT_MODES. */
22875
22876static void
22877aarch64_select_early_remat_modes (sbitmap modes)
22878{
22879 /* SVE values are not normally live across a call, so it should be
22880 worth doing early rematerialization even in VL-specific mode. */
22881 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
5c38705d
RS
22882 if (aarch64_sve_mode_p ((machine_mode) i))
22883 bitmap_set_bit (modes, i);
5cce8171
RS
22884}
22885
c0111dc4
RE
22886/* Override the default target speculation_safe_value. */
22887static rtx
22888aarch64_speculation_safe_value (machine_mode mode,
22889 rtx result, rtx val, rtx failval)
22890{
22891 /* Maybe we should warn if falling back to hard barriers. They are
22892 likely to be noticably more expensive than the alternative below. */
22893 if (!aarch64_track_speculation)
22894 return default_speculation_safe_value (mode, result, val, failval);
22895
22896 if (!REG_P (val))
22897 val = copy_to_mode_reg (mode, val);
22898
22899 if (!aarch64_reg_or_zero (failval, mode))
22900 failval = copy_to_mode_reg (mode, failval);
22901
21cebf90 22902 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
22903 return result;
22904}
22905
2d56d6ba
KT
22906/* Implement TARGET_ESTIMATED_POLY_VALUE.
22907 Look into the tuning structure for an estimate.
22908 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
22909 Advanced SIMD 128 bits. */
22910
22911static HOST_WIDE_INT
22912aarch64_estimated_poly_value (poly_int64 val)
22913{
22914 enum aarch64_sve_vector_bits_enum width_source
22915 = aarch64_tune_params.sve_width;
22916
22917 /* If we still don't have an estimate, use the default. */
22918 if (width_source == SVE_SCALABLE)
22919 return default_estimated_poly_value (val);
22920
22921 HOST_WIDE_INT over_128 = width_source - 128;
22922 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
22923}
22924
d9186814
SE
22925
22926/* Return true for types that could be supported as SIMD return or
22927 argument types. */
22928
22929static bool
22930supported_simd_type (tree t)
22931{
22932 if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
22933 {
22934 HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t));
22935 return s == 1 || s == 2 || s == 4 || s == 8;
22936 }
22937 return false;
22938}
22939
22940/* Return true for types that currently are supported as SIMD return
22941 or argument types. */
22942
22943static bool
22944currently_supported_simd_type (tree t, tree b)
22945{
22946 if (COMPLEX_FLOAT_TYPE_P (t))
22947 return false;
22948
22949 if (TYPE_SIZE (t) != TYPE_SIZE (b))
22950 return false;
22951
22952 return supported_simd_type (t);
22953}
22954
22955/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
22956
22957static int
22958aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22959 struct cgraph_simd_clone *clonei,
22960 tree base_type, int num)
22961{
22962 tree t, ret_type, arg_type;
22963 unsigned int elt_bits, vec_bits, count;
22964
22965 if (!TARGET_SIMD)
22966 return 0;
22967
22968 if (clonei->simdlen
22969 && (clonei->simdlen < 2
22970 || clonei->simdlen > 1024
22971 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22972 {
22973 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22974 "unsupported simdlen %d", clonei->simdlen);
22975 return 0;
22976 }
22977
22978 ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22979 if (TREE_CODE (ret_type) != VOID_TYPE
22980 && !currently_supported_simd_type (ret_type, base_type))
22981 {
22982 if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
22983 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22984 "GCC does not currently support mixed size types "
22985 "for %<simd%> functions");
22986 else if (supported_simd_type (ret_type))
22987 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22988 "GCC does not currently support return type %qT "
22989 "for %<simd%> functions", ret_type);
22990 else
22991 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22992 "unsupported return type %qT for %<simd%> functions",
22993 ret_type);
22994 return 0;
22995 }
22996
22997 for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
22998 {
22999 arg_type = TREE_TYPE (t);
23000
23001 if (!currently_supported_simd_type (arg_type, base_type))
23002 {
23003 if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
23004 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23005 "GCC does not currently support mixed size types "
23006 "for %<simd%> functions");
23007 else
23008 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23009 "GCC does not currently support argument type %qT "
23010 "for %<simd%> functions", arg_type);
23011 return 0;
23012 }
23013 }
23014
23015 clonei->vecsize_mangle = 'n';
23016 clonei->mask_mode = VOIDmode;
23017 elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
23018 if (clonei->simdlen == 0)
23019 {
23020 count = 2;
23021 vec_bits = (num == 0 ? 64 : 128);
23022 clonei->simdlen = vec_bits / elt_bits;
23023 }
23024 else
23025 {
23026 count = 1;
23027 vec_bits = clonei->simdlen * elt_bits;
23028 if (vec_bits != 64 && vec_bits != 128)
23029 {
23030 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23031 "GCC does not currently support simdlen %d for type %qT",
23032 clonei->simdlen, base_type);
23033 return 0;
23034 }
23035 }
23036 clonei->vecsize_int = vec_bits;
23037 clonei->vecsize_float = vec_bits;
23038 return count;
23039}
23040
23041/* Implement TARGET_SIMD_CLONE_ADJUST. */
23042
23043static void
23044aarch64_simd_clone_adjust (struct cgraph_node *node)
23045{
23046 /* Add aarch64_vector_pcs target attribute to SIMD clones so they
23047 use the correct ABI. */
23048
23049 tree t = TREE_TYPE (node->decl);
23050 TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
23051 TYPE_ATTRIBUTES (t));
23052}
23053
23054/* Implement TARGET_SIMD_CLONE_USABLE. */
23055
23056static int
23057aarch64_simd_clone_usable (struct cgraph_node *node)
23058{
23059 switch (node->simdclone->vecsize_mangle)
23060 {
23061 case 'n':
23062 if (!TARGET_SIMD)
23063 return -1;
23064 return 0;
23065 default:
23066 gcc_unreachable ();
23067 }
23068}
23069
497f281c
SE
23070/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
23071
23072static int
23073aarch64_comp_type_attributes (const_tree type1, const_tree type2)
23074{
31427b97
RS
23075 auto check_attr = [&](const char *name) {
23076 tree attr1 = lookup_attribute (name, TYPE_ATTRIBUTES (type1));
23077 tree attr2 = lookup_attribute (name, TYPE_ATTRIBUTES (type2));
23078 if (!attr1 && !attr2)
23079 return true;
23080
23081 return attr1 && attr2 && attribute_value_equal (attr1, attr2);
23082 };
23083
23084 if (!check_attr ("aarch64_vector_pcs"))
23085 return 0;
23086 if (!check_attr ("Advanced SIMD type"))
497f281c
SE
23087 return 0;
23088 return 1;
23089}
23090
3bac1e20
SE
23091/* Implement TARGET_GET_MULTILIB_ABI_NAME */
23092
23093static const char *
23094aarch64_get_multilib_abi_name (void)
23095{
23096 if (TARGET_BIG_END)
23097 return TARGET_ILP32 ? "aarch64_be_ilp32" : "aarch64_be";
23098 return TARGET_ILP32 ? "aarch64_ilp32" : "aarch64";
23099}
23100
e76c8e56
JJ
23101/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
23102 global variable based guard use the default else
23103 return a null tree. */
23104static tree
23105aarch64_stack_protect_guard (void)
23106{
23107 if (aarch64_stack_protector_guard == SSP_GLOBAL)
23108 return default_stack_protect_guard ();
23109
23110 return NULL_TREE;
23111}
23112
98698967
SMW
23113/* Return the diagnostic message string if conversion from FROMTYPE to
23114 TOTYPE is not allowed, NULL otherwise. */
23115
23116static const char *
23117aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
23118{
23119 if (element_mode (fromtype) != element_mode (totype))
23120 {
23121 /* Do no allow conversions to/from BFmode scalar types. */
23122 if (TYPE_MODE (fromtype) == BFmode)
23123 return N_("invalid conversion from type %<bfloat16_t%>");
23124 if (TYPE_MODE (totype) == BFmode)
23125 return N_("invalid conversion to type %<bfloat16_t%>");
23126 }
23127
23128 /* Conversion allowed. */
23129 return NULL;
23130}
23131
23132/* Return the diagnostic message string if the unary operation OP is
23133 not permitted on TYPE, NULL otherwise. */
23134
23135static const char *
23136aarch64_invalid_unary_op (int op, const_tree type)
23137{
23138 /* Reject all single-operand operations on BFmode except for &. */
23139 if (element_mode (type) == BFmode && op != ADDR_EXPR)
23140 return N_("operation not permitted on type %<bfloat16_t%>");
23141
23142 /* Operation allowed. */
23143 return NULL;
23144}
23145
23146/* Return the diagnostic message string if the binary operation OP is
23147 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23148
23149static const char *
23150aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
23151 const_tree type2)
23152{
23153 /* Reject all 2-operand operations on BFmode. */
23154 if (element_mode (type1) == BFmode
23155 || element_mode (type2) == BFmode)
23156 return N_("operation not permitted on type %<bfloat16_t%>");
23157
38e62001
RS
23158 if (VECTOR_TYPE_P (type1)
23159 && VECTOR_TYPE_P (type2)
23160 && !TYPE_INDIVISIBLE_P (type1)
23161 && !TYPE_INDIVISIBLE_P (type2)
23162 && (aarch64_sve::builtin_type_p (type1)
23163 != aarch64_sve::builtin_type_p (type2)))
23164 return N_("cannot combine GNU and SVE vectors in a binary operation");
23165
98698967
SMW
23166 /* Operation allowed. */
23167 return NULL;
23168}
23169
32efff9f
SD
23170/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
23171 section at the end if needed. */
23172#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
23173#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
23174#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
23175void
23176aarch64_file_end_indicate_exec_stack ()
23177{
23178 file_end_indicate_exec_stack ();
23179
23180 unsigned feature_1_and = 0;
23181 if (aarch64_bti_enabled ())
23182 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
23183
23184 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
23185 feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
23186
23187 if (feature_1_and)
23188 {
23189 /* Generate .note.gnu.property section. */
23190 switch_to_section (get_section (".note.gnu.property",
23191 SECTION_NOTYPE, NULL));
23192
23193 /* PT_NOTE header: namesz, descsz, type.
23194 namesz = 4 ("GNU\0")
23195 descsz = 16 (Size of the program property array)
23196 [(12 + padding) * Number of array elements]
23197 type = 5 (NT_GNU_PROPERTY_TYPE_0). */
23198 assemble_align (POINTER_SIZE);
23199 assemble_integer (GEN_INT (4), 4, 32, 1);
23200 assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
23201 assemble_integer (GEN_INT (5), 4, 32, 1);
23202
23203 /* PT_NOTE name. */
23204 assemble_string ("GNU", 4);
23205
23206 /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
23207 type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
23208 datasz = 4
23209 data = feature_1_and. */
23210 assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
23211 assemble_integer (GEN_INT (4), 4, 32, 1);
23212 assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
23213
23214 /* Pad the size of the note to the required alignment. */
23215 assemble_align (POINTER_SIZE);
23216 }
23217}
23218#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
23219#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
23220#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
e76c8e56 23221
be178ecd
MM
23222/* Helper function for straight line speculation.
23223 Return what barrier should be emitted for straight line speculation
23224 mitigation.
23225 When not mitigating against straight line speculation this function returns
23226 an empty string.
23227 When mitigating against straight line speculation, use:
23228 * SB when the v8.5-A SB extension is enabled.
23229 * DSB+ISB otherwise. */
23230const char *
23231aarch64_sls_barrier (int mitigation_required)
23232{
23233 return mitigation_required
23234 ? (TARGET_SB ? "sb" : "dsb\tsy\n\tisb")
23235 : "";
23236}
23237
96b7f495
MM
23238static GTY (()) tree aarch64_sls_shared_thunks[30];
23239static GTY (()) bool aarch64_sls_shared_thunks_needed = false;
23240const char *indirect_symbol_names[30] = {
23241 "__call_indirect_x0",
23242 "__call_indirect_x1",
23243 "__call_indirect_x2",
23244 "__call_indirect_x3",
23245 "__call_indirect_x4",
23246 "__call_indirect_x5",
23247 "__call_indirect_x6",
23248 "__call_indirect_x7",
23249 "__call_indirect_x8",
23250 "__call_indirect_x9",
23251 "__call_indirect_x10",
23252 "__call_indirect_x11",
23253 "__call_indirect_x12",
23254 "__call_indirect_x13",
23255 "__call_indirect_x14",
23256 "__call_indirect_x15",
23257 "", /* "__call_indirect_x16", */
23258 "", /* "__call_indirect_x17", */
23259 "__call_indirect_x18",
23260 "__call_indirect_x19",
23261 "__call_indirect_x20",
23262 "__call_indirect_x21",
23263 "__call_indirect_x22",
23264 "__call_indirect_x23",
23265 "__call_indirect_x24",
23266 "__call_indirect_x25",
23267 "__call_indirect_x26",
23268 "__call_indirect_x27",
23269 "__call_indirect_x28",
23270 "__call_indirect_x29",
23271};
23272
23273/* Function to create a BLR thunk. This thunk is used to mitigate straight
23274 line speculation. Instead of a simple BLR that can be speculated past,
23275 we emit a BL to this thunk, and this thunk contains a BR to the relevant
23276 register. These thunks have the relevant speculation barries put after
23277 their indirect branch so that speculation is blocked.
23278
23279 We use such a thunk so the speculation barriers are kept off the
23280 architecturally executed path in order to reduce the performance overhead.
23281
23282 When optimizing for size we use stubs shared by the linked object.
23283 When optimizing for performance we emit stubs for each function in the hope
23284 that the branch predictor can better train on jumps specific for a given
23285 function. */
23286rtx
23287aarch64_sls_create_blr_label (int regnum)
23288{
23289 gcc_assert (STUB_REGNUM_P (regnum));
23290 if (optimize_function_for_size_p (cfun))
23291 {
23292 /* For the thunks shared between different functions in this compilation
23293 unit we use a named symbol -- this is just for users to more easily
23294 understand the generated assembly. */
23295 aarch64_sls_shared_thunks_needed = true;
23296 const char *thunk_name = indirect_symbol_names[regnum];
23297 if (aarch64_sls_shared_thunks[regnum] == NULL)
23298 {
23299 /* Build a decl representing this function stub and record it for
23300 later. We build a decl here so we can use the GCC machinery for
23301 handling sections automatically (through `get_named_section` and
23302 `make_decl_one_only`). That saves us a lot of trouble handling
23303 the specifics of different output file formats. */
23304 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
23305 get_identifier (thunk_name),
23306 build_function_type_list (void_type_node,
23307 NULL_TREE));
23308 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
23309 NULL_TREE, void_type_node);
23310 TREE_PUBLIC (decl) = 1;
23311 TREE_STATIC (decl) = 1;
23312 DECL_IGNORED_P (decl) = 1;
23313 DECL_ARTIFICIAL (decl) = 1;
23314 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23315 resolve_unique_section (decl, 0, false);
23316 aarch64_sls_shared_thunks[regnum] = decl;
23317 }
23318
23319 return gen_rtx_SYMBOL_REF (Pmode, thunk_name);
23320 }
23321
23322 if (cfun->machine->call_via[regnum] == NULL)
23323 cfun->machine->call_via[regnum]
23324 = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
23325 return cfun->machine->call_via[regnum];
23326}
23327
23328/* Helper function for aarch64_sls_emit_blr_function_thunks and
23329 aarch64_sls_emit_shared_blr_thunks below. */
23330static void
23331aarch64_sls_emit_function_stub (FILE *out_file, int regnum)
23332{
23333 /* Save in x16 and branch to that function so this transformation does
23334 not prevent jumping to `BTI c` instructions. */
23335 asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum);
23336 asm_fprintf (out_file, "\tbr\tx16\n");
23337}
23338
23339/* Emit all BLR stubs for this particular function.
23340 Here we emit all the BLR stubs needed for the current function. Since we
23341 emit these stubs in a consecutive block we know there will be no speculation
23342 gadgets between each stub, and hence we only emit a speculation barrier at
23343 the end of the stub sequences.
23344
23345 This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook. */
23346void
23347aarch64_sls_emit_blr_function_thunks (FILE *out_file)
23348{
23349 if (! aarch64_harden_sls_blr_p ())
23350 return;
23351
23352 bool any_functions_emitted = false;
23353 /* We must save and restore the current function section since this assembly
23354 is emitted at the end of the function. This means it can be emitted *just
23355 after* the cold section of a function. That cold part would be emitted in
23356 a different section. That switch would trigger a `.cfi_endproc` directive
23357 to be emitted in the original section and a `.cfi_startproc` directive to
23358 be emitted in the new section. Switching to the original section without
23359 restoring would mean that the `.cfi_endproc` emitted as a function ends
23360 would happen in a different section -- leaving an unmatched
23361 `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc`
23362 in the standard text section. */
23363 section *save_text_section = in_section;
23364 switch_to_section (function_section (current_function_decl));
23365 for (int regnum = 0; regnum < 30; ++regnum)
23366 {
23367 rtx specu_label = cfun->machine->call_via[regnum];
23368 if (specu_label == NULL)
23369 continue;
23370
23371 targetm.asm_out.print_operand (out_file, specu_label, 0);
23372 asm_fprintf (out_file, ":\n");
23373 aarch64_sls_emit_function_stub (out_file, regnum);
23374 any_functions_emitted = true;
23375 }
23376 if (any_functions_emitted)
23377 /* Can use the SB if needs be here, since this stub will only be used
23378 by the current function, and hence for the current target. */
23379 asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true));
23380 switch_to_section (save_text_section);
23381}
23382
23383/* Emit shared BLR stubs for the current compilation unit.
23384 Over the course of compiling this unit we may have converted some BLR
23385 instructions to a BL to a shared stub function. This is where we emit those
23386 stub functions.
23387 This function is for the stubs shared between different functions in this
23388 compilation unit. We share when optimizing for size instead of speed.
23389
23390 This function is called through the TARGET_ASM_FILE_END hook. */
23391void
23392aarch64_sls_emit_shared_blr_thunks (FILE *out_file)
23393{
23394 if (! aarch64_sls_shared_thunks_needed)
23395 return;
23396
23397 for (int regnum = 0; regnum < 30; ++regnum)
23398 {
23399 tree decl = aarch64_sls_shared_thunks[regnum];
23400 if (!decl)
23401 continue;
23402
23403 const char *name = indirect_symbol_names[regnum];
23404 switch_to_section (get_named_section (decl, NULL, 0));
23405 ASM_OUTPUT_ALIGN (out_file, 2);
23406 targetm.asm_out.globalize_label (out_file, name);
23407 /* Only emits if the compiler is configured for an assembler that can
23408 handle visibility directives. */
23409 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
23410 ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
23411 ASM_OUTPUT_LABEL (out_file, name);
23412 aarch64_sls_emit_function_stub (out_file, regnum);
23413 /* Use the most conservative target to ensure it can always be used by any
23414 function in the translation unit. */
23415 asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n");
23416 ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl);
23417 }
23418}
23419
23420/* Implement TARGET_ASM_FILE_END. */
23421void
23422aarch64_asm_file_end ()
23423{
23424 aarch64_sls_emit_shared_blr_thunks (asm_out_file);
23425 /* Since this function will be called for the ASM_FILE_END hook, we ensure
23426 that what would be called otherwise (e.g. `file_end_indicate_exec_stack`
23427 for FreeBSD) still gets called. */
23428#ifdef TARGET_ASM_FILE_END
23429 TARGET_ASM_FILE_END ();
23430#endif
23431}
23432
23433const char *
23434aarch64_indirect_call_asm (rtx addr)
23435{
23436 gcc_assert (REG_P (addr));
23437 if (aarch64_harden_sls_blr_p ())
23438 {
23439 rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr));
23440 output_asm_insn ("bl\t%0", &stub_label);
23441 }
23442 else
23443 output_asm_insn ("blr\t%0", &addr);
23444 return "";
23445}
23446
51b86113
DM
23447/* Target-specific selftests. */
23448
23449#if CHECKING_P
23450
23451namespace selftest {
23452
23453/* Selftest for the RTL loader.
23454 Verify that the RTL loader copes with a dump from
23455 print_rtx_function. This is essentially just a test that class
23456 function_reader can handle a real dump, but it also verifies
23457 that lookup_reg_by_dump_name correctly handles hard regs.
23458 The presence of hard reg names in the dump means that the test is
23459 target-specific, hence it is in this file. */
23460
23461static void
23462aarch64_test_loading_full_dump ()
23463{
23464 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
23465
23466 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
23467
23468 rtx_insn *insn_1 = get_insn_by_uid (1);
23469 ASSERT_EQ (NOTE, GET_CODE (insn_1));
23470
23471 rtx_insn *insn_15 = get_insn_by_uid (15);
23472 ASSERT_EQ (INSN, GET_CODE (insn_15));
23473 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
23474
23475 /* Verify crtl->return_rtx. */
23476 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
23477 ASSERT_EQ (0, REGNO (crtl->return_rtx));
23478 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
23479}
23480
23481/* Run all target-specific selftests. */
23482
23483static void
23484aarch64_run_selftests (void)
23485{
23486 aarch64_test_loading_full_dump ();
23487}
23488
23489} // namespace selftest
23490
23491#endif /* #if CHECKING_P */
23492
cd0b2d36
RR
23493#undef TARGET_STACK_PROTECT_GUARD
23494#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
23495
43e9d192
IB
23496#undef TARGET_ADDRESS_COST
23497#define TARGET_ADDRESS_COST aarch64_address_cost
23498
23499/* This hook will determines whether unnamed bitfields affect the alignment
23500 of the containing structure. The hook returns true if the structure
23501 should inherit the alignment requirements of an unnamed bitfield's
23502 type. */
23503#undef TARGET_ALIGN_ANON_BITFIELD
23504#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
23505
23506#undef TARGET_ASM_ALIGNED_DI_OP
23507#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
23508
23509#undef TARGET_ASM_ALIGNED_HI_OP
23510#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
23511
23512#undef TARGET_ASM_ALIGNED_SI_OP
23513#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
23514
23515#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23516#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
23517 hook_bool_const_tree_hwi_hwi_const_tree_true
23518
e1c1ecb0
KT
23519#undef TARGET_ASM_FILE_START
23520#define TARGET_ASM_FILE_START aarch64_start_file
23521
43e9d192
IB
23522#undef TARGET_ASM_OUTPUT_MI_THUNK
23523#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
23524
23525#undef TARGET_ASM_SELECT_RTX_SECTION
23526#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
23527
23528#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
23529#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
23530
c292cfe5
SN
23531#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
23532#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY aarch64_print_patchable_function_entry
23533
43e9d192
IB
23534#undef TARGET_BUILD_BUILTIN_VA_LIST
23535#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
23536
23537#undef TARGET_CALLEE_COPIES
7256c719 23538#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false
43e9d192
IB
23539
23540#undef TARGET_CAN_ELIMINATE
23541#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
23542
1fd8d40c
KT
23543#undef TARGET_CAN_INLINE_P
23544#define TARGET_CAN_INLINE_P aarch64_can_inline_p
23545
43e9d192
IB
23546#undef TARGET_CANNOT_FORCE_CONST_MEM
23547#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
23548
50487d79
EM
23549#undef TARGET_CASE_VALUES_THRESHOLD
23550#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
23551
43e9d192
IB
23552#undef TARGET_CONDITIONAL_REGISTER_USAGE
23553#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
23554
38e62001
RS
23555#undef TARGET_MEMBER_TYPE_FORCES_BLK
23556#define TARGET_MEMBER_TYPE_FORCES_BLK aarch64_member_type_forces_blk
23557
43e9d192
IB
23558/* Only the least significant bit is used for initialization guard
23559 variables. */
23560#undef TARGET_CXX_GUARD_MASK_BIT
23561#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
23562
23563#undef TARGET_C_MODE_FOR_SUFFIX
23564#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
23565
23566#ifdef TARGET_BIG_ENDIAN_DEFAULT
23567#undef TARGET_DEFAULT_TARGET_FLAGS
23568#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
23569#endif
23570
23571#undef TARGET_CLASS_MAX_NREGS
23572#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
23573
119103ca
JG
23574#undef TARGET_BUILTIN_DECL
23575#define TARGET_BUILTIN_DECL aarch64_builtin_decl
23576
a6fc00da
BH
23577#undef TARGET_BUILTIN_RECIPROCAL
23578#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
23579
11e554b3
JG
23580#undef TARGET_C_EXCESS_PRECISION
23581#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
23582
43e9d192
IB
23583#undef TARGET_EXPAND_BUILTIN
23584#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
23585
23586#undef TARGET_EXPAND_BUILTIN_VA_START
23587#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
23588
9697e620
JG
23589#undef TARGET_FOLD_BUILTIN
23590#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
23591
43e9d192
IB
23592#undef TARGET_FUNCTION_ARG
23593#define TARGET_FUNCTION_ARG aarch64_function_arg
23594
23595#undef TARGET_FUNCTION_ARG_ADVANCE
23596#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
23597
23598#undef TARGET_FUNCTION_ARG_BOUNDARY
23599#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
23600
76b0cbf8
RS
23601#undef TARGET_FUNCTION_ARG_PADDING
23602#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
23603
43cacb12
RS
23604#undef TARGET_GET_RAW_RESULT_MODE
23605#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
23606#undef TARGET_GET_RAW_ARG_MODE
23607#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
23608
43e9d192
IB
23609#undef TARGET_FUNCTION_OK_FOR_SIBCALL
23610#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
23611
23612#undef TARGET_FUNCTION_VALUE
23613#define TARGET_FUNCTION_VALUE aarch64_function_value
23614
23615#undef TARGET_FUNCTION_VALUE_REGNO_P
23616#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
23617
fc72cba7
AL
23618#undef TARGET_GIMPLE_FOLD_BUILTIN
23619#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 23620
43e9d192
IB
23621#undef TARGET_GIMPLIFY_VA_ARG_EXPR
23622#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
23623
23624#undef TARGET_INIT_BUILTINS
23625#define TARGET_INIT_BUILTINS aarch64_init_builtins
23626
c64f7d37
WD
23627#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
23628#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
23629 aarch64_ira_change_pseudo_allocno_class
23630
43e9d192
IB
23631#undef TARGET_LEGITIMATE_ADDRESS_P
23632#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
23633
23634#undef TARGET_LEGITIMATE_CONSTANT_P
23635#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
23636
491ec060
WD
23637#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
23638#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
23639 aarch64_legitimize_address_displacement
23640
43e9d192
IB
23641#undef TARGET_LIBGCC_CMP_RETURN_MODE
23642#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
23643
11e554b3
JG
23644#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
23645#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
23646aarch64_libgcc_floating_mode_supported_p
23647
ac2b960f
YZ
23648#undef TARGET_MANGLE_TYPE
23649#define TARGET_MANGLE_TYPE aarch64_mangle_type
23650
98698967
SMW
23651#undef TARGET_INVALID_CONVERSION
23652#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
23653
23654#undef TARGET_INVALID_UNARY_OP
23655#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
23656
23657#undef TARGET_INVALID_BINARY_OP
23658#define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
23659
65ef05d0
RS
23660#undef TARGET_VERIFY_TYPE_CONTEXT
23661#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context
23662
43e9d192
IB
23663#undef TARGET_MEMORY_MOVE_COST
23664#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
23665
26e0ff94
WD
23666#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
23667#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
23668
43e9d192
IB
23669#undef TARGET_MUST_PASS_IN_STACK
23670#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
23671
23672/* This target hook should return true if accesses to volatile bitfields
23673 should use the narrowest mode possible. It should return false if these
23674 accesses should use the bitfield container type. */
23675#undef TARGET_NARROW_VOLATILE_BITFIELD
23676#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
23677
23678#undef TARGET_OPTION_OVERRIDE
23679#define TARGET_OPTION_OVERRIDE aarch64_override_options
23680
23681#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23682#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
23683 aarch64_override_options_after_change
23684
29a14a1a
MK
23685#undef TARGET_OFFLOAD_OPTIONS
23686#define TARGET_OFFLOAD_OPTIONS aarch64_offload_options
23687
361fb3ee
KT
23688#undef TARGET_OPTION_SAVE
23689#define TARGET_OPTION_SAVE aarch64_option_save
23690
23691#undef TARGET_OPTION_RESTORE
23692#define TARGET_OPTION_RESTORE aarch64_option_restore
23693
23694#undef TARGET_OPTION_PRINT
23695#define TARGET_OPTION_PRINT aarch64_option_print
23696
5a2c8331
KT
23697#undef TARGET_OPTION_VALID_ATTRIBUTE_P
23698#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
23699
d78006d9
KT
23700#undef TARGET_SET_CURRENT_FUNCTION
23701#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
23702
43e9d192
IB
23703#undef TARGET_PASS_BY_REFERENCE
23704#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
23705
23706#undef TARGET_PREFERRED_RELOAD_CLASS
23707#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
23708
cee66c68
WD
23709#undef TARGET_SCHED_REASSOCIATION_WIDTH
23710#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
23711
c2ec330c
AL
23712#undef TARGET_PROMOTED_TYPE
23713#define TARGET_PROMOTED_TYPE aarch64_promoted_type
23714
43e9d192
IB
23715#undef TARGET_SECONDARY_RELOAD
23716#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
23717
23718#undef TARGET_SHIFT_TRUNCATION_MASK
23719#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
23720
23721#undef TARGET_SETUP_INCOMING_VARARGS
23722#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
23723
23724#undef TARGET_STRUCT_VALUE_RTX
23725#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
23726
23727#undef TARGET_REGISTER_MOVE_COST
23728#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
23729
23730#undef TARGET_RETURN_IN_MEMORY
23731#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
23732
23733#undef TARGET_RETURN_IN_MSB
23734#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
23735
23736#undef TARGET_RTX_COSTS
7cc2145f 23737#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 23738
2e5f8203
JG
23739#undef TARGET_SCALAR_MODE_SUPPORTED_P
23740#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
23741
d126a4ae
AP
23742#undef TARGET_SCHED_ISSUE_RATE
23743#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
23744
d0bc0cb6
RS
23745#undef TARGET_SCHED_VARIABLE_ISSUE
23746#define TARGET_SCHED_VARIABLE_ISSUE aarch64_sched_variable_issue
23747
d03f7e44
MK
23748#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23749#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23750 aarch64_sched_first_cycle_multipass_dfa_lookahead
23751
2d6bc7fa
KT
23752#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
23753#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
23754 aarch64_first_cycle_multipass_dfa_lookahead_guard
23755
827ab47a
KT
23756#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
23757#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
23758 aarch64_get_separate_components
23759
23760#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
23761#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
23762 aarch64_components_for_bb
23763
23764#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
23765#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
23766 aarch64_disqualify_components
23767
23768#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
23769#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
23770 aarch64_emit_prologue_components
23771
23772#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
23773#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
23774 aarch64_emit_epilogue_components
23775
23776#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
23777#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
23778 aarch64_set_handled_components
23779
43e9d192
IB
23780#undef TARGET_TRAMPOLINE_INIT
23781#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
23782
23783#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23784#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
23785
23786#undef TARGET_VECTOR_MODE_SUPPORTED_P
23787#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
23788
482b2b43
RS
23789#undef TARGET_COMPATIBLE_VECTOR_TYPES_P
23790#define TARGET_COMPATIBLE_VECTOR_TYPES_P aarch64_compatible_vector_types_p
23791
7df76747
N
23792#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
23793#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
23794 aarch64_builtin_support_vector_misalignment
23795
9f4cbab8
RS
23796#undef TARGET_ARRAY_MODE
23797#define TARGET_ARRAY_MODE aarch64_array_mode
23798
43e9d192
IB
23799#undef TARGET_ARRAY_MODE_SUPPORTED_P
23800#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
23801
8990e73a
TB
23802#undef TARGET_VECTORIZE_ADD_STMT_COST
23803#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
23804
23805#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23806#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23807 aarch64_builtin_vectorization_cost
23808
43e9d192
IB
23809#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23810#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
23811
42fc9a7f
JG
23812#undef TARGET_VECTORIZE_BUILTINS
23813#define TARGET_VECTORIZE_BUILTINS
23814
23815#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23816#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23817 aarch64_builtin_vectorized_function
23818
e021fb86
RS
23819#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23820#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23821 aarch64_autovectorize_vector_modes
3b357264 23822
aa87aced
KV
23823#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23824#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
23825 aarch64_atomic_assign_expand_fenv
23826
43e9d192
IB
23827/* Section anchor support. */
23828
23829#undef TARGET_MIN_ANCHOR_OFFSET
23830#define TARGET_MIN_ANCHOR_OFFSET -256
23831
23832/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
23833 byte offset; we can do much more for larger data types, but have no way
23834 to determine the size of the access. We assume accesses are aligned. */
23835#undef TARGET_MAX_ANCHOR_OFFSET
23836#define TARGET_MAX_ANCHOR_OFFSET 4095
23837
db0253a4
TB
23838#undef TARGET_VECTOR_ALIGNMENT
23839#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
23840
43cacb12
RS
23841#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
23842#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
23843 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
23844#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
23845#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
23846 aarch64_simd_vector_alignment_reachable
23847
88b08073
JG
23848/* vec_perm support. */
23849
f151c9e1
RS
23850#undef TARGET_VECTORIZE_VEC_PERM_CONST
23851#define TARGET_VECTORIZE_VEC_PERM_CONST \
23852 aarch64_vectorize_vec_perm_const
88b08073 23853
74166aab
RS
23854#undef TARGET_VECTORIZE_RELATED_MODE
23855#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
43cacb12
RS
23856#undef TARGET_VECTORIZE_GET_MASK_MODE
23857#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
23858#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
23859#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
23860 aarch64_empty_mask_is_expensive
6a86928d
RS
23861#undef TARGET_PREFERRED_ELSE_VALUE
23862#define TARGET_PREFERRED_ELSE_VALUE \
23863 aarch64_preferred_else_value
43cacb12 23864
c2ec330c
AL
23865#undef TARGET_INIT_LIBFUNCS
23866#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 23867
706b2314 23868#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
23869#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
23870
5cb74e90
RR
23871#undef TARGET_FLAGS_REGNUM
23872#define TARGET_FLAGS_REGNUM CC_REGNUM
23873
78607708
TV
23874#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23875#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23876
a3125fc2
CL
23877#undef TARGET_ASAN_SHADOW_OFFSET
23878#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
23879
0c4ec427
RE
23880#undef TARGET_LEGITIMIZE_ADDRESS
23881#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
23882
b48d6421
KT
23883#undef TARGET_SCHED_CAN_SPECULATE_INSN
23884#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
23885
594bdd53
FY
23886#undef TARGET_CAN_USE_DOLOOP_P
23887#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
23888
9bca63d4
WD
23889#undef TARGET_SCHED_ADJUST_PRIORITY
23890#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
23891
6a569cdd
KT
23892#undef TARGET_SCHED_MACRO_FUSION_P
23893#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
23894
23895#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23896#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
23897
350013bc
BC
23898#undef TARGET_SCHED_FUSION_PRIORITY
23899#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
23900
7b841a12
JW
23901#undef TARGET_UNSPEC_MAY_TRAP_P
23902#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
23903
1b1e81f8
JW
23904#undef TARGET_USE_PSEUDO_PIC_REG
23905#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
23906
cc8ca59e
JB
23907#undef TARGET_PRINT_OPERAND
23908#define TARGET_PRINT_OPERAND aarch64_print_operand
23909
23910#undef TARGET_PRINT_OPERAND_ADDRESS
23911#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
23912
74b27d8e
RS
23913#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23914#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra
23915
ee62a5a6
RS
23916#undef TARGET_OPTAB_SUPPORTED_P
23917#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
23918
43203dea
RR
23919#undef TARGET_OMIT_STRUCT_RETURN_REG
23920#define TARGET_OMIT_STRUCT_RETURN_REG true
23921
43cacb12
RS
23922#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
23923#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
23924 aarch64_dwarf_poly_indeterminate_value
23925
f46fe37e
EB
23926/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
23927#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23928#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
23929
c43f4279
RS
23930#undef TARGET_HARD_REGNO_NREGS
23931#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
23932#undef TARGET_HARD_REGNO_MODE_OK
23933#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
23934
99e1629f
RS
23935#undef TARGET_MODES_TIEABLE_P
23936#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
23937
80ec73f4
RS
23938#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23939#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23940 aarch64_hard_regno_call_part_clobbered
23941
5a5a3bc5
RS
23942#undef TARGET_INSN_CALLEE_ABI
23943#define TARGET_INSN_CALLEE_ABI aarch64_insn_callee_abi
b3650d40 23944
58e17cf8
RS
23945#undef TARGET_CONSTANT_ALIGNMENT
23946#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
23947
8c6e3b23
TC
23948#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
23949#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
23950 aarch64_stack_clash_protection_alloca_probe_range
23951
43cacb12
RS
23952#undef TARGET_COMPUTE_PRESSURE_CLASSES
23953#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
23954
23955#undef TARGET_CAN_CHANGE_MODE_CLASS
23956#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
23957
5cce8171
RS
23958#undef TARGET_SELECT_EARLY_REMAT_MODES
23959#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
23960
c0111dc4
RE
23961#undef TARGET_SPECULATION_SAFE_VALUE
23962#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
23963
2d56d6ba
KT
23964#undef TARGET_ESTIMATED_POLY_VALUE
23965#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
23966
a0d0b980
SE
23967#undef TARGET_ATTRIBUTE_TABLE
23968#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
23969
d9186814
SE
23970#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23971#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23972 aarch64_simd_clone_compute_vecsize_and_simdlen
23973
23974#undef TARGET_SIMD_CLONE_ADJUST
23975#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
23976
23977#undef TARGET_SIMD_CLONE_USABLE
23978#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
23979
497f281c
SE
23980#undef TARGET_COMP_TYPE_ATTRIBUTES
23981#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
23982
3bac1e20
SE
23983#undef TARGET_GET_MULTILIB_ABI_NAME
23984#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
23985
002ffd3c
RS
23986#undef TARGET_FNTYPE_ABI
23987#define TARGET_FNTYPE_ABI aarch64_fntype_abi
23988
51b86113
DM
23989#if CHECKING_P
23990#undef TARGET_RUN_TARGET_SELFTESTS
23991#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
23992#endif /* #if CHECKING_P */
23993
8fc16d72
ST
23994#undef TARGET_ASM_POST_CFI_STARTPROC
23995#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
23996
c600df9a
RS
23997#undef TARGET_STRICT_ARGUMENT_NAMING
23998#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23999
1a7a35c7
RH
24000#undef TARGET_MD_ASM_ADJUST
24001#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
24002
96b7f495
MM
24003#undef TARGET_ASM_FILE_END
24004#define TARGET_ASM_FILE_END aarch64_asm_file_end
24005
24006#undef TARGET_ASM_FUNCTION_EPILOGUE
24007#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks
24008
43e9d192
IB
24009struct gcc_target targetm = TARGET_INITIALIZER;
24010
24011#include "gt-aarch64.h"