]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Use REGNUM macros in the definitions of aarch64 PROBE_STACK regs
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
b187677b 81/* Information about a legitimate vector immediate operand. */
48063b9d
IB
82struct simd_immediate_info
83{
b187677b
RS
84 enum insn_type { MOV, MVN };
85 enum modifier_type { LSL, MSL };
86
87 simd_immediate_info () {}
88 simd_immediate_info (scalar_float_mode, rtx);
89 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
90 insn_type = MOV, modifier_type = LSL,
91 unsigned int = 0);
43cacb12 92 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
93
94 /* The mode of the elements. */
95 scalar_mode elt_mode;
96
43cacb12
RS
97 /* The value of each element if all elements are the same, or the
98 first value if the constant is a series. */
48063b9d 99 rtx value;
b187677b 100
43cacb12
RS
101 /* The value of the step if the constant is a series, null otherwise. */
102 rtx step;
103
b187677b
RS
104 /* The instruction to use to move the immediate into a vector. */
105 insn_type insn;
106
107 /* The kind of shift modifier to use, and the number of bits to shift.
108 This is (LSL, 0) if no shift is needed. */
109 modifier_type modifier;
110 unsigned int shift;
48063b9d
IB
111};
112
b187677b
RS
113/* Construct a floating-point immediate in which each element has mode
114 ELT_MODE_IN and value VALUE_IN. */
115inline simd_immediate_info
116::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 117 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
118 modifier (LSL), shift (0)
119{}
120
121/* Construct an integer immediate in which each element has mode ELT_MODE_IN
122 and value VALUE_IN. The other parameters are as for the structure
123 fields. */
124inline simd_immediate_info
125::simd_immediate_info (scalar_int_mode elt_mode_in,
126 unsigned HOST_WIDE_INT value_in,
127 insn_type insn_in, modifier_type modifier_in,
128 unsigned int shift_in)
129 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
130 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
131{}
132
133/* Construct an integer immediate in which each element has mode ELT_MODE_IN
134 and where element I is equal to VALUE_IN + I * STEP_IN. */
135inline simd_immediate_info
136::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
137 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
138 modifier (LSL), shift (0)
b187677b
RS
139{}
140
43e9d192
IB
141/* The current code model. */
142enum aarch64_code_model aarch64_cmodel;
143
43cacb12
RS
144/* The number of 64-bit elements in an SVE vector. */
145poly_uint16 aarch64_sve_vg;
146
43e9d192
IB
147#ifdef HAVE_AS_TLS
148#undef TARGET_HAVE_TLS
149#define TARGET_HAVE_TLS 1
150#endif
151
ef4bddc2
RS
152static bool aarch64_composite_type_p (const_tree, machine_mode);
153static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 154 const_tree,
ef4bddc2 155 machine_mode *, int *,
43e9d192
IB
156 bool *);
157static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 159static void aarch64_override_options_after_change (void);
ef4bddc2 160static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 161static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
162static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
163 const_tree type,
164 int misalignment,
165 bool is_packed);
43cacb12 166static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
167static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
168 aarch64_addr_query_type);
eb471ba3 169static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 170
0c6caaf8
RL
171/* Major revision number of the ARM Architecture implemented by the target. */
172unsigned aarch64_architecture_version;
173
43e9d192 174/* The processor for which instructions should be scheduled. */
02fdbd5b 175enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 176
43e9d192
IB
177/* Mask to specify which instruction scheduling options should be used. */
178unsigned long aarch64_tune_flags = 0;
179
1be34295 180/* Global flag for PC relative loads. */
9ee6540a 181bool aarch64_pcrelative_literal_loads;
1be34295 182
d6cb6d6a
WD
183/* Global flag for whether frame pointer is enabled. */
184bool aarch64_use_frame_pointer;
185
8dec06f2
JG
186/* Support for command line parsing of boolean flags in the tuning
187 structures. */
188struct aarch64_flag_desc
189{
190 const char* name;
191 unsigned int flag;
192};
193
ed9fa8d2 194#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
195 { name, AARCH64_FUSE_##internal_name },
196static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
197{
198 { "none", AARCH64_FUSE_NOTHING },
199#include "aarch64-fusion-pairs.def"
200 { "all", AARCH64_FUSE_ALL },
201 { NULL, AARCH64_FUSE_NOTHING }
202};
8dec06f2 203
a339a01c 204#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
205 { name, AARCH64_EXTRA_TUNE_##internal_name },
206static const struct aarch64_flag_desc aarch64_tuning_flags[] =
207{
208 { "none", AARCH64_EXTRA_TUNE_NONE },
209#include "aarch64-tuning-flags.def"
210 { "all", AARCH64_EXTRA_TUNE_ALL },
211 { NULL, AARCH64_EXTRA_TUNE_NONE }
212};
8dec06f2 213
43e9d192
IB
214/* Tuning parameters. */
215
43e9d192
IB
216static const struct cpu_addrcost_table generic_addrcost_table =
217{
67747367 218 {
2fae724a 219 1, /* hi */
bd95e655
JG
220 0, /* si */
221 0, /* di */
2fae724a 222 1, /* ti */
67747367 223 },
bd95e655
JG
224 0, /* pre_modify */
225 0, /* post_modify */
226 0, /* register_offset */
783879e6
EM
227 0, /* register_sextend */
228 0, /* register_zextend */
bd95e655 229 0 /* imm_offset */
43e9d192
IB
230};
231
5ec1ae3b
EM
232static const struct cpu_addrcost_table exynosm1_addrcost_table =
233{
234 {
235 0, /* hi */
236 0, /* si */
237 0, /* di */
238 2, /* ti */
239 },
240 0, /* pre_modify */
241 0, /* post_modify */
242 1, /* register_offset */
243 1, /* register_sextend */
244 2, /* register_zextend */
245 0, /* imm_offset */
246};
247
381e27aa
PT
248static const struct cpu_addrcost_table xgene1_addrcost_table =
249{
381e27aa 250 {
bd95e655
JG
251 1, /* hi */
252 0, /* si */
253 0, /* di */
254 1, /* ti */
381e27aa 255 },
bd95e655 256 1, /* pre_modify */
52ddefd8 257 1, /* post_modify */
bd95e655 258 0, /* register_offset */
783879e6
EM
259 1, /* register_sextend */
260 1, /* register_zextend */
bd95e655 261 0, /* imm_offset */
381e27aa
PT
262};
263
d1261ac6 264static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
265{
266 {
5f407e57
AP
267 1, /* hi */
268 1, /* si */
269 1, /* di */
ad611a4c
VP
270 2, /* ti */
271 },
272 0, /* pre_modify */
273 0, /* post_modify */
274 2, /* register_offset */
275 3, /* register_sextend */
276 3, /* register_zextend */
277 0, /* imm_offset */
278};
279
910f72e7
SZ
280static const struct cpu_addrcost_table tsv110_addrcost_table =
281{
282 {
283 1, /* hi */
284 0, /* si */
285 0, /* di */
286 1, /* ti */
287 },
288 0, /* pre_modify */
289 0, /* post_modify */
290 0, /* register_offset */
291 1, /* register_sextend */
292 1, /* register_zextend */
293 0, /* imm_offset */
294};
295
8d39ea2f
LM
296static const struct cpu_addrcost_table qdf24xx_addrcost_table =
297{
298 {
299 1, /* hi */
300 1, /* si */
301 1, /* di */
302 2, /* ti */
303 },
304 1, /* pre_modify */
305 1, /* post_modify */
306 3, /* register_offset */
31508b39 307 3, /* register_sextend */
8d39ea2f
LM
308 3, /* register_zextend */
309 2, /* imm_offset */
310};
311
43e9d192
IB
312static const struct cpu_regmove_cost generic_regmove_cost =
313{
bd95e655 314 1, /* GP2GP */
3969c510
WD
315 /* Avoid the use of slow int<->fp moves for spilling by setting
316 their cost higher than memmov_cost. */
bd95e655
JG
317 5, /* GP2FP */
318 5, /* FP2GP */
319 2 /* FP2FP */
43e9d192
IB
320};
321
e4a9c55a
WD
322static const struct cpu_regmove_cost cortexa57_regmove_cost =
323{
bd95e655 324 1, /* GP2GP */
e4a9c55a
WD
325 /* Avoid the use of slow int<->fp moves for spilling by setting
326 their cost higher than memmov_cost. */
bd95e655
JG
327 5, /* GP2FP */
328 5, /* FP2GP */
329 2 /* FP2FP */
e4a9c55a
WD
330};
331
332static const struct cpu_regmove_cost cortexa53_regmove_cost =
333{
bd95e655 334 1, /* GP2GP */
e4a9c55a
WD
335 /* Avoid the use of slow int<->fp moves for spilling by setting
336 their cost higher than memmov_cost. */
bd95e655
JG
337 5, /* GP2FP */
338 5, /* FP2GP */
339 2 /* FP2FP */
e4a9c55a
WD
340};
341
5ec1ae3b
EM
342static const struct cpu_regmove_cost exynosm1_regmove_cost =
343{
344 1, /* GP2GP */
345 /* Avoid the use of slow int<->fp moves for spilling by setting
346 their cost higher than memmov_cost (actual, 4 and 9). */
347 9, /* GP2FP */
348 9, /* FP2GP */
349 1 /* FP2FP */
350};
351
d1bcc29f
AP
352static const struct cpu_regmove_cost thunderx_regmove_cost =
353{
bd95e655
JG
354 2, /* GP2GP */
355 2, /* GP2FP */
356 6, /* FP2GP */
357 4 /* FP2FP */
d1bcc29f
AP
358};
359
381e27aa
PT
360static const struct cpu_regmove_cost xgene1_regmove_cost =
361{
bd95e655 362 1, /* GP2GP */
381e27aa
PT
363 /* Avoid the use of slow int<->fp moves for spilling by setting
364 their cost higher than memmov_cost. */
bd95e655
JG
365 8, /* GP2FP */
366 8, /* FP2GP */
367 2 /* FP2FP */
381e27aa
PT
368};
369
ee446d9f
JW
370static const struct cpu_regmove_cost qdf24xx_regmove_cost =
371{
372 2, /* GP2GP */
373 /* Avoid the use of int<->fp moves for spilling. */
374 6, /* GP2FP */
375 6, /* FP2GP */
376 4 /* FP2FP */
377};
378
d1261ac6 379static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
380{
381 1, /* GP2GP */
382 /* Avoid the use of int<->fp moves for spilling. */
383 8, /* GP2FP */
384 8, /* FP2GP */
385 4 /* FP2FP */
386};
387
910f72e7
SZ
388static const struct cpu_regmove_cost tsv110_regmove_cost =
389{
390 1, /* GP2GP */
391 /* Avoid the use of slow int<->fp moves for spilling by setting
392 their cost higher than memmov_cost. */
393 2, /* GP2FP */
394 3, /* FP2GP */
395 2 /* FP2FP */
396};
397
8990e73a 398/* Generic costs for vector insn classes. */
8990e73a
TB
399static const struct cpu_vector_cost generic_vector_cost =
400{
cd8ae5ed
AP
401 1, /* scalar_int_stmt_cost */
402 1, /* scalar_fp_stmt_cost */
bd95e655
JG
403 1, /* scalar_load_cost */
404 1, /* scalar_store_cost */
cd8ae5ed
AP
405 1, /* vec_int_stmt_cost */
406 1, /* vec_fp_stmt_cost */
c428f91c 407 2, /* vec_permute_cost */
bd95e655
JG
408 1, /* vec_to_scalar_cost */
409 1, /* scalar_to_vec_cost */
410 1, /* vec_align_load_cost */
411 1, /* vec_unalign_load_cost */
412 1, /* vec_unalign_store_cost */
413 1, /* vec_store_cost */
414 3, /* cond_taken_branch_cost */
415 1 /* cond_not_taken_branch_cost */
8990e73a
TB
416};
417
e75bc10e
LM
418/* QDF24XX costs for vector insn classes. */
419static const struct cpu_vector_cost qdf24xx_vector_cost =
420{
421 1, /* scalar_int_stmt_cost */
422 1, /* scalar_fp_stmt_cost */
423 1, /* scalar_load_cost */
424 1, /* scalar_store_cost */
425 1, /* vec_int_stmt_cost */
426 3, /* vec_fp_stmt_cost */
427 2, /* vec_permute_cost */
428 1, /* vec_to_scalar_cost */
429 1, /* scalar_to_vec_cost */
430 1, /* vec_align_load_cost */
431 1, /* vec_unalign_load_cost */
432 1, /* vec_unalign_store_cost */
433 1, /* vec_store_cost */
434 3, /* cond_taken_branch_cost */
435 1 /* cond_not_taken_branch_cost */
436};
437
c3f20327
AP
438/* ThunderX costs for vector insn classes. */
439static const struct cpu_vector_cost thunderx_vector_cost =
440{
cd8ae5ed
AP
441 1, /* scalar_int_stmt_cost */
442 1, /* scalar_fp_stmt_cost */
c3f20327
AP
443 3, /* scalar_load_cost */
444 1, /* scalar_store_cost */
cd8ae5ed 445 4, /* vec_int_stmt_cost */
b29d7591 446 1, /* vec_fp_stmt_cost */
c3f20327
AP
447 4, /* vec_permute_cost */
448 2, /* vec_to_scalar_cost */
449 2, /* scalar_to_vec_cost */
450 3, /* vec_align_load_cost */
7e87a3d9
AP
451 5, /* vec_unalign_load_cost */
452 5, /* vec_unalign_store_cost */
c3f20327
AP
453 1, /* vec_store_cost */
454 3, /* cond_taken_branch_cost */
455 3 /* cond_not_taken_branch_cost */
456};
457
910f72e7
SZ
458static const struct cpu_vector_cost tsv110_vector_cost =
459{
460 1, /* scalar_int_stmt_cost */
461 1, /* scalar_fp_stmt_cost */
462 5, /* scalar_load_cost */
463 1, /* scalar_store_cost */
464 2, /* vec_int_stmt_cost */
465 2, /* vec_fp_stmt_cost */
466 2, /* vec_permute_cost */
467 3, /* vec_to_scalar_cost */
468 2, /* scalar_to_vec_cost */
469 5, /* vec_align_load_cost */
470 5, /* vec_unalign_load_cost */
471 1, /* vec_unalign_store_cost */
472 1, /* vec_store_cost */
473 1, /* cond_taken_branch_cost */
474 1 /* cond_not_taken_branch_cost */
475};
476
60bff090 477/* Generic costs for vector insn classes. */
60bff090
JG
478static const struct cpu_vector_cost cortexa57_vector_cost =
479{
cd8ae5ed
AP
480 1, /* scalar_int_stmt_cost */
481 1, /* scalar_fp_stmt_cost */
bd95e655
JG
482 4, /* scalar_load_cost */
483 1, /* scalar_store_cost */
cd8ae5ed
AP
484 2, /* vec_int_stmt_cost */
485 2, /* vec_fp_stmt_cost */
c428f91c 486 3, /* vec_permute_cost */
bd95e655
JG
487 8, /* vec_to_scalar_cost */
488 8, /* scalar_to_vec_cost */
db4a1c18
WD
489 4, /* vec_align_load_cost */
490 4, /* vec_unalign_load_cost */
bd95e655
JG
491 1, /* vec_unalign_store_cost */
492 1, /* vec_store_cost */
493 1, /* cond_taken_branch_cost */
494 1 /* cond_not_taken_branch_cost */
60bff090
JG
495};
496
5ec1ae3b
EM
497static const struct cpu_vector_cost exynosm1_vector_cost =
498{
cd8ae5ed
AP
499 1, /* scalar_int_stmt_cost */
500 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
501 5, /* scalar_load_cost */
502 1, /* scalar_store_cost */
cd8ae5ed
AP
503 3, /* vec_int_stmt_cost */
504 3, /* vec_fp_stmt_cost */
c428f91c 505 3, /* vec_permute_cost */
5ec1ae3b
EM
506 3, /* vec_to_scalar_cost */
507 3, /* scalar_to_vec_cost */
508 5, /* vec_align_load_cost */
509 5, /* vec_unalign_load_cost */
510 1, /* vec_unalign_store_cost */
511 1, /* vec_store_cost */
512 1, /* cond_taken_branch_cost */
513 1 /* cond_not_taken_branch_cost */
514};
515
381e27aa 516/* Generic costs for vector insn classes. */
381e27aa
PT
517static const struct cpu_vector_cost xgene1_vector_cost =
518{
cd8ae5ed
AP
519 1, /* scalar_int_stmt_cost */
520 1, /* scalar_fp_stmt_cost */
bd95e655
JG
521 5, /* scalar_load_cost */
522 1, /* scalar_store_cost */
cd8ae5ed
AP
523 2, /* vec_int_stmt_cost */
524 2, /* vec_fp_stmt_cost */
c428f91c 525 2, /* vec_permute_cost */
bd95e655
JG
526 4, /* vec_to_scalar_cost */
527 4, /* scalar_to_vec_cost */
528 10, /* vec_align_load_cost */
529 10, /* vec_unalign_load_cost */
530 2, /* vec_unalign_store_cost */
531 2, /* vec_store_cost */
532 2, /* cond_taken_branch_cost */
533 1 /* cond_not_taken_branch_cost */
381e27aa
PT
534};
535
ad611a4c 536/* Costs for vector insn classes for Vulcan. */
d1261ac6 537static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 538{
cd8ae5ed
AP
539 1, /* scalar_int_stmt_cost */
540 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
541 4, /* scalar_load_cost */
542 1, /* scalar_store_cost */
cd8ae5ed
AP
543 5, /* vec_int_stmt_cost */
544 6, /* vec_fp_stmt_cost */
ad611a4c
VP
545 3, /* vec_permute_cost */
546 6, /* vec_to_scalar_cost */
547 5, /* scalar_to_vec_cost */
548 8, /* vec_align_load_cost */
549 8, /* vec_unalign_load_cost */
550 4, /* vec_unalign_store_cost */
551 4, /* vec_store_cost */
552 2, /* cond_taken_branch_cost */
553 1 /* cond_not_taken_branch_cost */
554};
555
b9066f5a
MW
556/* Generic costs for branch instructions. */
557static const struct cpu_branch_cost generic_branch_cost =
558{
9094d4a4
WD
559 1, /* Predictable. */
560 3 /* Unpredictable. */
b9066f5a
MW
561};
562
9acc9cbe
EM
563/* Generic approximation modes. */
564static const cpu_approx_modes generic_approx_modes =
565{
79a2bc2d 566 AARCH64_APPROX_NONE, /* division */
98daafa0 567 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
568 AARCH64_APPROX_NONE /* recip_sqrt */
569};
570
571/* Approximation modes for Exynos M1. */
572static const cpu_approx_modes exynosm1_approx_modes =
573{
79a2bc2d 574 AARCH64_APPROX_NONE, /* division */
98daafa0 575 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
576 AARCH64_APPROX_ALL /* recip_sqrt */
577};
578
579/* Approximation modes for X-Gene 1. */
580static const cpu_approx_modes xgene1_approx_modes =
581{
79a2bc2d 582 AARCH64_APPROX_NONE, /* division */
98daafa0 583 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
584 AARCH64_APPROX_ALL /* recip_sqrt */
585};
586
9d2c6e2e
MK
587/* Generic prefetch settings (which disable prefetch). */
588static const cpu_prefetch_tune generic_prefetch_tune =
589{
590 0, /* num_slots */
591 -1, /* l1_cache_size */
592 -1, /* l1_cache_line_size */
16b2cafd 593 -1, /* l2_cache_size */
d2ff35c0 594 true, /* prefetch_dynamic_strides */
59100dfc 595 -1, /* minimum_stride */
16b2cafd 596 -1 /* default_opt_level */
9d2c6e2e
MK
597};
598
599static const cpu_prefetch_tune exynosm1_prefetch_tune =
600{
601 0, /* num_slots */
602 -1, /* l1_cache_size */
603 64, /* l1_cache_line_size */
16b2cafd 604 -1, /* l2_cache_size */
d2ff35c0 605 true, /* prefetch_dynamic_strides */
59100dfc 606 -1, /* minimum_stride */
16b2cafd 607 -1 /* default_opt_level */
9d2c6e2e
MK
608};
609
610static const cpu_prefetch_tune qdf24xx_prefetch_tune =
611{
70c51b58
MK
612 4, /* num_slots */
613 32, /* l1_cache_size */
9d2c6e2e 614 64, /* l1_cache_line_size */
725e2110 615 512, /* l2_cache_size */
d2ff35c0 616 false, /* prefetch_dynamic_strides */
59100dfc
LM
617 2048, /* minimum_stride */
618 3 /* default_opt_level */
9d2c6e2e
MK
619};
620
f1e247d0
AP
621static const cpu_prefetch_tune thunderxt88_prefetch_tune =
622{
623 8, /* num_slots */
624 32, /* l1_cache_size */
625 128, /* l1_cache_line_size */
626 16*1024, /* l2_cache_size */
d2ff35c0 627 true, /* prefetch_dynamic_strides */
59100dfc 628 -1, /* minimum_stride */
f1e247d0
AP
629 3 /* default_opt_level */
630};
631
632static const cpu_prefetch_tune thunderx_prefetch_tune =
633{
634 8, /* num_slots */
635 32, /* l1_cache_size */
636 128, /* l1_cache_line_size */
637 -1, /* l2_cache_size */
d2ff35c0 638 true, /* prefetch_dynamic_strides */
59100dfc 639 -1, /* minimum_stride */
f1e247d0
AP
640 -1 /* default_opt_level */
641};
642
9d2c6e2e
MK
643static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
644{
f1e247d0
AP
645 8, /* num_slots */
646 32, /* l1_cache_size */
9d2c6e2e 647 64, /* l1_cache_line_size */
f1e247d0 648 256, /* l2_cache_size */
d2ff35c0 649 true, /* prefetch_dynamic_strides */
59100dfc 650 -1, /* minimum_stride */
16b2cafd 651 -1 /* default_opt_level */
9d2c6e2e
MK
652};
653
910f72e7
SZ
654static const cpu_prefetch_tune tsv110_prefetch_tune =
655{
656 0, /* num_slots */
657 64, /* l1_cache_size */
658 64, /* l1_cache_line_size */
659 512, /* l2_cache_size */
660 true, /* prefetch_dynamic_strides */
661 -1, /* minimum_stride */
662 -1 /* default_opt_level */
663};
664
d5e9851e
CM
665static const cpu_prefetch_tune xgene1_prefetch_tune =
666{
667 8, /* num_slots */
668 32, /* l1_cache_size */
669 64, /* l1_cache_line_size */
670 256, /* l2_cache_size */
671 true, /* prefetch_dynamic_strides */
672 -1, /* minimum_stride */
673 -1 /* default_opt_level */
674};
675
43e9d192
IB
676static const struct tune_params generic_tunings =
677{
4e2cd668 678 &cortexa57_extra_costs,
43e9d192
IB
679 &generic_addrcost_table,
680 &generic_regmove_cost,
8990e73a 681 &generic_vector_cost,
b9066f5a 682 &generic_branch_cost,
9acc9cbe 683 &generic_approx_modes,
2d56d6ba 684 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
685 4, /* memmov_cost */
686 2, /* issue_rate */
e0701ef0 687 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
688 "8", /* function_align. */
689 "4", /* jump_align. */
690 "8", /* loop_align. */
cee66c68
WD
691 2, /* int_reassoc_width. */
692 4, /* fp_reassoc_width. */
50093a33
WD
693 1, /* vec_reassoc_width. */
694 2, /* min_div_recip_mul_sf. */
dfba575f 695 2, /* min_div_recip_mul_df. */
50487d79 696 0, /* max_case_values. */
3b4c0f7e 697 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
698 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
699 &generic_prefetch_tune
43e9d192
IB
700};
701
1c72a3ca
JG
702static const struct tune_params cortexa35_tunings =
703{
704 &cortexa53_extra_costs,
705 &generic_addrcost_table,
706 &cortexa53_regmove_cost,
707 &generic_vector_cost,
aca97ef8 708 &generic_branch_cost,
9acc9cbe 709 &generic_approx_modes,
2d56d6ba 710 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
711 4, /* memmov_cost */
712 1, /* issue_rate */
0bc24338 713 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 714 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
715 "16", /* function_align. */
716 "4", /* jump_align. */
717 "8", /* loop_align. */
1c72a3ca
JG
718 2, /* int_reassoc_width. */
719 4, /* fp_reassoc_width. */
720 1, /* vec_reassoc_width. */
721 2, /* min_div_recip_mul_sf. */
722 2, /* min_div_recip_mul_df. */
723 0, /* max_case_values. */
1c72a3ca 724 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
725 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
726 &generic_prefetch_tune
1c72a3ca
JG
727};
728
984239ad
KT
729static const struct tune_params cortexa53_tunings =
730{
731 &cortexa53_extra_costs,
732 &generic_addrcost_table,
e4a9c55a 733 &cortexa53_regmove_cost,
984239ad 734 &generic_vector_cost,
aca97ef8 735 &generic_branch_cost,
9acc9cbe 736 &generic_approx_modes,
2d56d6ba 737 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
738 4, /* memmov_cost */
739 2, /* issue_rate */
00a8574a 740 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 741 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
742 "16", /* function_align. */
743 "4", /* jump_align. */
744 "8", /* loop_align. */
cee66c68
WD
745 2, /* int_reassoc_width. */
746 4, /* fp_reassoc_width. */
50093a33
WD
747 1, /* vec_reassoc_width. */
748 2, /* min_div_recip_mul_sf. */
dfba575f 749 2, /* min_div_recip_mul_df. */
50487d79 750 0, /* max_case_values. */
2d6bc7fa 751 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
752 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
753 &generic_prefetch_tune
984239ad
KT
754};
755
4fd92af6
KT
756static const struct tune_params cortexa57_tunings =
757{
758 &cortexa57_extra_costs,
a39d4348 759 &generic_addrcost_table,
e4a9c55a 760 &cortexa57_regmove_cost,
60bff090 761 &cortexa57_vector_cost,
aca97ef8 762 &generic_branch_cost,
9acc9cbe 763 &generic_approx_modes,
2d56d6ba 764 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
765 4, /* memmov_cost */
766 3, /* issue_rate */
00a8574a 767 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 768 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
769 "16", /* function_align. */
770 "4", /* jump_align. */
771 "8", /* loop_align. */
cee66c68
WD
772 2, /* int_reassoc_width. */
773 4, /* fp_reassoc_width. */
50093a33
WD
774 1, /* vec_reassoc_width. */
775 2, /* min_div_recip_mul_sf. */
dfba575f 776 2, /* min_div_recip_mul_df. */
50487d79 777 0, /* max_case_values. */
2d6bc7fa 778 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
779 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
780 &generic_prefetch_tune
dfba575f
JG
781};
782
783static const struct tune_params cortexa72_tunings =
784{
785 &cortexa57_extra_costs,
a39d4348 786 &generic_addrcost_table,
dfba575f
JG
787 &cortexa57_regmove_cost,
788 &cortexa57_vector_cost,
aca97ef8 789 &generic_branch_cost,
9acc9cbe 790 &generic_approx_modes,
2d56d6ba 791 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
792 4, /* memmov_cost */
793 3, /* issue_rate */
00a8574a 794 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 795 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
796 "16", /* function_align. */
797 "4", /* jump_align. */
798 "8", /* loop_align. */
dfba575f
JG
799 2, /* int_reassoc_width. */
800 4, /* fp_reassoc_width. */
801 1, /* vec_reassoc_width. */
802 2, /* min_div_recip_mul_sf. */
803 2, /* min_div_recip_mul_df. */
50487d79 804 0, /* max_case_values. */
0bc24338 805 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
806 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
807 &generic_prefetch_tune
4fd92af6
KT
808};
809
4fb570c4
KT
810static const struct tune_params cortexa73_tunings =
811{
812 &cortexa57_extra_costs,
a39d4348 813 &generic_addrcost_table,
4fb570c4
KT
814 &cortexa57_regmove_cost,
815 &cortexa57_vector_cost,
aca97ef8 816 &generic_branch_cost,
4fb570c4 817 &generic_approx_modes,
2d56d6ba 818 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
819 4, /* memmov_cost. */
820 2, /* issue_rate. */
821 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
822 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
823 "16", /* function_align. */
824 "4", /* jump_align. */
825 "8", /* loop_align. */
4fb570c4
KT
826 2, /* int_reassoc_width. */
827 4, /* fp_reassoc_width. */
828 1, /* vec_reassoc_width. */
829 2, /* min_div_recip_mul_sf. */
830 2, /* min_div_recip_mul_df. */
831 0, /* max_case_values. */
4fb570c4 832 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
833 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
834 &generic_prefetch_tune
4fb570c4
KT
835};
836
9d2c6e2e
MK
837
838
5ec1ae3b
EM
839static const struct tune_params exynosm1_tunings =
840{
841 &exynosm1_extra_costs,
842 &exynosm1_addrcost_table,
843 &exynosm1_regmove_cost,
844 &exynosm1_vector_cost,
845 &generic_branch_cost,
9acc9cbe 846 &exynosm1_approx_modes,
2d56d6ba 847 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
848 4, /* memmov_cost */
849 3, /* issue_rate */
25cc2199 850 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
851 "4", /* function_align. */
852 "4", /* jump_align. */
853 "4", /* loop_align. */
5ec1ae3b
EM
854 2, /* int_reassoc_width. */
855 4, /* fp_reassoc_width. */
856 1, /* vec_reassoc_width. */
857 2, /* min_div_recip_mul_sf. */
858 2, /* min_div_recip_mul_df. */
859 48, /* max_case_values. */
220379df 860 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
861 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
862 &exynosm1_prefetch_tune
5ec1ae3b
EM
863};
864
f1e247d0
AP
865static const struct tune_params thunderxt88_tunings =
866{
867 &thunderx_extra_costs,
868 &generic_addrcost_table,
869 &thunderx_regmove_cost,
870 &thunderx_vector_cost,
871 &generic_branch_cost,
872 &generic_approx_modes,
2d56d6ba 873 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
874 6, /* memmov_cost */
875 2, /* issue_rate */
876 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
877 "8", /* function_align. */
878 "8", /* jump_align. */
879 "8", /* loop_align. */
f1e247d0
AP
880 2, /* int_reassoc_width. */
881 4, /* fp_reassoc_width. */
882 1, /* vec_reassoc_width. */
883 2, /* min_div_recip_mul_sf. */
884 2, /* min_div_recip_mul_df. */
885 0, /* max_case_values. */
886 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
887 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
888 &thunderxt88_prefetch_tune
889};
890
d1bcc29f
AP
891static const struct tune_params thunderx_tunings =
892{
893 &thunderx_extra_costs,
894 &generic_addrcost_table,
895 &thunderx_regmove_cost,
c3f20327 896 &thunderx_vector_cost,
b9066f5a 897 &generic_branch_cost,
9acc9cbe 898 &generic_approx_modes,
2d56d6ba 899 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
900 6, /* memmov_cost */
901 2, /* issue_rate */
e9a3a175 902 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
903 "8", /* function_align. */
904 "8", /* jump_align. */
905 "8", /* loop_align. */
cee66c68
WD
906 2, /* int_reassoc_width. */
907 4, /* fp_reassoc_width. */
50093a33
WD
908 1, /* vec_reassoc_width. */
909 2, /* min_div_recip_mul_sf. */
dfba575f 910 2, /* min_div_recip_mul_df. */
50487d79 911 0, /* max_case_values. */
2d6bc7fa 912 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
913 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
914 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 915 &thunderx_prefetch_tune
d1bcc29f
AP
916};
917
910f72e7
SZ
918static const struct tune_params tsv110_tunings =
919{
920 &tsv110_extra_costs,
921 &tsv110_addrcost_table,
922 &tsv110_regmove_cost,
923 &tsv110_vector_cost,
924 &generic_branch_cost,
925 &generic_approx_modes,
2d56d6ba 926 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
927 4, /* memmov_cost */
928 4, /* issue_rate */
929 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
930 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
931 "16", /* function_align. */
932 "4", /* jump_align. */
933 "8", /* loop_align. */
934 2, /* int_reassoc_width. */
935 4, /* fp_reassoc_width. */
936 1, /* vec_reassoc_width. */
937 2, /* min_div_recip_mul_sf. */
938 2, /* min_div_recip_mul_df. */
939 0, /* max_case_values. */
940 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
941 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
942 &tsv110_prefetch_tune
943};
944
381e27aa 945static const struct tune_params xgene1_tunings =
e02669db
CM
946{
947 &xgene1_extra_costs,
948 &xgene1_addrcost_table,
949 &xgene1_regmove_cost,
950 &xgene1_vector_cost,
951 &generic_branch_cost,
952 &xgene1_approx_modes,
2d56d6ba 953 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
954 6, /* memmov_cost */
955 4, /* issue_rate */
956 AARCH64_FUSE_NOTHING, /* fusible_ops */
957 "16", /* function_align. */
958 "16", /* jump_align. */
959 "16", /* loop_align. */
960 2, /* int_reassoc_width. */
961 4, /* fp_reassoc_width. */
962 1, /* vec_reassoc_width. */
963 2, /* min_div_recip_mul_sf. */
964 2, /* min_div_recip_mul_df. */
965 17, /* max_case_values. */
966 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
967 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
968 &xgene1_prefetch_tune
969};
970
971static const struct tune_params emag_tunings =
381e27aa
PT
972{
973 &xgene1_extra_costs,
974 &xgene1_addrcost_table,
975 &xgene1_regmove_cost,
976 &xgene1_vector_cost,
b9066f5a 977 &generic_branch_cost,
9acc9cbe 978 &xgene1_approx_modes,
2d56d6ba 979 SVE_NOT_IMPLEMENTED,
bd95e655
JG
980 6, /* memmov_cost */
981 4, /* issue_rate */
e9a3a175 982 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 983 "16", /* function_align. */
cf28c77e 984 "16", /* jump_align. */
c518c102 985 "16", /* loop_align. */
381e27aa
PT
986 2, /* int_reassoc_width. */
987 4, /* fp_reassoc_width. */
50093a33
WD
988 1, /* vec_reassoc_width. */
989 2, /* min_div_recip_mul_sf. */
dfba575f 990 2, /* min_div_recip_mul_df. */
cf28c77e 991 17, /* max_case_values. */
2d6bc7fa 992 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 993 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 994 &xgene1_prefetch_tune
381e27aa
PT
995};
996
ee446d9f
JW
997static const struct tune_params qdf24xx_tunings =
998{
999 &qdf24xx_extra_costs,
8d39ea2f 1000 &qdf24xx_addrcost_table,
ee446d9f 1001 &qdf24xx_regmove_cost,
e75bc10e 1002 &qdf24xx_vector_cost,
ee446d9f
JW
1003 &generic_branch_cost,
1004 &generic_approx_modes,
2d56d6ba 1005 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1006 4, /* memmov_cost */
1007 4, /* issue_rate */
1008 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1009 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1010 "16", /* function_align. */
1011 "8", /* jump_align. */
1012 "16", /* loop_align. */
ee446d9f
JW
1013 2, /* int_reassoc_width. */
1014 4, /* fp_reassoc_width. */
1015 1, /* vec_reassoc_width. */
1016 2, /* min_div_recip_mul_sf. */
1017 2, /* min_div_recip_mul_df. */
1018 0, /* max_case_values. */
4f2a94e6 1019 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1020 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1021 &qdf24xx_prefetch_tune
ee446d9f
JW
1022};
1023
52ee8191
SP
1024/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1025 for now. */
1026static const struct tune_params saphira_tunings =
1027{
1028 &generic_extra_costs,
1029 &generic_addrcost_table,
1030 &generic_regmove_cost,
1031 &generic_vector_cost,
1032 &generic_branch_cost,
1033 &generic_approx_modes,
2d56d6ba 1034 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1035 4, /* memmov_cost */
1036 4, /* issue_rate */
1037 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1038 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1039 "16", /* function_align. */
1040 "8", /* jump_align. */
1041 "16", /* loop_align. */
52ee8191
SP
1042 2, /* int_reassoc_width. */
1043 4, /* fp_reassoc_width. */
1044 1, /* vec_reassoc_width. */
1045 2, /* min_div_recip_mul_sf. */
1046 2, /* min_div_recip_mul_df. */
1047 0, /* max_case_values. */
1048 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1049 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1050 &generic_prefetch_tune
1051};
1052
d1261ac6 1053static const struct tune_params thunderx2t99_tunings =
ad611a4c 1054{
d1261ac6
AP
1055 &thunderx2t99_extra_costs,
1056 &thunderx2t99_addrcost_table,
1057 &thunderx2t99_regmove_cost,
1058 &thunderx2t99_vector_cost,
aca97ef8 1059 &generic_branch_cost,
ad611a4c 1060 &generic_approx_modes,
2d56d6ba 1061 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1062 4, /* memmov_cost. */
1063 4, /* issue_rate. */
00c7c57f
JB
1064 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
1065 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
1066 "16", /* function_align. */
1067 "8", /* jump_align. */
1068 "16", /* loop_align. */
ad611a4c
VP
1069 3, /* int_reassoc_width. */
1070 2, /* fp_reassoc_width. */
1071 2, /* vec_reassoc_width. */
1072 2, /* min_div_recip_mul_sf. */
1073 2, /* min_div_recip_mul_df. */
1074 0, /* max_case_values. */
f1e247d0 1075 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1076 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1077 &thunderx2t99_prefetch_tune
ad611a4c
VP
1078};
1079
8dec06f2
JG
1080/* Support for fine-grained override of the tuning structures. */
1081struct aarch64_tuning_override_function
1082{
1083 const char* name;
1084 void (*parse_override)(const char*, struct tune_params*);
1085};
1086
1087static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1088static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1089static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1090
1091static const struct aarch64_tuning_override_function
1092aarch64_tuning_override_functions[] =
1093{
1094 { "fuse", aarch64_parse_fuse_string },
1095 { "tune", aarch64_parse_tune_string },
886f092f 1096 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1097 { NULL, NULL }
1098};
1099
43e9d192
IB
1100/* A processor implementing AArch64. */
1101struct processor
1102{
1103 const char *const name;
46806c44
KT
1104 enum aarch64_processor ident;
1105 enum aarch64_processor sched_core;
393ae126 1106 enum aarch64_arch arch;
0c6caaf8 1107 unsigned architecture_version;
43e9d192
IB
1108 const unsigned long flags;
1109 const struct tune_params *const tune;
1110};
1111
393ae126
KT
1112/* Architectures implementing AArch64. */
1113static const struct processor all_architectures[] =
1114{
1115#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1116 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1117#include "aarch64-arches.def"
393ae126
KT
1118 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1119};
1120
43e9d192
IB
1121/* Processor cores implementing AArch64. */
1122static const struct processor all_cores[] =
1123{
e8fcc9fa 1124#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1125 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1126 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1127 FLAGS, &COSTS##_tunings},
43e9d192 1128#include "aarch64-cores.def"
393ae126
KT
1129 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1130 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1131 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1132};
1133
43e9d192 1134
361fb3ee
KT
1135/* Target specification. These are populated by the -march, -mtune, -mcpu
1136 handling code or by target attributes. */
43e9d192
IB
1137static const struct processor *selected_arch;
1138static const struct processor *selected_cpu;
1139static const struct processor *selected_tune;
1140
b175b679
JG
1141/* The current tuning set. */
1142struct tune_params aarch64_tune_params = generic_tunings;
1143
43e9d192
IB
1144#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1145
1146/* An ISA extension in the co-processor and main instruction set space. */
1147struct aarch64_option_extension
1148{
1149 const char *const name;
1150 const unsigned long flags_on;
1151 const unsigned long flags_off;
1152};
1153
43e9d192
IB
1154typedef enum aarch64_cond_code
1155{
1156 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1157 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1158 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1159}
1160aarch64_cc;
1161
1162#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1163
1164/* The condition codes of the processor, and the inverse function. */
1165static const char * const aarch64_condition_codes[] =
1166{
1167 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1168 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1169};
1170
973d2e01
TP
1171/* Generate code to enable conditional branches in functions over 1 MiB. */
1172const char *
1173aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1174 const char * branch_format)
1175{
1176 rtx_code_label * tmp_label = gen_label_rtx ();
1177 char label_buf[256];
1178 char buffer[128];
1179 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1180 CODE_LABEL_NUMBER (tmp_label));
1181 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1182 rtx dest_label = operands[pos_label];
1183 operands[pos_label] = tmp_label;
1184
1185 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1186 output_asm_insn (buffer, operands);
1187
1188 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1189 operands[pos_label] = dest_label;
1190 output_asm_insn (buffer, operands);
1191 return "";
1192}
1193
261fb553 1194void
fc29dfc9 1195aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1196{
261fb553 1197 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1198 if (FLOAT_MODE_P (mode))
1199 error ("%qs is incompatible with the use of floating-point types",
1200 "-mgeneral-regs-only");
1201 else
1202 error ("%qs is incompatible with the use of vector types",
1203 "-mgeneral-regs-only");
261fb553 1204 else
fc29dfc9
SE
1205 if (FLOAT_MODE_P (mode))
1206 error ("%qs feature modifier is incompatible with the use of"
1207 " floating-point types", "+nofp");
1208 else
1209 error ("%qs feature modifier is incompatible with the use of"
1210 " vector types", "+nofp");
261fb553
AL
1211}
1212
c64f7d37 1213/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1214 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1215 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1216 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1217 and GENERAL_REGS is lower than the memory cost (in this case the best class
1218 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1219 cost results in bad allocations with many redundant int<->FP moves which
1220 are expensive on various cores.
1221 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1222 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1223 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1224 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1225 The result of this is that it is no longer inefficient to have a higher
1226 memory move cost than the register move cost.
1227*/
c64f7d37
WD
1228
1229static reg_class_t
31e2b5a3
WD
1230aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1231 reg_class_t best_class)
c64f7d37 1232{
b8506a8a 1233 machine_mode mode;
c64f7d37 1234
67e5c59a
RS
1235 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1236 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1237 return allocno_class;
1238
67e5c59a
RS
1239 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1240 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1241 return best_class;
1242
c64f7d37
WD
1243 mode = PSEUDO_REGNO_MODE (regno);
1244 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1245}
1246
26e0ff94 1247static unsigned int
b8506a8a 1248aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1249{
50093a33 1250 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1251 return aarch64_tune_params.min_div_recip_mul_sf;
1252 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1253}
1254
b5b33e11 1255/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1256static int
b5b33e11 1257aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1258{
1259 if (VECTOR_MODE_P (mode))
b175b679 1260 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1261 if (INTEGRAL_MODE_P (mode))
b175b679 1262 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1263 /* Avoid reassociating floating point addition so we emit more FMAs. */
1264 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1265 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1266 return 1;
1267}
1268
43e9d192
IB
1269/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1270unsigned
1271aarch64_dbx_register_number (unsigned regno)
1272{
1273 if (GP_REGNUM_P (regno))
1274 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1275 else if (regno == SP_REGNUM)
1276 return AARCH64_DWARF_SP;
1277 else if (FP_REGNUM_P (regno))
1278 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1279 else if (PR_REGNUM_P (regno))
1280 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1281 else if (regno == VG_REGNUM)
1282 return AARCH64_DWARF_VG;
43e9d192
IB
1283
1284 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1285 equivalent DWARF register. */
1286 return DWARF_FRAME_REGISTERS;
1287}
1288
43cacb12
RS
1289/* Return true if MODE is any of the Advanced SIMD structure modes. */
1290static bool
1291aarch64_advsimd_struct_mode_p (machine_mode mode)
1292{
1293 return (TARGET_SIMD
1294 && (mode == OImode || mode == CImode || mode == XImode));
1295}
1296
1297/* Return true if MODE is an SVE predicate mode. */
1298static bool
1299aarch64_sve_pred_mode_p (machine_mode mode)
1300{
1301 return (TARGET_SVE
1302 && (mode == VNx16BImode
1303 || mode == VNx8BImode
1304 || mode == VNx4BImode
1305 || mode == VNx2BImode));
1306}
1307
1308/* Three mutually-exclusive flags describing a vector or predicate type. */
1309const unsigned int VEC_ADVSIMD = 1;
1310const unsigned int VEC_SVE_DATA = 2;
1311const unsigned int VEC_SVE_PRED = 4;
1312/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1313 a structure of 2, 3 or 4 vectors. */
1314const unsigned int VEC_STRUCT = 8;
1315/* Useful combinations of the above. */
1316const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1317const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1318
1319/* Return a set of flags describing the vector properties of mode MODE.
1320 Ignore modes that are not supported by the current target. */
1321static unsigned int
1322aarch64_classify_vector_mode (machine_mode mode)
1323{
1324 if (aarch64_advsimd_struct_mode_p (mode))
1325 return VEC_ADVSIMD | VEC_STRUCT;
1326
1327 if (aarch64_sve_pred_mode_p (mode))
1328 return VEC_SVE_PRED;
1329
1330 scalar_mode inner = GET_MODE_INNER (mode);
1331 if (VECTOR_MODE_P (mode)
1332 && (inner == QImode
1333 || inner == HImode
1334 || inner == HFmode
1335 || inner == SImode
1336 || inner == SFmode
1337 || inner == DImode
1338 || inner == DFmode))
1339 {
9f4cbab8
RS
1340 if (TARGET_SVE)
1341 {
1342 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1343 return VEC_SVE_DATA;
1344 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1345 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1346 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1347 return VEC_SVE_DATA | VEC_STRUCT;
1348 }
43cacb12
RS
1349
1350 /* This includes V1DF but not V1DI (which doesn't exist). */
1351 if (TARGET_SIMD
1352 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1353 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1354 return VEC_ADVSIMD;
1355 }
1356
1357 return 0;
1358}
1359
1360/* Return true if MODE is any of the data vector modes, including
1361 structure modes. */
43e9d192 1362static bool
43cacb12 1363aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1364{
43cacb12 1365 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1366}
1367
43cacb12
RS
1368/* Return true if MODE is an SVE data vector mode; either a single vector
1369 or a structure of vectors. */
43e9d192 1370static bool
43cacb12 1371aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1372{
43cacb12 1373 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1374}
1375
9f4cbab8
RS
1376/* Implement target hook TARGET_ARRAY_MODE. */
1377static opt_machine_mode
1378aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1379{
1380 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1381 && IN_RANGE (nelems, 2, 4))
1382 return mode_for_vector (GET_MODE_INNER (mode),
1383 GET_MODE_NUNITS (mode) * nelems);
1384
1385 return opt_machine_mode ();
1386}
1387
43e9d192
IB
1388/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1389static bool
ef4bddc2 1390aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1391 unsigned HOST_WIDE_INT nelems)
1392{
1393 if (TARGET_SIMD
635e66fe
AL
1394 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1395 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1396 && (nelems >= 2 && nelems <= 4))
1397 return true;
1398
1399 return false;
1400}
1401
43cacb12
RS
1402/* Return the SVE predicate mode to use for elements that have
1403 ELEM_NBYTES bytes, if such a mode exists. */
1404
1405opt_machine_mode
1406aarch64_sve_pred_mode (unsigned int elem_nbytes)
1407{
1408 if (TARGET_SVE)
1409 {
1410 if (elem_nbytes == 1)
1411 return VNx16BImode;
1412 if (elem_nbytes == 2)
1413 return VNx8BImode;
1414 if (elem_nbytes == 4)
1415 return VNx4BImode;
1416 if (elem_nbytes == 8)
1417 return VNx2BImode;
1418 }
1419 return opt_machine_mode ();
1420}
1421
1422/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1423
1424static opt_machine_mode
1425aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1426{
1427 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1428 {
1429 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1430 machine_mode pred_mode;
1431 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1432 return pred_mode;
1433 }
1434
1435 return default_get_mask_mode (nunits, nbytes);
1436}
1437
b41d1f6e
RS
1438/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1439 prefer to use the first arithmetic operand as the else value if
1440 the else value doesn't matter, since that exactly matches the SVE
1441 destructive merging form. For ternary operations we could either
1442 pick the first operand and use FMAD-like instructions or the last
1443 operand and use FMLA-like instructions; the latter seems more
1444 natural. */
6a86928d
RS
1445
1446static tree
b41d1f6e 1447aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1448{
b41d1f6e 1449 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1450}
1451
c43f4279 1452/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1453
c43f4279 1454static unsigned int
ef4bddc2 1455aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1456{
6a70badb
RS
1457 /* ??? Logically we should only need to provide a value when
1458 HARD_REGNO_MODE_OK says that the combination is valid,
1459 but at the moment we need to handle all modes. Just ignore
1460 any runtime parts for registers that can't store them. */
1461 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1462 switch (aarch64_regno_regclass (regno))
1463 {
1464 case FP_REGS:
1465 case FP_LO_REGS:
43cacb12
RS
1466 if (aarch64_sve_data_mode_p (mode))
1467 return exact_div (GET_MODE_SIZE (mode),
1468 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1469 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1470 case PR_REGS:
1471 case PR_LO_REGS:
1472 case PR_HI_REGS:
1473 return 1;
43e9d192 1474 default:
6a70badb 1475 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1476 }
1477 gcc_unreachable ();
1478}
1479
f939c3e6 1480/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1481
f939c3e6 1482static bool
ef4bddc2 1483aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1484{
1485 if (GET_MODE_CLASS (mode) == MODE_CC)
1486 return regno == CC_REGNUM;
1487
43cacb12
RS
1488 if (regno == VG_REGNUM)
1489 /* This must have the same size as _Unwind_Word. */
1490 return mode == DImode;
1491
1492 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1493 if (vec_flags & VEC_SVE_PRED)
1494 return PR_REGNUM_P (regno);
1495
1496 if (PR_REGNUM_P (regno))
1497 return 0;
1498
9259db42
YZ
1499 if (regno == SP_REGNUM)
1500 /* The purpose of comparing with ptr_mode is to support the
1501 global register variable associated with the stack pointer
1502 register via the syntax of asm ("wsp") in ILP32. */
1503 return mode == Pmode || mode == ptr_mode;
1504
1505 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1506 return mode == Pmode;
1507
563cc649
RH
1508 if (GP_REGNUM_P (regno))
1509 {
1510 if (known_le (GET_MODE_SIZE (mode), 8))
1511 return true;
1512 else if (known_le (GET_MODE_SIZE (mode), 16))
1513 return (regno & 1) == 0;
1514 }
1515 else if (FP_REGNUM_P (regno))
43e9d192 1516 {
43cacb12 1517 if (vec_flags & VEC_STRUCT)
4edd6298 1518 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1519 else
43cacb12 1520 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1521 }
1522
f939c3e6 1523 return false;
43e9d192
IB
1524}
1525
80ec73f4
RS
1526/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1527 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1528 clobbers the top 64 bits when restoring the bottom 64 bits. */
1529
1530static bool
1531aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1532{
6a70badb 1533 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1534}
1535
43cacb12
RS
1536/* Implement REGMODE_NATURAL_SIZE. */
1537poly_uint64
1538aarch64_regmode_natural_size (machine_mode mode)
1539{
1540 /* The natural size for SVE data modes is one SVE data vector,
1541 and similarly for predicates. We can't independently modify
1542 anything smaller than that. */
1543 /* ??? For now, only do this for variable-width SVE registers.
1544 Doing it for constant-sized registers breaks lower-subreg.c. */
1545 /* ??? And once that's fixed, we should probably have similar
1546 code for Advanced SIMD. */
1547 if (!aarch64_sve_vg.is_constant ())
1548 {
1549 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1550 if (vec_flags & VEC_SVE_PRED)
1551 return BYTES_PER_SVE_PRED;
1552 if (vec_flags & VEC_SVE_DATA)
1553 return BYTES_PER_SVE_VECTOR;
1554 }
1555 return UNITS_PER_WORD;
1556}
1557
73d9ac6a 1558/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1559machine_mode
43cacb12
RS
1560aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1561 machine_mode mode)
1562{
1563 /* The predicate mode determines which bits are significant and
1564 which are "don't care". Decreasing the number of lanes would
1565 lose data while increasing the number of lanes would make bits
1566 unnecessarily significant. */
1567 if (PR_REGNUM_P (regno))
1568 return mode;
6a70badb
RS
1569 if (known_ge (GET_MODE_SIZE (mode), 4))
1570 return mode;
73d9ac6a 1571 else
6a70badb 1572 return SImode;
73d9ac6a
IB
1573}
1574
231c52ae
ST
1575/* Return true if I's bits are consecutive ones from the MSB. */
1576bool
1577aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
1578{
1579 return exact_log2 (-i) != HOST_WIDE_INT_M1;
1580}
1581
58e17cf8
RS
1582/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1583 that strcpy from constants will be faster. */
1584
1585static HOST_WIDE_INT
1586aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1587{
1588 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1589 return MAX (align, BITS_PER_WORD);
1590 return align;
1591}
1592
43e9d192
IB
1593/* Return true if calls to DECL should be treated as
1594 long-calls (ie called via a register). */
1595static bool
1596aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1597{
1598 return false;
1599}
1600
1601/* Return true if calls to symbol-ref SYM should be treated as
1602 long-calls (ie called via a register). */
1603bool
1604aarch64_is_long_call_p (rtx sym)
1605{
1606 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1607}
1608
b60d63cb
JW
1609/* Return true if calls to symbol-ref SYM should not go through
1610 plt stubs. */
1611
1612bool
1613aarch64_is_noplt_call_p (rtx sym)
1614{
1615 const_tree decl = SYMBOL_REF_DECL (sym);
1616
1617 if (flag_pic
1618 && decl
1619 && (!flag_plt
1620 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1621 && !targetm.binds_local_p (decl))
1622 return true;
1623
1624 return false;
1625}
1626
43e9d192
IB
1627/* Return true if the offsets to a zero/sign-extract operation
1628 represent an expression that matches an extend operation. The
1629 operands represent the paramters from
1630
4745e701 1631 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1632bool
77e994c9 1633aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1634 rtx extract_imm)
1635{
1636 HOST_WIDE_INT mult_val, extract_val;
1637
1638 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1639 return false;
1640
1641 mult_val = INTVAL (mult_imm);
1642 extract_val = INTVAL (extract_imm);
1643
1644 if (extract_val > 8
1645 && extract_val < GET_MODE_BITSIZE (mode)
1646 && exact_log2 (extract_val & ~7) > 0
1647 && (extract_val & 7) <= 4
1648 && mult_val == (1 << (extract_val & 7)))
1649 return true;
1650
1651 return false;
1652}
1653
1654/* Emit an insn that's a simple single-set. Both the operands must be
1655 known to be valid. */
827ab47a 1656inline static rtx_insn *
43e9d192
IB
1657emit_set_insn (rtx x, rtx y)
1658{
f7df4a84 1659 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1660}
1661
1662/* X and Y are two things to compare using CODE. Emit the compare insn and
1663 return the rtx for register 0 in the proper mode. */
1664rtx
1665aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1666{
ef4bddc2 1667 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1668 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1669
1670 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1671 return cc_reg;
1672}
1673
d400fda3
RH
1674/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
1675
1676static rtx
1677aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
1678 machine_mode y_mode)
1679{
1680 if (y_mode == E_QImode || y_mode == E_HImode)
1681 {
1682 if (CONST_INT_P (y))
1683 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
1684 else
1685 {
1686 rtx t, cc_reg;
1687 machine_mode cc_mode;
1688
1689 t = gen_rtx_ZERO_EXTEND (SImode, y);
1690 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
1691 cc_mode = CC_SWPmode;
1692 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
1693 emit_set_insn (cc_reg, t);
1694 return cc_reg;
1695 }
1696 }
1697
1698 return aarch64_gen_compare_reg (code, x, y);
1699}
1700
43e9d192
IB
1701/* Build the SYMBOL_REF for __tls_get_addr. */
1702
1703static GTY(()) rtx tls_get_addr_libfunc;
1704
1705rtx
1706aarch64_tls_get_addr (void)
1707{
1708 if (!tls_get_addr_libfunc)
1709 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1710 return tls_get_addr_libfunc;
1711}
1712
1713/* Return the TLS model to use for ADDR. */
1714
1715static enum tls_model
1716tls_symbolic_operand_type (rtx addr)
1717{
1718 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1719 if (GET_CODE (addr) == CONST)
1720 {
6a70badb
RS
1721 poly_int64 addend;
1722 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1723 if (GET_CODE (sym) == SYMBOL_REF)
1724 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1725 }
1726 else if (GET_CODE (addr) == SYMBOL_REF)
1727 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1728
1729 return tls_kind;
1730}
1731
1732/* We'll allow lo_sum's in addresses in our legitimate addresses
1733 so that combine would take care of combining addresses where
1734 necessary, but for generation purposes, we'll generate the address
1735 as :
1736 RTL Absolute
1737 tmp = hi (symbol_ref); adrp x1, foo
1738 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1739 nop
1740
1741 PIC TLS
1742 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1743 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1744 bl __tls_get_addr
1745 nop
1746
1747 Load TLS symbol, depending on TLS mechanism and TLS access model.
1748
1749 Global Dynamic - Traditional TLS:
1750 adrp tmp, :tlsgd:imm
1751 add dest, tmp, #:tlsgd_lo12:imm
1752 bl __tls_get_addr
1753
1754 Global Dynamic - TLS Descriptors:
1755 adrp dest, :tlsdesc:imm
1756 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1757 add dest, dest, #:tlsdesc_lo12:imm
1758 blr tmp
1759 mrs tp, tpidr_el0
1760 add dest, dest, tp
1761
1762 Initial Exec:
1763 mrs tp, tpidr_el0
1764 adrp tmp, :gottprel:imm
1765 ldr dest, [tmp, #:gottprel_lo12:imm]
1766 add dest, dest, tp
1767
1768 Local Exec:
1769 mrs tp, tpidr_el0
0699caae
RL
1770 add t0, tp, #:tprel_hi12:imm, lsl #12
1771 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1772*/
1773
1774static void
1775aarch64_load_symref_appropriately (rtx dest, rtx imm,
1776 enum aarch64_symbol_type type)
1777{
1778 switch (type)
1779 {
1780 case SYMBOL_SMALL_ABSOLUTE:
1781 {
28514dda 1782 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1783 rtx tmp_reg = dest;
ef4bddc2 1784 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1785
1786 gcc_assert (mode == Pmode || mode == ptr_mode);
1787
43e9d192 1788 if (can_create_pseudo_p ())
28514dda 1789 tmp_reg = gen_reg_rtx (mode);
43e9d192 1790
28514dda 1791 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1792 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1793 return;
1794 }
1795
a5350ddc 1796 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1797 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1798 return;
1799
1b1e81f8
JW
1800 case SYMBOL_SMALL_GOT_28K:
1801 {
1802 machine_mode mode = GET_MODE (dest);
1803 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1804 rtx insn;
1805 rtx mem;
1b1e81f8
JW
1806
1807 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1808 here before rtl expand. Tree IVOPT will generate rtl pattern to
1809 decide rtx costs, in which case pic_offset_table_rtx is not
1810 initialized. For that case no need to generate the first adrp
026c3cfd 1811 instruction as the final cost for global variable access is
1b1e81f8
JW
1812 one instruction. */
1813 if (gp_rtx != NULL)
1814 {
1815 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1816 using the page base as GOT base, the first page may be wasted,
1817 in the worst scenario, there is only 28K space for GOT).
1818
1819 The generate instruction sequence for accessing global variable
1820 is:
1821
a3957742 1822 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1823
1824 Only one instruction needed. But we must initialize
1825 pic_offset_table_rtx properly. We generate initialize insn for
1826 every global access, and allow CSE to remove all redundant.
1827
1828 The final instruction sequences will look like the following
1829 for multiply global variables access.
1830
a3957742 1831 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1832
a3957742
JW
1833 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1834 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1835 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1836 ... */
1b1e81f8
JW
1837
1838 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1839 crtl->uses_pic_offset_table = 1;
1840 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1841
1842 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1843 gp_rtx = gen_lowpart (mode, gp_rtx);
1844
1b1e81f8
JW
1845 }
1846
1847 if (mode == ptr_mode)
1848 {
1849 if (mode == DImode)
53021678 1850 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1851 else
53021678
JW
1852 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1853
1854 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1855 }
1856 else
1857 {
1858 gcc_assert (mode == Pmode);
53021678
JW
1859
1860 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1861 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1862 }
1863
53021678
JW
1864 /* The operand is expected to be MEM. Whenever the related insn
1865 pattern changed, above code which calculate mem should be
1866 updated. */
1867 gcc_assert (GET_CODE (mem) == MEM);
1868 MEM_READONLY_P (mem) = 1;
1869 MEM_NOTRAP_P (mem) = 1;
1870 emit_insn (insn);
1b1e81f8
JW
1871 return;
1872 }
1873
6642bdb4 1874 case SYMBOL_SMALL_GOT_4G:
43e9d192 1875 {
28514dda
YZ
1876 /* In ILP32, the mode of dest can be either SImode or DImode,
1877 while the got entry is always of SImode size. The mode of
1878 dest depends on how dest is used: if dest is assigned to a
1879 pointer (e.g. in the memory), it has SImode; it may have
1880 DImode if dest is dereferenced to access the memeory.
1881 This is why we have to handle three different ldr_got_small
1882 patterns here (two patterns for ILP32). */
53021678
JW
1883
1884 rtx insn;
1885 rtx mem;
43e9d192 1886 rtx tmp_reg = dest;
ef4bddc2 1887 machine_mode mode = GET_MODE (dest);
28514dda 1888
43e9d192 1889 if (can_create_pseudo_p ())
28514dda
YZ
1890 tmp_reg = gen_reg_rtx (mode);
1891
1892 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1893 if (mode == ptr_mode)
1894 {
1895 if (mode == DImode)
53021678 1896 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1897 else
53021678
JW
1898 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1899
1900 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1901 }
1902 else
1903 {
1904 gcc_assert (mode == Pmode);
53021678
JW
1905
1906 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1907 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1908 }
1909
53021678
JW
1910 gcc_assert (GET_CODE (mem) == MEM);
1911 MEM_READONLY_P (mem) = 1;
1912 MEM_NOTRAP_P (mem) = 1;
1913 emit_insn (insn);
43e9d192
IB
1914 return;
1915 }
1916
1917 case SYMBOL_SMALL_TLSGD:
1918 {
5d8a22a5 1919 rtx_insn *insns;
23b88fda
N
1920 machine_mode mode = GET_MODE (dest);
1921 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1922
1923 start_sequence ();
23b88fda
N
1924 if (TARGET_ILP32)
1925 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1926 else
1927 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1928 insns = get_insns ();
1929 end_sequence ();
1930
1931 RTL_CONST_CALL_P (insns) = 1;
1932 emit_libcall_block (insns, dest, result, imm);
1933 return;
1934 }
1935
1936 case SYMBOL_SMALL_TLSDESC:
1937 {
ef4bddc2 1938 machine_mode mode = GET_MODE (dest);
621ad2de 1939 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1940 rtx tp;
1941
621ad2de
AP
1942 gcc_assert (mode == Pmode || mode == ptr_mode);
1943
2876a13f
JW
1944 /* In ILP32, the got entry is always of SImode size. Unlike
1945 small GOT, the dest is fixed at reg 0. */
1946 if (TARGET_ILP32)
1947 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1948 else
2876a13f 1949 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1950 tp = aarch64_load_tp (NULL);
621ad2de
AP
1951
1952 if (mode != Pmode)
1953 tp = gen_lowpart (mode, tp);
1954
2876a13f 1955 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1956 if (REG_P (dest))
1957 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1958 return;
1959 }
1960
79496620 1961 case SYMBOL_SMALL_TLSIE:
43e9d192 1962 {
621ad2de
AP
1963 /* In ILP32, the mode of dest can be either SImode or DImode,
1964 while the got entry is always of SImode size. The mode of
1965 dest depends on how dest is used: if dest is assigned to a
1966 pointer (e.g. in the memory), it has SImode; it may have
1967 DImode if dest is dereferenced to access the memeory.
1968 This is why we have to handle three different tlsie_small
1969 patterns here (two patterns for ILP32). */
ef4bddc2 1970 machine_mode mode = GET_MODE (dest);
621ad2de 1971 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1972 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1973
1974 if (mode == ptr_mode)
1975 {
1976 if (mode == DImode)
1977 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1978 else
1979 {
1980 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1981 tp = gen_lowpart (mode, tp);
1982 }
1983 }
1984 else
1985 {
1986 gcc_assert (mode == Pmode);
1987 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1988 }
1989
f7df4a84 1990 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
1991 if (REG_P (dest))
1992 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1993 return;
1994 }
1995
cbf5629e 1996 case SYMBOL_TLSLE12:
d18ba284 1997 case SYMBOL_TLSLE24:
cbf5629e
JW
1998 case SYMBOL_TLSLE32:
1999 case SYMBOL_TLSLE48:
43e9d192 2000 {
cbf5629e 2001 machine_mode mode = GET_MODE (dest);
43e9d192 2002 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 2003
cbf5629e
JW
2004 if (mode != Pmode)
2005 tp = gen_lowpart (mode, tp);
2006
2007 switch (type)
2008 {
2009 case SYMBOL_TLSLE12:
2010 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
2011 (dest, tp, imm));
2012 break;
2013 case SYMBOL_TLSLE24:
2014 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
2015 (dest, tp, imm));
2016 break;
2017 case SYMBOL_TLSLE32:
2018 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
2019 (dest, imm));
2020 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2021 (dest, dest, tp));
2022 break;
2023 case SYMBOL_TLSLE48:
2024 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
2025 (dest, imm));
2026 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2027 (dest, dest, tp));
2028 break;
2029 default:
2030 gcc_unreachable ();
2031 }
e6f7f0e9 2032
241dbd9d
QZ
2033 if (REG_P (dest))
2034 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2035 return;
2036 }
2037
87dd8ab0
MS
2038 case SYMBOL_TINY_GOT:
2039 emit_insn (gen_ldr_got_tiny (dest, imm));
2040 return;
2041
5ae7caad
JW
2042 case SYMBOL_TINY_TLSIE:
2043 {
2044 machine_mode mode = GET_MODE (dest);
2045 rtx tp = aarch64_load_tp (NULL);
2046
2047 if (mode == ptr_mode)
2048 {
2049 if (mode == DImode)
2050 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2051 else
2052 {
2053 tp = gen_lowpart (mode, tp);
2054 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2055 }
2056 }
2057 else
2058 {
2059 gcc_assert (mode == Pmode);
2060 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2061 }
2062
241dbd9d
QZ
2063 if (REG_P (dest))
2064 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2065 return;
2066 }
2067
43e9d192
IB
2068 default:
2069 gcc_unreachable ();
2070 }
2071}
2072
2073/* Emit a move from SRC to DEST. Assume that the move expanders can
2074 handle all moves if !can_create_pseudo_p (). The distinction is
2075 important because, unlike emit_move_insn, the move expanders know
2076 how to force Pmode objects into the constant pool even when the
2077 constant pool address is not itself legitimate. */
2078static rtx
2079aarch64_emit_move (rtx dest, rtx src)
2080{
2081 return (can_create_pseudo_p ()
2082 ? emit_move_insn (dest, src)
2083 : emit_move_insn_1 (dest, src));
2084}
2085
f22d7973
RS
2086/* Apply UNOPTAB to OP and store the result in DEST. */
2087
2088static void
2089aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2090{
2091 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2092 if (dest != tmp)
2093 emit_move_insn (dest, tmp);
2094}
2095
2096/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2097
2098static void
2099aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2100{
2101 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2102 OPTAB_DIRECT);
2103 if (dest != tmp)
2104 emit_move_insn (dest, tmp);
2105}
2106
030d03b8
RE
2107/* Split a 128-bit move operation into two 64-bit move operations,
2108 taking care to handle partial overlap of register to register
2109 copies. Special cases are needed when moving between GP regs and
2110 FP regs. SRC can be a register, constant or memory; DST a register
2111 or memory. If either operand is memory it must not have any side
2112 effects. */
43e9d192
IB
2113void
2114aarch64_split_128bit_move (rtx dst, rtx src)
2115{
030d03b8
RE
2116 rtx dst_lo, dst_hi;
2117 rtx src_lo, src_hi;
43e9d192 2118
ef4bddc2 2119 machine_mode mode = GET_MODE (dst);
12dc6974 2120
030d03b8
RE
2121 gcc_assert (mode == TImode || mode == TFmode);
2122 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2123 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2124
2125 if (REG_P (dst) && REG_P (src))
2126 {
030d03b8
RE
2127 int src_regno = REGNO (src);
2128 int dst_regno = REGNO (dst);
43e9d192 2129
030d03b8 2130 /* Handle FP <-> GP regs. */
43e9d192
IB
2131 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2132 {
030d03b8
RE
2133 src_lo = gen_lowpart (word_mode, src);
2134 src_hi = gen_highpart (word_mode, src);
2135
0016d8d9
RS
2136 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2137 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2138 return;
43e9d192
IB
2139 }
2140 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2141 {
030d03b8
RE
2142 dst_lo = gen_lowpart (word_mode, dst);
2143 dst_hi = gen_highpart (word_mode, dst);
2144
0016d8d9
RS
2145 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2146 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2147 return;
43e9d192 2148 }
43e9d192
IB
2149 }
2150
030d03b8
RE
2151 dst_lo = gen_lowpart (word_mode, dst);
2152 dst_hi = gen_highpart (word_mode, dst);
2153 src_lo = gen_lowpart (word_mode, src);
2154 src_hi = gen_highpart_mode (word_mode, mode, src);
2155
2156 /* At most one pairing may overlap. */
2157 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2158 {
2159 aarch64_emit_move (dst_hi, src_hi);
2160 aarch64_emit_move (dst_lo, src_lo);
2161 }
2162 else
2163 {
2164 aarch64_emit_move (dst_lo, src_lo);
2165 aarch64_emit_move (dst_hi, src_hi);
2166 }
43e9d192
IB
2167}
2168
2169bool
2170aarch64_split_128bit_move_p (rtx dst, rtx src)
2171{
2172 return (! REG_P (src)
2173 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2174}
2175
8b033a8a
SN
2176/* Split a complex SIMD combine. */
2177
2178void
2179aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2180{
ef4bddc2
RS
2181 machine_mode src_mode = GET_MODE (src1);
2182 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2183
2184 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2185 gcc_assert (register_operand (dst, dst_mode)
2186 && register_operand (src1, src_mode)
2187 && register_operand (src2, src_mode));
8b033a8a 2188
0016d8d9 2189 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2190 return;
8b033a8a
SN
2191}
2192
fd4842cd
SN
2193/* Split a complex SIMD move. */
2194
2195void
2196aarch64_split_simd_move (rtx dst, rtx src)
2197{
ef4bddc2
RS
2198 machine_mode src_mode = GET_MODE (src);
2199 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2200
2201 gcc_assert (VECTOR_MODE_P (dst_mode));
2202
2203 if (REG_P (dst) && REG_P (src))
2204 {
2205 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2206 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2207 }
2208}
2209
ef22810a
RH
2210bool
2211aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2212 machine_mode ymode, rtx y)
2213{
2214 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2215 gcc_assert (r != NULL);
2216 return rtx_equal_p (x, r);
2217}
2218
2219
43e9d192 2220static rtx
ef4bddc2 2221aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2222{
2223 if (can_create_pseudo_p ())
e18b4a81 2224 return force_reg (mode, value);
43e9d192
IB
2225 else
2226 {
f5470a77
RS
2227 gcc_assert (x);
2228 aarch64_emit_move (x, value);
43e9d192
IB
2229 return x;
2230 }
2231}
2232
43cacb12
RS
2233/* Return true if we can move VALUE into a register using a single
2234 CNT[BHWD] instruction. */
2235
2236static bool
2237aarch64_sve_cnt_immediate_p (poly_int64 value)
2238{
2239 HOST_WIDE_INT factor = value.coeffs[0];
2240 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2241 return (value.coeffs[1] == factor
2242 && IN_RANGE (factor, 2, 16 * 16)
2243 && (factor & 1) == 0
2244 && factor <= 16 * (factor & -factor));
2245}
2246
2247/* Likewise for rtx X. */
2248
2249bool
2250aarch64_sve_cnt_immediate_p (rtx x)
2251{
2252 poly_int64 value;
2253 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2254}
2255
2256/* Return the asm string for an instruction with a CNT-like vector size
2257 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2258 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2259 first part of the operands template (the part that comes before the
2260 vector size itself). FACTOR is the number of quadwords.
2261 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2262 If it is zero, we can use any element size. */
2263
2264static char *
2265aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2266 unsigned int factor,
2267 unsigned int nelts_per_vq)
2268{
2269 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2270
2271 if (nelts_per_vq == 0)
2272 /* There is some overlap in the ranges of the four CNT instructions.
2273 Here we always use the smallest possible element size, so that the
2274 multiplier is 1 whereever possible. */
2275 nelts_per_vq = factor & -factor;
2276 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2277 gcc_assert (IN_RANGE (shift, 1, 4));
2278 char suffix = "dwhb"[shift - 1];
2279
2280 factor >>= shift;
2281 unsigned int written;
2282 if (factor == 1)
2283 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2284 prefix, suffix, operands);
2285 else
2286 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2287 prefix, suffix, operands, factor);
2288 gcc_assert (written < sizeof (buffer));
2289 return buffer;
2290}
2291
2292/* Return the asm string for an instruction with a CNT-like vector size
2293 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2294 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2295 first part of the operands template (the part that comes before the
2296 vector size itself). X is the value of the vector size operand,
2297 as a polynomial integer rtx. */
2298
2299char *
2300aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2301 rtx x)
2302{
2303 poly_int64 value = rtx_to_poly_int64 (x);
2304 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2305 return aarch64_output_sve_cnt_immediate (prefix, operands,
2306 value.coeffs[1], 0);
2307}
2308
2309/* Return true if we can add VALUE to a register using a single ADDVL
2310 or ADDPL instruction. */
2311
2312static bool
2313aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2314{
2315 HOST_WIDE_INT factor = value.coeffs[0];
2316 if (factor == 0 || value.coeffs[1] != factor)
2317 return false;
2318 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2319 and a value of 16 is one vector width. */
2320 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2321 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2322}
2323
2324/* Likewise for rtx X. */
2325
2326bool
2327aarch64_sve_addvl_addpl_immediate_p (rtx x)
2328{
2329 poly_int64 value;
2330 return (poly_int_rtx_p (x, &value)
2331 && aarch64_sve_addvl_addpl_immediate_p (value));
2332}
2333
2334/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2335 and storing the result in operand 0. */
2336
2337char *
2338aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2339{
2340 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2341 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2342 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2343
2344 /* Use INC or DEC if possible. */
2345 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2346 {
2347 if (aarch64_sve_cnt_immediate_p (offset_value))
2348 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2349 offset_value.coeffs[1], 0);
2350 if (aarch64_sve_cnt_immediate_p (-offset_value))
2351 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2352 -offset_value.coeffs[1], 0);
2353 }
2354
2355 int factor = offset_value.coeffs[1];
2356 if ((factor & 15) == 0)
2357 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2358 else
2359 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2360 return buffer;
2361}
2362
2363/* Return true if X is a valid immediate for an SVE vector INC or DEC
2364 instruction. If it is, store the number of elements in each vector
2365 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2366 factor in *FACTOR_OUT (if nonnull). */
2367
2368bool
2369aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2370 unsigned int *nelts_per_vq_out)
2371{
2372 rtx elt;
2373 poly_int64 value;
2374
2375 if (!const_vec_duplicate_p (x, &elt)
2376 || !poly_int_rtx_p (elt, &value))
2377 return false;
2378
2379 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2380 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2381 /* There's no vector INCB. */
2382 return false;
2383
2384 HOST_WIDE_INT factor = value.coeffs[0];
2385 if (value.coeffs[1] != factor)
2386 return false;
2387
2388 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2389 if ((factor % nelts_per_vq) != 0
2390 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2391 return false;
2392
2393 if (factor_out)
2394 *factor_out = factor;
2395 if (nelts_per_vq_out)
2396 *nelts_per_vq_out = nelts_per_vq;
2397 return true;
2398}
2399
2400/* Return true if X is a valid immediate for an SVE vector INC or DEC
2401 instruction. */
2402
2403bool
2404aarch64_sve_inc_dec_immediate_p (rtx x)
2405{
2406 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2407}
2408
2409/* Return the asm template for an SVE vector INC or DEC instruction.
2410 OPERANDS gives the operands before the vector count and X is the
2411 value of the vector count operand itself. */
2412
2413char *
2414aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2415{
2416 int factor;
2417 unsigned int nelts_per_vq;
2418 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2419 gcc_unreachable ();
2420 if (factor < 0)
2421 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2422 nelts_per_vq);
2423 else
2424 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2425 nelts_per_vq);
2426}
43e9d192 2427
82614948
RR
2428static int
2429aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2430 scalar_int_mode mode)
43e9d192 2431{
43e9d192 2432 int i;
9a4865db
WD
2433 unsigned HOST_WIDE_INT val, val2, mask;
2434 int one_match, zero_match;
2435 int num_insns;
43e9d192 2436
9a4865db
WD
2437 val = INTVAL (imm);
2438
2439 if (aarch64_move_imm (val, mode))
43e9d192 2440 {
82614948 2441 if (generate)
f7df4a84 2442 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2443 return 1;
43e9d192
IB
2444 }
2445
9de00935
TC
2446 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2447 (with XXXX non-zero). In that case check to see if the move can be done in
2448 a smaller mode. */
2449 val2 = val & 0xffffffff;
2450 if (mode == DImode
2451 && aarch64_move_imm (val2, SImode)
2452 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2453 {
2454 if (generate)
2455 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2456
2457 /* Check if we have to emit a second instruction by checking to see
2458 if any of the upper 32 bits of the original DI mode value is set. */
2459 if (val == val2)
2460 return 1;
2461
2462 i = (val >> 48) ? 48 : 32;
2463
2464 if (generate)
2465 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2466 GEN_INT ((val >> i) & 0xffff)));
2467
2468 return 2;
2469 }
2470
9a4865db 2471 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2472 {
82614948
RR
2473 if (generate)
2474 {
9a4865db
WD
2475 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2476 if (mode == SImode)
2477 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2478 GEN_INT ((val >> 16) & 0xffff)));
2479 else
2480 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2481 GEN_INT ((val >> 16) & 0xffff)));
82614948 2482 }
9a4865db 2483 return 2;
43e9d192
IB
2484 }
2485
2486 /* Remaining cases are all for DImode. */
2487
43e9d192 2488 mask = 0xffff;
9a4865db
WD
2489 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2490 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2491 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2492 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2493
62c8d76c 2494 if (zero_match != 2 && one_match != 2)
43e9d192 2495 {
62c8d76c
WD
2496 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2497 For a 64-bit bitmask try whether changing 16 bits to all ones or
2498 zeroes creates a valid bitmask. To check any repeated bitmask,
2499 try using 16 bits from the other 32-bit half of val. */
43e9d192 2500
62c8d76c 2501 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2502 {
62c8d76c
WD
2503 val2 = val & ~mask;
2504 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2505 break;
2506 val2 = val | mask;
2507 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2508 break;
2509 val2 = val2 & ~mask;
2510 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2511 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2512 break;
43e9d192 2513 }
62c8d76c 2514 if (i != 64)
43e9d192 2515 {
62c8d76c 2516 if (generate)
43e9d192 2517 {
62c8d76c
WD
2518 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2519 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2520 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2521 }
1312b1ba 2522 return 2;
43e9d192
IB
2523 }
2524 }
2525
9a4865db
WD
2526 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2527 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2528 otherwise skip zero bits. */
2c274197 2529
9a4865db 2530 num_insns = 1;
43e9d192 2531 mask = 0xffff;
9a4865db
WD
2532 val2 = one_match > zero_match ? ~val : val;
2533 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2534
2535 if (generate)
2536 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2537 ? (val | ~(mask << i))
2538 : (val & (mask << i)))));
2539 for (i += 16; i < 64; i += 16)
43e9d192 2540 {
9a4865db
WD
2541 if ((val2 & (mask << i)) == 0)
2542 continue;
2543 if (generate)
2544 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2545 GEN_INT ((val >> i) & 0xffff)));
2546 num_insns ++;
82614948
RR
2547 }
2548
2549 return num_insns;
2550}
2551
c0bb5bc5
WD
2552/* Return whether imm is a 128-bit immediate which is simple enough to
2553 expand inline. */
2554bool
2555aarch64_mov128_immediate (rtx imm)
2556{
2557 if (GET_CODE (imm) == CONST_INT)
2558 return true;
2559
2560 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2561
2562 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2563 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2564
2565 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2566 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2567}
2568
2569
43cacb12
RS
2570/* Return the number of temporary registers that aarch64_add_offset_1
2571 would need to add OFFSET to a register. */
2572
2573static unsigned int
2574aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2575{
2576 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2577}
2578
f5470a77
RS
2579/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2580 a non-polynomial OFFSET. MODE is the mode of the addition.
2581 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2582 be set and CFA adjustments added to the generated instructions.
2583
2584 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2585 temporary if register allocation is already complete. This temporary
2586 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2587 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2588 the immediate again.
0100c5f9
RS
2589
2590 Since this function may be used to adjust the stack pointer, we must
2591 ensure that it cannot cause transient stack deallocation (for example
2592 by first incrementing SP and then decrementing when adjusting by a
2593 large immediate). */
2594
2595static void
f5470a77
RS
2596aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2597 rtx src, HOST_WIDE_INT offset, rtx temp1,
2598 bool frame_related_p, bool emit_move_imm)
0100c5f9 2599{
f5470a77
RS
2600 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2601 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2602
2603 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2604 rtx_insn *insn;
2605
f5470a77
RS
2606 if (!moffset)
2607 {
2608 if (!rtx_equal_p (dest, src))
2609 {
2610 insn = emit_insn (gen_rtx_SET (dest, src));
2611 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2612 }
2613 return;
2614 }
0100c5f9
RS
2615
2616 /* Single instruction adjustment. */
f5470a77 2617 if (aarch64_uimm12_shift (moffset))
0100c5f9 2618 {
f5470a77 2619 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2620 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2621 return;
2622 }
2623
f5470a77
RS
2624 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2625 and either:
2626
2627 a) the offset cannot be loaded by a 16-bit move or
2628 b) there is no spare register into which we can move it. */
2629 if (moffset < 0x1000000
2630 && ((!temp1 && !can_create_pseudo_p ())
2631 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2632 {
f5470a77 2633 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2634
f5470a77
RS
2635 low_off = offset < 0 ? -low_off : low_off;
2636 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2637 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2638 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2639 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2640 return;
2641 }
2642
2643 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2644 if (emit_move_imm)
f5470a77
RS
2645 {
2646 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2647 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2648 }
2649 insn = emit_insn (offset < 0
2650 ? gen_sub3_insn (dest, src, temp1)
2651 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2652 if (frame_related_p)
2653 {
2654 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2655 rtx adj = plus_constant (mode, src, offset);
2656 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2657 }
2658}
2659
43cacb12
RS
2660/* Return the number of temporary registers that aarch64_add_offset
2661 would need to move OFFSET into a register or add OFFSET to a register;
2662 ADD_P is true if we want the latter rather than the former. */
2663
2664static unsigned int
2665aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2666{
2667 /* This follows the same structure as aarch64_add_offset. */
2668 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2669 return 0;
2670
2671 unsigned int count = 0;
2672 HOST_WIDE_INT factor = offset.coeffs[1];
2673 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2674 poly_int64 poly_offset (factor, factor);
2675 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2676 /* Need one register for the ADDVL/ADDPL result. */
2677 count += 1;
2678 else if (factor != 0)
2679 {
2680 factor = abs (factor);
2681 if (factor > 16 * (factor & -factor))
2682 /* Need one register for the CNT result and one for the multiplication
2683 factor. If necessary, the second temporary can be reused for the
2684 constant part of the offset. */
2685 return 2;
2686 /* Need one register for the CNT result (which might then
2687 be shifted). */
2688 count += 1;
2689 }
2690 return count + aarch64_add_offset_1_temporaries (constant);
2691}
2692
2693/* If X can be represented as a poly_int64, return the number
2694 of temporaries that are required to add it to a register.
2695 Return -1 otherwise. */
2696
2697int
2698aarch64_add_offset_temporaries (rtx x)
2699{
2700 poly_int64 offset;
2701 if (!poly_int_rtx_p (x, &offset))
2702 return -1;
2703 return aarch64_offset_temporaries (true, offset);
2704}
2705
f5470a77
RS
2706/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2707 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2708 be set and CFA adjustments added to the generated instructions.
2709
2710 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2711 temporary if register allocation is already complete. This temporary
43cacb12
RS
2712 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2713 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2714 false to avoid emitting the immediate again.
2715
2716 TEMP2, if nonnull, is a second temporary register that doesn't
2717 overlap either DEST or REG.
f5470a77
RS
2718
2719 Since this function may be used to adjust the stack pointer, we must
2720 ensure that it cannot cause transient stack deallocation (for example
2721 by first incrementing SP and then decrementing when adjusting by a
2722 large immediate). */
2723
2724static void
2725aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2726 poly_int64 offset, rtx temp1, rtx temp2,
2727 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2728{
f5470a77
RS
2729 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2730 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2731 gcc_assert (temp1 == NULL_RTX
2732 || !frame_related_p
2733 || !reg_overlap_mentioned_p (temp1, dest));
2734 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2735
2736 /* Try using ADDVL or ADDPL to add the whole value. */
2737 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2738 {
2739 rtx offset_rtx = gen_int_mode (offset, mode);
2740 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2741 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2742 return;
2743 }
2744
2745 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2746 SVE vector register, over and above the minimum size of 128 bits.
2747 This is equivalent to half the value returned by CNTD with a
2748 vector shape of ALL. */
2749 HOST_WIDE_INT factor = offset.coeffs[1];
2750 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2751
2752 /* Try using ADDVL or ADDPL to add the VG-based part. */
2753 poly_int64 poly_offset (factor, factor);
2754 if (src != const0_rtx
2755 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2756 {
2757 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2758 if (frame_related_p)
2759 {
2760 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2761 RTX_FRAME_RELATED_P (insn) = true;
2762 src = dest;
2763 }
2764 else
2765 {
2766 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2767 src = aarch64_force_temporary (mode, temp1, addr);
2768 temp1 = temp2;
2769 temp2 = NULL_RTX;
2770 }
2771 }
2772 /* Otherwise use a CNT-based sequence. */
2773 else if (factor != 0)
2774 {
2775 /* Use a subtraction if we have a negative factor. */
2776 rtx_code code = PLUS;
2777 if (factor < 0)
2778 {
2779 factor = -factor;
2780 code = MINUS;
2781 }
2782
2783 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2784 into the multiplication. */
2785 rtx val;
2786 int shift = 0;
2787 if (factor & 1)
2788 /* Use a right shift by 1. */
2789 shift = -1;
2790 else
2791 factor /= 2;
2792 HOST_WIDE_INT low_bit = factor & -factor;
2793 if (factor <= 16 * low_bit)
2794 {
2795 if (factor > 16 * 8)
2796 {
2797 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2798 the value with the minimum multiplier and shift it into
2799 position. */
2800 int extra_shift = exact_log2 (low_bit);
2801 shift += extra_shift;
2802 factor >>= extra_shift;
2803 }
2804 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2805 }
2806 else
2807 {
2808 /* Use CNTD, then multiply it by FACTOR. */
2809 val = gen_int_mode (poly_int64 (2, 2), mode);
2810 val = aarch64_force_temporary (mode, temp1, val);
2811
2812 /* Go back to using a negative multiplication factor if we have
2813 no register from which to subtract. */
2814 if (code == MINUS && src == const0_rtx)
2815 {
2816 factor = -factor;
2817 code = PLUS;
2818 }
2819 rtx coeff1 = gen_int_mode (factor, mode);
2820 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2821 val = gen_rtx_MULT (mode, val, coeff1);
2822 }
2823
2824 if (shift > 0)
2825 {
2826 /* Multiply by 1 << SHIFT. */
2827 val = aarch64_force_temporary (mode, temp1, val);
2828 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2829 }
2830 else if (shift == -1)
2831 {
2832 /* Divide by 2. */
2833 val = aarch64_force_temporary (mode, temp1, val);
2834 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2835 }
2836
2837 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2838 if (src != const0_rtx)
2839 {
2840 val = aarch64_force_temporary (mode, temp1, val);
2841 val = gen_rtx_fmt_ee (code, mode, src, val);
2842 }
2843 else if (code == MINUS)
2844 {
2845 val = aarch64_force_temporary (mode, temp1, val);
2846 val = gen_rtx_NEG (mode, val);
2847 }
2848
2849 if (constant == 0 || frame_related_p)
2850 {
2851 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2852 if (frame_related_p)
2853 {
2854 RTX_FRAME_RELATED_P (insn) = true;
2855 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2856 gen_rtx_SET (dest, plus_constant (Pmode, src,
2857 poly_offset)));
2858 }
2859 src = dest;
2860 if (constant == 0)
2861 return;
2862 }
2863 else
2864 {
2865 src = aarch64_force_temporary (mode, temp1, val);
2866 temp1 = temp2;
2867 temp2 = NULL_RTX;
2868 }
2869
2870 emit_move_imm = true;
2871 }
f5470a77 2872
f5470a77
RS
2873 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2874 frame_related_p, emit_move_imm);
0100c5f9
RS
2875}
2876
43cacb12
RS
2877/* Like aarch64_add_offset, but the offset is given as an rtx rather
2878 than a poly_int64. */
2879
2880void
2881aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2882 rtx offset_rtx, rtx temp1, rtx temp2)
2883{
2884 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2885 temp1, temp2, false);
2886}
2887
f5470a77
RS
2888/* Add DELTA to the stack pointer, marking the instructions frame-related.
2889 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2890 if TEMP1 already contains abs (DELTA). */
2891
0100c5f9 2892static inline void
43cacb12 2893aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2894{
f5470a77 2895 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2896 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2897}
2898
f5470a77
RS
2899/* Subtract DELTA from the stack pointer, marking the instructions
2900 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2901 if nonnull. */
2902
0100c5f9 2903static inline void
cd1bef27
JL
2904aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
2905 bool emit_move_imm = true)
0100c5f9 2906{
f5470a77 2907 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 2908 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 2909}
82614948 2910
43cacb12
RS
2911/* Set DEST to (vec_series BASE STEP). */
2912
2913static void
2914aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2915{
2916 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2917 scalar_mode inner = GET_MODE_INNER (mode);
2918
2919 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2920 if (!aarch64_sve_index_immediate_p (base))
2921 base = force_reg (inner, base);
2922 if (!aarch64_sve_index_immediate_p (step))
2923 step = force_reg (inner, step);
2924
2925 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2926}
82614948 2927
43cacb12
RS
2928/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2929 integer of mode INT_MODE. Return true on success. */
2930
2931static bool
2932aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2933 rtx src)
2934{
2935 /* If the constant is smaller than 128 bits, we can do the move
2936 using a vector of SRC_MODEs. */
2937 if (src_mode != TImode)
2938 {
2939 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2940 GET_MODE_SIZE (src_mode));
2941 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2942 emit_move_insn (gen_lowpart (dup_mode, dest),
2943 gen_const_vec_duplicate (dup_mode, src));
2944 return true;
2945 }
2946
947b1372 2947 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2948 src = force_const_mem (src_mode, src);
2949 if (!src)
2950 return false;
2951
2952 /* Make sure that the address is legitimate. */
2953 if (!aarch64_sve_ld1r_operand_p (src))
2954 {
2955 rtx addr = force_reg (Pmode, XEXP (src, 0));
2956 src = replace_equiv_address (src, addr);
2957 }
2958
947b1372
RS
2959 machine_mode mode = GET_MODE (dest);
2960 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
2961 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
2962 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
2963 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
2964 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
2965 return true;
2966}
2967
2968/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
2969 isn't a simple duplicate or series. */
2970
2971static void
2972aarch64_expand_sve_const_vector (rtx dest, rtx src)
2973{
2974 machine_mode mode = GET_MODE (src);
2975 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
2976 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
2977 gcc_assert (npatterns > 1);
2978
2979 if (nelts_per_pattern == 1)
2980 {
2981 /* The constant is a repeating seqeuence of at least two elements,
2982 where the repeating elements occupy no more than 128 bits.
2983 Get an integer representation of the replicated value. */
8179efe0
RS
2984 scalar_int_mode int_mode;
2985 if (BYTES_BIG_ENDIAN)
2986 /* For now, always use LD1RQ to load the value on big-endian
2987 targets, since the handling of smaller integers includes a
2988 subreg that is semantically an element reverse. */
2989 int_mode = TImode;
2990 else
2991 {
2992 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
2993 gcc_assert (int_bits <= 128);
2994 int_mode = int_mode_for_size (int_bits, 0).require ();
2995 }
43cacb12
RS
2996 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
2997 if (int_value
2998 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
2999 return;
3000 }
3001
3002 /* Expand each pattern individually. */
3003 rtx_vector_builder builder;
3004 auto_vec<rtx, 16> vectors (npatterns);
3005 for (unsigned int i = 0; i < npatterns; ++i)
3006 {
3007 builder.new_vector (mode, 1, nelts_per_pattern);
3008 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
3009 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
3010 vectors.quick_push (force_reg (mode, builder.build ()));
3011 }
3012
3013 /* Use permutes to interleave the separate vectors. */
3014 while (npatterns > 1)
3015 {
3016 npatterns /= 2;
3017 for (unsigned int i = 0; i < npatterns; ++i)
3018 {
3019 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
3020 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
3021 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
3022 vectors[i] = tmp;
3023 }
3024 }
3025 gcc_assert (vectors[0] == dest);
3026}
3027
3028/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
3029 is a pattern that can be used to set DEST to a replicated scalar
3030 element. */
3031
3032void
3033aarch64_expand_mov_immediate (rtx dest, rtx imm,
3034 rtx (*gen_vec_duplicate) (rtx, rtx))
3035{
3036 machine_mode mode = GET_MODE (dest);
82614948
RR
3037
3038 /* Check on what type of symbol it is. */
77e994c9
RS
3039 scalar_int_mode int_mode;
3040 if ((GET_CODE (imm) == SYMBOL_REF
3041 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
3042 || GET_CODE (imm) == CONST
3043 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 3044 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 3045 {
43cacb12
RS
3046 rtx mem;
3047 poly_int64 offset;
3048 HOST_WIDE_INT const_offset;
82614948
RR
3049 enum aarch64_symbol_type sty;
3050
3051 /* If we have (const (plus symbol offset)), separate out the offset
3052 before we start classifying the symbol. */
43cacb12 3053 rtx base = strip_offset (imm, &offset);
82614948 3054
43cacb12
RS
3055 /* We must always add an offset involving VL separately, rather than
3056 folding it into the relocation. */
3057 if (!offset.is_constant (&const_offset))
3058 {
3059 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
3060 emit_insn (gen_rtx_SET (dest, imm));
3061 else
3062 {
3063 /* Do arithmetic on 32-bit values if the result is smaller
3064 than that. */
3065 if (partial_subreg_p (int_mode, SImode))
3066 {
3067 /* It is invalid to do symbol calculations in modes
3068 narrower than SImode. */
3069 gcc_assert (base == const0_rtx);
3070 dest = gen_lowpart (SImode, dest);
3071 int_mode = SImode;
3072 }
3073 if (base != const0_rtx)
3074 {
3075 base = aarch64_force_temporary (int_mode, dest, base);
3076 aarch64_add_offset (int_mode, dest, base, offset,
3077 NULL_RTX, NULL_RTX, false);
3078 }
3079 else
3080 aarch64_add_offset (int_mode, dest, base, offset,
3081 dest, NULL_RTX, false);
3082 }
3083 return;
3084 }
3085
3086 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
3087 switch (sty)
3088 {
3089 case SYMBOL_FORCE_TO_MEM:
43cacb12 3090 if (const_offset != 0
77e994c9 3091 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
3092 {
3093 gcc_assert (can_create_pseudo_p ());
77e994c9 3094 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3095 aarch64_add_offset (int_mode, dest, base, const_offset,
3096 NULL_RTX, NULL_RTX, false);
82614948
RR
3097 return;
3098 }
b4f50fd4 3099
82614948
RR
3100 mem = force_const_mem (ptr_mode, imm);
3101 gcc_assert (mem);
b4f50fd4
RR
3102
3103 /* If we aren't generating PC relative literals, then
3104 we need to expand the literal pool access carefully.
3105 This is something that needs to be done in a number
3106 of places, so could well live as a separate function. */
9ee6540a 3107 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
3108 {
3109 gcc_assert (can_create_pseudo_p ());
3110 base = gen_reg_rtx (ptr_mode);
3111 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
3112 if (ptr_mode != Pmode)
3113 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
3114 mem = gen_rtx_MEM (ptr_mode, base);
3115 }
3116
77e994c9
RS
3117 if (int_mode != ptr_mode)
3118 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 3119
f7df4a84 3120 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 3121
82614948
RR
3122 return;
3123
3124 case SYMBOL_SMALL_TLSGD:
3125 case SYMBOL_SMALL_TLSDESC:
79496620 3126 case SYMBOL_SMALL_TLSIE:
1b1e81f8 3127 case SYMBOL_SMALL_GOT_28K:
6642bdb4 3128 case SYMBOL_SMALL_GOT_4G:
82614948 3129 case SYMBOL_TINY_GOT:
5ae7caad 3130 case SYMBOL_TINY_TLSIE:
43cacb12 3131 if (const_offset != 0)
82614948
RR
3132 {
3133 gcc_assert(can_create_pseudo_p ());
77e994c9 3134 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3135 aarch64_add_offset (int_mode, dest, base, const_offset,
3136 NULL_RTX, NULL_RTX, false);
82614948
RR
3137 return;
3138 }
3139 /* FALLTHRU */
3140
82614948
RR
3141 case SYMBOL_SMALL_ABSOLUTE:
3142 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3143 case SYMBOL_TLSLE12:
d18ba284 3144 case SYMBOL_TLSLE24:
cbf5629e
JW
3145 case SYMBOL_TLSLE32:
3146 case SYMBOL_TLSLE48:
82614948
RR
3147 aarch64_load_symref_appropriately (dest, imm, sty);
3148 return;
3149
3150 default:
3151 gcc_unreachable ();
3152 }
3153 }
3154
3155 if (!CONST_INT_P (imm))
3156 {
43cacb12
RS
3157 rtx base, step, value;
3158 if (GET_CODE (imm) == HIGH
3159 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3160 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3161 else if (const_vec_series_p (imm, &base, &step))
3162 aarch64_expand_vec_series (dest, base, step);
3163 else if (const_vec_duplicate_p (imm, &value))
3164 {
3165 /* If the constant is out of range of an SVE vector move,
3166 load it from memory if we can, otherwise move it into
3167 a register and use a DUP. */
3168 scalar_mode inner_mode = GET_MODE_INNER (mode);
3169 rtx op = force_const_mem (inner_mode, value);
3170 if (!op)
3171 op = force_reg (inner_mode, value);
3172 else if (!aarch64_sve_ld1r_operand_p (op))
3173 {
3174 rtx addr = force_reg (Pmode, XEXP (op, 0));
3175 op = replace_equiv_address (op, addr);
3176 }
3177 emit_insn (gen_vec_duplicate (dest, op));
3178 }
3179 else if (GET_CODE (imm) == CONST_VECTOR
3180 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3181 aarch64_expand_sve_const_vector (dest, imm);
82614948 3182 else
43cacb12 3183 {
82614948
RR
3184 rtx mem = force_const_mem (mode, imm);
3185 gcc_assert (mem);
43cacb12 3186 emit_move_insn (dest, mem);
43e9d192 3187 }
82614948
RR
3188
3189 return;
43e9d192 3190 }
82614948 3191
77e994c9
RS
3192 aarch64_internal_mov_immediate (dest, imm, true,
3193 as_a <scalar_int_mode> (mode));
43e9d192
IB
3194}
3195
43cacb12
RS
3196/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3197 that is known to contain PTRUE. */
3198
3199void
3200aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3201{
3202 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3203 gen_rtvec (2, pred, src),
3204 UNSPEC_MERGE_PTRUE)));
3205}
3206
3207/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3208 operand is in memory. In this case we need to use the predicated LD1
3209 and ST1 instead of LDR and STR, both for correctness on big-endian
3210 targets and because LD1 and ST1 support a wider range of addressing modes.
3211 PRED_MODE is the mode of the predicate.
3212
3213 See the comment at the head of aarch64-sve.md for details about the
3214 big-endian handling. */
3215
3216void
3217aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3218{
3219 machine_mode mode = GET_MODE (dest);
3220 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3221 if (!register_operand (src, mode)
3222 && !register_operand (dest, mode))
3223 {
3224 rtx tmp = gen_reg_rtx (mode);
3225 if (MEM_P (src))
3226 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3227 else
3228 emit_move_insn (tmp, src);
3229 src = tmp;
3230 }
3231 aarch64_emit_sve_pred_move (dest, ptrue, src);
3232}
3233
002092be
RS
3234/* Called only on big-endian targets. See whether an SVE vector move
3235 from SRC to DEST is effectively a REV[BHW] instruction, because at
3236 least one operand is a subreg of an SVE vector that has wider or
3237 narrower elements. Return true and emit the instruction if so.
3238
3239 For example:
3240
3241 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3242
3243 represents a VIEW_CONVERT between the following vectors, viewed
3244 in memory order:
3245
3246 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3247 R1: { [0], [1], [2], [3], ... }
3248
3249 The high part of lane X in R2 should therefore correspond to lane X*2
3250 of R1, but the register representations are:
3251
3252 msb lsb
3253 R2: ...... [1].high [1].low [0].high [0].low
3254 R1: ...... [3] [2] [1] [0]
3255
3256 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3257 We therefore need a reverse operation to swap the high and low values
3258 around.
3259
3260 This is purely an optimization. Without it we would spill the
3261 subreg operand to the stack in one mode and reload it in the
3262 other mode, which has the same effect as the REV. */
3263
3264bool
3265aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3266{
3267 gcc_assert (BYTES_BIG_ENDIAN);
3268 if (GET_CODE (dest) == SUBREG)
3269 dest = SUBREG_REG (dest);
3270 if (GET_CODE (src) == SUBREG)
3271 src = SUBREG_REG (src);
3272
3273 /* The optimization handles two single SVE REGs with different element
3274 sizes. */
3275 if (!REG_P (dest)
3276 || !REG_P (src)
3277 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3278 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3279 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3280 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3281 return false;
3282
3283 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3284 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3285 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3286 UNSPEC_REV_SUBREG);
3287 emit_insn (gen_rtx_SET (dest, unspec));
3288 return true;
3289}
3290
3291/* Return a copy of X with mode MODE, without changing its other
3292 attributes. Unlike gen_lowpart, this doesn't care whether the
3293 mode change is valid. */
3294
3295static rtx
3296aarch64_replace_reg_mode (rtx x, machine_mode mode)
3297{
3298 if (GET_MODE (x) == mode)
3299 return x;
3300
3301 x = shallow_copy_rtx (x);
3302 set_mode_and_regno (x, mode, REGNO (x));
3303 return x;
3304}
3305
3306/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3307 operands. */
3308
3309void
3310aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3311{
3312 /* Decide which REV operation we need. The mode with narrower elements
3313 determines the mode of the operands and the mode with the wider
3314 elements determines the reverse width. */
3315 machine_mode mode_with_wider_elts = GET_MODE (dest);
3316 machine_mode mode_with_narrower_elts = GET_MODE (src);
3317 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3318 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3319 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3320
3321 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3322 unsigned int unspec;
3323 if (wider_bytes == 8)
3324 unspec = UNSPEC_REV64;
3325 else if (wider_bytes == 4)
3326 unspec = UNSPEC_REV32;
3327 else if (wider_bytes == 2)
3328 unspec = UNSPEC_REV16;
3329 else
3330 gcc_unreachable ();
3331 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3332
3333 /* Emit:
3334
3335 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3336 UNSPEC_MERGE_PTRUE))
3337
3338 with the appropriate modes. */
3339 ptrue = gen_lowpart (pred_mode, ptrue);
3340 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3341 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3342 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3343 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3344 UNSPEC_MERGE_PTRUE);
3345 emit_insn (gen_rtx_SET (dest, src));
3346}
3347
43e9d192 3348static bool
fee9ba42
JW
3349aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3350 tree exp ATTRIBUTE_UNUSED)
43e9d192 3351{
fee9ba42 3352 /* Currently, always true. */
43e9d192
IB
3353 return true;
3354}
3355
3356/* Implement TARGET_PASS_BY_REFERENCE. */
3357
3358static bool
3359aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3360 machine_mode mode,
43e9d192
IB
3361 const_tree type,
3362 bool named ATTRIBUTE_UNUSED)
3363{
3364 HOST_WIDE_INT size;
ef4bddc2 3365 machine_mode dummymode;
43e9d192
IB
3366 int nregs;
3367
3368 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3369 if (mode == BLKmode && type)
3370 size = int_size_in_bytes (type);
3371 else
3372 /* No frontends can create types with variable-sized modes, so we
3373 shouldn't be asked to pass or return them. */
3374 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3375
aadc1c43
MHD
3376 /* Aggregates are passed by reference based on their size. */
3377 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3378 {
aadc1c43 3379 size = int_size_in_bytes (type);
43e9d192
IB
3380 }
3381
3382 /* Variable sized arguments are always returned by reference. */
3383 if (size < 0)
3384 return true;
3385
3386 /* Can this be a candidate to be passed in fp/simd register(s)? */
3387 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3388 &dummymode, &nregs,
3389 NULL))
3390 return false;
3391
3392 /* Arguments which are variable sized or larger than 2 registers are
3393 passed by reference unless they are a homogenous floating point
3394 aggregate. */
3395 return size > 2 * UNITS_PER_WORD;
3396}
3397
3398/* Return TRUE if VALTYPE is padded to its least significant bits. */
3399static bool
3400aarch64_return_in_msb (const_tree valtype)
3401{
ef4bddc2 3402 machine_mode dummy_mode;
43e9d192
IB
3403 int dummy_int;
3404
3405 /* Never happens in little-endian mode. */
3406 if (!BYTES_BIG_ENDIAN)
3407 return false;
3408
3409 /* Only composite types smaller than or equal to 16 bytes can
3410 be potentially returned in registers. */
3411 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3412 || int_size_in_bytes (valtype) <= 0
3413 || int_size_in_bytes (valtype) > 16)
3414 return false;
3415
3416 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3417 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3418 is always passed/returned in the least significant bits of fp/simd
3419 register(s). */
3420 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3421 &dummy_mode, &dummy_int, NULL))
3422 return false;
3423
3424 return true;
3425}
3426
3427/* Implement TARGET_FUNCTION_VALUE.
3428 Define how to find the value returned by a function. */
3429
3430static rtx
3431aarch64_function_value (const_tree type, const_tree func,
3432 bool outgoing ATTRIBUTE_UNUSED)
3433{
ef4bddc2 3434 machine_mode mode;
43e9d192
IB
3435 int unsignedp;
3436 int count;
ef4bddc2 3437 machine_mode ag_mode;
43e9d192
IB
3438
3439 mode = TYPE_MODE (type);
3440 if (INTEGRAL_TYPE_P (type))
3441 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3442
3443 if (aarch64_return_in_msb (type))
3444 {
3445 HOST_WIDE_INT size = int_size_in_bytes (type);
3446
3447 if (size % UNITS_PER_WORD != 0)
3448 {
3449 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3450 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3451 }
3452 }
3453
3454 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3455 &ag_mode, &count, NULL))
3456 {
3457 if (!aarch64_composite_type_p (type, mode))
3458 {
3459 gcc_assert (count == 1 && mode == ag_mode);
3460 return gen_rtx_REG (mode, V0_REGNUM);
3461 }
3462 else
3463 {
3464 int i;
3465 rtx par;
3466
3467 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3468 for (i = 0; i < count; i++)
3469 {
3470 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3471 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3472 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3473 XVECEXP (par, 0, i) = tmp;
3474 }
3475 return par;
3476 }
3477 }
3478 else
3479 return gen_rtx_REG (mode, R0_REGNUM);
3480}
3481
3482/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3483 Return true if REGNO is the number of a hard register in which the values
3484 of called function may come back. */
3485
3486static bool
3487aarch64_function_value_regno_p (const unsigned int regno)
3488{
3489 /* Maximum of 16 bytes can be returned in the general registers. Examples
3490 of 16-byte return values are: 128-bit integers and 16-byte small
3491 structures (excluding homogeneous floating-point aggregates). */
3492 if (regno == R0_REGNUM || regno == R1_REGNUM)
3493 return true;
3494
3495 /* Up to four fp/simd registers can return a function value, e.g. a
3496 homogeneous floating-point aggregate having four members. */
3497 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3498 return TARGET_FLOAT;
43e9d192
IB
3499
3500 return false;
3501}
3502
3503/* Implement TARGET_RETURN_IN_MEMORY.
3504
3505 If the type T of the result of a function is such that
3506 void func (T arg)
3507 would require that arg be passed as a value in a register (or set of
3508 registers) according to the parameter passing rules, then the result
3509 is returned in the same registers as would be used for such an
3510 argument. */
3511
3512static bool
3513aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3514{
3515 HOST_WIDE_INT size;
ef4bddc2 3516 machine_mode ag_mode;
43e9d192
IB
3517 int count;
3518
3519 if (!AGGREGATE_TYPE_P (type)
3520 && TREE_CODE (type) != COMPLEX_TYPE
3521 && TREE_CODE (type) != VECTOR_TYPE)
3522 /* Simple scalar types always returned in registers. */
3523 return false;
3524
3525 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3526 type,
3527 &ag_mode,
3528 &count,
3529 NULL))
3530 return false;
3531
3532 /* Types larger than 2 registers returned in memory. */
3533 size = int_size_in_bytes (type);
3534 return (size < 0 || size > 2 * UNITS_PER_WORD);
3535}
3536
3537static bool
ef4bddc2 3538aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3539 const_tree type, int *nregs)
3540{
3541 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3542 return aarch64_vfp_is_call_or_return_candidate (mode,
3543 type,
3544 &pcum->aapcs_vfp_rmode,
3545 nregs,
3546 NULL);
3547}
3548
985b8393 3549/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3550 bits. The idea is to suppress any stronger alignment requested by
3551 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3552 This is a helper function for local use only. */
3553
985b8393 3554static unsigned int
ef4bddc2 3555aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3556{
75d6cc81 3557 if (!type)
985b8393 3558 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3559
75d6cc81 3560 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3561 return 0;
43e9d192 3562
75d6cc81
AL
3563 gcc_assert (TYPE_MODE (type) == mode);
3564
3565 if (!AGGREGATE_TYPE_P (type))
985b8393 3566 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3567
3568 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3569 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3570
985b8393 3571 unsigned int alignment = 0;
75d6cc81 3572 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3573 if (TREE_CODE (field) == FIELD_DECL)
3574 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3575
985b8393 3576 return alignment;
43e9d192
IB
3577}
3578
3579/* Layout a function argument according to the AAPCS64 rules. The rule
3580 numbers refer to the rule numbers in the AAPCS64. */
3581
3582static void
ef4bddc2 3583aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3584 const_tree type,
3585 bool named ATTRIBUTE_UNUSED)
3586{
3587 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3588 int ncrn, nvrn, nregs;
3589 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3590 HOST_WIDE_INT size;
43e9d192
IB
3591
3592 /* We need to do this once per argument. */
3593 if (pcum->aapcs_arg_processed)
3594 return;
3595
3596 pcum->aapcs_arg_processed = true;
3597
3abf17cf 3598 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3599 if (type)
3600 size = int_size_in_bytes (type);
3601 else
3602 /* No frontends can create types with variable-sized modes, so we
3603 shouldn't be asked to pass or return them. */
3604 size = GET_MODE_SIZE (mode).to_constant ();
3605 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3606
43e9d192
IB
3607 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3608 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3609 mode,
3610 type,
3611 &nregs);
3612
3613 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3614 The following code thus handles passing by SIMD/FP registers first. */
3615
3616 nvrn = pcum->aapcs_nvrn;
3617
3618 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3619 and homogenous short-vector aggregates (HVA). */
3620 if (allocate_nvrn)
3621 {
261fb553 3622 if (!TARGET_FLOAT)
fc29dfc9 3623 aarch64_err_no_fpadvsimd (mode);
261fb553 3624
43e9d192
IB
3625 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3626 {
3627 pcum->aapcs_nextnvrn = nvrn + nregs;
3628 if (!aarch64_composite_type_p (type, mode))
3629 {
3630 gcc_assert (nregs == 1);
3631 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3632 }
3633 else
3634 {
3635 rtx par;
3636 int i;
3637 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3638 for (i = 0; i < nregs; i++)
3639 {
3640 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3641 V0_REGNUM + nvrn + i);
6a70badb
RS
3642 rtx offset = gen_int_mode
3643 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3644 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3645 XVECEXP (par, 0, i) = tmp;
3646 }
3647 pcum->aapcs_reg = par;
3648 }
3649 return;
3650 }
3651 else
3652 {
3653 /* C.3 NSRN is set to 8. */
3654 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3655 goto on_stack;
3656 }
3657 }
3658
3659 ncrn = pcum->aapcs_ncrn;
3abf17cf 3660 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3661
3662 /* C6 - C9. though the sign and zero extension semantics are
3663 handled elsewhere. This is the case where the argument fits
3664 entirely general registers. */
3665 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3666 {
43e9d192
IB
3667
3668 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3669
3670 /* C.8 if the argument has an alignment of 16 then the NGRN is
3671 rounded up to the next even number. */
985b8393
JJ
3672 if (nregs == 2
3673 && ncrn % 2
2ec07fa6 3674 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3675 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3676 alignment nregs should be > 2 and therefore it should be
3677 passed by reference rather than value. */
985b8393
JJ
3678 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3679 {
3680 ++ncrn;
3681 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3682 }
2ec07fa6 3683
43e9d192
IB
3684 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3685 A reg is still generated for it, but the caller should be smart
3686 enough not to use it. */
3687 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3688 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3689 else
3690 {
3691 rtx par;
3692 int i;
3693
3694 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3695 for (i = 0; i < nregs; i++)
3696 {
3697 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3698 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3699 GEN_INT (i * UNITS_PER_WORD));
3700 XVECEXP (par, 0, i) = tmp;
3701 }
3702 pcum->aapcs_reg = par;
3703 }
3704
3705 pcum->aapcs_nextncrn = ncrn + nregs;
3706 return;
3707 }
3708
3709 /* C.11 */
3710 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3711
3712 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3713 this argument and align the total size if necessary. */
43e9d192 3714on_stack:
3abf17cf 3715 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3716
985b8393 3717 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3718 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3719 16 / UNITS_PER_WORD);
43e9d192
IB
3720 return;
3721}
3722
3723/* Implement TARGET_FUNCTION_ARG. */
3724
3725static rtx
ef4bddc2 3726aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3727 const_tree type, bool named)
3728{
3729 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3730 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3731
3732 if (mode == VOIDmode)
3733 return NULL_RTX;
3734
3735 aarch64_layout_arg (pcum_v, mode, type, named);
3736 return pcum->aapcs_reg;
3737}
3738
3739void
3740aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3741 const_tree fntype ATTRIBUTE_UNUSED,
3742 rtx libname ATTRIBUTE_UNUSED,
3743 const_tree fndecl ATTRIBUTE_UNUSED,
3744 unsigned n_named ATTRIBUTE_UNUSED)
3745{
3746 pcum->aapcs_ncrn = 0;
3747 pcum->aapcs_nvrn = 0;
3748 pcum->aapcs_nextncrn = 0;
3749 pcum->aapcs_nextnvrn = 0;
3750 pcum->pcs_variant = ARM_PCS_AAPCS64;
3751 pcum->aapcs_reg = NULL_RTX;
3752 pcum->aapcs_arg_processed = false;
3753 pcum->aapcs_stack_words = 0;
3754 pcum->aapcs_stack_size = 0;
3755
261fb553
AL
3756 if (!TARGET_FLOAT
3757 && fndecl && TREE_PUBLIC (fndecl)
3758 && fntype && fntype != error_mark_node)
3759 {
3760 const_tree type = TREE_TYPE (fntype);
3761 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3762 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3763 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3764 &mode, &nregs, NULL))
fc29dfc9 3765 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 3766 }
43e9d192
IB
3767 return;
3768}
3769
3770static void
3771aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3772 machine_mode mode,
43e9d192
IB
3773 const_tree type,
3774 bool named)
3775{
3776 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3777 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3778 {
3779 aarch64_layout_arg (pcum_v, mode, type, named);
3780 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3781 != (pcum->aapcs_stack_words != 0));
3782 pcum->aapcs_arg_processed = false;
3783 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3784 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3785 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3786 pcum->aapcs_stack_words = 0;
3787 pcum->aapcs_reg = NULL_RTX;
3788 }
3789}
3790
3791bool
3792aarch64_function_arg_regno_p (unsigned regno)
3793{
3794 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3795 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3796}
3797
3798/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3799 PARM_BOUNDARY bits of alignment, but will be given anything up
3800 to STACK_BOUNDARY bits if the type requires it. This makes sure
3801 that both before and after the layout of each argument, the Next
3802 Stacked Argument Address (NSAA) will have a minimum alignment of
3803 8 bytes. */
3804
3805static unsigned int
ef4bddc2 3806aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3807{
985b8393
JJ
3808 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3809 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3810}
3811
43cacb12
RS
3812/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3813
3814static fixed_size_mode
3815aarch64_get_reg_raw_mode (int regno)
3816{
3817 if (TARGET_SVE && FP_REGNUM_P (regno))
3818 /* Don't use the SVE part of the register for __builtin_apply and
3819 __builtin_return. The SVE registers aren't used by the normal PCS,
3820 so using them there would be a waste of time. The PCS extensions
3821 for SVE types are fundamentally incompatible with the
3822 __builtin_return/__builtin_apply interface. */
3823 return as_a <fixed_size_mode> (V16QImode);
3824 return default_get_reg_raw_mode (regno);
3825}
3826
76b0cbf8 3827/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3828
3829 Small aggregate types are placed in the lowest memory address.
3830
3831 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3832
76b0cbf8
RS
3833static pad_direction
3834aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3835{
3836 /* On little-endian targets, the least significant byte of every stack
3837 argument is passed at the lowest byte address of the stack slot. */
3838 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3839 return PAD_UPWARD;
43e9d192 3840
00edcfbe 3841 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3842 the least significant byte of a stack argument is passed at the highest
3843 byte address of the stack slot. */
3844 if (type
00edcfbe
YZ
3845 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3846 || POINTER_TYPE_P (type))
43e9d192 3847 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3848 return PAD_DOWNWARD;
43e9d192
IB
3849
3850 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3851 return PAD_UPWARD;
43e9d192
IB
3852}
3853
3854/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3855
3856 It specifies padding for the last (may also be the only)
3857 element of a block move between registers and memory. If
3858 assuming the block is in the memory, padding upward means that
3859 the last element is padded after its highest significant byte,
3860 while in downward padding, the last element is padded at the
3861 its least significant byte side.
3862
3863 Small aggregates and small complex types are always padded
3864 upwards.
3865
3866 We don't need to worry about homogeneous floating-point or
3867 short-vector aggregates; their move is not affected by the
3868 padding direction determined here. Regardless of endianness,
3869 each element of such an aggregate is put in the least
3870 significant bits of a fp/simd register.
3871
3872 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3873 register has useful data, and return the opposite if the most
3874 significant byte does. */
3875
3876bool
ef4bddc2 3877aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3878 bool first ATTRIBUTE_UNUSED)
3879{
3880
3881 /* Small composite types are always padded upward. */
3882 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3883 {
6a70badb
RS
3884 HOST_WIDE_INT size;
3885 if (type)
3886 size = int_size_in_bytes (type);
3887 else
3888 /* No frontends can create types with variable-sized modes, so we
3889 shouldn't be asked to pass or return them. */
3890 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3891 if (size < 2 * UNITS_PER_WORD)
3892 return true;
3893 }
3894
3895 /* Otherwise, use the default padding. */
3896 return !BYTES_BIG_ENDIAN;
3897}
3898
095a2d76 3899static scalar_int_mode
43e9d192
IB
3900aarch64_libgcc_cmp_return_mode (void)
3901{
3902 return SImode;
3903}
3904
a3eb8a52
EB
3905#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3906
3907/* We use the 12-bit shifted immediate arithmetic instructions so values
3908 must be multiple of (1 << 12), i.e. 4096. */
3909#define ARITH_FACTOR 4096
3910
3911#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3912#error Cannot use simple address calculation for stack probing
3913#endif
3914
3915/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
3916#define PROBE_STACK_FIRST_REG R9_REGNUM
3917#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 3918
6a70badb 3919/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3920 inclusive. These are offsets from the current stack pointer. */
3921
3922static void
6a70badb 3923aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3924{
6a70badb
RS
3925 HOST_WIDE_INT size;
3926 if (!poly_size.is_constant (&size))
3927 {
3928 sorry ("stack probes for SVE frames");
3929 return;
3930 }
3931
5f5c5e0f 3932 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3933
3934 /* See the same assertion on PROBE_INTERVAL above. */
3935 gcc_assert ((first % ARITH_FACTOR) == 0);
3936
3937 /* See if we have a constant small number of probes to generate. If so,
3938 that's the easy case. */
3939 if (size <= PROBE_INTERVAL)
3940 {
3941 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3942
3943 emit_set_insn (reg1,
5f5c5e0f 3944 plus_constant (Pmode,
a3eb8a52 3945 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3946 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3947 }
3948
3949 /* The run-time loop is made up of 8 insns in the generic case while the
3950 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3951 else if (size <= 4 * PROBE_INTERVAL)
3952 {
3953 HOST_WIDE_INT i, rem;
3954
3955 emit_set_insn (reg1,
5f5c5e0f 3956 plus_constant (Pmode,
a3eb8a52
EB
3957 stack_pointer_rtx,
3958 -(first + PROBE_INTERVAL)));
3959 emit_stack_probe (reg1);
3960
3961 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3962 it exceeds SIZE. If only two probes are needed, this will not
3963 generate any code. Then probe at FIRST + SIZE. */
3964 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3965 {
3966 emit_set_insn (reg1,
5f5c5e0f 3967 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
3968 emit_stack_probe (reg1);
3969 }
3970
3971 rem = size - (i - PROBE_INTERVAL);
3972 if (rem > 256)
3973 {
3974 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3975
5f5c5e0f
EB
3976 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
3977 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
3978 }
3979 else
5f5c5e0f 3980 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
3981 }
3982
3983 /* Otherwise, do the same as above, but in a loop. Note that we must be
3984 extra careful with variables wrapping around because we might be at
3985 the very top (or the very bottom) of the address space and we have
3986 to be able to handle this case properly; in particular, we use an
3987 equality test for the loop condition. */
3988 else
3989 {
5f5c5e0f 3990 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
3991
3992 /* Step 1: round SIZE to the previous multiple of the interval. */
3993
3994 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
3995
3996
3997 /* Step 2: compute initial and final value of the loop counter. */
3998
3999 /* TEST_ADDR = SP + FIRST. */
4000 emit_set_insn (reg1,
5f5c5e0f 4001 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
4002
4003 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
4004 HOST_WIDE_INT adjustment = - (first + rounded_size);
4005 if (! aarch64_uimm12_shift (adjustment))
4006 {
4007 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
4008 true, Pmode);
4009 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
4010 }
4011 else
8dd64cdf
EB
4012 emit_set_insn (reg2,
4013 plus_constant (Pmode, stack_pointer_rtx, adjustment));
4014
a3eb8a52
EB
4015 /* Step 3: the loop
4016
4017 do
4018 {
4019 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4020 probe at TEST_ADDR
4021 }
4022 while (TEST_ADDR != LAST_ADDR)
4023
4024 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4025 until it is equal to ROUNDED_SIZE. */
4026
5f5c5e0f 4027 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
4028
4029
4030 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4031 that SIZE is equal to ROUNDED_SIZE. */
4032
4033 if (size != rounded_size)
4034 {
4035 HOST_WIDE_INT rem = size - rounded_size;
4036
4037 if (rem > 256)
4038 {
4039 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4040
5f5c5e0f
EB
4041 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
4042 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
4043 }
4044 else
5f5c5e0f 4045 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
4046 }
4047 }
4048
4049 /* Make sure nothing is scheduled before we are done. */
4050 emit_insn (gen_blockage ());
4051}
4052
4053/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4054 absolute addresses. */
4055
4056const char *
4057aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
4058{
4059 static int labelno = 0;
4060 char loop_lab[32];
4061 rtx xops[2];
4062
4063 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
4064
4065 /* Loop. */
4066 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4067
cd1bef27
JL
4068 HOST_WIDE_INT stack_clash_probe_interval
4069 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
4070
a3eb8a52
EB
4071 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4072 xops[0] = reg1;
cd1bef27
JL
4073 HOST_WIDE_INT interval;
4074 if (flag_stack_clash_protection)
4075 interval = stack_clash_probe_interval;
4076 else
4077 interval = PROBE_INTERVAL;
4078
4079 gcc_assert (aarch64_uimm12_shift (interval));
4080 xops[1] = GEN_INT (interval);
4081
a3eb8a52
EB
4082 output_asm_insn ("sub\t%0, %0, %1", xops);
4083
cd1bef27
JL
4084 /* If doing stack clash protection then we probe up by the ABI specified
4085 amount. We do this because we're dropping full pages at a time in the
4086 loop. But if we're doing non-stack clash probing, probe at SP 0. */
4087 if (flag_stack_clash_protection)
4088 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
4089 else
4090 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
4091
4092 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
4093 by this amount for each iteration. */
4094 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
4095
4096 /* Test if TEST_ADDR == LAST_ADDR. */
4097 xops[1] = reg2;
4098 output_asm_insn ("cmp\t%0, %1", xops);
4099
4100 /* Branch. */
4101 fputs ("\tb.ne\t", asm_out_file);
4102 assemble_name_raw (asm_out_file, loop_lab);
4103 fputc ('\n', asm_out_file);
4104
4105 return "";
4106}
4107
eb471ba3
TC
4108/* Emit the probe loop for doing stack clash probes and stack adjustments for
4109 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
4110 of GUARD_SIZE. When a probe is emitted it is done at most
4111 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
4112 at most MIN_PROBE_THRESHOLD. By the end of this function
4113 BASE = BASE - ADJUSTMENT. */
4114
4115const char *
4116aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
4117 rtx min_probe_threshold, rtx guard_size)
4118{
4119 /* This function is not allowed to use any instruction generation function
4120 like gen_ and friends. If you do you'll likely ICE during CFG validation,
4121 so instead emit the code you want using output_asm_insn. */
4122 gcc_assert (flag_stack_clash_protection);
4123 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
4124 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
4125
4126 /* The minimum required allocation before the residual requires probing. */
4127 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
4128
4129 /* Clamp the value down to the nearest value that can be used with a cmp. */
4130 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
4131 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
4132
4133 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
4134 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
4135
4136 static int labelno = 0;
4137 char loop_start_lab[32];
4138 char loop_end_lab[32];
4139 rtx xops[2];
4140
4141 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
4142 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
4143
4144 /* Emit loop start label. */
4145 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
4146
4147 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
4148 xops[0] = adjustment;
4149 xops[1] = probe_offset_value_rtx;
4150 output_asm_insn ("cmp\t%0, %1", xops);
4151
4152 /* Branch to end if not enough adjustment to probe. */
4153 fputs ("\tb.lt\t", asm_out_file);
4154 assemble_name_raw (asm_out_file, loop_end_lab);
4155 fputc ('\n', asm_out_file);
4156
4157 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
4158 xops[0] = base;
4159 xops[1] = probe_offset_value_rtx;
4160 output_asm_insn ("sub\t%0, %0, %1", xops);
4161
4162 /* Probe at BASE. */
4163 xops[1] = const0_rtx;
4164 output_asm_insn ("str\txzr, [%0, %1]", xops);
4165
4166 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
4167 xops[0] = adjustment;
4168 xops[1] = probe_offset_value_rtx;
4169 output_asm_insn ("sub\t%0, %0, %1", xops);
4170
4171 /* Branch to start if still more bytes to allocate. */
4172 fputs ("\tb\t", asm_out_file);
4173 assemble_name_raw (asm_out_file, loop_start_lab);
4174 fputc ('\n', asm_out_file);
4175
4176 /* No probe leave. */
4177 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
4178
4179 /* BASE = BASE - ADJUSTMENT. */
4180 xops[0] = base;
4181 xops[1] = adjustment;
4182 output_asm_insn ("sub\t%0, %0, %1", xops);
4183 return "";
4184}
4185
d6cb6d6a
WD
4186/* Determine whether a frame chain needs to be generated. */
4187static bool
4188aarch64_needs_frame_chain (void)
4189{
4190 /* Force a frame chain for EH returns so the return address is at FP+8. */
4191 if (frame_pointer_needed || crtl->calls_eh_return)
4192 return true;
4193
4194 /* A leaf function cannot have calls or write LR. */
4195 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
4196
4197 /* Don't use a frame chain in leaf functions if leaf frame pointers
4198 are disabled. */
4199 if (flag_omit_leaf_frame_pointer && is_leaf)
4200 return false;
4201
4202 return aarch64_use_frame_pointer;
4203}
4204
43e9d192
IB
4205/* Mark the registers that need to be saved by the callee and calculate
4206 the size of the callee-saved registers area and frame record (both FP
33a2e348 4207 and LR may be omitted). */
43e9d192
IB
4208static void
4209aarch64_layout_frame (void)
4210{
4211 HOST_WIDE_INT offset = 0;
4b0685d9 4212 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192 4213
d6cb6d6a 4214 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 4215
8c6e3b23
TC
4216 /* Adjust the outgoing arguments size if required. Keep it in sync with what
4217 the mid-end is doing. */
4218 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
4219
97826595
MS
4220#define SLOT_NOT_REQUIRED (-2)
4221#define SLOT_REQUIRED (-1)
4222
71bfb77a
WD
4223 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
4224 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 4225
43e9d192
IB
4226 /* First mark all the registers that really need to be saved... */
4227 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4228 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4229
4230 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4231 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4232
4233 /* ... that includes the eh data registers (if needed)... */
4234 if (crtl->calls_eh_return)
4235 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4236 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4237 = SLOT_REQUIRED;
43e9d192
IB
4238
4239 /* ... and any callee saved register that dataflow says is live. */
4240 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4241 if (df_regs_ever_live_p (regno)
1c923b60
JW
4242 && (regno == R30_REGNUM
4243 || !call_used_regs[regno]))
97826595 4244 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4245
4246 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4247 if (df_regs_ever_live_p (regno)
4248 && !call_used_regs[regno])
4b0685d9
WD
4249 {
4250 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4251 last_fp_reg = regno;
4252 }
43e9d192 4253
204d2c03 4254 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4255 {
2e1cdae5 4256 /* FP and LR are placed in the linkage record. */
43e9d192 4257 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4258 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4259 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4260 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4261 offset = 2 * UNITS_PER_WORD;
4262 }
43e9d192 4263
db6b62a8
TC
4264 /* With stack-clash, LR must be saved in non-leaf functions. */
4265 gcc_assert (crtl->is_leaf
4266 || (cfun->machine->frame.reg_offset[R30_REGNUM]
4267 != SLOT_NOT_REQUIRED));
4268
43e9d192 4269 /* Now assign stack slots for them. */
2e1cdae5 4270 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4271 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4272 {
4273 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4274 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4275 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4276 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4277 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4278 offset += UNITS_PER_WORD;
4279 }
4280
4b0685d9
WD
4281 HOST_WIDE_INT max_int_offset = offset;
4282 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4283 bool has_align_gap = offset != max_int_offset;
4284
43e9d192 4285 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4286 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4287 {
4b0685d9
WD
4288 /* If there is an alignment gap between integer and fp callee-saves,
4289 allocate the last fp register to it if possible. */
4290 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
4291 {
4292 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4293 break;
4294 }
4295
43e9d192 4296 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4297 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4298 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4299 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4300 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4301 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4302 offset += UNITS_PER_WORD;
4303 }
4304
4f59f9f2 4305 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4306
4307 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4308
71bfb77a
WD
4309 HOST_WIDE_INT varargs_and_saved_regs_size
4310 = offset + cfun->machine->frame.saved_varargs_size;
4311
1c960e02 4312 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4313 = aligned_upper_bound (varargs_and_saved_regs_size
4314 + get_frame_size (),
4315 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4316
6a70badb
RS
4317 /* Both these values are already aligned. */
4318 gcc_assert (multiple_p (crtl->outgoing_args_size,
4319 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4320 cfun->machine->frame.frame_size
6a70badb
RS
4321 = (cfun->machine->frame.hard_fp_offset
4322 + crtl->outgoing_args_size);
1c960e02 4323
71bfb77a
WD
4324 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4325
4326 cfun->machine->frame.initial_adjust = 0;
4327 cfun->machine->frame.final_adjust = 0;
4328 cfun->machine->frame.callee_adjust = 0;
4329 cfun->machine->frame.callee_offset = 0;
4330
4331 HOST_WIDE_INT max_push_offset = 0;
4332 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4333 max_push_offset = 512;
4334 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4335 max_push_offset = 256;
4336
6a70badb
RS
4337 HOST_WIDE_INT const_size, const_fp_offset;
4338 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4339 && const_size < max_push_offset
4340 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4341 {
4342 /* Simple, small frame with no outgoing arguments:
4343 stp reg1, reg2, [sp, -frame_size]!
4344 stp reg3, reg4, [sp, 16] */
6a70badb 4345 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4346 }
6a70badb
RS
4347 else if (known_lt (crtl->outgoing_args_size
4348 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4349 && !(cfun->calls_alloca
6a70badb
RS
4350 && known_lt (cfun->machine->frame.hard_fp_offset,
4351 max_push_offset)))
71bfb77a
WD
4352 {
4353 /* Frame with small outgoing arguments:
4354 sub sp, sp, frame_size
4355 stp reg1, reg2, [sp, outgoing_args_size]
4356 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4357 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4358 cfun->machine->frame.callee_offset
4359 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4360 }
6a70badb
RS
4361 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4362 && const_fp_offset < max_push_offset)
71bfb77a
WD
4363 {
4364 /* Frame with large outgoing arguments but a small local area:
4365 stp reg1, reg2, [sp, -hard_fp_offset]!
4366 stp reg3, reg4, [sp, 16]
4367 sub sp, sp, outgoing_args_size */
6a70badb 4368 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4369 cfun->machine->frame.final_adjust
4370 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4371 }
71bfb77a
WD
4372 else
4373 {
4374 /* Frame with large local area and outgoing arguments using frame pointer:
4375 sub sp, sp, hard_fp_offset
4376 stp x29, x30, [sp, 0]
4377 add x29, sp, 0
4378 stp reg3, reg4, [sp, 16]
4379 sub sp, sp, outgoing_args_size */
4380 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4381 cfun->machine->frame.final_adjust
4382 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4383 }
4384
43e9d192
IB
4385 cfun->machine->frame.laid_out = true;
4386}
4387
04ddfe06
KT
4388/* Return true if the register REGNO is saved on entry to
4389 the current function. */
4390
43e9d192
IB
4391static bool
4392aarch64_register_saved_on_entry (int regno)
4393{
97826595 4394 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4395}
4396
04ddfe06
KT
4397/* Return the next register up from REGNO up to LIMIT for the callee
4398 to save. */
4399
64dedd72
JW
4400static unsigned
4401aarch64_next_callee_save (unsigned regno, unsigned limit)
4402{
4403 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4404 regno ++;
4405 return regno;
4406}
43e9d192 4407
04ddfe06
KT
4408/* Push the register number REGNO of mode MODE to the stack with write-back
4409 adjusting the stack by ADJUSTMENT. */
4410
c5e1f66e 4411static void
ef4bddc2 4412aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4413 HOST_WIDE_INT adjustment)
4414 {
4415 rtx base_rtx = stack_pointer_rtx;
4416 rtx insn, reg, mem;
4417
4418 reg = gen_rtx_REG (mode, regno);
4419 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4420 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4421 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4422
4423 insn = emit_move_insn (mem, reg);
4424 RTX_FRAME_RELATED_P (insn) = 1;
4425}
4426
04ddfe06
KT
4427/* Generate and return an instruction to store the pair of registers
4428 REG and REG2 of mode MODE to location BASE with write-back adjusting
4429 the stack location BASE by ADJUSTMENT. */
4430
80c11907 4431static rtx
ef4bddc2 4432aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4433 HOST_WIDE_INT adjustment)
4434{
4435 switch (mode)
4436 {
4e10a5a7 4437 case E_DImode:
80c11907
JW
4438 return gen_storewb_pairdi_di (base, base, reg, reg2,
4439 GEN_INT (-adjustment),
4440 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4441 case E_DFmode:
80c11907
JW
4442 return gen_storewb_pairdf_di (base, base, reg, reg2,
4443 GEN_INT (-adjustment),
4444 GEN_INT (UNITS_PER_WORD - adjustment));
4445 default:
4446 gcc_unreachable ();
4447 }
4448}
4449
04ddfe06
KT
4450/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4451 stack pointer by ADJUSTMENT. */
4452
80c11907 4453static void
89ac681e 4454aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4455{
5d8a22a5 4456 rtx_insn *insn;
0d4a1197 4457 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 4458
71bfb77a 4459 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4460 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4461
80c11907
JW
4462 rtx reg1 = gen_rtx_REG (mode, regno1);
4463 rtx reg2 = gen_rtx_REG (mode, regno2);
4464
4465 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4466 reg2, adjustment));
4467 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4468 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4469 RTX_FRAME_RELATED_P (insn) = 1;
4470}
4471
04ddfe06
KT
4472/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4473 adjusting it by ADJUSTMENT afterwards. */
4474
159313d9 4475static rtx
ef4bddc2 4476aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4477 HOST_WIDE_INT adjustment)
4478{
4479 switch (mode)
4480 {
4e10a5a7 4481 case E_DImode:
159313d9 4482 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4483 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4484 case E_DFmode:
159313d9 4485 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4486 GEN_INT (UNITS_PER_WORD));
159313d9
JW
4487 default:
4488 gcc_unreachable ();
4489 }
4490}
4491
04ddfe06
KT
4492/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4493 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4494 into CFI_OPS. */
4495
89ac681e
WD
4496static void
4497aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4498 rtx *cfi_ops)
4499{
0d4a1197 4500 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
4501 rtx reg1 = gen_rtx_REG (mode, regno1);
4502
4503 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4504
71bfb77a 4505 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4506 {
4507 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4508 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4509 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4510 }
4511 else
4512 {
4513 rtx reg2 = gen_rtx_REG (mode, regno2);
4514 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4515 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4516 reg2, adjustment));
4517 }
4518}
4519
04ddfe06
KT
4520/* Generate and return a store pair instruction of mode MODE to store
4521 register REG1 to MEM1 and register REG2 to MEM2. */
4522
72df5c1f 4523static rtx
ef4bddc2 4524aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4525 rtx reg2)
4526{
4527 switch (mode)
4528 {
4e10a5a7 4529 case E_DImode:
dfe1da23 4530 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4531
4e10a5a7 4532 case E_DFmode:
dfe1da23 4533 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f
JW
4534
4535 default:
4536 gcc_unreachable ();
4537 }
4538}
4539
04ddfe06
KT
4540/* Generate and regurn a load pair isntruction of mode MODE to load register
4541 REG1 from MEM1 and register REG2 from MEM2. */
4542
72df5c1f 4543static rtx
ef4bddc2 4544aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4545 rtx mem2)
4546{
4547 switch (mode)
4548 {
4e10a5a7 4549 case E_DImode:
dfe1da23 4550 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4551
4e10a5a7 4552 case E_DFmode:
dfe1da23 4553 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f
JW
4554
4555 default:
4556 gcc_unreachable ();
4557 }
4558}
4559
db58fd89
JW
4560/* Return TRUE if return address signing should be enabled for the current
4561 function, otherwise return FALSE. */
4562
4563bool
4564aarch64_return_address_signing_enabled (void)
4565{
4566 /* This function should only be called after frame laid out. */
4567 gcc_assert (cfun->machine->frame.laid_out);
4568
4569 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4570 if it's LR is pushed onto stack. */
4571 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4572 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4573 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4574}
4575
04ddfe06
KT
4576/* Emit code to save the callee-saved registers from register number START
4577 to LIMIT to the stack at the location starting at offset START_OFFSET,
4578 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4579
43e9d192 4580static void
6a70badb 4581aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4582 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4583{
5d8a22a5 4584 rtx_insn *insn;
43e9d192
IB
4585 unsigned regno;
4586 unsigned regno2;
4587
0ec74a1e 4588 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4589 regno <= limit;
4590 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4591 {
ae13fce3 4592 rtx reg, mem;
6a70badb 4593 poly_int64 offset;
64dedd72 4594
ae13fce3
JW
4595 if (skip_wb
4596 && (regno == cfun->machine->frame.wb_candidate1
4597 || regno == cfun->machine->frame.wb_candidate2))
4598 continue;
4599
827ab47a
KT
4600 if (cfun->machine->reg_is_wrapped_separately[regno])
4601 continue;
4602
ae13fce3
JW
4603 reg = gen_rtx_REG (mode, regno);
4604 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4605 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4606 offset));
64dedd72
JW
4607
4608 regno2 = aarch64_next_callee_save (regno + 1, limit);
4609
4610 if (regno2 <= limit
827ab47a 4611 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
4612 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4613 == cfun->machine->frame.reg_offset[regno2]))
4614
43e9d192 4615 {
0ec74a1e 4616 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4617 rtx mem2;
4618
4619 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4620 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4621 offset));
8ed2fc62
JW
4622 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4623 reg2));
0b4a9743 4624
64dedd72
JW
4625 /* The first part of a frame-related parallel insn is
4626 always assumed to be relevant to the frame
4627 calculations; subsequent parts, are only
4628 frame-related if explicitly marked. */
4629 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4630 regno = regno2;
4631 }
4632 else
8ed2fc62
JW
4633 insn = emit_move_insn (mem, reg);
4634
4635 RTX_FRAME_RELATED_P (insn) = 1;
4636 }
4637}
4638
04ddfe06
KT
4639/* Emit code to restore the callee registers of mode MODE from register
4640 number START up to and including LIMIT. Restore from the stack offset
4641 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4642 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4643
8ed2fc62 4644static void
ef4bddc2 4645aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4646 poly_int64 start_offset, unsigned start,
dd991abb 4647 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4648{
8ed2fc62 4649 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4650 unsigned regno;
4651 unsigned regno2;
6a70badb 4652 poly_int64 offset;
8ed2fc62
JW
4653
4654 for (regno = aarch64_next_callee_save (start, limit);
4655 regno <= limit;
4656 regno = aarch64_next_callee_save (regno + 1, limit))
4657 {
827ab47a
KT
4658 if (cfun->machine->reg_is_wrapped_separately[regno])
4659 continue;
4660
ae13fce3 4661 rtx reg, mem;
8ed2fc62 4662
ae13fce3
JW
4663 if (skip_wb
4664 && (regno == cfun->machine->frame.wb_candidate1
4665 || regno == cfun->machine->frame.wb_candidate2))
4666 continue;
4667
4668 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4669 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4670 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4671
4672 regno2 = aarch64_next_callee_save (regno + 1, limit);
4673
4674 if (regno2 <= limit
827ab47a 4675 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
4676 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4677 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 4678 {
8ed2fc62
JW
4679 rtx reg2 = gen_rtx_REG (mode, regno2);
4680 rtx mem2;
4681
4682 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4683 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4684 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4685
dd991abb 4686 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4687 regno = regno2;
43e9d192 4688 }
8ed2fc62 4689 else
dd991abb
RH
4690 emit_move_insn (reg, mem);
4691 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4692 }
43e9d192
IB
4693}
4694
43cacb12
RS
4695/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4696 of MODE. */
4697
4698static inline bool
4699offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4700{
4701 HOST_WIDE_INT multiple;
4702 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4703 && IN_RANGE (multiple, -8, 7));
4704}
4705
4706/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4707 of MODE. */
4708
4709static inline bool
4710offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4711{
4712 HOST_WIDE_INT multiple;
4713 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4714 && IN_RANGE (multiple, 0, 63));
4715}
4716
4717/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4718 of MODE. */
4719
4720bool
4721aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4722{
4723 HOST_WIDE_INT multiple;
4724 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4725 && IN_RANGE (multiple, -64, 63));
4726}
4727
4728/* Return true if OFFSET is a signed 9-bit value. */
4729
3c5af608
MM
4730bool
4731aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
4732 poly_int64 offset)
827ab47a 4733{
6a70badb
RS
4734 HOST_WIDE_INT const_offset;
4735 return (offset.is_constant (&const_offset)
4736 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4737}
4738
43cacb12
RS
4739/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4740 of MODE. */
4741
827ab47a 4742static inline bool
43cacb12 4743offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4744{
6a70badb
RS
4745 HOST_WIDE_INT multiple;
4746 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4747 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4748}
4749
43cacb12
RS
4750/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4751 of MODE. */
4752
4753static inline bool
4754offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4755{
6a70badb
RS
4756 HOST_WIDE_INT multiple;
4757 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4758 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4759}
4760
4761/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4762
4763static sbitmap
4764aarch64_get_separate_components (void)
4765{
827ab47a
KT
4766 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4767 bitmap_clear (components);
4768
4769 /* The registers we need saved to the frame. */
4770 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4771 if (aarch64_register_saved_on_entry (regno))
4772 {
6a70badb 4773 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4774 if (!frame_pointer_needed)
4775 offset += cfun->machine->frame.frame_size
4776 - cfun->machine->frame.hard_fp_offset;
4777 /* Check that we can access the stack slot of the register with one
4778 direct load with no adjustments needed. */
4779 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4780 bitmap_set_bit (components, regno);
4781 }
4782
4783 /* Don't mess with the hard frame pointer. */
4784 if (frame_pointer_needed)
4785 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4786
4787 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4788 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 4789 /* If registers have been chosen to be stored/restored with
827ab47a
KT
4790 writeback don't interfere with them to avoid having to output explicit
4791 stack adjustment instructions. */
4792 if (reg2 != INVALID_REGNUM)
4793 bitmap_clear_bit (components, reg2);
4794 if (reg1 != INVALID_REGNUM)
4795 bitmap_clear_bit (components, reg1);
4796
4797 bitmap_clear_bit (components, LR_REGNUM);
4798 bitmap_clear_bit (components, SP_REGNUM);
4799
4800 return components;
4801}
4802
4803/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4804
4805static sbitmap
4806aarch64_components_for_bb (basic_block bb)
4807{
4808 bitmap in = DF_LIVE_IN (bb);
4809 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4810 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
4811
4812 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4813 bitmap_clear (components);
4814
4815 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4816 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4817 if ((!call_used_regs[regno])
4818 && (bitmap_bit_p (in, regno)
4819 || bitmap_bit_p (gen, regno)
4820 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4821 {
4822 unsigned regno2, offset, offset2;
4823 bitmap_set_bit (components, regno);
4824
4825 /* If there is a callee-save at an adjacent offset, add it too
4826 to increase the use of LDP/STP. */
4827 offset = cfun->machine->frame.reg_offset[regno];
4828 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4829
4830 if (regno2 <= LAST_SAVED_REGNUM)
4831 {
4832 offset2 = cfun->machine->frame.reg_offset[regno2];
4833 if ((offset & ~8) == (offset2 & ~8))
4834 bitmap_set_bit (components, regno2);
4835 }
4836 }
827ab47a
KT
4837
4838 return components;
4839}
4840
4841/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4842 Nothing to do for aarch64. */
4843
4844static void
4845aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4846{
4847}
4848
4849/* Return the next set bit in BMP from START onwards. Return the total number
4850 of bits in BMP if no set bit is found at or after START. */
4851
4852static unsigned int
4853aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4854{
4855 unsigned int nbits = SBITMAP_SIZE (bmp);
4856 if (start == nbits)
4857 return start;
4858
4859 gcc_assert (start < nbits);
4860 for (unsigned int i = start; i < nbits; i++)
4861 if (bitmap_bit_p (bmp, i))
4862 return i;
4863
4864 return nbits;
4865}
4866
4867/* Do the work for aarch64_emit_prologue_components and
4868 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4869 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4870 for these components or the epilogue sequence. That is, it determines
4871 whether we should emit stores or loads and what kind of CFA notes to attach
4872 to the insns. Otherwise the logic for the two sequences is very
4873 similar. */
4874
4875static void
4876aarch64_process_components (sbitmap components, bool prologue_p)
4877{
4878 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4879 ? HARD_FRAME_POINTER_REGNUM
4880 : STACK_POINTER_REGNUM);
4881
4882 unsigned last_regno = SBITMAP_SIZE (components);
4883 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4884 rtx_insn *insn = NULL;
4885
4886 while (regno != last_regno)
4887 {
4888 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
4889 so DFmode for the vector registers is enough. */
0d4a1197 4890 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a 4891 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4892 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4893 if (!frame_pointer_needed)
4894 offset += cfun->machine->frame.frame_size
4895 - cfun->machine->frame.hard_fp_offset;
4896 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4897 rtx mem = gen_frame_mem (mode, addr);
4898
4899 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4900 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4901 /* No more registers to handle after REGNO.
4902 Emit a single save/restore and exit. */
4903 if (regno2 == last_regno)
4904 {
4905 insn = emit_insn (set);
4906 RTX_FRAME_RELATED_P (insn) = 1;
4907 if (prologue_p)
4908 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4909 else
4910 add_reg_note (insn, REG_CFA_RESTORE, reg);
4911 break;
4912 }
4913
6a70badb 4914 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4915 /* The next register is not of the same class or its offset is not
4916 mergeable with the current one into a pair. */
4917 if (!satisfies_constraint_Ump (mem)
4918 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
6a70badb
RS
4919 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
4920 GET_MODE_SIZE (mode)))
827ab47a
KT
4921 {
4922 insn = emit_insn (set);
4923 RTX_FRAME_RELATED_P (insn) = 1;
4924 if (prologue_p)
4925 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4926 else
4927 add_reg_note (insn, REG_CFA_RESTORE, reg);
4928
4929 regno = regno2;
4930 continue;
4931 }
4932
4933 /* REGNO2 can be saved/restored in a pair with REGNO. */
4934 rtx reg2 = gen_rtx_REG (mode, regno2);
4935 if (!frame_pointer_needed)
4936 offset2 += cfun->machine->frame.frame_size
4937 - cfun->machine->frame.hard_fp_offset;
4938 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
4939 rtx mem2 = gen_frame_mem (mode, addr2);
4940 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
4941 : gen_rtx_SET (reg2, mem2);
4942
4943 if (prologue_p)
4944 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
4945 else
4946 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
4947
4948 RTX_FRAME_RELATED_P (insn) = 1;
4949 if (prologue_p)
4950 {
4951 add_reg_note (insn, REG_CFA_OFFSET, set);
4952 add_reg_note (insn, REG_CFA_OFFSET, set2);
4953 }
4954 else
4955 {
4956 add_reg_note (insn, REG_CFA_RESTORE, reg);
4957 add_reg_note (insn, REG_CFA_RESTORE, reg2);
4958 }
4959
4960 regno = aarch64_get_next_set_bit (components, regno2 + 1);
4961 }
4962}
4963
4964/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
4965
4966static void
4967aarch64_emit_prologue_components (sbitmap components)
4968{
4969 aarch64_process_components (components, true);
4970}
4971
4972/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
4973
4974static void
4975aarch64_emit_epilogue_components (sbitmap components)
4976{
4977 aarch64_process_components (components, false);
4978}
4979
4980/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
4981
4982static void
4983aarch64_set_handled_components (sbitmap components)
4984{
4985 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4986 if (bitmap_bit_p (components, regno))
4987 cfun->machine->reg_is_wrapped_separately[regno] = true;
4988}
4989
8c6e3b23
TC
4990/* On AArch64 we have an ABI defined safe buffer. This constant is used to
4991 determining the probe offset for alloca. */
4992
4993static HOST_WIDE_INT
4994aarch64_stack_clash_protection_alloca_probe_range (void)
4995{
4996 return STACK_CLASH_CALLER_GUARD;
4997}
4998
4999
cd1bef27
JL
5000/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
5001 registers. If POLY_SIZE is not large enough to require a probe this function
5002 will only adjust the stack. When allocating the stack space
5003 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
5004 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
5005 arguments. If we are then we ensure that any allocation larger than the ABI
5006 defined buffer needs a probe so that the invariant of having a 1KB buffer is
5007 maintained.
5008
5009 We emit barriers after each stack adjustment to prevent optimizations from
5010 breaking the invariant that we never drop the stack more than a page. This
5011 invariant is needed to make it easier to correctly handle asynchronous
5012 events, e.g. if we were to allow the stack to be dropped by more than a page
5013 and then have multiple probes up and we take a signal somewhere in between
5014 then the signal handler doesn't know the state of the stack and can make no
5015 assumptions about which pages have been probed. */
5016
5017static void
5018aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
5019 poly_int64 poly_size,
5020 bool frame_related_p,
5021 bool final_adjustment_p)
5022{
5023 HOST_WIDE_INT guard_size
5024 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5025 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5026 /* When doing the final adjustment for the outgoing argument size we can't
5027 assume that LR was saved at position 0. So subtract it's offset from the
5028 ABI safe buffer so that we don't accidentally allow an adjustment that
5029 would result in an allocation larger than the ABI buffer without
5030 probing. */
5031 HOST_WIDE_INT min_probe_threshold
5032 = final_adjustment_p
5033 ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
5034 : guard_size - guard_used_by_caller;
5035
5036 poly_int64 frame_size = cfun->machine->frame.frame_size;
5037
5038 /* We should always have a positive probe threshold. */
5039 gcc_assert (min_probe_threshold > 0);
5040
5041 if (flag_stack_clash_protection && !final_adjustment_p)
5042 {
5043 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
5044 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5045
5046 if (known_eq (frame_size, 0))
5047 {
5048 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
5049 }
5050 else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
5051 && known_lt (final_adjust, guard_used_by_caller))
5052 {
5053 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
5054 }
5055 }
5056
cd1bef27
JL
5057 /* If SIZE is not large enough to require probing, just adjust the stack and
5058 exit. */
eb471ba3 5059 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
5060 || !flag_stack_clash_protection)
5061 {
5062 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
5063 return;
5064 }
5065
eb471ba3
TC
5066 HOST_WIDE_INT size;
5067 /* Handle the SVE non-constant case first. */
5068 if (!poly_size.is_constant (&size))
5069 {
5070 if (dump_file)
5071 {
5072 fprintf (dump_file, "Stack clash SVE prologue: ");
5073 print_dec (poly_size, dump_file);
5074 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
5075 }
5076
5077 /* First calculate the amount of bytes we're actually spilling. */
5078 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
5079 poly_size, temp1, temp2, false, true);
5080
5081 rtx_insn *insn = get_last_insn ();
5082
5083 if (frame_related_p)
5084 {
5085 /* This is done to provide unwinding information for the stack
5086 adjustments we're about to do, however to prevent the optimizers
5087 from removing the R15 move and leaving the CFA note (which would be
5088 very wrong) we tie the old and new stack pointer together.
5089 The tie will expand to nothing but the optimizers will not touch
5090 the instruction. */
5091 rtx stack_ptr_copy = gen_rtx_REG (Pmode, R15_REGNUM);
5092 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
5093 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
5094
5095 /* We want the CFA independent of the stack pointer for the
5096 duration of the loop. */
5097 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
5098 RTX_FRAME_RELATED_P (insn) = 1;
5099 }
5100
5101 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
5102 rtx guard_const = gen_int_mode (guard_size, Pmode);
5103
5104 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
5105 stack_pointer_rtx, temp1,
5106 probe_const, guard_const));
5107
5108 /* Now reset the CFA register if needed. */
5109 if (frame_related_p)
5110 {
5111 add_reg_note (insn, REG_CFA_DEF_CFA,
5112 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
5113 gen_int_mode (poly_size, Pmode)));
5114 RTX_FRAME_RELATED_P (insn) = 1;
5115 }
5116
5117 return;
5118 }
5119
cd1bef27
JL
5120 if (dump_file)
5121 fprintf (dump_file,
eb471ba3
TC
5122 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
5123 " bytes, probing will be required.\n", size);
cd1bef27
JL
5124
5125 /* Round size to the nearest multiple of guard_size, and calculate the
5126 residual as the difference between the original size and the rounded
5127 size. */
5128 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
5129 HOST_WIDE_INT residual = size - rounded_size;
5130
5131 /* We can handle a small number of allocations/probes inline. Otherwise
5132 punt to a loop. */
5133 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
5134 {
5135 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
5136 {
5137 aarch64_sub_sp (NULL, temp2, guard_size, true);
5138 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5139 guard_used_by_caller));
5140 emit_insn (gen_blockage ());
5141 }
5142 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
5143 }
5144 else
5145 {
5146 /* Compute the ending address. */
5147 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
5148 temp1, NULL, false, true);
5149 rtx_insn *insn = get_last_insn ();
5150
5151 /* For the initial allocation, we don't have a frame pointer
5152 set up, so we always need CFI notes. If we're doing the
5153 final allocation, then we may have a frame pointer, in which
5154 case it is the CFA, otherwise we need CFI notes.
5155
5156 We can determine which allocation we are doing by looking at
5157 the value of FRAME_RELATED_P since the final allocations are not
5158 frame related. */
5159 if (frame_related_p)
5160 {
5161 /* We want the CFA independent of the stack pointer for the
5162 duration of the loop. */
5163 add_reg_note (insn, REG_CFA_DEF_CFA,
5164 plus_constant (Pmode, temp1, rounded_size));
5165 RTX_FRAME_RELATED_P (insn) = 1;
5166 }
5167
5168 /* This allocates and probes the stack. Note that this re-uses some of
5169 the existing Ada stack protection code. However we are guaranteed not
5170 to enter the non loop or residual branches of that code.
5171
5172 The non-loop part won't be entered because if our allocation amount
5173 doesn't require a loop, the case above would handle it.
5174
5175 The residual amount won't be entered because TEMP1 is a mutliple of
5176 the allocation size. The residual will always be 0. As such, the only
5177 part we are actually using from that code is the loop setup. The
5178 actual probing is done in aarch64_output_probe_stack_range. */
5179 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
5180 stack_pointer_rtx, temp1));
5181
5182 /* Now reset the CFA register if needed. */
5183 if (frame_related_p)
5184 {
5185 add_reg_note (insn, REG_CFA_DEF_CFA,
5186 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
5187 RTX_FRAME_RELATED_P (insn) = 1;
5188 }
5189
5190 emit_insn (gen_blockage ());
5191 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
5192 }
5193
5194 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
5195 be probed. This maintains the requirement that each page is probed at
5196 least once. For initial probing we probe only if the allocation is
5197 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
5198 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
5199 GUARD_SIZE. This works that for any allocation that is large enough to
5200 trigger a probe here, we'll have at least one, and if they're not large
5201 enough for this code to emit anything for them, The page would have been
5202 probed by the saving of FP/LR either by this function or any callees. If
5203 we don't have any callees then we won't have more stack adjustments and so
5204 are still safe. */
5205 if (residual)
5206 {
5207 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
5208 /* If we're doing final adjustments, and we've done any full page
5209 allocations then any residual needs to be probed. */
5210 if (final_adjustment_p && rounded_size != 0)
5211 min_probe_threshold = 0;
5212 /* If doing a small final adjustment, we always probe at offset 0.
5213 This is done to avoid issues when LR is not at position 0 or when
5214 the final adjustment is smaller than the probing offset. */
5215 else if (final_adjustment_p && rounded_size == 0)
5216 residual_probe_offset = 0;
5217
5218 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
5219 if (residual >= min_probe_threshold)
5220 {
5221 if (dump_file)
5222 fprintf (dump_file,
5223 "Stack clash AArch64 prologue residuals: "
5224 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
5225 "\n", residual);
5226
5227 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5228 residual_probe_offset));
5229 emit_insn (gen_blockage ());
5230 }
5231 }
5232}
5233
43cacb12
RS
5234/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
5235 is saved at BASE + OFFSET. */
5236
5237static void
5238aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
5239 rtx base, poly_int64 offset)
5240{
5241 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
5242 add_reg_note (insn, REG_CFA_EXPRESSION,
5243 gen_rtx_SET (mem, regno_reg_rtx[reg]));
5244}
5245
43e9d192
IB
5246/* AArch64 stack frames generated by this compiler look like:
5247
5248 +-------------------------------+
5249 | |
5250 | incoming stack arguments |
5251 | |
34834420
MS
5252 +-------------------------------+
5253 | | <-- incoming stack pointer (aligned)
43e9d192
IB
5254 | callee-allocated save area |
5255 | for register varargs |
5256 | |
34834420
MS
5257 +-------------------------------+
5258 | local variables | <-- frame_pointer_rtx
43e9d192
IB
5259 | |
5260 +-------------------------------+
cd1bef27 5261 | padding | \
454fdba9 5262 +-------------------------------+ |
454fdba9 5263 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
5264 +-------------------------------+ |
5265 | LR' | |
5266 +-------------------------------+ |
34834420
MS
5267 | FP' | / <- hard_frame_pointer_rtx (aligned)
5268 +-------------------------------+
43e9d192
IB
5269 | dynamic allocation |
5270 +-------------------------------+
34834420
MS
5271 | padding |
5272 +-------------------------------+
5273 | outgoing stack arguments | <-- arg_pointer
5274 | |
5275 +-------------------------------+
5276 | | <-- stack_pointer_rtx (aligned)
43e9d192 5277
34834420
MS
5278 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
5279 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
5280 unchanged.
5281
5282 By default for stack-clash we assume the guard is at least 64KB, but this
5283 value is configurable to either 4KB or 64KB. We also force the guard size to
5284 be the same as the probing interval and both values are kept in sync.
5285
5286 With those assumptions the callee can allocate up to 63KB (or 3KB depending
5287 on the guard size) of stack space without probing.
5288
5289 When probing is needed, we emit a probe at the start of the prologue
5290 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
5291
5292 We have to track how much space has been allocated and the only stores
5293 to the stack we track as implicit probes are the FP/LR stores.
5294
5295 For outgoing arguments we probe if the size is larger than 1KB, such that
5296 the ABI specified buffer is maintained for the next callee. */
43e9d192
IB
5297
5298/* Generate the prologue instructions for entry into a function.
5299 Establish the stack frame by decreasing the stack pointer with a
5300 properly calculated size and, if necessary, create a frame record
5301 filled with the values of LR and previous frame pointer. The
6991c977 5302 current FP is also set up if it is in use. */
43e9d192
IB
5303
5304void
5305aarch64_expand_prologue (void)
5306{
6a70badb
RS
5307 poly_int64 frame_size = cfun->machine->frame.frame_size;
5308 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5309 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5310 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5311 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5312 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5313 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 5314 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 5315 rtx_insn *insn;
43e9d192 5316
db58fd89
JW
5317 /* Sign return address for functions. */
5318 if (aarch64_return_address_signing_enabled ())
27169e45
JW
5319 {
5320 insn = emit_insn (gen_pacisp ());
5321 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5322 RTX_FRAME_RELATED_P (insn) = 1;
5323 }
db58fd89 5324
dd991abb 5325 if (flag_stack_usage_info)
6a70badb 5326 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 5327
a3eb8a52
EB
5328 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5329 {
5330 if (crtl->is_leaf && !cfun->calls_alloca)
5331 {
6a70badb
RS
5332 if (maybe_gt (frame_size, PROBE_INTERVAL)
5333 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
5334 aarch64_emit_probe_stack_range (get_stack_check_protect (),
5335 (frame_size
5336 - get_stack_check_protect ()));
a3eb8a52 5337 }
6a70badb 5338 else if (maybe_gt (frame_size, 0))
8c1dd970 5339 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
5340 }
5341
f5470a77
RS
5342 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5343 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
5344
cd1bef27
JL
5345 /* In theory we should never have both an initial adjustment
5346 and a callee save adjustment. Verify that is the case since the
5347 code below does not handle it for -fstack-clash-protection. */
5348 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
5349
5350 /* Will only probe if the initial adjustment is larger than the guard
5351 less the amount of the guard reserved for use by the caller's
5352 outgoing args. */
5353 aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust,
5354 true, false);
43e9d192 5355
71bfb77a
WD
5356 if (callee_adjust != 0)
5357 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 5358
204d2c03 5359 if (emit_frame_chain)
43e9d192 5360 {
43cacb12 5361 poly_int64 reg_offset = callee_adjust;
71bfb77a 5362 if (callee_adjust == 0)
43cacb12
RS
5363 {
5364 reg1 = R29_REGNUM;
5365 reg2 = R30_REGNUM;
5366 reg_offset = callee_offset;
5367 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
5368 }
f5470a77 5369 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
5370 stack_pointer_rtx, callee_offset,
5371 ip1_rtx, ip0_rtx, frame_pointer_needed);
5372 if (frame_pointer_needed && !frame_size.is_constant ())
5373 {
5374 /* Variable-sized frames need to describe the save slot
5375 address using DW_CFA_expression rather than DW_CFA_offset.
5376 This means that, without taking further action, the
5377 locations of the registers that we've already saved would
5378 remain based on the stack pointer even after we redefine
5379 the CFA based on the frame pointer. We therefore need new
5380 DW_CFA_expressions to re-express the save slots with addresses
5381 based on the frame pointer. */
5382 rtx_insn *insn = get_last_insn ();
5383 gcc_assert (RTX_FRAME_RELATED_P (insn));
5384
5385 /* Add an explicit CFA definition if this was previously
5386 implicit. */
5387 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
5388 {
5389 rtx src = plus_constant (Pmode, stack_pointer_rtx,
5390 callee_offset);
5391 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5392 gen_rtx_SET (hard_frame_pointer_rtx, src));
5393 }
5394
5395 /* Change the save slot expressions for the registers that
5396 we've already saved. */
5397 reg_offset -= callee_offset;
5398 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
5399 reg_offset + UNITS_PER_WORD);
5400 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
5401 reg_offset);
5402 }
71bfb77a 5403 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 5404 }
71bfb77a
WD
5405
5406 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 5407 callee_adjust != 0 || emit_frame_chain);
71bfb77a 5408 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
204d2c03 5409 callee_adjust != 0 || emit_frame_chain);
cd1bef27
JL
5410
5411 /* We may need to probe the final adjustment if it is larger than the guard
5412 that is assumed by the called. */
5413 aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx, final_adjust,
5414 !frame_pointer_needed, true);
43e9d192
IB
5415}
5416
4f942779
RL
5417/* Return TRUE if we can use a simple_return insn.
5418
5419 This function checks whether the callee saved stack is empty, which
5420 means no restore actions are need. The pro_and_epilogue will use
5421 this to check whether shrink-wrapping opt is feasible. */
5422
5423bool
5424aarch64_use_return_insn_p (void)
5425{
5426 if (!reload_completed)
5427 return false;
5428
5429 if (crtl->profile)
5430 return false;
5431
6a70badb 5432 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
5433}
5434
71bfb77a
WD
5435/* Generate the epilogue instructions for returning from a function.
5436 This is almost exactly the reverse of the prolog sequence, except
5437 that we need to insert barriers to avoid scheduling loads that read
5438 from a deallocated stack, and we optimize the unwind records by
5439 emitting them all together if possible. */
43e9d192
IB
5440void
5441aarch64_expand_epilogue (bool for_sibcall)
5442{
6a70badb 5443 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5444 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5445 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5446 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5447 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5448 unsigned reg2 = cfun->machine->frame.wb_candidate2;
5449 rtx cfi_ops = NULL;
5450 rtx_insn *insn;
43cacb12
RS
5451 /* A stack clash protection prologue may not have left IP0_REGNUM or
5452 IP1_REGNUM in a usable state. The same is true for allocations
5453 with an SVE component, since we then need both temporary registers
cd1bef27
JL
5454 for each allocation. For stack clash we are in a usable state if
5455 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
5456 HOST_WIDE_INT guard_size
5457 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5458 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5459
5460 /* We can re-use the registers when the allocation amount is smaller than
5461 guard_size - guard_used_by_caller because we won't be doing any probes
5462 then. In such situations the register should remain live with the correct
5463 value. */
43cacb12 5464 bool can_inherit_p = (initial_adjust.is_constant ()
cd1bef27
JL
5465 && final_adjust.is_constant ())
5466 && (!flag_stack_clash_protection
5467 || known_lt (initial_adjust,
5468 guard_size - guard_used_by_caller));
44c0e7b9 5469
71bfb77a 5470 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
5471 bool need_barrier_p
5472 = maybe_ne (get_frame_size ()
5473 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 5474
71bfb77a 5475 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
5476 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
5477 || cfun->calls_alloca
8144a493 5478 || crtl->calls_eh_return)
43e9d192 5479 {
71bfb77a
WD
5480 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5481 need_barrier_p = false;
5482 }
7e8c2bd5 5483
71bfb77a
WD
5484 /* Restore the stack pointer from the frame pointer if it may not
5485 be the same as the stack pointer. */
f5470a77
RS
5486 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5487 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
5488 if (frame_pointer_needed
5489 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
5490 /* If writeback is used when restoring callee-saves, the CFA
5491 is restored on the instruction doing the writeback. */
5492 aarch64_add_offset (Pmode, stack_pointer_rtx,
5493 hard_frame_pointer_rtx, -callee_offset,
43cacb12 5494 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 5495 else
cd1bef27
JL
5496 /* The case where we need to re-use the register here is very rare, so
5497 avoid the complicated condition and just always emit a move if the
5498 immediate doesn't fit. */
5499 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust, true);
43e9d192 5500
71bfb77a
WD
5501 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
5502 callee_adjust != 0, &cfi_ops);
5503 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5504 callee_adjust != 0, &cfi_ops);
43e9d192 5505
71bfb77a
WD
5506 if (need_barrier_p)
5507 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5508
5509 if (callee_adjust != 0)
5510 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
5511
6a70badb 5512 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
5513 {
5514 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5515 insn = get_last_insn ();
71bfb77a
WD
5516 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5517 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5518 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5519 cfi_ops = NULL;
43e9d192
IB
5520 }
5521
43cacb12
RS
5522 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
5523 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 5524
71bfb77a
WD
5525 if (cfi_ops)
5526 {
5527 /* Emit delayed restores and reset the CFA to be SP. */
5528 insn = get_last_insn ();
5529 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5530 REG_NOTES (insn) = cfi_ops;
5531 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5532 }
5533
db58fd89
JW
5534 /* We prefer to emit the combined return/authenticate instruction RETAA,
5535 however there are three cases in which we must instead emit an explicit
5536 authentication instruction.
5537
5538 1) Sibcalls don't return in a normal way, so if we're about to call one
5539 we must authenticate.
5540
5541 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5542 generating code for !TARGET_ARMV8_3 we can't use it and must
5543 explicitly authenticate.
5544
5545 3) On an eh_return path we make extra stack adjustments to update the
5546 canonical frame address to be the exception handler's CFA. We want
5547 to authenticate using the CFA of the function which calls eh_return.
5548 */
5549 if (aarch64_return_address_signing_enabled ()
5550 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5551 {
5552 insn = emit_insn (gen_autisp ());
5553 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5554 RTX_FRAME_RELATED_P (insn) = 1;
5555 }
db58fd89 5556
dd991abb
RH
5557 /* Stack adjustment for exception handler. */
5558 if (crtl->calls_eh_return)
5559 {
5560 /* We need to unwind the stack by the offset computed by
5561 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5562 to be SP; letting the CFA move during this adjustment
5563 is just as correct as retaining the CFA from the body
5564 of the function. Therefore, do nothing special. */
5565 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5566 }
5567
5568 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5569 if (!for_sibcall)
5570 emit_jump_insn (ret_rtx);
5571}
5572
8144a493
WD
5573/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5574 normally or return to a previous frame after unwinding.
1c960e02 5575
8144a493
WD
5576 An EH return uses a single shared return sequence. The epilogue is
5577 exactly like a normal epilogue except that it has an extra input
5578 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5579 that must be applied after the frame has been destroyed. An extra label
5580 is inserted before the epilogue which initializes this register to zero,
5581 and this is the entry point for a normal return.
43e9d192 5582
8144a493
WD
5583 An actual EH return updates the return address, initializes the stack
5584 adjustment and jumps directly into the epilogue (bypassing the zeroing
5585 of the adjustment). Since the return address is typically saved on the
5586 stack when a function makes a call, the saved LR must be updated outside
5587 the epilogue.
43e9d192 5588
8144a493
WD
5589 This poses problems as the store is generated well before the epilogue,
5590 so the offset of LR is not known yet. Also optimizations will remove the
5591 store as it appears dead, even after the epilogue is generated (as the
5592 base or offset for loading LR is different in many cases).
43e9d192 5593
8144a493
WD
5594 To avoid these problems this implementation forces the frame pointer
5595 in eh_return functions so that the location of LR is fixed and known early.
5596 It also marks the store volatile, so no optimization is permitted to
5597 remove the store. */
5598rtx
5599aarch64_eh_return_handler_rtx (void)
5600{
5601 rtx tmp = gen_frame_mem (Pmode,
5602 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5603
8144a493
WD
5604 /* Mark the store volatile, so no optimization is permitted to remove it. */
5605 MEM_VOLATILE_P (tmp) = true;
5606 return tmp;
43e9d192
IB
5607}
5608
43e9d192
IB
5609/* Output code to add DELTA to the first argument, and then jump
5610 to FUNCTION. Used for C++ multiple inheritance. */
5611static void
5612aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5613 HOST_WIDE_INT delta,
5614 HOST_WIDE_INT vcall_offset,
5615 tree function)
5616{
5617 /* The this pointer is always in x0. Note that this differs from
5618 Arm where the this pointer maybe bumped to r1 if r0 is required
5619 to return a pointer to an aggregate. On AArch64 a result value
5620 pointer will be in x8. */
5621 int this_regno = R0_REGNUM;
5d8a22a5
DM
5622 rtx this_rtx, temp0, temp1, addr, funexp;
5623 rtx_insn *insn;
43e9d192 5624
75f1d6fc
SN
5625 reload_completed = 1;
5626 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5627
f5470a77
RS
5628 this_rtx = gen_rtx_REG (Pmode, this_regno);
5629 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5630 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5631
43e9d192 5632 if (vcall_offset == 0)
43cacb12 5633 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5634 else
5635 {
28514dda 5636 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5637
75f1d6fc
SN
5638 addr = this_rtx;
5639 if (delta != 0)
5640 {
5641 if (delta >= -256 && delta < 256)
5642 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5643 plus_constant (Pmode, this_rtx, delta));
5644 else
43cacb12
RS
5645 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5646 temp1, temp0, false);
43e9d192
IB
5647 }
5648
28514dda
YZ
5649 if (Pmode == ptr_mode)
5650 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5651 else
5652 aarch64_emit_move (temp0,
5653 gen_rtx_ZERO_EXTEND (Pmode,
5654 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5655
28514dda 5656 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5657 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5658 else
5659 {
f43657b4
JW
5660 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5661 Pmode);
75f1d6fc 5662 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5663 }
5664
28514dda
YZ
5665 if (Pmode == ptr_mode)
5666 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5667 else
5668 aarch64_emit_move (temp1,
5669 gen_rtx_SIGN_EXTEND (Pmode,
5670 gen_rtx_MEM (ptr_mode, addr)));
5671
75f1d6fc 5672 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5673 }
5674
75f1d6fc
SN
5675 /* Generate a tail call to the target function. */
5676 if (!TREE_USED (function))
5677 {
5678 assemble_external (function);
5679 TREE_USED (function) = 1;
5680 }
5681 funexp = XEXP (DECL_RTL (function), 0);
5682 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5683 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5684 SIBLING_CALL_P (insn) = 1;
5685
5686 insn = get_insns ();
5687 shorten_branches (insn);
5688 final_start_function (insn, file, 1);
5689 final (insn, file, 1);
43e9d192 5690 final_end_function ();
75f1d6fc
SN
5691
5692 /* Stop pretending to be a post-reload pass. */
5693 reload_completed = 0;
43e9d192
IB
5694}
5695
43e9d192
IB
5696static bool
5697aarch64_tls_referenced_p (rtx x)
5698{
5699 if (!TARGET_HAVE_TLS)
5700 return false;
e7de8563
RS
5701 subrtx_iterator::array_type array;
5702 FOR_EACH_SUBRTX (iter, array, x, ALL)
5703 {
5704 const_rtx x = *iter;
5705 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5706 return true;
5707 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5708 TLS offsets, not real symbol references. */
5709 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5710 iter.skip_subrtxes ();
5711 }
5712 return false;
43e9d192
IB
5713}
5714
5715
43e9d192
IB
5716/* Return true if val can be encoded as a 12-bit unsigned immediate with
5717 a left shift of 0 or 12 bits. */
5718bool
5719aarch64_uimm12_shift (HOST_WIDE_INT val)
5720{
5721 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5722 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5723 );
5724}
5725
eb471ba3
TC
5726/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
5727 that can be created with a left shift of 0 or 12. */
5728static HOST_WIDE_INT
5729aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
5730{
5731 /* Check to see if the value fits in 24 bits, as that is the maximum we can
5732 handle correctly. */
5733 gcc_assert ((val & 0xffffff) == val);
5734
5735 if (((val & 0xfff) << 0) == val)
5736 return val;
5737
5738 return val & (0xfff << 12);
5739}
43e9d192
IB
5740
5741/* Return true if val is an immediate that can be loaded into a
5742 register by a MOVZ instruction. */
5743static bool
77e994c9 5744aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5745{
5746 if (GET_MODE_SIZE (mode) > 4)
5747 {
5748 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5749 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5750 return 1;
5751 }
5752 else
5753 {
43cacb12
RS
5754 /* Ignore sign extension. */
5755 val &= (HOST_WIDE_INT) 0xffffffff;
5756 }
5757 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5758 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5759}
5760
5761/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5762 64-bit (DImode) integer. */
5763
5764static unsigned HOST_WIDE_INT
5765aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5766{
5767 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5768 while (size < 64)
5769 {
5770 val &= (HOST_WIDE_INT_1U << size) - 1;
5771 val |= val << size;
5772 size *= 2;
43e9d192 5773 }
43cacb12 5774 return val;
43e9d192
IB
5775}
5776
a64c73a2
WD
5777/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5778
5779static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5780 {
5781 0x0000000100000001ull,
5782 0x0001000100010001ull,
5783 0x0101010101010101ull,
5784 0x1111111111111111ull,
5785 0x5555555555555555ull,
5786 };
5787
43e9d192
IB
5788
5789/* Return true if val is a valid bitmask immediate. */
a64c73a2 5790
43e9d192 5791bool
a64c73a2 5792aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5793{
a64c73a2
WD
5794 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5795 int bits;
5796
5797 /* Check for a single sequence of one bits and return quickly if so.
5798 The special cases of all ones and all zeroes returns false. */
43cacb12 5799 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5800 tmp = val + (val & -val);
5801
5802 if (tmp == (tmp & -tmp))
5803 return (val + 1) > 1;
5804
5805 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5806 if (mode == SImode)
5807 val = (val << 32) | (val & 0xffffffff);
5808
5809 /* Invert if the immediate doesn't start with a zero bit - this means we
5810 only need to search for sequences of one bits. */
5811 if (val & 1)
5812 val = ~val;
5813
5814 /* Find the first set bit and set tmp to val with the first sequence of one
5815 bits removed. Return success if there is a single sequence of ones. */
5816 first_one = val & -val;
5817 tmp = val & (val + first_one);
5818
5819 if (tmp == 0)
5820 return true;
5821
5822 /* Find the next set bit and compute the difference in bit position. */
5823 next_one = tmp & -tmp;
5824 bits = clz_hwi (first_one) - clz_hwi (next_one);
5825 mask = val ^ tmp;
5826
5827 /* Check the bit position difference is a power of 2, and that the first
5828 sequence of one bits fits within 'bits' bits. */
5829 if ((mask >> bits) != 0 || bits != (bits & -bits))
5830 return false;
5831
5832 /* Check the sequence of one bits is repeated 64/bits times. */
5833 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5834}
5835
43fd192f
MC
5836/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5837 Assumed precondition: VAL_IN Is not zero. */
5838
5839unsigned HOST_WIDE_INT
5840aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5841{
5842 int lowest_bit_set = ctz_hwi (val_in);
5843 int highest_bit_set = floor_log2 (val_in);
5844 gcc_assert (val_in != 0);
5845
5846 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5847 (HOST_WIDE_INT_1U << lowest_bit_set));
5848}
5849
5850/* Create constant where bits outside of lowest bit set to highest bit set
5851 are set to 1. */
5852
5853unsigned HOST_WIDE_INT
5854aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5855{
5856 return val_in | ~aarch64_and_split_imm1 (val_in);
5857}
5858
5859/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5860
5861bool
5862aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5863{
77e994c9
RS
5864 scalar_int_mode int_mode;
5865 if (!is_a <scalar_int_mode> (mode, &int_mode))
5866 return false;
5867
5868 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5869 return false;
5870
77e994c9 5871 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5872 return false;
5873
5874 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5875
77e994c9 5876 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 5877}
43e9d192
IB
5878
5879/* Return true if val is an immediate that can be loaded into a
5880 register in a single instruction. */
5881bool
ef4bddc2 5882aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 5883{
77e994c9
RS
5884 scalar_int_mode int_mode;
5885 if (!is_a <scalar_int_mode> (mode, &int_mode))
5886 return false;
5887
5888 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 5889 return 1;
77e994c9 5890 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
5891}
5892
5893static bool
ef4bddc2 5894aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
5895{
5896 rtx base, offset;
7eda14e1 5897
43e9d192
IB
5898 if (GET_CODE (x) == HIGH)
5899 return true;
5900
43cacb12
RS
5901 /* There's no way to calculate VL-based values using relocations. */
5902 subrtx_iterator::array_type array;
5903 FOR_EACH_SUBRTX (iter, array, x, ALL)
5904 if (GET_CODE (*iter) == CONST_POLY_INT)
5905 return true;
5906
43e9d192
IB
5907 split_const (x, &base, &offset);
5908 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 5909 {
43cacb12 5910 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
5911 != SYMBOL_FORCE_TO_MEM)
5912 return true;
5913 else
5914 /* Avoid generating a 64-bit relocation in ILP32; leave
5915 to aarch64_expand_mov_immediate to handle it properly. */
5916 return mode != ptr_mode;
5917 }
43e9d192
IB
5918
5919 return aarch64_tls_referenced_p (x);
5920}
5921
e79136e4
WD
5922/* Implement TARGET_CASE_VALUES_THRESHOLD.
5923 The expansion for a table switch is quite expensive due to the number
5924 of instructions, the table lookup and hard to predict indirect jump.
5925 When optimizing for speed, and -O3 enabled, use the per-core tuning if
5926 set, otherwise use tables for > 16 cases as a tradeoff between size and
5927 performance. When optimizing for size, use the default setting. */
50487d79
EM
5928
5929static unsigned int
5930aarch64_case_values_threshold (void)
5931{
5932 /* Use the specified limit for the number of cases before using jump
5933 tables at higher optimization levels. */
5934 if (optimize > 2
5935 && selected_cpu->tune->max_case_values != 0)
5936 return selected_cpu->tune->max_case_values;
5937 else
e79136e4 5938 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
5939}
5940
43e9d192
IB
5941/* Return true if register REGNO is a valid index register.
5942 STRICT_P is true if REG_OK_STRICT is in effect. */
5943
5944bool
5945aarch64_regno_ok_for_index_p (int regno, bool strict_p)
5946{
5947 if (!HARD_REGISTER_NUM_P (regno))
5948 {
5949 if (!strict_p)
5950 return true;
5951
5952 if (!reg_renumber)
5953 return false;
5954
5955 regno = reg_renumber[regno];
5956 }
5957 return GP_REGNUM_P (regno);
5958}
5959
5960/* Return true if register REGNO is a valid base register for mode MODE.
5961 STRICT_P is true if REG_OK_STRICT is in effect. */
5962
5963bool
5964aarch64_regno_ok_for_base_p (int regno, bool strict_p)
5965{
5966 if (!HARD_REGISTER_NUM_P (regno))
5967 {
5968 if (!strict_p)
5969 return true;
5970
5971 if (!reg_renumber)
5972 return false;
5973
5974 regno = reg_renumber[regno];
5975 }
5976
5977 /* The fake registers will be eliminated to either the stack or
5978 hard frame pointer, both of which are usually valid base registers.
5979 Reload deals with the cases where the eliminated form isn't valid. */
5980 return (GP_REGNUM_P (regno)
5981 || regno == SP_REGNUM
5982 || regno == FRAME_POINTER_REGNUM
5983 || regno == ARG_POINTER_REGNUM);
5984}
5985
5986/* Return true if X is a valid base register for mode MODE.
5987 STRICT_P is true if REG_OK_STRICT is in effect. */
5988
5989static bool
5990aarch64_base_register_rtx_p (rtx x, bool strict_p)
5991{
76160199
RS
5992 if (!strict_p
5993 && GET_CODE (x) == SUBREG
5994 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
5995 x = SUBREG_REG (x);
5996
5997 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
5998}
5999
6000/* Return true if address offset is a valid index. If it is, fill in INFO
6001 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
6002
6003static bool
6004aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 6005 machine_mode mode, bool strict_p)
43e9d192
IB
6006{
6007 enum aarch64_address_type type;
6008 rtx index;
6009 int shift;
6010
6011 /* (reg:P) */
6012 if ((REG_P (x) || GET_CODE (x) == SUBREG)
6013 && GET_MODE (x) == Pmode)
6014 {
6015 type = ADDRESS_REG_REG;
6016 index = x;
6017 shift = 0;
6018 }
6019 /* (sign_extend:DI (reg:SI)) */
6020 else if ((GET_CODE (x) == SIGN_EXTEND
6021 || GET_CODE (x) == ZERO_EXTEND)
6022 && GET_MODE (x) == DImode
6023 && GET_MODE (XEXP (x, 0)) == SImode)
6024 {
6025 type = (GET_CODE (x) == SIGN_EXTEND)
6026 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6027 index = XEXP (x, 0);
6028 shift = 0;
6029 }
6030 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
6031 else if (GET_CODE (x) == MULT
6032 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6033 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6034 && GET_MODE (XEXP (x, 0)) == DImode
6035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6036 && CONST_INT_P (XEXP (x, 1)))
6037 {
6038 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6039 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6040 index = XEXP (XEXP (x, 0), 0);
6041 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6042 }
6043 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
6044 else if (GET_CODE (x) == ASHIFT
6045 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6046 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6047 && GET_MODE (XEXP (x, 0)) == DImode
6048 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6049 && CONST_INT_P (XEXP (x, 1)))
6050 {
6051 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6052 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6053 index = XEXP (XEXP (x, 0), 0);
6054 shift = INTVAL (XEXP (x, 1));
6055 }
6056 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
6057 else if ((GET_CODE (x) == SIGN_EXTRACT
6058 || GET_CODE (x) == ZERO_EXTRACT)
6059 && GET_MODE (x) == DImode
6060 && GET_CODE (XEXP (x, 0)) == MULT
6061 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6062 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6063 {
6064 type = (GET_CODE (x) == SIGN_EXTRACT)
6065 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6066 index = XEXP (XEXP (x, 0), 0);
6067 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6068 if (INTVAL (XEXP (x, 1)) != 32 + shift
6069 || INTVAL (XEXP (x, 2)) != 0)
6070 shift = -1;
6071 }
6072 /* (and:DI (mult:DI (reg:DI) (const_int scale))
6073 (const_int 0xffffffff<<shift)) */
6074 else if (GET_CODE (x) == AND
6075 && GET_MODE (x) == DImode
6076 && GET_CODE (XEXP (x, 0)) == MULT
6077 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6078 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6079 && CONST_INT_P (XEXP (x, 1)))
6080 {
6081 type = ADDRESS_REG_UXTW;
6082 index = XEXP (XEXP (x, 0), 0);
6083 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6084 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6085 shift = -1;
6086 }
6087 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
6088 else if ((GET_CODE (x) == SIGN_EXTRACT
6089 || GET_CODE (x) == ZERO_EXTRACT)
6090 && GET_MODE (x) == DImode
6091 && GET_CODE (XEXP (x, 0)) == ASHIFT
6092 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6093 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6094 {
6095 type = (GET_CODE (x) == SIGN_EXTRACT)
6096 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6097 index = XEXP (XEXP (x, 0), 0);
6098 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6099 if (INTVAL (XEXP (x, 1)) != 32 + shift
6100 || INTVAL (XEXP (x, 2)) != 0)
6101 shift = -1;
6102 }
6103 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
6104 (const_int 0xffffffff<<shift)) */
6105 else if (GET_CODE (x) == AND
6106 && GET_MODE (x) == DImode
6107 && GET_CODE (XEXP (x, 0)) == ASHIFT
6108 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6109 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6110 && CONST_INT_P (XEXP (x, 1)))
6111 {
6112 type = ADDRESS_REG_UXTW;
6113 index = XEXP (XEXP (x, 0), 0);
6114 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6115 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6116 shift = -1;
6117 }
6118 /* (mult:P (reg:P) (const_int scale)) */
6119 else if (GET_CODE (x) == MULT
6120 && GET_MODE (x) == Pmode
6121 && GET_MODE (XEXP (x, 0)) == Pmode
6122 && CONST_INT_P (XEXP (x, 1)))
6123 {
6124 type = ADDRESS_REG_REG;
6125 index = XEXP (x, 0);
6126 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6127 }
6128 /* (ashift:P (reg:P) (const_int shift)) */
6129 else if (GET_CODE (x) == ASHIFT
6130 && GET_MODE (x) == Pmode
6131 && GET_MODE (XEXP (x, 0)) == Pmode
6132 && CONST_INT_P (XEXP (x, 1)))
6133 {
6134 type = ADDRESS_REG_REG;
6135 index = XEXP (x, 0);
6136 shift = INTVAL (XEXP (x, 1));
6137 }
6138 else
6139 return false;
6140
76160199
RS
6141 if (!strict_p
6142 && GET_CODE (index) == SUBREG
6143 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
6144 index = SUBREG_REG (index);
6145
43cacb12
RS
6146 if (aarch64_sve_data_mode_p (mode))
6147 {
6148 if (type != ADDRESS_REG_REG
6149 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
6150 return false;
6151 }
6152 else
6153 {
6154 if (shift != 0
6155 && !(IN_RANGE (shift, 1, 3)
6156 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
6157 return false;
6158 }
6159
6160 if (REG_P (index)
43e9d192
IB
6161 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
6162 {
6163 info->type = type;
6164 info->offset = index;
6165 info->shift = shift;
6166 return true;
6167 }
6168
6169 return false;
6170}
6171
abc52318
KT
6172/* Return true if MODE is one of the modes for which we
6173 support LDP/STP operations. */
6174
6175static bool
6176aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
6177{
6178 return mode == SImode || mode == DImode
6179 || mode == SFmode || mode == DFmode
6180 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
6181 && (known_eq (GET_MODE_SIZE (mode), 8)
6182 || (known_eq (GET_MODE_SIZE (mode), 16)
6183 && (aarch64_tune_params.extra_tuning_flags
6184 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
6185}
6186
9e0218fc
RH
6187/* Return true if REGNO is a virtual pointer register, or an eliminable
6188 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
6189 include stack_pointer or hard_frame_pointer. */
6190static bool
6191virt_or_elim_regno_p (unsigned regno)
6192{
6193 return ((regno >= FIRST_VIRTUAL_REGISTER
6194 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
6195 || regno == FRAME_POINTER_REGNUM
6196 || regno == ARG_POINTER_REGNUM);
6197}
6198
a97d8b98
RS
6199/* Return true if X is a valid address of type TYPE for machine mode MODE.
6200 If it is, fill in INFO appropriately. STRICT_P is true if
6201 REG_OK_STRICT is in effect. */
43e9d192 6202
a98824ac 6203bool
43e9d192 6204aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 6205 rtx x, machine_mode mode, bool strict_p,
a98824ac 6206 aarch64_addr_query_type type)
43e9d192
IB
6207{
6208 enum rtx_code code = GET_CODE (x);
6209 rtx op0, op1;
dc640181
RS
6210 poly_int64 offset;
6211
6a70badb 6212 HOST_WIDE_INT const_size;
2d8c6dc1 6213
80d43579
WD
6214 /* On BE, we use load/store pair for all large int mode load/stores.
6215 TI/TFmode may also use a load/store pair. */
43cacb12
RS
6216 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6217 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 6218 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 6219 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
6220 || mode == TImode
6221 || mode == TFmode
43cacb12 6222 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 6223
a25831ac
AV
6224 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
6225 corresponds to the actual size of the memory being loaded/stored and the
6226 mode of the corresponding addressing mode is half of that. */
6227 if (type == ADDR_QUERY_LDP_STP_N
6228 && known_eq (GET_MODE_SIZE (mode), 16))
6229 mode = DFmode;
6230
6a70badb 6231 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
6232 && (known_lt (GET_MODE_SIZE (mode), 16)
6233 || vec_flags == VEC_ADVSIMD
6234 || vec_flags == VEC_SVE_DATA));
6235
6236 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
6237 [Rn, #offset, MUL VL]. */
6238 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
6239 && (code != REG && code != PLUS))
6240 return false;
2d8c6dc1
AH
6241
6242 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
6243 REG addressing. */
43cacb12
RS
6244 if (advsimd_struct_p
6245 && !BYTES_BIG_ENDIAN
43e9d192
IB
6246 && (code != POST_INC && code != REG))
6247 return false;
6248
43cacb12
RS
6249 gcc_checking_assert (GET_MODE (x) == VOIDmode
6250 || SCALAR_INT_MODE_P (GET_MODE (x)));
6251
43e9d192
IB
6252 switch (code)
6253 {
6254 case REG:
6255 case SUBREG:
6256 info->type = ADDRESS_REG_IMM;
6257 info->base = x;
6258 info->offset = const0_rtx;
dc640181 6259 info->const_offset = 0;
43e9d192
IB
6260 return aarch64_base_register_rtx_p (x, strict_p);
6261
6262 case PLUS:
6263 op0 = XEXP (x, 0);
6264 op1 = XEXP (x, 1);
15c0c5c9
JW
6265
6266 if (! strict_p
4aa81c2e 6267 && REG_P (op0)
9e0218fc 6268 && virt_or_elim_regno_p (REGNO (op0))
dc640181 6269 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
6270 {
6271 info->type = ADDRESS_REG_IMM;
6272 info->base = op0;
6273 info->offset = op1;
dc640181 6274 info->const_offset = offset;
15c0c5c9
JW
6275
6276 return true;
6277 }
6278
6a70badb 6279 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
6280 && aarch64_base_register_rtx_p (op0, strict_p)
6281 && poly_int_rtx_p (op1, &offset))
43e9d192 6282 {
43e9d192
IB
6283 info->type = ADDRESS_REG_IMM;
6284 info->base = op0;
6285 info->offset = op1;
dc640181 6286 info->const_offset = offset;
43e9d192
IB
6287
6288 /* TImode and TFmode values are allowed in both pairs of X
6289 registers and individual Q registers. The available
6290 address modes are:
6291 X,X: 7-bit signed scaled offset
6292 Q: 9-bit signed offset
6293 We conservatively require an offset representable in either mode.
8ed49fab
KT
6294 When performing the check for pairs of X registers i.e. LDP/STP
6295 pass down DImode since that is the natural size of the LDP/STP
6296 instruction memory accesses. */
43e9d192 6297 if (mode == TImode || mode == TFmode)
8ed49fab 6298 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 6299 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 6300 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 6301
2d8c6dc1
AH
6302 /* A 7bit offset check because OImode will emit a ldp/stp
6303 instruction (only big endian will get here).
6304 For ldp/stp instructions, the offset is scaled for the size of a
6305 single element of the pair. */
6306 if (mode == OImode)
6307 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
6308
6309 /* Three 9/12 bit offsets checks because CImode will emit three
6310 ldr/str instructions (only big endian will get here). */
6311 if (mode == CImode)
6312 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
6313 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
6314 offset + 32)
2d8c6dc1
AH
6315 || offset_12bit_unsigned_scaled_p (V16QImode,
6316 offset + 32)));
6317
6318 /* Two 7bit offsets checks because XImode will emit two ldp/stp
6319 instructions (only big endian will get here). */
6320 if (mode == XImode)
6321 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
6322 && aarch64_offset_7bit_signed_scaled_p (TImode,
6323 offset + 32));
6324
43cacb12
RS
6325 /* Make "m" use the LD1 offset range for SVE data modes, so
6326 that pre-RTL optimizers like ivopts will work to that
6327 instead of the wider LDR/STR range. */
6328 if (vec_flags == VEC_SVE_DATA)
6329 return (type == ADDR_QUERY_M
6330 ? offset_4bit_signed_scaled_p (mode, offset)
6331 : offset_9bit_signed_scaled_p (mode, offset));
6332
9f4cbab8
RS
6333 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
6334 {
6335 poly_int64 end_offset = (offset
6336 + GET_MODE_SIZE (mode)
6337 - BYTES_PER_SVE_VECTOR);
6338 return (type == ADDR_QUERY_M
6339 ? offset_4bit_signed_scaled_p (mode, offset)
6340 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
6341 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
6342 end_offset)));
6343 }
6344
43cacb12
RS
6345 if (vec_flags == VEC_SVE_PRED)
6346 return offset_9bit_signed_scaled_p (mode, offset);
6347
2d8c6dc1 6348 if (load_store_pair_p)
6a70badb 6349 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6350 || known_eq (GET_MODE_SIZE (mode), 8)
6351 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6352 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6353 else
3c5af608 6354 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
6355 || offset_12bit_unsigned_scaled_p (mode, offset));
6356 }
6357
6358 if (allow_reg_index_p)
6359 {
6360 /* Look for base + (scaled/extended) index register. */
6361 if (aarch64_base_register_rtx_p (op0, strict_p)
6362 && aarch64_classify_index (info, op1, mode, strict_p))
6363 {
6364 info->base = op0;
6365 return true;
6366 }
6367 if (aarch64_base_register_rtx_p (op1, strict_p)
6368 && aarch64_classify_index (info, op0, mode, strict_p))
6369 {
6370 info->base = op1;
6371 return true;
6372 }
6373 }
6374
6375 return false;
6376
6377 case POST_INC:
6378 case POST_DEC:
6379 case PRE_INC:
6380 case PRE_DEC:
6381 info->type = ADDRESS_REG_WB;
6382 info->base = XEXP (x, 0);
6383 info->offset = NULL_RTX;
6384 return aarch64_base_register_rtx_p (info->base, strict_p);
6385
6386 case POST_MODIFY:
6387 case PRE_MODIFY:
6388 info->type = ADDRESS_REG_WB;
6389 info->base = XEXP (x, 0);
6390 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 6391 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
6392 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
6393 && aarch64_base_register_rtx_p (info->base, strict_p))
6394 {
43e9d192 6395 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 6396 info->const_offset = offset;
43e9d192
IB
6397
6398 /* TImode and TFmode values are allowed in both pairs of X
6399 registers and individual Q registers. The available
6400 address modes are:
6401 X,X: 7-bit signed scaled offset
6402 Q: 9-bit signed offset
6403 We conservatively require an offset representable in either mode.
6404 */
6405 if (mode == TImode || mode == TFmode)
44707478 6406 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 6407 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 6408
2d8c6dc1 6409 if (load_store_pair_p)
6a70badb 6410 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6411 || known_eq (GET_MODE_SIZE (mode), 8)
6412 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6413 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6414 else
3c5af608 6415 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
6416 }
6417 return false;
6418
6419 case CONST:
6420 case SYMBOL_REF:
6421 case LABEL_REF:
79517551
SN
6422 /* load literal: pc-relative constant pool entry. Only supported
6423 for SI mode or larger. */
43e9d192 6424 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 6425
6a70badb
RS
6426 if (!load_store_pair_p
6427 && GET_MODE_SIZE (mode).is_constant (&const_size)
6428 && const_size >= 4)
43e9d192
IB
6429 {
6430 rtx sym, addend;
6431
6432 split_const (x, &sym, &addend);
b4f50fd4
RR
6433 return ((GET_CODE (sym) == LABEL_REF
6434 || (GET_CODE (sym) == SYMBOL_REF
6435 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 6436 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
6437 }
6438 return false;
6439
6440 case LO_SUM:
6441 info->type = ADDRESS_LO_SUM;
6442 info->base = XEXP (x, 0);
6443 info->offset = XEXP (x, 1);
6444 if (allow_reg_index_p
6445 && aarch64_base_register_rtx_p (info->base, strict_p))
6446 {
6447 rtx sym, offs;
6448 split_const (info->offset, &sym, &offs);
6449 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
6450 && (aarch64_classify_symbol (sym, INTVAL (offs))
6451 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
6452 {
6453 /* The symbol and offset must be aligned to the access size. */
6454 unsigned int align;
43e9d192
IB
6455
6456 if (CONSTANT_POOL_ADDRESS_P (sym))
6457 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
6458 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
6459 {
6460 tree exp = SYMBOL_REF_DECL (sym);
6461 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 6462 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
6463 }
6464 else if (SYMBOL_REF_DECL (sym))
6465 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
6466 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
6467 && SYMBOL_REF_BLOCK (sym) != NULL)
6468 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
6469 else
6470 align = BITS_PER_UNIT;
6471
6a70badb
RS
6472 poly_int64 ref_size = GET_MODE_SIZE (mode);
6473 if (known_eq (ref_size, 0))
43e9d192
IB
6474 ref_size = GET_MODE_SIZE (DImode);
6475
6a70badb
RS
6476 return (multiple_p (INTVAL (offs), ref_size)
6477 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
6478 }
6479 }
6480 return false;
6481
6482 default:
6483 return false;
6484 }
6485}
6486
9bf2f779
KT
6487/* Return true if the address X is valid for a PRFM instruction.
6488 STRICT_P is true if we should do strict checking with
6489 aarch64_classify_address. */
6490
6491bool
6492aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
6493{
6494 struct aarch64_address_info addr;
6495
6496 /* PRFM accepts the same addresses as DImode... */
a97d8b98 6497 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
6498 if (!res)
6499 return false;
6500
6501 /* ... except writeback forms. */
6502 return addr.type != ADDRESS_REG_WB;
6503}
6504
43e9d192
IB
6505bool
6506aarch64_symbolic_address_p (rtx x)
6507{
6508 rtx offset;
6509
6510 split_const (x, &x, &offset);
6511 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
6512}
6513
a6e0bfa7 6514/* Classify the base of symbolic expression X. */
da4f13a4
MS
6515
6516enum aarch64_symbol_type
a6e0bfa7 6517aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
6518{
6519 rtx offset;
da4f13a4 6520
43e9d192 6521 split_const (x, &x, &offset);
43cacb12 6522 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
6523}
6524
6525
6526/* Return TRUE if X is a legitimate address for accessing memory in
6527 mode MODE. */
6528static bool
ef4bddc2 6529aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
6530{
6531 struct aarch64_address_info addr;
6532
a97d8b98 6533 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
6534}
6535
a97d8b98
RS
6536/* Return TRUE if X is a legitimate address of type TYPE for accessing
6537 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 6538bool
a97d8b98
RS
6539aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
6540 aarch64_addr_query_type type)
43e9d192
IB
6541{
6542 struct aarch64_address_info addr;
6543
a97d8b98 6544 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6545}
6546
9005477f
RS
6547/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6548
491ec060 6549static bool
9005477f
RS
6550aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6551 poly_int64 orig_offset,
6552 machine_mode mode)
491ec060 6553{
6a70badb
RS
6554 HOST_WIDE_INT size;
6555 if (GET_MODE_SIZE (mode).is_constant (&size))
6556 {
9005477f
RS
6557 HOST_WIDE_INT const_offset, second_offset;
6558
6559 /* A general SVE offset is A * VQ + B. Remove the A component from
6560 coefficient 0 in order to get the constant B. */
6561 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6562
6563 /* Split an out-of-range address displacement into a base and
6564 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6565 range otherwise to increase opportunities for sharing the base
6566 address of different sizes. Unaligned accesses use the signed
6567 9-bit range, TImode/TFmode use the intersection of signed
6568 scaled 7-bit and signed 9-bit offset. */
6a70badb 6569 if (mode == TImode || mode == TFmode)
9005477f
RS
6570 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6571 else if ((const_offset & (size - 1)) != 0)
6572 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6573 else
9005477f 6574 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6575
9005477f
RS
6576 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6577 return false;
6578
6579 /* Split the offset into second_offset and the rest. */
6580 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6581 *offset2 = gen_int_mode (second_offset, Pmode);
6582 return true;
6583 }
6584 else
6585 {
6586 /* Get the mode we should use as the basis of the range. For structure
6587 modes this is the mode of one vector. */
6588 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6589 machine_mode step_mode
6590 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6591
6592 /* Get the "mul vl" multiplier we'd like to use. */
6593 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6594 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6595 if (vec_flags & VEC_SVE_DATA)
6596 /* LDR supports a 9-bit range, but the move patterns for
6597 structure modes require all vectors to be in range of the
6598 same base. The simplest way of accomodating that while still
6599 promoting reuse of anchor points between different modes is
6600 to use an 8-bit range unconditionally. */
6601 vnum = ((vnum + 128) & 255) - 128;
6602 else
6603 /* Predicates are only handled singly, so we might as well use
6604 the full range. */
6605 vnum = ((vnum + 256) & 511) - 256;
6606 if (vnum == 0)
6607 return false;
6608
6609 /* Convert the "mul vl" multiplier into a byte offset. */
6610 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6611 if (known_eq (second_offset, orig_offset))
6612 return false;
6613
6614 /* Split the offset into second_offset and the rest. */
6615 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6616 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6617 return true;
6618 }
491ec060
WD
6619}
6620
a2170965
TC
6621/* Return the binary representation of floating point constant VALUE in INTVAL.
6622 If the value cannot be converted, return false without setting INTVAL.
6623 The conversion is done in the given MODE. */
6624bool
6625aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6626{
6627
6628 /* We make a general exception for 0. */
6629 if (aarch64_float_const_zero_rtx_p (value))
6630 {
6631 *intval = 0;
6632 return true;
6633 }
6634
0d0e0188 6635 scalar_float_mode mode;
a2170965 6636 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6637 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6638 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6639 /* Only support up to DF mode. */
6640 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6641 return false;
6642
6643 unsigned HOST_WIDE_INT ival = 0;
6644
6645 long res[2];
6646 real_to_target (res,
6647 CONST_DOUBLE_REAL_VALUE (value),
6648 REAL_MODE_FORMAT (mode));
6649
5c22bb48
TC
6650 if (mode == DFmode)
6651 {
6652 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6653 ival = zext_hwi (res[order], 32);
6654 ival |= (zext_hwi (res[1 - order], 32) << 32);
6655 }
6656 else
6657 ival = zext_hwi (res[0], 32);
a2170965
TC
6658
6659 *intval = ival;
6660 return true;
6661}
6662
6663/* Return TRUE if rtx X is an immediate constant that can be moved using a
6664 single MOV(+MOVK) followed by an FMOV. */
6665bool
6666aarch64_float_const_rtx_p (rtx x)
6667{
6668 machine_mode mode = GET_MODE (x);
6669 if (mode == VOIDmode)
6670 return false;
6671
6672 /* Determine whether it's cheaper to write float constants as
6673 mov/movk pairs over ldr/adrp pairs. */
6674 unsigned HOST_WIDE_INT ival;
6675
6676 if (GET_CODE (x) == CONST_DOUBLE
6677 && SCALAR_FLOAT_MODE_P (mode)
6678 && aarch64_reinterpret_float_as_int (x, &ival))
6679 {
77e994c9
RS
6680 scalar_int_mode imode = (mode == HFmode
6681 ? SImode
6682 : int_mode_for_mode (mode).require ());
a2170965
TC
6683 int num_instr = aarch64_internal_mov_immediate
6684 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6685 return num_instr < 3;
6686 }
6687
6688 return false;
6689}
6690
43e9d192
IB
6691/* Return TRUE if rtx X is immediate constant 0.0 */
6692bool
3520f7cc 6693aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6694{
43e9d192
IB
6695 if (GET_MODE (x) == VOIDmode)
6696 return false;
6697
34a72c33 6698 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6699 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6700 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6701}
6702
a2170965
TC
6703/* Return TRUE if rtx X is immediate constant that fits in a single
6704 MOVI immediate operation. */
6705bool
6706aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6707{
6708 if (!TARGET_SIMD)
6709 return false;
6710
77e994c9
RS
6711 machine_mode vmode;
6712 scalar_int_mode imode;
a2170965
TC
6713 unsigned HOST_WIDE_INT ival;
6714
6715 if (GET_CODE (x) == CONST_DOUBLE
6716 && SCALAR_FLOAT_MODE_P (mode))
6717 {
6718 if (!aarch64_reinterpret_float_as_int (x, &ival))
6719 return false;
6720
35c38fa6
TC
6721 /* We make a general exception for 0. */
6722 if (aarch64_float_const_zero_rtx_p (x))
6723 return true;
6724
304b9962 6725 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6726 }
6727 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6728 && is_a <scalar_int_mode> (mode, &imode))
6729 ival = INTVAL (x);
a2170965
TC
6730 else
6731 return false;
6732
6733 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6734 a 128 bit vector mode. */
77e994c9 6735 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6736
6737 vmode = aarch64_simd_container_mode (imode, width);
6738 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6739
b187677b 6740 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6741}
6742
6743
70f09188
AP
6744/* Return the fixed registers used for condition codes. */
6745
6746static bool
6747aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6748{
6749 *p1 = CC_REGNUM;
6750 *p2 = INVALID_REGNUM;
6751 return true;
6752}
6753
47210a04
RL
6754/* This function is used by the call expanders of the machine description.
6755 RESULT is the register in which the result is returned. It's NULL for
6756 "call" and "sibcall".
6757 MEM is the location of the function call.
6758 SIBCALL indicates whether this function call is normal call or sibling call.
6759 It will generate different pattern accordingly. */
6760
6761void
6762aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6763{
6764 rtx call, callee, tmp;
6765 rtvec vec;
6766 machine_mode mode;
6767
6768 gcc_assert (MEM_P (mem));
6769 callee = XEXP (mem, 0);
6770 mode = GET_MODE (callee);
6771 gcc_assert (mode == Pmode);
6772
6773 /* Decide if we should generate indirect calls by loading the
6774 address of the callee into a register before performing
6775 the branch-and-link. */
6776 if (SYMBOL_REF_P (callee)
6777 ? (aarch64_is_long_call_p (callee)
6778 || aarch64_is_noplt_call_p (callee))
6779 : !REG_P (callee))
6780 XEXP (mem, 0) = force_reg (mode, callee);
6781
6782 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6783
6784 if (result != NULL_RTX)
6785 call = gen_rtx_SET (result, call);
6786
6787 if (sibcall)
6788 tmp = ret_rtx;
6789 else
6790 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6791
6792 vec = gen_rtvec (2, call, tmp);
6793 call = gen_rtx_PARALLEL (VOIDmode, vec);
6794
6795 aarch64_emit_call_insn (call);
6796}
6797
78607708
TV
6798/* Emit call insn with PAT and do aarch64-specific handling. */
6799
d07a3fed 6800void
78607708
TV
6801aarch64_emit_call_insn (rtx pat)
6802{
6803 rtx insn = emit_call_insn (pat);
6804
6805 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6806 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6807 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6808}
6809
ef4bddc2 6810machine_mode
43e9d192
IB
6811aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6812{
6813 /* All floating point compares return CCFP if it is an equality
6814 comparison, and CCFPE otherwise. */
6815 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6816 {
6817 switch (code)
6818 {
6819 case EQ:
6820 case NE:
6821 case UNORDERED:
6822 case ORDERED:
6823 case UNLT:
6824 case UNLE:
6825 case UNGT:
6826 case UNGE:
6827 case UNEQ:
43e9d192
IB
6828 return CCFPmode;
6829
6830 case LT:
6831 case LE:
6832 case GT:
6833 case GE:
8332c5ee 6834 case LTGT:
43e9d192
IB
6835 return CCFPEmode;
6836
6837 default:
6838 gcc_unreachable ();
6839 }
6840 }
6841
2b8568fe
KT
6842 /* Equality comparisons of short modes against zero can be performed
6843 using the TST instruction with the appropriate bitmask. */
6844 if (y == const0_rtx && REG_P (x)
6845 && (code == EQ || code == NE)
6846 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6847 return CC_NZmode;
6848
b06335f9
KT
6849 /* Similarly, comparisons of zero_extends from shorter modes can
6850 be performed using an ANDS with an immediate mask. */
6851 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6852 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6853 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6854 && (code == EQ || code == NE))
6855 return CC_NZmode;
6856
43e9d192
IB
6857 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6858 && y == const0_rtx
6859 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6860 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6861 || GET_CODE (x) == NEG
6862 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6863 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6864 return CC_NZmode;
6865
1c992d1e 6866 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6867 the comparison will have to be swapped when we emit the assembly
6868 code. */
6869 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6870 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6871 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6872 || GET_CODE (x) == LSHIFTRT
1c992d1e 6873 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6874 return CC_SWPmode;
6875
1c992d1e
RE
6876 /* Similarly for a negated operand, but we can only do this for
6877 equalities. */
6878 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 6879 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
6880 && (code == EQ || code == NE)
6881 && GET_CODE (x) == NEG)
6882 return CC_Zmode;
6883
ef22810a
RH
6884 /* A test for unsigned overflow. */
6885 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6886 && code == NE
6887 && GET_CODE (x) == PLUS
6888 && GET_CODE (y) == ZERO_EXTEND)
6889 return CC_Cmode;
6890
30c46053
MC
6891 /* A test for signed overflow. */
6892 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6893 && code == NE
6894 && GET_CODE (x) == PLUS
6895 && GET_CODE (y) == SIGN_EXTEND)
6896 return CC_Vmode;
6897
43e9d192
IB
6898 /* For everything else, return CCmode. */
6899 return CCmode;
6900}
6901
3dfa7055 6902static int
b8506a8a 6903aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 6904
cd5660ab 6905int
43e9d192
IB
6906aarch64_get_condition_code (rtx x)
6907{
ef4bddc2 6908 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
6909 enum rtx_code comp_code = GET_CODE (x);
6910
6911 if (GET_MODE_CLASS (mode) != MODE_CC)
6912 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
6913 return aarch64_get_condition_code_1 (mode, comp_code);
6914}
43e9d192 6915
3dfa7055 6916static int
b8506a8a 6917aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 6918{
43e9d192
IB
6919 switch (mode)
6920 {
4e10a5a7
RS
6921 case E_CCFPmode:
6922 case E_CCFPEmode:
43e9d192
IB
6923 switch (comp_code)
6924 {
6925 case GE: return AARCH64_GE;
6926 case GT: return AARCH64_GT;
6927 case LE: return AARCH64_LS;
6928 case LT: return AARCH64_MI;
6929 case NE: return AARCH64_NE;
6930 case EQ: return AARCH64_EQ;
6931 case ORDERED: return AARCH64_VC;
6932 case UNORDERED: return AARCH64_VS;
6933 case UNLT: return AARCH64_LT;
6934 case UNLE: return AARCH64_LE;
6935 case UNGT: return AARCH64_HI;
6936 case UNGE: return AARCH64_PL;
cd5660ab 6937 default: return -1;
43e9d192
IB
6938 }
6939 break;
6940
4e10a5a7 6941 case E_CCmode:
43e9d192
IB
6942 switch (comp_code)
6943 {
6944 case NE: return AARCH64_NE;
6945 case EQ: return AARCH64_EQ;
6946 case GE: return AARCH64_GE;
6947 case GT: return AARCH64_GT;
6948 case LE: return AARCH64_LE;
6949 case LT: return AARCH64_LT;
6950 case GEU: return AARCH64_CS;
6951 case GTU: return AARCH64_HI;
6952 case LEU: return AARCH64_LS;
6953 case LTU: return AARCH64_CC;
cd5660ab 6954 default: return -1;
43e9d192
IB
6955 }
6956 break;
6957
4e10a5a7 6958 case E_CC_SWPmode:
43e9d192
IB
6959 switch (comp_code)
6960 {
6961 case NE: return AARCH64_NE;
6962 case EQ: return AARCH64_EQ;
6963 case GE: return AARCH64_LE;
6964 case GT: return AARCH64_LT;
6965 case LE: return AARCH64_GE;
6966 case LT: return AARCH64_GT;
6967 case GEU: return AARCH64_LS;
6968 case GTU: return AARCH64_CC;
6969 case LEU: return AARCH64_CS;
6970 case LTU: return AARCH64_HI;
cd5660ab 6971 default: return -1;
43e9d192
IB
6972 }
6973 break;
6974
4e10a5a7 6975 case E_CC_NZmode:
43e9d192
IB
6976 switch (comp_code)
6977 {
6978 case NE: return AARCH64_NE;
6979 case EQ: return AARCH64_EQ;
6980 case GE: return AARCH64_PL;
6981 case LT: return AARCH64_MI;
cd5660ab 6982 default: return -1;
43e9d192
IB
6983 }
6984 break;
6985
4e10a5a7 6986 case E_CC_Zmode:
1c992d1e
RE
6987 switch (comp_code)
6988 {
6989 case NE: return AARCH64_NE;
6990 case EQ: return AARCH64_EQ;
cd5660ab 6991 default: return -1;
1c992d1e
RE
6992 }
6993 break;
6994
4e10a5a7 6995 case E_CC_Cmode:
ef22810a
RH
6996 switch (comp_code)
6997 {
6998 case NE: return AARCH64_CS;
6999 case EQ: return AARCH64_CC;
7000 default: return -1;
7001 }
7002 break;
7003
30c46053
MC
7004 case E_CC_Vmode:
7005 switch (comp_code)
7006 {
7007 case NE: return AARCH64_VS;
7008 case EQ: return AARCH64_VC;
7009 default: return -1;
7010 }
7011 break;
7012
43e9d192 7013 default:
cd5660ab 7014 return -1;
43e9d192 7015 }
3dfa7055 7016
3dfa7055 7017 return -1;
43e9d192
IB
7018}
7019
ddeabd3e
AL
7020bool
7021aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
7022 HOST_WIDE_INT minval,
7023 HOST_WIDE_INT maxval)
ddeabd3e 7024{
6a70badb
RS
7025 rtx elt;
7026 return (const_vec_duplicate_p (x, &elt)
7027 && CONST_INT_P (elt)
7028 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
7029}
7030
7031bool
7032aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
7033{
7034 return aarch64_const_vec_all_same_in_range_p (x, val, val);
7035}
7036
43cacb12
RS
7037/* Return true if VEC is a constant in which every element is in the range
7038 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
7039
7040static bool
7041aarch64_const_vec_all_in_range_p (rtx vec,
7042 HOST_WIDE_INT minval,
7043 HOST_WIDE_INT maxval)
7044{
7045 if (GET_CODE (vec) != CONST_VECTOR
7046 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
7047 return false;
7048
7049 int nunits;
7050 if (!CONST_VECTOR_STEPPED_P (vec))
7051 nunits = const_vector_encoded_nelts (vec);
7052 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
7053 return false;
7054
7055 for (int i = 0; i < nunits; i++)
7056 {
7057 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
7058 if (!CONST_INT_P (vec_elem)
7059 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
7060 return false;
7061 }
7062 return true;
7063}
43e9d192 7064
cf670503
ZC
7065/* N Z C V. */
7066#define AARCH64_CC_V 1
7067#define AARCH64_CC_C (1 << 1)
7068#define AARCH64_CC_Z (1 << 2)
7069#define AARCH64_CC_N (1 << 3)
7070
c8012fbc
WD
7071/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
7072static const int aarch64_nzcv_codes[] =
7073{
7074 0, /* EQ, Z == 1. */
7075 AARCH64_CC_Z, /* NE, Z == 0. */
7076 0, /* CS, C == 1. */
7077 AARCH64_CC_C, /* CC, C == 0. */
7078 0, /* MI, N == 1. */
7079 AARCH64_CC_N, /* PL, N == 0. */
7080 0, /* VS, V == 1. */
7081 AARCH64_CC_V, /* VC, V == 0. */
7082 0, /* HI, C ==1 && Z == 0. */
7083 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
7084 AARCH64_CC_V, /* GE, N == V. */
7085 0, /* LT, N != V. */
7086 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
7087 0, /* LE, !(Z == 0 && N == V). */
7088 0, /* AL, Any. */
7089 0 /* NV, Any. */
cf670503
ZC
7090};
7091
43cacb12
RS
7092/* Print floating-point vector immediate operand X to F, negating it
7093 first if NEGATE is true. Return true on success, false if it isn't
7094 a constant we can handle. */
7095
7096static bool
7097aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
7098{
7099 rtx elt;
7100
7101 if (!const_vec_duplicate_p (x, &elt))
7102 return false;
7103
7104 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
7105 if (negate)
7106 r = real_value_negate (&r);
7107
7108 /* We only handle the SVE single-bit immediates here. */
7109 if (real_equal (&r, &dconst0))
7110 asm_fprintf (f, "0.0");
7111 else if (real_equal (&r, &dconst1))
7112 asm_fprintf (f, "1.0");
7113 else if (real_equal (&r, &dconsthalf))
7114 asm_fprintf (f, "0.5");
7115 else
7116 return false;
7117
7118 return true;
7119}
7120
9f4cbab8
RS
7121/* Return the equivalent letter for size. */
7122static char
7123sizetochar (int size)
7124{
7125 switch (size)
7126 {
7127 case 64: return 'd';
7128 case 32: return 's';
7129 case 16: return 'h';
7130 case 8 : return 'b';
7131 default: gcc_unreachable ();
7132 }
7133}
7134
bcf19844
JW
7135/* Print operand X to file F in a target specific manner according to CODE.
7136 The acceptable formatting commands given by CODE are:
7137 'c': An integer or symbol address without a preceding #
7138 sign.
43cacb12
RS
7139 'C': Take the duplicated element in a vector constant
7140 and print it in hex.
7141 'D': Take the duplicated element in a vector constant
7142 and print it as an unsigned integer, in decimal.
bcf19844
JW
7143 'e': Print the sign/zero-extend size as a character 8->b,
7144 16->h, 32->w.
7145 'p': Prints N such that 2^N == X (X must be power of 2 and
7146 const int).
7147 'P': Print the number of non-zero bits in X (a const_int).
7148 'H': Print the higher numbered register of a pair (TImode)
7149 of regs.
7150 'm': Print a condition (eq, ne, etc).
7151 'M': Same as 'm', but invert condition.
43cacb12
RS
7152 'N': Take the duplicated element in a vector constant
7153 and print the negative of it in decimal.
bcf19844
JW
7154 'b/h/s/d/q': Print a scalar FP/SIMD register name.
7155 'S/T/U/V': Print a FP/SIMD register name for a register list.
7156 The register printed is the FP/SIMD register name
7157 of X + 0/1/2/3 for S/T/U/V.
7158 'R': Print a scalar FP/SIMD register name + 1.
7159 'X': Print bottom 16 bits of integer constant in hex.
7160 'w/x': Print a general register name or the zero register
7161 (32-bit or 64-bit).
7162 '0': Print a normal operand, if it's a general register,
7163 then we assume DImode.
7164 'k': Print NZCV for conditional compare instructions.
7165 'A': Output address constant representing the first
7166 argument of X, specifying a relocation offset
7167 if appropriate.
7168 'L': Output constant address specified by X
7169 with a relocation offset if appropriate.
7170 'G': Prints address of X, specifying a PC relative
e69a816d
WD
7171 relocation mode if appropriate.
7172 'y': Output address of LDP or STP - this is used for
7173 some LDP/STPs which don't use a PARALLEL in their
7174 pattern (so the mode needs to be adjusted).
7175 'z': Output address of a typical LDP or STP. */
bcf19844 7176
cc8ca59e
JB
7177static void
7178aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 7179{
43cacb12 7180 rtx elt;
43e9d192
IB
7181 switch (code)
7182 {
f541a481
KT
7183 case 'c':
7184 switch (GET_CODE (x))
7185 {
7186 case CONST_INT:
7187 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7188 break;
7189
7190 case SYMBOL_REF:
7191 output_addr_const (f, x);
7192 break;
7193
7194 case CONST:
7195 if (GET_CODE (XEXP (x, 0)) == PLUS
7196 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
7197 {
7198 output_addr_const (f, x);
7199 break;
7200 }
7201 /* Fall through. */
7202
7203 default:
ee61f880 7204 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
7205 }
7206 break;
7207
43e9d192 7208 case 'e':
43e9d192
IB
7209 {
7210 int n;
7211
4aa81c2e 7212 if (!CONST_INT_P (x)
43e9d192
IB
7213 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
7214 {
7215 output_operand_lossage ("invalid operand for '%%%c'", code);
7216 return;
7217 }
7218
7219 switch (n)
7220 {
7221 case 3:
7222 fputc ('b', f);
7223 break;
7224 case 4:
7225 fputc ('h', f);
7226 break;
7227 case 5:
7228 fputc ('w', f);
7229 break;
7230 default:
7231 output_operand_lossage ("invalid operand for '%%%c'", code);
7232 return;
7233 }
7234 }
7235 break;
7236
7237 case 'p':
7238 {
7239 int n;
7240
4aa81c2e 7241 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
7242 {
7243 output_operand_lossage ("invalid operand for '%%%c'", code);
7244 return;
7245 }
7246
7247 asm_fprintf (f, "%d", n);
7248 }
7249 break;
7250
7251 case 'P':
4aa81c2e 7252 if (!CONST_INT_P (x))
43e9d192
IB
7253 {
7254 output_operand_lossage ("invalid operand for '%%%c'", code);
7255 return;
7256 }
7257
8d55c61b 7258 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
7259 break;
7260
7261 case 'H':
c0111dc4
RE
7262 if (x == const0_rtx)
7263 {
7264 asm_fprintf (f, "xzr");
7265 break;
7266 }
7267
4aa81c2e 7268 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
7269 {
7270 output_operand_lossage ("invalid operand for '%%%c'", code);
7271 return;
7272 }
7273
01a3a324 7274 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
7275 break;
7276
43e9d192 7277 case 'M':
c8012fbc 7278 case 'm':
cd5660ab
KT
7279 {
7280 int cond_code;
c8012fbc
WD
7281 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
7282 if (x == const_true_rtx)
cd5660ab 7283 {
c8012fbc
WD
7284 if (code == 'M')
7285 fputs ("nv", f);
cd5660ab
KT
7286 return;
7287 }
43e9d192 7288
cd5660ab
KT
7289 if (!COMPARISON_P (x))
7290 {
7291 output_operand_lossage ("invalid operand for '%%%c'", code);
7292 return;
7293 }
c8012fbc 7294
cd5660ab
KT
7295 cond_code = aarch64_get_condition_code (x);
7296 gcc_assert (cond_code >= 0);
c8012fbc
WD
7297 if (code == 'M')
7298 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
7299 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 7300 }
43e9d192
IB
7301 break;
7302
43cacb12
RS
7303 case 'N':
7304 if (!const_vec_duplicate_p (x, &elt))
7305 {
7306 output_operand_lossage ("invalid vector constant");
7307 return;
7308 }
7309
7310 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7311 asm_fprintf (f, "%wd", -INTVAL (elt));
7312 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7313 && aarch64_print_vector_float_operand (f, x, true))
7314 ;
7315 else
7316 {
7317 output_operand_lossage ("invalid vector constant");
7318 return;
7319 }
7320 break;
7321
43e9d192
IB
7322 case 'b':
7323 case 'h':
7324 case 's':
7325 case 'd':
7326 case 'q':
43e9d192
IB
7327 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7328 {
7329 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7330 return;
7331 }
50ce6f88 7332 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
7333 break;
7334
7335 case 'S':
7336 case 'T':
7337 case 'U':
7338 case 'V':
43e9d192
IB
7339 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7340 {
7341 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7342 return;
7343 }
43cacb12
RS
7344 asm_fprintf (f, "%c%d",
7345 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
7346 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
7347 break;
7348
2d8c6dc1 7349 case 'R':
2d8c6dc1
AH
7350 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7351 {
7352 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7353 return;
7354 }
7355 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
7356 break;
7357
a05c0ddf 7358 case 'X':
4aa81c2e 7359 if (!CONST_INT_P (x))
a05c0ddf
IB
7360 {
7361 output_operand_lossage ("invalid operand for '%%%c'", code);
7362 return;
7363 }
50d38551 7364 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
7365 break;
7366
43cacb12
RS
7367 case 'C':
7368 {
7369 /* Print a replicated constant in hex. */
7370 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7371 {
7372 output_operand_lossage ("invalid operand for '%%%c'", code);
7373 return;
7374 }
7375 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7376 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7377 }
7378 break;
7379
7380 case 'D':
7381 {
7382 /* Print a replicated constant in decimal, treating it as
7383 unsigned. */
7384 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7385 {
7386 output_operand_lossage ("invalid operand for '%%%c'", code);
7387 return;
7388 }
7389 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7390 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7391 }
7392 break;
7393
43e9d192
IB
7394 case 'w':
7395 case 'x':
3520f7cc
JG
7396 if (x == const0_rtx
7397 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 7398 {
50ce6f88 7399 asm_fprintf (f, "%czr", code);
43e9d192
IB
7400 break;
7401 }
7402
7403 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
7404 {
50ce6f88 7405 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
7406 break;
7407 }
7408
7409 if (REG_P (x) && REGNO (x) == SP_REGNUM)
7410 {
50ce6f88 7411 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
7412 break;
7413 }
7414
7415 /* Fall through */
7416
7417 case 0:
43e9d192
IB
7418 if (x == NULL)
7419 {
7420 output_operand_lossage ("missing operand");
7421 return;
7422 }
7423
7424 switch (GET_CODE (x))
7425 {
7426 case REG:
43cacb12 7427 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
7428 {
7429 if (REG_NREGS (x) == 1)
7430 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
7431 else
7432 {
7433 char suffix
7434 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
7435 asm_fprintf (f, "{z%d.%c - z%d.%c}",
7436 REGNO (x) - V0_REGNUM, suffix,
7437 END_REGNO (x) - V0_REGNUM - 1, suffix);
7438 }
7439 }
43cacb12
RS
7440 else
7441 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
7442 break;
7443
7444 case MEM:
cc8ca59e 7445 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
7446 break;
7447
7448 case LABEL_REF:
7449 case SYMBOL_REF:
7450 output_addr_const (asm_out_file, x);
7451 break;
7452
7453 case CONST_INT:
7454 asm_fprintf (f, "%wd", INTVAL (x));
7455 break;
7456
43cacb12
RS
7457 case CONST:
7458 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 7459 {
43cacb12
RS
7460 output_addr_const (asm_out_file, x);
7461 break;
3520f7cc 7462 }
43cacb12
RS
7463 /* fall through */
7464
7465 case CONST_VECTOR:
7466 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 7467 {
43cacb12
RS
7468 output_operand_lossage ("invalid vector constant");
7469 return;
3520f7cc 7470 }
43cacb12
RS
7471
7472 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7473 asm_fprintf (f, "%wd", INTVAL (elt));
7474 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7475 && aarch64_print_vector_float_operand (f, x, false))
7476 ;
3520f7cc 7477 else
43cacb12
RS
7478 {
7479 output_operand_lossage ("invalid vector constant");
7480 return;
7481 }
43e9d192
IB
7482 break;
7483
3520f7cc 7484 case CONST_DOUBLE:
2ca5b430
KT
7485 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
7486 be getting CONST_DOUBLEs holding integers. */
7487 gcc_assert (GET_MODE (x) != VOIDmode);
7488 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
7489 {
7490 fputc ('0', f);
7491 break;
7492 }
7493 else if (aarch64_float_const_representable_p (x))
7494 {
7495#define buf_size 20
7496 char float_buf[buf_size] = {'\0'};
34a72c33
RS
7497 real_to_decimal_for_mode (float_buf,
7498 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
7499 buf_size, buf_size,
7500 1, GET_MODE (x));
7501 asm_fprintf (asm_out_file, "%s", float_buf);
7502 break;
7503#undef buf_size
7504 }
7505 output_operand_lossage ("invalid constant");
7506 return;
43e9d192
IB
7507 default:
7508 output_operand_lossage ("invalid operand");
7509 return;
7510 }
7511 break;
7512
7513 case 'A':
7514 if (GET_CODE (x) == HIGH)
7515 x = XEXP (x, 0);
7516
a6e0bfa7 7517 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7518 {
6642bdb4 7519 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7520 asm_fprintf (asm_out_file, ":got:");
7521 break;
7522
7523 case SYMBOL_SMALL_TLSGD:
7524 asm_fprintf (asm_out_file, ":tlsgd:");
7525 break;
7526
7527 case SYMBOL_SMALL_TLSDESC:
7528 asm_fprintf (asm_out_file, ":tlsdesc:");
7529 break;
7530
79496620 7531 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7532 asm_fprintf (asm_out_file, ":gottprel:");
7533 break;
7534
d18ba284 7535 case SYMBOL_TLSLE24:
43e9d192
IB
7536 asm_fprintf (asm_out_file, ":tprel:");
7537 break;
7538
87dd8ab0
MS
7539 case SYMBOL_TINY_GOT:
7540 gcc_unreachable ();
7541 break;
7542
43e9d192
IB
7543 default:
7544 break;
7545 }
7546 output_addr_const (asm_out_file, x);
7547 break;
7548
7549 case 'L':
a6e0bfa7 7550 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7551 {
6642bdb4 7552 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7553 asm_fprintf (asm_out_file, ":lo12:");
7554 break;
7555
7556 case SYMBOL_SMALL_TLSGD:
7557 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7558 break;
7559
7560 case SYMBOL_SMALL_TLSDESC:
7561 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7562 break;
7563
79496620 7564 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7565 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7566 break;
7567
cbf5629e
JW
7568 case SYMBOL_TLSLE12:
7569 asm_fprintf (asm_out_file, ":tprel_lo12:");
7570 break;
7571
d18ba284 7572 case SYMBOL_TLSLE24:
43e9d192
IB
7573 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7574 break;
7575
87dd8ab0
MS
7576 case SYMBOL_TINY_GOT:
7577 asm_fprintf (asm_out_file, ":got:");
7578 break;
7579
5ae7caad
JW
7580 case SYMBOL_TINY_TLSIE:
7581 asm_fprintf (asm_out_file, ":gottprel:");
7582 break;
7583
43e9d192
IB
7584 default:
7585 break;
7586 }
7587 output_addr_const (asm_out_file, x);
7588 break;
7589
7590 case 'G':
a6e0bfa7 7591 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7592 {
d18ba284 7593 case SYMBOL_TLSLE24:
43e9d192
IB
7594 asm_fprintf (asm_out_file, ":tprel_hi12:");
7595 break;
7596 default:
7597 break;
7598 }
7599 output_addr_const (asm_out_file, x);
7600 break;
7601
cf670503
ZC
7602 case 'k':
7603 {
c8012fbc 7604 HOST_WIDE_INT cond_code;
cf670503 7605
c8012fbc 7606 if (!CONST_INT_P (x))
cf670503
ZC
7607 {
7608 output_operand_lossage ("invalid operand for '%%%c'", code);
7609 return;
7610 }
7611
c8012fbc
WD
7612 cond_code = INTVAL (x);
7613 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7614 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7615 }
7616 break;
7617
e69a816d
WD
7618 case 'y':
7619 case 'z':
7620 {
7621 machine_mode mode = GET_MODE (x);
7622
c348cab0 7623 if (GET_CODE (x) != MEM
6a70badb 7624 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7625 {
7626 output_operand_lossage ("invalid operand for '%%%c'", code);
7627 return;
7628 }
7629
a25831ac
AV
7630 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
7631 code == 'y'
7632 ? ADDR_QUERY_LDP_STP_N
7633 : ADDR_QUERY_LDP_STP))
c348cab0 7634 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7635 }
7636 break;
7637
43e9d192
IB
7638 default:
7639 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7640 return;
7641 }
7642}
7643
e69a816d
WD
7644/* Print address 'x' of a memory access with mode 'mode'.
7645 'op' is the context required by aarch64_classify_address. It can either be
7646 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7647static bool
a97d8b98
RS
7648aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7649 aarch64_addr_query_type type)
43e9d192
IB
7650{
7651 struct aarch64_address_info addr;
6a70badb 7652 unsigned int size;
43e9d192 7653
e69a816d 7654 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
7655 if (GET_MODE (x) != Pmode
7656 && (!CONST_INT_P (x)
7657 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
7658 {
7659 output_operand_lossage ("invalid address mode");
7660 return false;
7661 }
e69a816d 7662
a97d8b98 7663 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7664 switch (addr.type)
7665 {
7666 case ADDRESS_REG_IMM:
dc640181 7667 if (known_eq (addr.const_offset, 0))
01a3a324 7668 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7669 else if (aarch64_sve_data_mode_p (mode))
7670 {
7671 HOST_WIDE_INT vnum
7672 = exact_div (addr.const_offset,
7673 BYTES_PER_SVE_VECTOR).to_constant ();
7674 asm_fprintf (f, "[%s, #%wd, mul vl]",
7675 reg_names[REGNO (addr.base)], vnum);
7676 }
7677 else if (aarch64_sve_pred_mode_p (mode))
7678 {
7679 HOST_WIDE_INT vnum
7680 = exact_div (addr.const_offset,
7681 BYTES_PER_SVE_PRED).to_constant ();
7682 asm_fprintf (f, "[%s, #%wd, mul vl]",
7683 reg_names[REGNO (addr.base)], vnum);
7684 }
43e9d192 7685 else
16a3246f 7686 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7687 INTVAL (addr.offset));
c348cab0 7688 return true;
43e9d192
IB
7689
7690 case ADDRESS_REG_REG:
7691 if (addr.shift == 0)
16a3246f 7692 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7693 reg_names [REGNO (addr.offset)]);
43e9d192 7694 else
16a3246f 7695 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7696 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7697 return true;
43e9d192
IB
7698
7699 case ADDRESS_REG_UXTW:
7700 if (addr.shift == 0)
16a3246f 7701 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7702 REGNO (addr.offset) - R0_REGNUM);
7703 else
16a3246f 7704 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7705 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7706 return true;
43e9d192
IB
7707
7708 case ADDRESS_REG_SXTW:
7709 if (addr.shift == 0)
16a3246f 7710 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7711 REGNO (addr.offset) - R0_REGNUM);
7712 else
16a3246f 7713 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7714 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7715 return true;
43e9d192
IB
7716
7717 case ADDRESS_REG_WB:
6a70badb
RS
7718 /* Writeback is only supported for fixed-width modes. */
7719 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7720 switch (GET_CODE (x))
7721 {
7722 case PRE_INC:
6a70badb 7723 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7724 return true;
43e9d192 7725 case POST_INC:
6a70badb 7726 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7727 return true;
43e9d192 7728 case PRE_DEC:
6a70badb 7729 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7730 return true;
43e9d192 7731 case POST_DEC:
6a70badb 7732 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7733 return true;
43e9d192 7734 case PRE_MODIFY:
6a70badb 7735 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7736 INTVAL (addr.offset));
c348cab0 7737 return true;
43e9d192 7738 case POST_MODIFY:
6a70badb 7739 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7740 INTVAL (addr.offset));
c348cab0 7741 return true;
43e9d192
IB
7742 default:
7743 break;
7744 }
7745 break;
7746
7747 case ADDRESS_LO_SUM:
16a3246f 7748 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7749 output_addr_const (f, addr.offset);
7750 asm_fprintf (f, "]");
c348cab0 7751 return true;
43e9d192
IB
7752
7753 case ADDRESS_SYMBOLIC:
d6591257 7754 output_addr_const (f, x);
c348cab0 7755 return true;
43e9d192
IB
7756 }
7757
c348cab0 7758 return false;
43e9d192
IB
7759}
7760
e69a816d
WD
7761/* Print address 'x' of a memory access with mode 'mode'. */
7762static void
7763aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7764{
43cacb12 7765 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7766 output_addr_const (f, x);
e69a816d
WD
7767}
7768
43e9d192
IB
7769bool
7770aarch64_label_mentioned_p (rtx x)
7771{
7772 const char *fmt;
7773 int i;
7774
7775 if (GET_CODE (x) == LABEL_REF)
7776 return true;
7777
7778 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7779 referencing instruction, but they are constant offsets, not
7780 symbols. */
7781 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7782 return false;
7783
7784 fmt = GET_RTX_FORMAT (GET_CODE (x));
7785 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7786 {
7787 if (fmt[i] == 'E')
7788 {
7789 int j;
7790
7791 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7792 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7793 return 1;
7794 }
7795 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7796 return 1;
7797 }
7798
7799 return 0;
7800}
7801
7802/* Implement REGNO_REG_CLASS. */
7803
7804enum reg_class
7805aarch64_regno_regclass (unsigned regno)
7806{
7807 if (GP_REGNUM_P (regno))
a4a182c6 7808 return GENERAL_REGS;
43e9d192
IB
7809
7810 if (regno == SP_REGNUM)
7811 return STACK_REG;
7812
7813 if (regno == FRAME_POINTER_REGNUM
7814 || regno == ARG_POINTER_REGNUM)
f24bb080 7815 return POINTER_REGS;
43e9d192
IB
7816
7817 if (FP_REGNUM_P (regno))
7818 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7819
43cacb12
RS
7820 if (PR_REGNUM_P (regno))
7821 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7822
43e9d192
IB
7823 return NO_REGS;
7824}
7825
6a70badb
RS
7826/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7827 If OFFSET is out of range, return an offset of an anchor point
7828 that is in range. Return 0 otherwise. */
7829
7830static HOST_WIDE_INT
7831aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7832 machine_mode mode)
7833{
7834 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7835 if (size > 16)
7836 return (offset + 0x400) & ~0x7f0;
7837
7838 /* For offsets that aren't a multiple of the access size, the limit is
7839 -256...255. */
7840 if (offset & (size - 1))
7841 {
7842 /* BLKmode typically uses LDP of X-registers. */
7843 if (mode == BLKmode)
7844 return (offset + 512) & ~0x3ff;
7845 return (offset + 0x100) & ~0x1ff;
7846 }
7847
7848 /* Small negative offsets are supported. */
7849 if (IN_RANGE (offset, -256, 0))
7850 return 0;
7851
7852 if (mode == TImode || mode == TFmode)
7853 return (offset + 0x100) & ~0x1ff;
7854
7855 /* Use 12-bit offset by access size. */
7856 return offset & (~0xfff * size);
7857}
7858
0c4ec427 7859static rtx
ef4bddc2 7860aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7861{
7862 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7863 where mask is selected by alignment and size of the offset.
7864 We try to pick as large a range for the offset as possible to
7865 maximize the chance of a CSE. However, for aligned addresses
7866 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7867 elements are likely to use the same base. We need to be careful
7868 not to split a CONST for some forms of address expression, otherwise
7869 it will generate sub-optimal code. */
0c4ec427
RE
7870
7871 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7872 {
9e0218fc 7873 rtx base = XEXP (x, 0);
17d7bdd8 7874 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7875 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 7876
9e0218fc 7877 if (GET_CODE (base) == PLUS)
e8426e0a 7878 {
9e0218fc
RH
7879 rtx op0 = XEXP (base, 0);
7880 rtx op1 = XEXP (base, 1);
7881
7882 /* Force any scaling into a temp for CSE. */
7883 op0 = force_reg (Pmode, op0);
7884 op1 = force_reg (Pmode, op1);
7885
7886 /* Let the pointer register be in op0. */
7887 if (REG_POINTER (op1))
7888 std::swap (op0, op1);
7889
7890 /* If the pointer is virtual or frame related, then we know that
7891 virtual register instantiation or register elimination is going
7892 to apply a second constant. We want the two constants folded
7893 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
7894 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 7895 {
9e0218fc
RH
7896 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
7897 NULL_RTX, true, OPTAB_DIRECT);
7898 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 7899 }
e8426e0a 7900
9e0218fc
RH
7901 /* Otherwise, in order to encourage CSE (and thence loop strength
7902 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
7903 base = expand_binop (Pmode, add_optab, op0, op1,
7904 NULL_RTX, true, OPTAB_DIRECT);
7905 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
7906 }
7907
6a70badb
RS
7908 HOST_WIDE_INT size;
7909 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 7910 {
6a70badb
RS
7911 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
7912 mode);
7913 if (base_offset != 0)
7914 {
7915 base = plus_constant (Pmode, base, base_offset);
7916 base = force_operand (base, NULL_RTX);
7917 return plus_constant (Pmode, base, offset - base_offset);
7918 }
9e0218fc 7919 }
0c4ec427
RE
7920 }
7921
7922 return x;
7923}
7924
43e9d192
IB
7925static reg_class_t
7926aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
7927 reg_class_t rclass,
ef4bddc2 7928 machine_mode mode,
43e9d192
IB
7929 secondary_reload_info *sri)
7930{
9a1b9cb4
RS
7931 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
7932 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
7933 comment at the head of aarch64-sve.md for more details about the
7934 big-endian handling. */
43cacb12
RS
7935 if (BYTES_BIG_ENDIAN
7936 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
7937 && !((REG_P (x) && HARD_REGISTER_P (x))
7938 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
7939 && aarch64_sve_data_mode_p (mode))
7940 {
7941 sri->icode = CODE_FOR_aarch64_sve_reload_be;
7942 return NO_REGS;
7943 }
b4f50fd4
RR
7944
7945 /* If we have to disable direct literal pool loads and stores because the
7946 function is too big, then we need a scratch register. */
7947 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
7948 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
7949 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 7950 && !aarch64_pcrelative_literal_loads)
b4f50fd4 7951 {
0016d8d9 7952 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
7953 return NO_REGS;
7954 }
7955
43e9d192
IB
7956 /* Without the TARGET_SIMD instructions we cannot move a Q register
7957 to a Q register directly. We need a scratch. */
7958 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
7959 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
7960 && reg_class_subset_p (rclass, FP_REGS))
7961 {
0016d8d9 7962 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
7963 return NO_REGS;
7964 }
7965
7966 /* A TFmode or TImode memory access should be handled via an FP_REGS
7967 because AArch64 has richer addressing modes for LDR/STR instructions
7968 than LDP/STP instructions. */
d5726973 7969 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 7970 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
7971 return FP_REGS;
7972
7973 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 7974 return GENERAL_REGS;
43e9d192
IB
7975
7976 return NO_REGS;
7977}
7978
7979static bool
6216fd90 7980aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 7981{
6216fd90 7982 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 7983
6216fd90
WD
7984 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
7985 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 7986 if (frame_pointer_needed)
6216fd90 7987 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
7988 return true;
7989}
7990
6a70badb 7991poly_int64
43e9d192
IB
7992aarch64_initial_elimination_offset (unsigned from, unsigned to)
7993{
78c29983
MS
7994 if (to == HARD_FRAME_POINTER_REGNUM)
7995 {
7996 if (from == ARG_POINTER_REGNUM)
71bfb77a 7997 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
7998
7999 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8000 return cfun->machine->frame.hard_fp_offset
8001 - cfun->machine->frame.locals_offset;
78c29983
MS
8002 }
8003
8004 if (to == STACK_POINTER_REGNUM)
8005 {
8006 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8007 return cfun->machine->frame.frame_size
8008 - cfun->machine->frame.locals_offset;
78c29983
MS
8009 }
8010
1c960e02 8011 return cfun->machine->frame.frame_size;
43e9d192
IB
8012}
8013
43e9d192
IB
8014/* Implement RETURN_ADDR_RTX. We do not support moving back to a
8015 previous frame. */
8016
8017rtx
8018aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
8019{
8020 if (count != 0)
8021 return const0_rtx;
8022 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
8023}
8024
8025
8026static void
8027aarch64_asm_trampoline_template (FILE *f)
8028{
28514dda
YZ
8029 if (TARGET_ILP32)
8030 {
8031 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
8032 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
8033 }
8034 else
8035 {
8036 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
8037 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
8038 }
01a3a324 8039 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 8040 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
8041 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
8042 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
8043}
8044
8045static void
8046aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
8047{
8048 rtx fnaddr, mem, a_tramp;
28514dda 8049 const int tramp_code_sz = 16;
43e9d192
IB
8050
8051 /* Don't need to copy the trailing D-words, we fill those in below. */
8052 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
8053 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
8054 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 8055 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
8056 if (GET_MODE (fnaddr) != ptr_mode)
8057 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
8058 emit_move_insn (mem, fnaddr);
8059
28514dda 8060 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
8061 emit_move_insn (mem, chain_value);
8062
8063 /* XXX We should really define a "clear_cache" pattern and use
8064 gen_clear_cache(). */
8065 a_tramp = XEXP (m_tramp, 0);
8066 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 8067 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
8068 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
8069 ptr_mode);
43e9d192
IB
8070}
8071
8072static unsigned char
ef4bddc2 8073aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 8074{
6a70badb
RS
8075 /* ??? Logically we should only need to provide a value when
8076 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
8077 can hold MODE, but at the moment we need to handle all modes.
8078 Just ignore any runtime parts for registers that can't store them. */
8079 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 8080 unsigned int nregs;
43e9d192
IB
8081 switch (regclass)
8082 {
d677263e 8083 case TAILCALL_ADDR_REGS:
43e9d192
IB
8084 case POINTER_REGS:
8085 case GENERAL_REGS:
8086 case ALL_REGS:
f25a140b 8087 case POINTER_AND_FP_REGS:
43e9d192
IB
8088 case FP_REGS:
8089 case FP_LO_REGS:
43cacb12
RS
8090 if (aarch64_sve_data_mode_p (mode)
8091 && constant_multiple_p (GET_MODE_SIZE (mode),
8092 BYTES_PER_SVE_VECTOR, &nregs))
8093 return nregs;
8094 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
8095 ? CEIL (lowest_size, UNITS_PER_VREG)
8096 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 8097 case STACK_REG:
43cacb12
RS
8098 case PR_REGS:
8099 case PR_LO_REGS:
8100 case PR_HI_REGS:
43e9d192
IB
8101 return 1;
8102
8103 case NO_REGS:
8104 return 0;
8105
8106 default:
8107 break;
8108 }
8109 gcc_unreachable ();
8110}
8111
8112static reg_class_t
78d8b9f0 8113aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 8114{
51bb310d 8115 if (regclass == POINTER_REGS)
78d8b9f0
IB
8116 return GENERAL_REGS;
8117
51bb310d
MS
8118 if (regclass == STACK_REG)
8119 {
8120 if (REG_P(x)
8121 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
8122 return regclass;
8123
8124 return NO_REGS;
8125 }
8126
27bd251b
IB
8127 /* Register eliminiation can result in a request for
8128 SP+constant->FP_REGS. We cannot support such operations which
8129 use SP as source and an FP_REG as destination, so reject out
8130 right now. */
8131 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
8132 {
8133 rtx lhs = XEXP (x, 0);
8134
8135 /* Look through a possible SUBREG introduced by ILP32. */
8136 if (GET_CODE (lhs) == SUBREG)
8137 lhs = SUBREG_REG (lhs);
8138
8139 gcc_assert (REG_P (lhs));
8140 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
8141 POINTER_REGS));
8142 return NO_REGS;
8143 }
8144
78d8b9f0 8145 return regclass;
43e9d192
IB
8146}
8147
8148void
8149aarch64_asm_output_labelref (FILE* f, const char *name)
8150{
8151 asm_fprintf (f, "%U%s", name);
8152}
8153
8154static void
8155aarch64_elf_asm_constructor (rtx symbol, int priority)
8156{
8157 if (priority == DEFAULT_INIT_PRIORITY)
8158 default_ctor_section_asm_out_constructor (symbol, priority);
8159 else
8160 {
8161 section *s;
53d190c1
AT
8162 /* While priority is known to be in range [0, 65535], so 18 bytes
8163 would be enough, the compiler might not know that. To avoid
8164 -Wformat-truncation false positive, use a larger size. */
8165 char buf[23];
43e9d192 8166 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 8167 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8168 switch_to_section (s);
8169 assemble_align (POINTER_SIZE);
28514dda 8170 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8171 }
8172}
8173
8174static void
8175aarch64_elf_asm_destructor (rtx symbol, int priority)
8176{
8177 if (priority == DEFAULT_INIT_PRIORITY)
8178 default_dtor_section_asm_out_destructor (symbol, priority);
8179 else
8180 {
8181 section *s;
53d190c1
AT
8182 /* While priority is known to be in range [0, 65535], so 18 bytes
8183 would be enough, the compiler might not know that. To avoid
8184 -Wformat-truncation false positive, use a larger size. */
8185 char buf[23];
43e9d192 8186 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 8187 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8188 switch_to_section (s);
8189 assemble_align (POINTER_SIZE);
28514dda 8190 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8191 }
8192}
8193
8194const char*
8195aarch64_output_casesi (rtx *operands)
8196{
8197 char buf[100];
8198 char label[100];
b32d5189 8199 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
8200 int index;
8201 static const char *const patterns[4][2] =
8202 {
8203 {
8204 "ldrb\t%w3, [%0,%w1,uxtw]",
8205 "add\t%3, %4, %w3, sxtb #2"
8206 },
8207 {
8208 "ldrh\t%w3, [%0,%w1,uxtw #1]",
8209 "add\t%3, %4, %w3, sxth #2"
8210 },
8211 {
8212 "ldr\t%w3, [%0,%w1,uxtw #2]",
8213 "add\t%3, %4, %w3, sxtw #2"
8214 },
8215 /* We assume that DImode is only generated when not optimizing and
8216 that we don't really need 64-bit address offsets. That would
8217 imply an object file with 8GB of code in a single function! */
8218 {
8219 "ldr\t%w3, [%0,%w1,uxtw #2]",
8220 "add\t%3, %4, %w3, sxtw #2"
8221 }
8222 };
8223
8224 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
8225
77e994c9
RS
8226 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
8227 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
8228
8229 gcc_assert (index >= 0 && index <= 3);
8230
8231 /* Need to implement table size reduction, by chaning the code below. */
8232 output_asm_insn (patterns[index][0], operands);
8233 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
8234 snprintf (buf, sizeof (buf),
8235 "adr\t%%4, %s", targetm.strip_name_encoding (label));
8236 output_asm_insn (buf, operands);
8237 output_asm_insn (patterns[index][1], operands);
8238 output_asm_insn ("br\t%3", operands);
8239 assemble_label (asm_out_file, label);
8240 return "";
8241}
8242
8243
8244/* Return size in bits of an arithmetic operand which is shifted/scaled and
8245 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
8246 operator. */
8247
8248int
8249aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
8250{
8251 if (shift >= 0 && shift <= 3)
8252 {
8253 int size;
8254 for (size = 8; size <= 32; size *= 2)
8255 {
8256 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
8257 if (mask == bits << shift)
8258 return size;
8259 }
8260 }
8261 return 0;
8262}
8263
e78d485e
RR
8264/* Constant pools are per function only when PC relative
8265 literal loads are true or we are in the large memory
8266 model. */
8267
8268static inline bool
8269aarch64_can_use_per_function_literal_pools_p (void)
8270{
9ee6540a 8271 return (aarch64_pcrelative_literal_loads
e78d485e
RR
8272 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
8273}
8274
43e9d192 8275static bool
e78d485e 8276aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 8277{
74a9301d
VM
8278 /* We can't use blocks for constants when we're using a per-function
8279 constant pool. */
8280 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
8281}
8282
e78d485e
RR
8283/* Select appropriate section for constants depending
8284 on where we place literal pools. */
8285
43e9d192 8286static section *
e78d485e
RR
8287aarch64_select_rtx_section (machine_mode mode,
8288 rtx x,
8289 unsigned HOST_WIDE_INT align)
43e9d192 8290{
e78d485e
RR
8291 if (aarch64_can_use_per_function_literal_pools_p ())
8292 return function_section (current_function_decl);
43e9d192 8293
e78d485e
RR
8294 return default_elf_select_rtx_section (mode, x, align);
8295}
43e9d192 8296
5fca7b66
RH
8297/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
8298void
8299aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
8300 HOST_WIDE_INT offset)
8301{
8302 /* When using per-function literal pools, we must ensure that any code
8303 section is aligned to the minimal instruction length, lest we get
8304 errors from the assembler re "unaligned instructions". */
8305 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
8306 ASM_OUTPUT_ALIGN (f, 2);
8307}
8308
43e9d192
IB
8309/* Costs. */
8310
8311/* Helper function for rtx cost calculation. Strip a shift expression
8312 from X. Returns the inner operand if successful, or the original
8313 expression on failure. */
8314static rtx
8315aarch64_strip_shift (rtx x)
8316{
8317 rtx op = x;
8318
57b77d46
RE
8319 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
8320 we can convert both to ROR during final output. */
43e9d192
IB
8321 if ((GET_CODE (op) == ASHIFT
8322 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
8323 || GET_CODE (op) == LSHIFTRT
8324 || GET_CODE (op) == ROTATERT
8325 || GET_CODE (op) == ROTATE)
43e9d192
IB
8326 && CONST_INT_P (XEXP (op, 1)))
8327 return XEXP (op, 0);
8328
8329 if (GET_CODE (op) == MULT
8330 && CONST_INT_P (XEXP (op, 1))
8331 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
8332 return XEXP (op, 0);
8333
8334 return x;
8335}
8336
4745e701 8337/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
8338 expression from X. Returns the inner operand if successful, or the
8339 original expression on failure. We deal with a number of possible
b10f1009
AP
8340 canonicalization variations here. If STRIP_SHIFT is true, then
8341 we can strip off a shift also. */
43e9d192 8342static rtx
b10f1009 8343aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 8344{
77e994c9 8345 scalar_int_mode mode;
43e9d192
IB
8346 rtx op = x;
8347
77e994c9
RS
8348 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
8349 return op;
8350
43e9d192
IB
8351 /* Zero and sign extraction of a widened value. */
8352 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
8353 && XEXP (op, 2) == const0_rtx
4745e701 8354 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 8355 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
8356 XEXP (op, 1)))
8357 return XEXP (XEXP (op, 0), 0);
8358
8359 /* It can also be represented (for zero-extend) as an AND with an
8360 immediate. */
8361 if (GET_CODE (op) == AND
8362 && GET_CODE (XEXP (op, 0)) == MULT
8363 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
8364 && CONST_INT_P (XEXP (op, 1))
8365 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
8366 INTVAL (XEXP (op, 1))) != 0)
8367 return XEXP (XEXP (op, 0), 0);
8368
8369 /* Now handle extended register, as this may also have an optional
8370 left shift by 1..4. */
b10f1009
AP
8371 if (strip_shift
8372 && GET_CODE (op) == ASHIFT
43e9d192
IB
8373 && CONST_INT_P (XEXP (op, 1))
8374 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
8375 op = XEXP (op, 0);
8376
8377 if (GET_CODE (op) == ZERO_EXTEND
8378 || GET_CODE (op) == SIGN_EXTEND)
8379 op = XEXP (op, 0);
8380
8381 if (op != x)
8382 return op;
8383
4745e701
JG
8384 return x;
8385}
8386
0a78ebe4
KT
8387/* Return true iff CODE is a shift supported in combination
8388 with arithmetic instructions. */
4d1919ed 8389
0a78ebe4
KT
8390static bool
8391aarch64_shift_p (enum rtx_code code)
8392{
8393 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
8394}
8395
b10f1009
AP
8396
8397/* Return true iff X is a cheap shift without a sign extend. */
8398
8399static bool
8400aarch64_cheap_mult_shift_p (rtx x)
8401{
8402 rtx op0, op1;
8403
8404 op0 = XEXP (x, 0);
8405 op1 = XEXP (x, 1);
8406
8407 if (!(aarch64_tune_params.extra_tuning_flags
8408 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
8409 return false;
8410
8411 if (GET_CODE (op0) == SIGN_EXTEND)
8412 return false;
8413
8414 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
8415 && UINTVAL (op1) <= 4)
8416 return true;
8417
8418 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
8419 return false;
8420
8421 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
8422
8423 if (l2 > 0 && l2 <= 4)
8424 return true;
8425
8426 return false;
8427}
8428
4745e701 8429/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
8430 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
8431 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
8432 operands where needed. */
8433
8434static int
e548c9df 8435aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
8436{
8437 rtx op0, op1;
8438 const struct cpu_cost_table *extra_cost
b175b679 8439 = aarch64_tune_params.insn_extra_cost;
4745e701 8440 int cost = 0;
0a78ebe4 8441 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 8442 machine_mode mode = GET_MODE (x);
4745e701
JG
8443
8444 gcc_checking_assert (code == MULT);
8445
8446 op0 = XEXP (x, 0);
8447 op1 = XEXP (x, 1);
8448
8449 if (VECTOR_MODE_P (mode))
8450 mode = GET_MODE_INNER (mode);
8451
8452 /* Integer multiply/fma. */
8453 if (GET_MODE_CLASS (mode) == MODE_INT)
8454 {
8455 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
8456 if (aarch64_shift_p (GET_CODE (x))
8457 || (CONST_INT_P (op1)
8458 && exact_log2 (INTVAL (op1)) > 0))
4745e701 8459 {
0a78ebe4
KT
8460 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
8461 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
8462 if (speed)
8463 {
0a78ebe4
KT
8464 if (compound_p)
8465 {
b10f1009
AP
8466 /* If the shift is considered cheap,
8467 then don't add any cost. */
8468 if (aarch64_cheap_mult_shift_p (x))
8469 ;
8470 else if (REG_P (op1))
0a78ebe4
KT
8471 /* ARITH + shift-by-register. */
8472 cost += extra_cost->alu.arith_shift_reg;
8473 else if (is_extend)
8474 /* ARITH + extended register. We don't have a cost field
8475 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
8476 cost += extra_cost->alu.extend_arith;
8477 else
8478 /* ARITH + shift-by-immediate. */
8479 cost += extra_cost->alu.arith_shift;
8480 }
4745e701
JG
8481 else
8482 /* LSL (immediate). */
0a78ebe4
KT
8483 cost += extra_cost->alu.shift;
8484
4745e701 8485 }
0a78ebe4
KT
8486 /* Strip extends as we will have costed them in the case above. */
8487 if (is_extend)
b10f1009 8488 op0 = aarch64_strip_extend (op0, true);
4745e701 8489
e548c9df 8490 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
8491
8492 return cost;
8493 }
8494
d2ac256b
KT
8495 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
8496 compound and let the below cases handle it. After all, MNEG is a
8497 special-case alias of MSUB. */
8498 if (GET_CODE (op0) == NEG)
8499 {
8500 op0 = XEXP (op0, 0);
8501 compound_p = true;
8502 }
8503
4745e701
JG
8504 /* Integer multiplies or FMAs have zero/sign extending variants. */
8505 if ((GET_CODE (op0) == ZERO_EXTEND
8506 && GET_CODE (op1) == ZERO_EXTEND)
8507 || (GET_CODE (op0) == SIGN_EXTEND
8508 && GET_CODE (op1) == SIGN_EXTEND))
8509 {
e548c9df
AM
8510 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8511 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8512
8513 if (speed)
8514 {
0a78ebe4 8515 if (compound_p)
d2ac256b 8516 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8517 cost += extra_cost->mult[0].extend_add;
8518 else
8519 /* MUL/SMULL/UMULL. */
8520 cost += extra_cost->mult[0].extend;
8521 }
8522
8523 return cost;
8524 }
8525
d2ac256b 8526 /* This is either an integer multiply or a MADD. In both cases
4745e701 8527 we want to recurse and cost the operands. */
e548c9df
AM
8528 cost += rtx_cost (op0, mode, MULT, 0, speed);
8529 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8530
8531 if (speed)
8532 {
0a78ebe4 8533 if (compound_p)
d2ac256b 8534 /* MADD/MSUB. */
4745e701
JG
8535 cost += extra_cost->mult[mode == DImode].add;
8536 else
8537 /* MUL. */
8538 cost += extra_cost->mult[mode == DImode].simple;
8539 }
8540
8541 return cost;
8542 }
8543 else
8544 {
8545 if (speed)
8546 {
3d840f7d 8547 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8548 operands, unless the rounding mode is upward or downward in
8549 which case FNMUL is different than FMUL with operand negation. */
8550 bool neg0 = GET_CODE (op0) == NEG;
8551 bool neg1 = GET_CODE (op1) == NEG;
8552 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8553 {
8554 if (neg0)
8555 op0 = XEXP (op0, 0);
8556 if (neg1)
8557 op1 = XEXP (op1, 0);
8558 }
4745e701 8559
0a78ebe4 8560 if (compound_p)
4745e701
JG
8561 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8562 cost += extra_cost->fp[mode == DFmode].fma;
8563 else
3d840f7d 8564 /* FMUL/FNMUL. */
4745e701
JG
8565 cost += extra_cost->fp[mode == DFmode].mult;
8566 }
8567
e548c9df
AM
8568 cost += rtx_cost (op0, mode, MULT, 0, speed);
8569 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8570 return cost;
8571 }
43e9d192
IB
8572}
8573
67747367
JG
8574static int
8575aarch64_address_cost (rtx x,
ef4bddc2 8576 machine_mode mode,
67747367
JG
8577 addr_space_t as ATTRIBUTE_UNUSED,
8578 bool speed)
8579{
8580 enum rtx_code c = GET_CODE (x);
b175b679 8581 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8582 struct aarch64_address_info info;
8583 int cost = 0;
8584 info.shift = 0;
8585
a97d8b98 8586 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8587 {
8588 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8589 {
8590 /* This is a CONST or SYMBOL ref which will be split
8591 in a different way depending on the code model in use.
8592 Cost it through the generic infrastructure. */
e548c9df 8593 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8594 /* Divide through by the cost of one instruction to
8595 bring it to the same units as the address costs. */
8596 cost_symbol_ref /= COSTS_N_INSNS (1);
8597 /* The cost is then the cost of preparing the address,
8598 followed by an immediate (possibly 0) offset. */
8599 return cost_symbol_ref + addr_cost->imm_offset;
8600 }
8601 else
8602 {
8603 /* This is most likely a jump table from a case
8604 statement. */
8605 return addr_cost->register_offset;
8606 }
8607 }
8608
8609 switch (info.type)
8610 {
8611 case ADDRESS_LO_SUM:
8612 case ADDRESS_SYMBOLIC:
8613 case ADDRESS_REG_IMM:
8614 cost += addr_cost->imm_offset;
8615 break;
8616
8617 case ADDRESS_REG_WB:
8618 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8619 cost += addr_cost->pre_modify;
8620 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8621 cost += addr_cost->post_modify;
8622 else
8623 gcc_unreachable ();
8624
8625 break;
8626
8627 case ADDRESS_REG_REG:
8628 cost += addr_cost->register_offset;
8629 break;
8630
67747367 8631 case ADDRESS_REG_SXTW:
783879e6
EM
8632 cost += addr_cost->register_sextend;
8633 break;
8634
8635 case ADDRESS_REG_UXTW:
8636 cost += addr_cost->register_zextend;
67747367
JG
8637 break;
8638
8639 default:
8640 gcc_unreachable ();
8641 }
8642
8643
8644 if (info.shift > 0)
8645 {
8646 /* For the sake of calculating the cost of the shifted register
8647 component, we can treat same sized modes in the same way. */
6a70badb
RS
8648 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8649 cost += addr_cost->addr_scale_costs.hi;
8650 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8651 cost += addr_cost->addr_scale_costs.si;
8652 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8653 cost += addr_cost->addr_scale_costs.di;
8654 else
8655 /* We can't tell, or this is a 128-bit vector. */
8656 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8657 }
8658
8659 return cost;
8660}
8661
b9066f5a
MW
8662/* Return the cost of a branch. If SPEED_P is true then the compiler is
8663 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8664 to be taken. */
8665
8666int
8667aarch64_branch_cost (bool speed_p, bool predictable_p)
8668{
8669 /* When optimizing for speed, use the cost of unpredictable branches. */
8670 const struct cpu_branch_cost *branch_costs =
b175b679 8671 aarch64_tune_params.branch_costs;
b9066f5a
MW
8672
8673 if (!speed_p || predictable_p)
8674 return branch_costs->predictable;
8675 else
8676 return branch_costs->unpredictable;
8677}
8678
7cc2145f
JG
8679/* Return true if the RTX X in mode MODE is a zero or sign extract
8680 usable in an ADD or SUB (extended register) instruction. */
8681static bool
77e994c9 8682aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8683{
8684 /* Catch add with a sign extract.
8685 This is add_<optab><mode>_multp2. */
8686 if (GET_CODE (x) == SIGN_EXTRACT
8687 || GET_CODE (x) == ZERO_EXTRACT)
8688 {
8689 rtx op0 = XEXP (x, 0);
8690 rtx op1 = XEXP (x, 1);
8691 rtx op2 = XEXP (x, 2);
8692
8693 if (GET_CODE (op0) == MULT
8694 && CONST_INT_P (op1)
8695 && op2 == const0_rtx
8696 && CONST_INT_P (XEXP (op0, 1))
8697 && aarch64_is_extend_from_extract (mode,
8698 XEXP (op0, 1),
8699 op1))
8700 {
8701 return true;
8702 }
8703 }
e47c4031
KT
8704 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8705 No shift. */
8706 else if (GET_CODE (x) == SIGN_EXTEND
8707 || GET_CODE (x) == ZERO_EXTEND)
8708 return REG_P (XEXP (x, 0));
7cc2145f
JG
8709
8710 return false;
8711}
8712
61263118
KT
8713static bool
8714aarch64_frint_unspec_p (unsigned int u)
8715{
8716 switch (u)
8717 {
8718 case UNSPEC_FRINTZ:
8719 case UNSPEC_FRINTP:
8720 case UNSPEC_FRINTM:
8721 case UNSPEC_FRINTA:
8722 case UNSPEC_FRINTN:
8723 case UNSPEC_FRINTX:
8724 case UNSPEC_FRINTI:
8725 return true;
8726
8727 default:
8728 return false;
8729 }
8730}
8731
fb0cb7fa
KT
8732/* Return true iff X is an rtx that will match an extr instruction
8733 i.e. as described in the *extr<mode>5_insn family of patterns.
8734 OP0 and OP1 will be set to the operands of the shifts involved
8735 on success and will be NULL_RTX otherwise. */
8736
8737static bool
8738aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8739{
8740 rtx op0, op1;
77e994c9
RS
8741 scalar_int_mode mode;
8742 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8743 return false;
fb0cb7fa
KT
8744
8745 *res_op0 = NULL_RTX;
8746 *res_op1 = NULL_RTX;
8747
8748 if (GET_CODE (x) != IOR)
8749 return false;
8750
8751 op0 = XEXP (x, 0);
8752 op1 = XEXP (x, 1);
8753
8754 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8755 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8756 {
8757 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8758 if (GET_CODE (op1) == ASHIFT)
8759 std::swap (op0, op1);
8760
8761 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8762 return false;
8763
8764 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8765 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8766
8767 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8768 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8769 {
8770 *res_op0 = XEXP (op0, 0);
8771 *res_op1 = XEXP (op1, 0);
8772 return true;
8773 }
8774 }
8775
8776 return false;
8777}
8778
2d5ffe46
AP
8779/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8780 storing it in *COST. Result is true if the total cost of the operation
8781 has now been calculated. */
8782static bool
8783aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8784{
b9e3afe9
AP
8785 rtx inner;
8786 rtx comparator;
8787 enum rtx_code cmpcode;
8788
8789 if (COMPARISON_P (op0))
8790 {
8791 inner = XEXP (op0, 0);
8792 comparator = XEXP (op0, 1);
8793 cmpcode = GET_CODE (op0);
8794 }
8795 else
8796 {
8797 inner = op0;
8798 comparator = const0_rtx;
8799 cmpcode = NE;
8800 }
8801
2d5ffe46
AP
8802 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8803 {
8804 /* Conditional branch. */
b9e3afe9 8805 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8806 return true;
8807 else
8808 {
b9e3afe9 8809 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8810 {
2d5ffe46
AP
8811 if (comparator == const0_rtx)
8812 {
8813 /* TBZ/TBNZ/CBZ/CBNZ. */
8814 if (GET_CODE (inner) == ZERO_EXTRACT)
8815 /* TBZ/TBNZ. */
e548c9df
AM
8816 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8817 ZERO_EXTRACT, 0, speed);
8818 else
8819 /* CBZ/CBNZ. */
8820 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8821
8822 return true;
8823 }
8824 }
b9e3afe9 8825 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8826 {
2d5ffe46
AP
8827 /* TBZ/TBNZ. */
8828 if (comparator == const0_rtx)
8829 return true;
8830 }
8831 }
8832 }
b9e3afe9 8833 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8834 {
786298dc 8835 /* CCMP. */
6dfeb7ce 8836 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8837 {
8838 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8839 if (XEXP (op1, 1) == const0_rtx)
8840 *cost += 1;
8841 if (speed)
8842 {
8843 machine_mode mode = GET_MODE (XEXP (op1, 0));
8844 const struct cpu_cost_table *extra_cost
8845 = aarch64_tune_params.insn_extra_cost;
8846
8847 if (GET_MODE_CLASS (mode) == MODE_INT)
8848 *cost += extra_cost->alu.arith;
8849 else
8850 *cost += extra_cost->fp[mode == DFmode].compare;
8851 }
8852 return true;
8853 }
8854
2d5ffe46
AP
8855 /* It's a conditional operation based on the status flags,
8856 so it must be some flavor of CSEL. */
8857
8858 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8859 if (GET_CODE (op1) == NEG
8860 || GET_CODE (op1) == NOT
8861 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8862 op1 = XEXP (op1, 0);
bad00732
KT
8863 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8864 {
8865 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8866 op1 = XEXP (op1, 0);
8867 op2 = XEXP (op2, 0);
8868 }
2d5ffe46 8869
e548c9df
AM
8870 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8871 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8872 return true;
8873 }
8874
8875 /* We don't know what this is, cost all operands. */
8876 return false;
8877}
8878
283b6c85
KT
8879/* Check whether X is a bitfield operation of the form shift + extend that
8880 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
8881 operand to which the bitfield operation is applied. Otherwise return
8882 NULL_RTX. */
8883
8884static rtx
8885aarch64_extend_bitfield_pattern_p (rtx x)
8886{
8887 rtx_code outer_code = GET_CODE (x);
8888 machine_mode outer_mode = GET_MODE (x);
8889
8890 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
8891 && outer_mode != SImode && outer_mode != DImode)
8892 return NULL_RTX;
8893
8894 rtx inner = XEXP (x, 0);
8895 rtx_code inner_code = GET_CODE (inner);
8896 machine_mode inner_mode = GET_MODE (inner);
8897 rtx op = NULL_RTX;
8898
8899 switch (inner_code)
8900 {
8901 case ASHIFT:
8902 if (CONST_INT_P (XEXP (inner, 1))
8903 && (inner_mode == QImode || inner_mode == HImode))
8904 op = XEXP (inner, 0);
8905 break;
8906 case LSHIFTRT:
8907 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
8908 && (inner_mode == QImode || inner_mode == HImode))
8909 op = XEXP (inner, 0);
8910 break;
8911 case ASHIFTRT:
8912 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
8913 && (inner_mode == QImode || inner_mode == HImode))
8914 op = XEXP (inner, 0);
8915 break;
8916 default:
8917 break;
8918 }
8919
8920 return op;
8921}
8922
8c83f71d
KT
8923/* Return true if the mask and a shift amount from an RTX of the form
8924 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
8925 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
8926
8927bool
77e994c9
RS
8928aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
8929 rtx shft_amnt)
8c83f71d
KT
8930{
8931 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
8932 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
8933 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
8934 && (INTVAL (mask)
8935 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
8936}
8937
43e9d192
IB
8938/* Calculate the cost of calculating X, storing it in *COST. Result
8939 is true if the total cost of the operation has now been calculated. */
8940static bool
e548c9df 8941aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
8942 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
8943{
a8eecd00 8944 rtx op0, op1, op2;
73250c4c 8945 const struct cpu_cost_table *extra_cost
b175b679 8946 = aarch64_tune_params.insn_extra_cost;
e548c9df 8947 int code = GET_CODE (x);
b4206259 8948 scalar_int_mode int_mode;
43e9d192 8949
7fc5ef02
JG
8950 /* By default, assume that everything has equivalent cost to the
8951 cheapest instruction. Any additional costs are applied as a delta
8952 above this default. */
8953 *cost = COSTS_N_INSNS (1);
8954
43e9d192
IB
8955 switch (code)
8956 {
8957 case SET:
ba123b0d
JG
8958 /* The cost depends entirely on the operands to SET. */
8959 *cost = 0;
43e9d192
IB
8960 op0 = SET_DEST (x);
8961 op1 = SET_SRC (x);
8962
8963 switch (GET_CODE (op0))
8964 {
8965 case MEM:
8966 if (speed)
2961177e
JG
8967 {
8968 rtx address = XEXP (op0, 0);
b6875aac
KV
8969 if (VECTOR_MODE_P (mode))
8970 *cost += extra_cost->ldst.storev;
8971 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8972 *cost += extra_cost->ldst.store;
8973 else if (mode == SFmode)
8974 *cost += extra_cost->ldst.storef;
8975 else if (mode == DFmode)
8976 *cost += extra_cost->ldst.stored;
8977
8978 *cost +=
8979 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8980 0, speed));
8981 }
43e9d192 8982
e548c9df 8983 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8984 return true;
8985
8986 case SUBREG:
8987 if (! REG_P (SUBREG_REG (op0)))
e548c9df 8988 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 8989
43e9d192
IB
8990 /* Fall through. */
8991 case REG:
b6875aac
KV
8992 /* The cost is one per vector-register copied. */
8993 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
8994 {
fe1447a1
RS
8995 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
8996 *cost = COSTS_N_INSNS (nregs);
b6875aac 8997 }
ba123b0d
JG
8998 /* const0_rtx is in general free, but we will use an
8999 instruction to set a register to 0. */
b6875aac
KV
9000 else if (REG_P (op1) || op1 == const0_rtx)
9001 {
9002 /* The cost is 1 per register copied. */
fe1447a1
RS
9003 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
9004 *cost = COSTS_N_INSNS (nregs);
b6875aac 9005 }
ba123b0d
JG
9006 else
9007 /* Cost is just the cost of the RHS of the set. */
e548c9df 9008 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
9009 return true;
9010
ba123b0d 9011 case ZERO_EXTRACT:
43e9d192 9012 case SIGN_EXTRACT:
ba123b0d
JG
9013 /* Bit-field insertion. Strip any redundant widening of
9014 the RHS to meet the width of the target. */
43e9d192
IB
9015 if (GET_CODE (op1) == SUBREG)
9016 op1 = SUBREG_REG (op1);
9017 if ((GET_CODE (op1) == ZERO_EXTEND
9018 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 9019 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
9020 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
9021 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 9022 op1 = XEXP (op1, 0);
ba123b0d
JG
9023
9024 if (CONST_INT_P (op1))
9025 {
9026 /* MOV immediate is assumed to always be cheap. */
9027 *cost = COSTS_N_INSNS (1);
9028 }
9029 else
9030 {
9031 /* BFM. */
9032 if (speed)
9033 *cost += extra_cost->alu.bfi;
e548c9df 9034 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
9035 }
9036
43e9d192
IB
9037 return true;
9038
9039 default:
ba123b0d
JG
9040 /* We can't make sense of this, assume default cost. */
9041 *cost = COSTS_N_INSNS (1);
61263118 9042 return false;
43e9d192
IB
9043 }
9044 return false;
9045
9dfc162c
JG
9046 case CONST_INT:
9047 /* If an instruction can incorporate a constant within the
9048 instruction, the instruction's expression avoids calling
9049 rtx_cost() on the constant. If rtx_cost() is called on a
9050 constant, then it is usually because the constant must be
9051 moved into a register by one or more instructions.
9052
9053 The exception is constant 0, which can be expressed
9054 as XZR/WZR and is therefore free. The exception to this is
9055 if we have (set (reg) (const0_rtx)) in which case we must cost
9056 the move. However, we can catch that when we cost the SET, so
9057 we don't need to consider that here. */
9058 if (x == const0_rtx)
9059 *cost = 0;
9060 else
9061 {
9062 /* To an approximation, building any other constant is
9063 proportionally expensive to the number of instructions
9064 required to build that constant. This is true whether we
9065 are compiling for SPEED or otherwise. */
77e994c9
RS
9066 if (!is_a <scalar_int_mode> (mode, &int_mode))
9067 int_mode = word_mode;
82614948 9068 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 9069 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
9070 }
9071 return true;
9072
9073 case CONST_DOUBLE:
a2170965
TC
9074
9075 /* First determine number of instructions to do the move
9076 as an integer constant. */
9077 if (!aarch64_float_const_representable_p (x)
9078 && !aarch64_can_const_movi_rtx_p (x, mode)
9079 && aarch64_float_const_rtx_p (x))
9080 {
9081 unsigned HOST_WIDE_INT ival;
9082 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
9083 gcc_assert (succeed);
9084
77e994c9
RS
9085 scalar_int_mode imode = (mode == HFmode
9086 ? SImode
9087 : int_mode_for_mode (mode).require ());
a2170965
TC
9088 int ncost = aarch64_internal_mov_immediate
9089 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
9090 *cost += COSTS_N_INSNS (ncost);
9091 return true;
9092 }
9093
9dfc162c
JG
9094 if (speed)
9095 {
9096 /* mov[df,sf]_aarch64. */
9097 if (aarch64_float_const_representable_p (x))
9098 /* FMOV (scalar immediate). */
9099 *cost += extra_cost->fp[mode == DFmode].fpconst;
9100 else if (!aarch64_float_const_zero_rtx_p (x))
9101 {
9102 /* This will be a load from memory. */
9103 if (mode == DFmode)
9104 *cost += extra_cost->ldst.loadd;
9105 else
9106 *cost += extra_cost->ldst.loadf;
9107 }
9108 else
9109 /* Otherwise this is +0.0. We get this using MOVI d0, #0
9110 or MOV v0.s[0], wzr - neither of which are modeled by the
9111 cost tables. Just use the default cost. */
9112 {
9113 }
9114 }
9115
9116 return true;
9117
43e9d192
IB
9118 case MEM:
9119 if (speed)
2961177e
JG
9120 {
9121 /* For loads we want the base cost of a load, plus an
9122 approximation for the additional cost of the addressing
9123 mode. */
9124 rtx address = XEXP (x, 0);
b6875aac
KV
9125 if (VECTOR_MODE_P (mode))
9126 *cost += extra_cost->ldst.loadv;
9127 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9128 *cost += extra_cost->ldst.load;
9129 else if (mode == SFmode)
9130 *cost += extra_cost->ldst.loadf;
9131 else if (mode == DFmode)
9132 *cost += extra_cost->ldst.loadd;
9133
9134 *cost +=
9135 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9136 0, speed));
9137 }
43e9d192
IB
9138
9139 return true;
9140
9141 case NEG:
4745e701
JG
9142 op0 = XEXP (x, 0);
9143
b6875aac
KV
9144 if (VECTOR_MODE_P (mode))
9145 {
9146 if (speed)
9147 {
9148 /* FNEG. */
9149 *cost += extra_cost->vect.alu;
9150 }
9151 return false;
9152 }
9153
e548c9df
AM
9154 if (GET_MODE_CLASS (mode) == MODE_INT)
9155 {
4745e701
JG
9156 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9157 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9158 {
9159 /* CSETM. */
e548c9df 9160 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
9161 return true;
9162 }
9163
9164 /* Cost this as SUB wzr, X. */
e548c9df 9165 op0 = CONST0_RTX (mode);
4745e701
JG
9166 op1 = XEXP (x, 0);
9167 goto cost_minus;
9168 }
9169
e548c9df 9170 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
9171 {
9172 /* Support (neg(fma...)) as a single instruction only if
9173 sign of zeros is unimportant. This matches the decision
9174 making in aarch64.md. */
9175 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
9176 {
9177 /* FNMADD. */
e548c9df 9178 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
9179 return true;
9180 }
d318517d
SN
9181 if (GET_CODE (op0) == MULT)
9182 {
9183 /* FNMUL. */
9184 *cost = rtx_cost (op0, mode, NEG, 0, speed);
9185 return true;
9186 }
4745e701
JG
9187 if (speed)
9188 /* FNEG. */
9189 *cost += extra_cost->fp[mode == DFmode].neg;
9190 return false;
9191 }
9192
9193 return false;
43e9d192 9194
781aeb73
KT
9195 case CLRSB:
9196 case CLZ:
9197 if (speed)
b6875aac
KV
9198 {
9199 if (VECTOR_MODE_P (mode))
9200 *cost += extra_cost->vect.alu;
9201 else
9202 *cost += extra_cost->alu.clz;
9203 }
781aeb73
KT
9204
9205 return false;
9206
43e9d192
IB
9207 case COMPARE:
9208 op0 = XEXP (x, 0);
9209 op1 = XEXP (x, 1);
9210
9211 if (op1 == const0_rtx
9212 && GET_CODE (op0) == AND)
9213 {
9214 x = op0;
e548c9df 9215 mode = GET_MODE (op0);
43e9d192
IB
9216 goto cost_logic;
9217 }
9218
a8eecd00
JG
9219 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
9220 {
9221 /* TODO: A write to the CC flags possibly costs extra, this
9222 needs encoding in the cost tables. */
9223
e548c9df 9224 mode = GET_MODE (op0);
a8eecd00
JG
9225 /* ANDS. */
9226 if (GET_CODE (op0) == AND)
9227 {
9228 x = op0;
9229 goto cost_logic;
9230 }
9231
9232 if (GET_CODE (op0) == PLUS)
9233 {
9234 /* ADDS (and CMN alias). */
9235 x = op0;
9236 goto cost_plus;
9237 }
9238
9239 if (GET_CODE (op0) == MINUS)
9240 {
9241 /* SUBS. */
9242 x = op0;
9243 goto cost_minus;
9244 }
9245
345854d8
KT
9246 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
9247 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
9248 && CONST_INT_P (XEXP (op0, 2)))
9249 {
9250 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
9251 Handle it here directly rather than going to cost_logic
9252 since we know the immediate generated for the TST is valid
9253 so we can avoid creating an intermediate rtx for it only
9254 for costing purposes. */
9255 if (speed)
9256 *cost += extra_cost->alu.logical;
9257
9258 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
9259 ZERO_EXTRACT, 0, speed);
9260 return true;
9261 }
9262
a8eecd00
JG
9263 if (GET_CODE (op1) == NEG)
9264 {
9265 /* CMN. */
9266 if (speed)
9267 *cost += extra_cost->alu.arith;
9268
e548c9df
AM
9269 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
9270 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
9271 return true;
9272 }
9273
9274 /* CMP.
9275
9276 Compare can freely swap the order of operands, and
9277 canonicalization puts the more complex operation first.
9278 But the integer MINUS logic expects the shift/extend
9279 operation in op1. */
9280 if (! (REG_P (op0)
9281 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
9282 {
9283 op0 = XEXP (x, 1);
9284 op1 = XEXP (x, 0);
9285 }
9286 goto cost_minus;
9287 }
9288
9289 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9290 {
9291 /* FCMP. */
9292 if (speed)
9293 *cost += extra_cost->fp[mode == DFmode].compare;
9294
9295 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
9296 {
e548c9df 9297 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
9298 /* FCMP supports constant 0.0 for no extra cost. */
9299 return true;
9300 }
9301 return false;
9302 }
9303
b6875aac
KV
9304 if (VECTOR_MODE_P (mode))
9305 {
9306 /* Vector compare. */
9307 if (speed)
9308 *cost += extra_cost->vect.alu;
9309
9310 if (aarch64_float_const_zero_rtx_p (op1))
9311 {
9312 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
9313 cost. */
9314 return true;
9315 }
9316 return false;
9317 }
a8eecd00 9318 return false;
43e9d192
IB
9319
9320 case MINUS:
4745e701
JG
9321 {
9322 op0 = XEXP (x, 0);
9323 op1 = XEXP (x, 1);
9324
9325cost_minus:
e548c9df 9326 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 9327
4745e701
JG
9328 /* Detect valid immediates. */
9329 if ((GET_MODE_CLASS (mode) == MODE_INT
9330 || (GET_MODE_CLASS (mode) == MODE_CC
9331 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
9332 && CONST_INT_P (op1)
9333 && aarch64_uimm12_shift (INTVAL (op1)))
9334 {
4745e701
JG
9335 if (speed)
9336 /* SUB(S) (immediate). */
9337 *cost += extra_cost->alu.arith;
9338 return true;
4745e701
JG
9339 }
9340
7cc2145f 9341 /* Look for SUB (extended register). */
77e994c9
RS
9342 if (is_a <scalar_int_mode> (mode, &int_mode)
9343 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
9344 {
9345 if (speed)
2533c820 9346 *cost += extra_cost->alu.extend_arith;
7cc2145f 9347
b10f1009 9348 op1 = aarch64_strip_extend (op1, true);
e47c4031 9349 *cost += rtx_cost (op1, VOIDmode,
e548c9df 9350 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
9351 return true;
9352 }
9353
b10f1009 9354 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
9355
9356 /* Cost this as an FMA-alike operation. */
9357 if ((GET_CODE (new_op1) == MULT
0a78ebe4 9358 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
9359 && code != COMPARE)
9360 {
9361 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
9362 (enum rtx_code) code,
9363 speed);
4745e701
JG
9364 return true;
9365 }
43e9d192 9366
e548c9df 9367 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 9368
4745e701
JG
9369 if (speed)
9370 {
b6875aac
KV
9371 if (VECTOR_MODE_P (mode))
9372 {
9373 /* Vector SUB. */
9374 *cost += extra_cost->vect.alu;
9375 }
9376 else if (GET_MODE_CLASS (mode) == MODE_INT)
9377 {
9378 /* SUB(S). */
9379 *cost += extra_cost->alu.arith;
9380 }
4745e701 9381 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9382 {
9383 /* FSUB. */
9384 *cost += extra_cost->fp[mode == DFmode].addsub;
9385 }
4745e701
JG
9386 }
9387 return true;
9388 }
43e9d192
IB
9389
9390 case PLUS:
4745e701
JG
9391 {
9392 rtx new_op0;
43e9d192 9393
4745e701
JG
9394 op0 = XEXP (x, 0);
9395 op1 = XEXP (x, 1);
43e9d192 9396
a8eecd00 9397cost_plus:
4745e701
JG
9398 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9399 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9400 {
9401 /* CSINC. */
e548c9df
AM
9402 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
9403 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
9404 return true;
9405 }
43e9d192 9406
4745e701 9407 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
9408 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
9409 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 9410 {
e548c9df 9411 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 9412
4745e701
JG
9413 if (speed)
9414 /* ADD (immediate). */
9415 *cost += extra_cost->alu.arith;
9416 return true;
9417 }
9418
e548c9df 9419 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 9420
7cc2145f 9421 /* Look for ADD (extended register). */
77e994c9
RS
9422 if (is_a <scalar_int_mode> (mode, &int_mode)
9423 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
9424 {
9425 if (speed)
2533c820 9426 *cost += extra_cost->alu.extend_arith;
7cc2145f 9427
b10f1009 9428 op0 = aarch64_strip_extend (op0, true);
e47c4031 9429 *cost += rtx_cost (op0, VOIDmode,
e548c9df 9430 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
9431 return true;
9432 }
9433
4745e701
JG
9434 /* Strip any extend, leave shifts behind as we will
9435 cost them through mult_cost. */
b10f1009 9436 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
9437
9438 if (GET_CODE (new_op0) == MULT
0a78ebe4 9439 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
9440 {
9441 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
9442 speed);
4745e701
JG
9443 return true;
9444 }
9445
e548c9df 9446 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
9447
9448 if (speed)
9449 {
b6875aac
KV
9450 if (VECTOR_MODE_P (mode))
9451 {
9452 /* Vector ADD. */
9453 *cost += extra_cost->vect.alu;
9454 }
9455 else if (GET_MODE_CLASS (mode) == MODE_INT)
9456 {
9457 /* ADD. */
9458 *cost += extra_cost->alu.arith;
9459 }
4745e701 9460 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9461 {
9462 /* FADD. */
9463 *cost += extra_cost->fp[mode == DFmode].addsub;
9464 }
4745e701
JG
9465 }
9466 return true;
9467 }
43e9d192 9468
18b42b2a
KT
9469 case BSWAP:
9470 *cost = COSTS_N_INSNS (1);
9471
9472 if (speed)
b6875aac
KV
9473 {
9474 if (VECTOR_MODE_P (mode))
9475 *cost += extra_cost->vect.alu;
9476 else
9477 *cost += extra_cost->alu.rev;
9478 }
18b42b2a
KT
9479 return false;
9480
43e9d192 9481 case IOR:
f7d5cf8d
KT
9482 if (aarch_rev16_p (x))
9483 {
9484 *cost = COSTS_N_INSNS (1);
9485
b6875aac
KV
9486 if (speed)
9487 {
9488 if (VECTOR_MODE_P (mode))
9489 *cost += extra_cost->vect.alu;
9490 else
9491 *cost += extra_cost->alu.rev;
9492 }
9493 return true;
f7d5cf8d 9494 }
fb0cb7fa
KT
9495
9496 if (aarch64_extr_rtx_p (x, &op0, &op1))
9497 {
e548c9df
AM
9498 *cost += rtx_cost (op0, mode, IOR, 0, speed);
9499 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
9500 if (speed)
9501 *cost += extra_cost->alu.shift;
9502
9503 return true;
9504 }
f7d5cf8d 9505 /* Fall through. */
43e9d192
IB
9506 case XOR:
9507 case AND:
9508 cost_logic:
9509 op0 = XEXP (x, 0);
9510 op1 = XEXP (x, 1);
9511
b6875aac
KV
9512 if (VECTOR_MODE_P (mode))
9513 {
9514 if (speed)
9515 *cost += extra_cost->vect.alu;
9516 return true;
9517 }
9518
268c3b47
JG
9519 if (code == AND
9520 && GET_CODE (op0) == MULT
9521 && CONST_INT_P (XEXP (op0, 1))
9522 && CONST_INT_P (op1)
9523 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9524 INTVAL (op1)) != 0)
9525 {
9526 /* This is a UBFM/SBFM. */
e548c9df 9527 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9528 if (speed)
9529 *cost += extra_cost->alu.bfx;
9530 return true;
9531 }
9532
b4206259 9533 if (is_int_mode (mode, &int_mode))
43e9d192 9534 {
8c83f71d 9535 if (CONST_INT_P (op1))
43e9d192 9536 {
8c83f71d
KT
9537 /* We have a mask + shift version of a UBFIZ
9538 i.e. the *andim_ashift<mode>_bfiz pattern. */
9539 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9540 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9541 XEXP (op0, 1)))
8c83f71d 9542 {
b4206259 9543 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9544 (enum rtx_code) code, 0, speed);
9545 if (speed)
9546 *cost += extra_cost->alu.bfx;
268c3b47 9547
8c83f71d
KT
9548 return true;
9549 }
b4206259 9550 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9551 {
9552 /* We possibly get the immediate for free, this is not
9553 modelled. */
b4206259
RS
9554 *cost += rtx_cost (op0, int_mode,
9555 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9556 if (speed)
9557 *cost += extra_cost->alu.logical;
268c3b47 9558
8c83f71d
KT
9559 return true;
9560 }
43e9d192
IB
9561 }
9562 else
9563 {
268c3b47
JG
9564 rtx new_op0 = op0;
9565
9566 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9567 if (GET_CODE (op0) == NOT)
9568 op0 = XEXP (op0, 0);
268c3b47
JG
9569
9570 new_op0 = aarch64_strip_shift (op0);
9571
9572 /* If we had a shift on op0 then this is a logical-shift-
9573 by-register/immediate operation. Otherwise, this is just
9574 a logical operation. */
9575 if (speed)
9576 {
9577 if (new_op0 != op0)
9578 {
9579 /* Shift by immediate. */
9580 if (CONST_INT_P (XEXP (op0, 1)))
9581 *cost += extra_cost->alu.log_shift;
9582 else
9583 *cost += extra_cost->alu.log_shift_reg;
9584 }
9585 else
9586 *cost += extra_cost->alu.logical;
9587 }
9588
9589 /* In both cases we want to cost both operands. */
b4206259
RS
9590 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9591 0, speed);
9592 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9593 1, speed);
268c3b47
JG
9594
9595 return true;
43e9d192 9596 }
43e9d192
IB
9597 }
9598 return false;
9599
268c3b47 9600 case NOT:
6365da9e
KT
9601 x = XEXP (x, 0);
9602 op0 = aarch64_strip_shift (x);
9603
b6875aac
KV
9604 if (VECTOR_MODE_P (mode))
9605 {
9606 /* Vector NOT. */
9607 *cost += extra_cost->vect.alu;
9608 return false;
9609 }
9610
6365da9e
KT
9611 /* MVN-shifted-reg. */
9612 if (op0 != x)
9613 {
e548c9df 9614 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9615
9616 if (speed)
9617 *cost += extra_cost->alu.log_shift;
9618
9619 return true;
9620 }
9621 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9622 Handle the second form here taking care that 'a' in the above can
9623 be a shift. */
9624 else if (GET_CODE (op0) == XOR)
9625 {
9626 rtx newop0 = XEXP (op0, 0);
9627 rtx newop1 = XEXP (op0, 1);
9628 rtx op0_stripped = aarch64_strip_shift (newop0);
9629
e548c9df
AM
9630 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9631 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9632
9633 if (speed)
9634 {
9635 if (op0_stripped != newop0)
9636 *cost += extra_cost->alu.log_shift;
9637 else
9638 *cost += extra_cost->alu.logical;
9639 }
9640
9641 return true;
9642 }
268c3b47
JG
9643 /* MVN. */
9644 if (speed)
9645 *cost += extra_cost->alu.logical;
9646
268c3b47
JG
9647 return false;
9648
43e9d192 9649 case ZERO_EXTEND:
b1685e62
JG
9650
9651 op0 = XEXP (x, 0);
9652 /* If a value is written in SI mode, then zero extended to DI
9653 mode, the operation will in general be free as a write to
9654 a 'w' register implicitly zeroes the upper bits of an 'x'
9655 register. However, if this is
9656
9657 (set (reg) (zero_extend (reg)))
9658
9659 we must cost the explicit register move. */
9660 if (mode == DImode
9661 && GET_MODE (op0) == SImode
9662 && outer == SET)
9663 {
e548c9df 9664 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9665
dde23f43
KM
9666 /* If OP_COST is non-zero, then the cost of the zero extend
9667 is effectively the cost of the inner operation. Otherwise
9668 we have a MOV instruction and we take the cost from the MOV
9669 itself. This is true independently of whether we are
9670 optimizing for space or time. */
9671 if (op_cost)
b1685e62
JG
9672 *cost = op_cost;
9673
9674 return true;
9675 }
e548c9df 9676 else if (MEM_P (op0))
43e9d192 9677 {
b1685e62 9678 /* All loads can zero extend to any size for free. */
e548c9df 9679 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9680 return true;
9681 }
b1685e62 9682
283b6c85
KT
9683 op0 = aarch64_extend_bitfield_pattern_p (x);
9684 if (op0)
9685 {
9686 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9687 if (speed)
9688 *cost += extra_cost->alu.bfx;
9689 return true;
9690 }
9691
b1685e62 9692 if (speed)
b6875aac
KV
9693 {
9694 if (VECTOR_MODE_P (mode))
9695 {
9696 /* UMOV. */
9697 *cost += extra_cost->vect.alu;
9698 }
9699 else
9700 {
63715e5e
WD
9701 /* We generate an AND instead of UXTB/UXTH. */
9702 *cost += extra_cost->alu.logical;
b6875aac
KV
9703 }
9704 }
43e9d192
IB
9705 return false;
9706
9707 case SIGN_EXTEND:
b1685e62 9708 if (MEM_P (XEXP (x, 0)))
43e9d192 9709 {
b1685e62
JG
9710 /* LDRSH. */
9711 if (speed)
9712 {
9713 rtx address = XEXP (XEXP (x, 0), 0);
9714 *cost += extra_cost->ldst.load_sign_extend;
9715
9716 *cost +=
9717 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9718 0, speed));
9719 }
43e9d192
IB
9720 return true;
9721 }
b1685e62 9722
283b6c85
KT
9723 op0 = aarch64_extend_bitfield_pattern_p (x);
9724 if (op0)
9725 {
9726 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9727 if (speed)
9728 *cost += extra_cost->alu.bfx;
9729 return true;
9730 }
9731
b1685e62 9732 if (speed)
b6875aac
KV
9733 {
9734 if (VECTOR_MODE_P (mode))
9735 *cost += extra_cost->vect.alu;
9736 else
9737 *cost += extra_cost->alu.extend;
9738 }
43e9d192
IB
9739 return false;
9740
ba0cfa17
JG
9741 case ASHIFT:
9742 op0 = XEXP (x, 0);
9743 op1 = XEXP (x, 1);
9744
9745 if (CONST_INT_P (op1))
9746 {
ba0cfa17 9747 if (speed)
b6875aac
KV
9748 {
9749 if (VECTOR_MODE_P (mode))
9750 {
9751 /* Vector shift (immediate). */
9752 *cost += extra_cost->vect.alu;
9753 }
9754 else
9755 {
9756 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9757 aliases. */
9758 *cost += extra_cost->alu.shift;
9759 }
9760 }
ba0cfa17
JG
9761
9762 /* We can incorporate zero/sign extend for free. */
9763 if (GET_CODE (op0) == ZERO_EXTEND
9764 || GET_CODE (op0) == SIGN_EXTEND)
9765 op0 = XEXP (op0, 0);
9766
e548c9df 9767 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9768 return true;
9769 }
9770 else
9771 {
7813b280 9772 if (VECTOR_MODE_P (mode))
b6875aac 9773 {
7813b280
KT
9774 if (speed)
9775 /* Vector shift (register). */
9776 *cost += extra_cost->vect.alu;
9777 }
9778 else
9779 {
9780 if (speed)
9781 /* LSLV. */
9782 *cost += extra_cost->alu.shift_reg;
9783
9784 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9785 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9786 && known_eq (INTVAL (XEXP (op1, 1)),
9787 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9788 {
7813b280
KT
9789 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9790 /* We already demanded XEXP (op1, 0) to be REG_P, so
9791 don't recurse into it. */
9792 return true;
b6875aac
KV
9793 }
9794 }
ba0cfa17
JG
9795 return false; /* All arguments need to be in registers. */
9796 }
9797
43e9d192 9798 case ROTATE:
43e9d192
IB
9799 case ROTATERT:
9800 case LSHIFTRT:
43e9d192 9801 case ASHIFTRT:
ba0cfa17
JG
9802 op0 = XEXP (x, 0);
9803 op1 = XEXP (x, 1);
43e9d192 9804
ba0cfa17
JG
9805 if (CONST_INT_P (op1))
9806 {
9807 /* ASR (immediate) and friends. */
9808 if (speed)
b6875aac
KV
9809 {
9810 if (VECTOR_MODE_P (mode))
9811 *cost += extra_cost->vect.alu;
9812 else
9813 *cost += extra_cost->alu.shift;
9814 }
43e9d192 9815
e548c9df 9816 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9817 return true;
9818 }
9819 else
9820 {
7813b280 9821 if (VECTOR_MODE_P (mode))
b6875aac 9822 {
7813b280
KT
9823 if (speed)
9824 /* Vector shift (register). */
b6875aac 9825 *cost += extra_cost->vect.alu;
7813b280
KT
9826 }
9827 else
9828 {
9829 if (speed)
9830 /* ASR (register) and friends. */
b6875aac 9831 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9832
9833 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9834 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9835 && known_eq (INTVAL (XEXP (op1, 1)),
9836 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9837 {
9838 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9839 /* We already demanded XEXP (op1, 0) to be REG_P, so
9840 don't recurse into it. */
9841 return true;
9842 }
b6875aac 9843 }
ba0cfa17
JG
9844 return false; /* All arguments need to be in registers. */
9845 }
43e9d192 9846
909734be
JG
9847 case SYMBOL_REF:
9848
1b1e81f8
JW
9849 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9850 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9851 {
9852 /* LDR. */
9853 if (speed)
9854 *cost += extra_cost->ldst.load;
9855 }
9856 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9857 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9858 {
9859 /* ADRP, followed by ADD. */
9860 *cost += COSTS_N_INSNS (1);
9861 if (speed)
9862 *cost += 2 * extra_cost->alu.arith;
9863 }
9864 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9865 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9866 {
9867 /* ADR. */
9868 if (speed)
9869 *cost += extra_cost->alu.arith;
9870 }
9871
9872 if (flag_pic)
9873 {
9874 /* One extra load instruction, after accessing the GOT. */
9875 *cost += COSTS_N_INSNS (1);
9876 if (speed)
9877 *cost += extra_cost->ldst.load;
9878 }
43e9d192
IB
9879 return true;
9880
909734be 9881 case HIGH:
43e9d192 9882 case LO_SUM:
909734be
JG
9883 /* ADRP/ADD (immediate). */
9884 if (speed)
9885 *cost += extra_cost->alu.arith;
43e9d192
IB
9886 return true;
9887
9888 case ZERO_EXTRACT:
9889 case SIGN_EXTRACT:
7cc2145f
JG
9890 /* UBFX/SBFX. */
9891 if (speed)
b6875aac
KV
9892 {
9893 if (VECTOR_MODE_P (mode))
9894 *cost += extra_cost->vect.alu;
9895 else
9896 *cost += extra_cost->alu.bfx;
9897 }
7cc2145f
JG
9898
9899 /* We can trust that the immediates used will be correct (there
9900 are no by-register forms), so we need only cost op0. */
e548c9df 9901 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
9902 return true;
9903
9904 case MULT:
4745e701
JG
9905 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
9906 /* aarch64_rtx_mult_cost always handles recursion to its
9907 operands. */
9908 return true;
43e9d192
IB
9909
9910 case MOD:
4f58fe36
KT
9911 /* We can expand signed mod by power of 2 using a NEGS, two parallel
9912 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
9913 an unconditional negate. This case should only ever be reached through
9914 the set_smod_pow2_cheap check in expmed.c. */
9915 if (CONST_INT_P (XEXP (x, 1))
9916 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9917 && (mode == SImode || mode == DImode))
9918 {
9919 /* We expand to 4 instructions. Reset the baseline. */
9920 *cost = COSTS_N_INSNS (4);
9921
9922 if (speed)
9923 *cost += 2 * extra_cost->alu.logical
9924 + 2 * extra_cost->alu.arith;
9925
9926 return true;
9927 }
9928
9929 /* Fall-through. */
43e9d192 9930 case UMOD:
43e9d192
IB
9931 if (speed)
9932 {
cb9ac430 9933 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
9934 if (VECTOR_MODE_P (mode))
9935 *cost += extra_cost->vect.alu;
e548c9df
AM
9936 else if (GET_MODE_CLASS (mode) == MODE_INT)
9937 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
9938 + extra_cost->mult[mode == DImode].idiv
9939 + (code == MOD ? 1 : 0));
43e9d192
IB
9940 }
9941 return false; /* All arguments need to be in registers. */
9942
9943 case DIV:
9944 case UDIV:
4105fe38 9945 case SQRT:
43e9d192
IB
9946 if (speed)
9947 {
b6875aac
KV
9948 if (VECTOR_MODE_P (mode))
9949 *cost += extra_cost->vect.alu;
9950 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
9951 /* There is no integer SQRT, so only DIV and UDIV can get
9952 here. */
cb9ac430
TC
9953 *cost += (extra_cost->mult[mode == DImode].idiv
9954 /* Slighly prefer UDIV over SDIV. */
9955 + (code == DIV ? 1 : 0));
4105fe38
JG
9956 else
9957 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
9958 }
9959 return false; /* All arguments need to be in registers. */
9960
a8eecd00 9961 case IF_THEN_ELSE:
2d5ffe46
AP
9962 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
9963 XEXP (x, 2), cost, speed);
a8eecd00
JG
9964
9965 case EQ:
9966 case NE:
9967 case GT:
9968 case GTU:
9969 case LT:
9970 case LTU:
9971 case GE:
9972 case GEU:
9973 case LE:
9974 case LEU:
9975
9976 return false; /* All arguments must be in registers. */
9977
b292109f
JG
9978 case FMA:
9979 op0 = XEXP (x, 0);
9980 op1 = XEXP (x, 1);
9981 op2 = XEXP (x, 2);
9982
9983 if (speed)
b6875aac
KV
9984 {
9985 if (VECTOR_MODE_P (mode))
9986 *cost += extra_cost->vect.alu;
9987 else
9988 *cost += extra_cost->fp[mode == DFmode].fma;
9989 }
b292109f
JG
9990
9991 /* FMSUB, FNMADD, and FNMSUB are free. */
9992 if (GET_CODE (op0) == NEG)
9993 op0 = XEXP (op0, 0);
9994
9995 if (GET_CODE (op2) == NEG)
9996 op2 = XEXP (op2, 0);
9997
9998 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
9999 and the by-element operand as operand 0. */
10000 if (GET_CODE (op1) == NEG)
10001 op1 = XEXP (op1, 0);
10002
10003 /* Catch vector-by-element operations. The by-element operand can
10004 either be (vec_duplicate (vec_select (x))) or just
10005 (vec_select (x)), depending on whether we are multiplying by
10006 a vector or a scalar.
10007
10008 Canonicalization is not very good in these cases, FMA4 will put the
10009 by-element operand as operand 0, FNMA4 will have it as operand 1. */
10010 if (GET_CODE (op0) == VEC_DUPLICATE)
10011 op0 = XEXP (op0, 0);
10012 else if (GET_CODE (op1) == VEC_DUPLICATE)
10013 op1 = XEXP (op1, 0);
10014
10015 if (GET_CODE (op0) == VEC_SELECT)
10016 op0 = XEXP (op0, 0);
10017 else if (GET_CODE (op1) == VEC_SELECT)
10018 op1 = XEXP (op1, 0);
10019
10020 /* If the remaining parameters are not registers,
10021 get the cost to put them into registers. */
e548c9df
AM
10022 *cost += rtx_cost (op0, mode, FMA, 0, speed);
10023 *cost += rtx_cost (op1, mode, FMA, 1, speed);
10024 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
10025 return true;
10026
5e2a765b
KT
10027 case FLOAT:
10028 case UNSIGNED_FLOAT:
10029 if (speed)
10030 *cost += extra_cost->fp[mode == DFmode].fromint;
10031 return false;
10032
b292109f
JG
10033 case FLOAT_EXTEND:
10034 if (speed)
b6875aac
KV
10035 {
10036 if (VECTOR_MODE_P (mode))
10037 {
10038 /*Vector truncate. */
10039 *cost += extra_cost->vect.alu;
10040 }
10041 else
10042 *cost += extra_cost->fp[mode == DFmode].widen;
10043 }
b292109f
JG
10044 return false;
10045
10046 case FLOAT_TRUNCATE:
10047 if (speed)
b6875aac
KV
10048 {
10049 if (VECTOR_MODE_P (mode))
10050 {
10051 /*Vector conversion. */
10052 *cost += extra_cost->vect.alu;
10053 }
10054 else
10055 *cost += extra_cost->fp[mode == DFmode].narrow;
10056 }
b292109f
JG
10057 return false;
10058
61263118
KT
10059 case FIX:
10060 case UNSIGNED_FIX:
10061 x = XEXP (x, 0);
10062 /* Strip the rounding part. They will all be implemented
10063 by the fcvt* family of instructions anyway. */
10064 if (GET_CODE (x) == UNSPEC)
10065 {
10066 unsigned int uns_code = XINT (x, 1);
10067
10068 if (uns_code == UNSPEC_FRINTA
10069 || uns_code == UNSPEC_FRINTM
10070 || uns_code == UNSPEC_FRINTN
10071 || uns_code == UNSPEC_FRINTP
10072 || uns_code == UNSPEC_FRINTZ)
10073 x = XVECEXP (x, 0, 0);
10074 }
10075
10076 if (speed)
b6875aac
KV
10077 {
10078 if (VECTOR_MODE_P (mode))
10079 *cost += extra_cost->vect.alu;
10080 else
10081 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
10082 }
39252973
KT
10083
10084 /* We can combine fmul by a power of 2 followed by a fcvt into a single
10085 fixed-point fcvt. */
10086 if (GET_CODE (x) == MULT
10087 && ((VECTOR_MODE_P (mode)
10088 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
10089 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
10090 {
10091 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
10092 0, speed);
10093 return true;
10094 }
10095
e548c9df 10096 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
10097 return true;
10098
b292109f 10099 case ABS:
b6875aac
KV
10100 if (VECTOR_MODE_P (mode))
10101 {
10102 /* ABS (vector). */
10103 if (speed)
10104 *cost += extra_cost->vect.alu;
10105 }
10106 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 10107 {
19261b99
KT
10108 op0 = XEXP (x, 0);
10109
10110 /* FABD, which is analogous to FADD. */
10111 if (GET_CODE (op0) == MINUS)
10112 {
e548c9df
AM
10113 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
10114 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
10115 if (speed)
10116 *cost += extra_cost->fp[mode == DFmode].addsub;
10117
10118 return true;
10119 }
10120 /* Simple FABS is analogous to FNEG. */
b292109f
JG
10121 if (speed)
10122 *cost += extra_cost->fp[mode == DFmode].neg;
10123 }
10124 else
10125 {
10126 /* Integer ABS will either be split to
10127 two arithmetic instructions, or will be an ABS
10128 (scalar), which we don't model. */
10129 *cost = COSTS_N_INSNS (2);
10130 if (speed)
10131 *cost += 2 * extra_cost->alu.arith;
10132 }
10133 return false;
10134
10135 case SMAX:
10136 case SMIN:
10137 if (speed)
10138 {
b6875aac
KV
10139 if (VECTOR_MODE_P (mode))
10140 *cost += extra_cost->vect.alu;
10141 else
10142 {
10143 /* FMAXNM/FMINNM/FMAX/FMIN.
10144 TODO: This may not be accurate for all implementations, but
10145 we do not model this in the cost tables. */
10146 *cost += extra_cost->fp[mode == DFmode].addsub;
10147 }
b292109f
JG
10148 }
10149 return false;
10150
61263118
KT
10151 case UNSPEC:
10152 /* The floating point round to integer frint* instructions. */
10153 if (aarch64_frint_unspec_p (XINT (x, 1)))
10154 {
10155 if (speed)
10156 *cost += extra_cost->fp[mode == DFmode].roundint;
10157
10158 return false;
10159 }
781aeb73
KT
10160
10161 if (XINT (x, 1) == UNSPEC_RBIT)
10162 {
10163 if (speed)
10164 *cost += extra_cost->alu.rev;
10165
10166 return false;
10167 }
61263118
KT
10168 break;
10169
fb620c4a
JG
10170 case TRUNCATE:
10171
10172 /* Decompose <su>muldi3_highpart. */
10173 if (/* (truncate:DI */
10174 mode == DImode
10175 /* (lshiftrt:TI */
10176 && GET_MODE (XEXP (x, 0)) == TImode
10177 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
10178 /* (mult:TI */
10179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10180 /* (ANY_EXTEND:TI (reg:DI))
10181 (ANY_EXTEND:TI (reg:DI))) */
10182 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10183 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
10184 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10185 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
10186 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
10187 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
10188 /* (const_int 64) */
10189 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10190 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
10191 {
10192 /* UMULH/SMULH. */
10193 if (speed)
10194 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
10195 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
10196 mode, MULT, 0, speed);
10197 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
10198 mode, MULT, 1, speed);
fb620c4a
JG
10199 return true;
10200 }
10201
10202 /* Fall through. */
43e9d192 10203 default:
61263118 10204 break;
43e9d192 10205 }
61263118 10206
c10e3d7f
AP
10207 if (dump_file
10208 && flag_aarch64_verbose_cost)
61263118
KT
10209 fprintf (dump_file,
10210 "\nFailed to cost RTX. Assuming default cost.\n");
10211
10212 return true;
43e9d192
IB
10213}
10214
0ee859b5
JG
10215/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
10216 calculated for X. This cost is stored in *COST. Returns true
10217 if the total cost of X was calculated. */
10218static bool
e548c9df 10219aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
10220 int param, int *cost, bool speed)
10221{
e548c9df 10222 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 10223
c10e3d7f
AP
10224 if (dump_file
10225 && flag_aarch64_verbose_cost)
0ee859b5
JG
10226 {
10227 print_rtl_single (dump_file, x);
10228 fprintf (dump_file, "\n%s cost: %d (%s)\n",
10229 speed ? "Hot" : "Cold",
10230 *cost, result ? "final" : "partial");
10231 }
10232
10233 return result;
10234}
10235
43e9d192 10236static int
ef4bddc2 10237aarch64_register_move_cost (machine_mode mode,
8a3a7e67 10238 reg_class_t from_i, reg_class_t to_i)
43e9d192 10239{
8a3a7e67
RH
10240 enum reg_class from = (enum reg_class) from_i;
10241 enum reg_class to = (enum reg_class) to_i;
43e9d192 10242 const struct cpu_regmove_cost *regmove_cost
b175b679 10243 = aarch64_tune_params.regmove_cost;
43e9d192 10244
3be07662 10245 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 10246 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
10247 to = GENERAL_REGS;
10248
d677263e 10249 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
10250 from = GENERAL_REGS;
10251
6ee70f81
AP
10252 /* Moving between GPR and stack cost is the same as GP2GP. */
10253 if ((from == GENERAL_REGS && to == STACK_REG)
10254 || (to == GENERAL_REGS && from == STACK_REG))
10255 return regmove_cost->GP2GP;
10256
10257 /* To/From the stack register, we move via the gprs. */
10258 if (to == STACK_REG || from == STACK_REG)
10259 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
10260 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
10261
6a70badb 10262 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
10263 {
10264 /* 128-bit operations on general registers require 2 instructions. */
10265 if (from == GENERAL_REGS && to == GENERAL_REGS)
10266 return regmove_cost->GP2GP * 2;
10267 else if (from == GENERAL_REGS)
10268 return regmove_cost->GP2FP * 2;
10269 else if (to == GENERAL_REGS)
10270 return regmove_cost->FP2GP * 2;
10271
10272 /* When AdvSIMD instructions are disabled it is not possible to move
10273 a 128-bit value directly between Q registers. This is handled in
10274 secondary reload. A general register is used as a scratch to move
10275 the upper DI value and the lower DI value is moved directly,
10276 hence the cost is the sum of three moves. */
10277 if (! TARGET_SIMD)
10278 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
10279
10280 return regmove_cost->FP2FP;
10281 }
10282
43e9d192
IB
10283 if (from == GENERAL_REGS && to == GENERAL_REGS)
10284 return regmove_cost->GP2GP;
10285 else if (from == GENERAL_REGS)
10286 return regmove_cost->GP2FP;
10287 else if (to == GENERAL_REGS)
10288 return regmove_cost->FP2GP;
10289
43e9d192
IB
10290 return regmove_cost->FP2FP;
10291}
10292
10293static int
ef4bddc2 10294aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
10295 reg_class_t rclass ATTRIBUTE_UNUSED,
10296 bool in ATTRIBUTE_UNUSED)
10297{
b175b679 10298 return aarch64_tune_params.memmov_cost;
43e9d192
IB
10299}
10300
0c30e0f3
EM
10301/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
10302 to optimize 1.0/sqrt. */
ee62a5a6
RS
10303
10304static bool
9acc9cbe 10305use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
10306{
10307 return (!flag_trapping_math
10308 && flag_unsafe_math_optimizations
9acc9cbe
EM
10309 && ((aarch64_tune_params.approx_modes->recip_sqrt
10310 & AARCH64_APPROX_MODE (mode))
1a33079e 10311 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
10312}
10313
0c30e0f3
EM
10314/* Function to decide when to use the approximate reciprocal square root
10315 builtin. */
a6fc00da
BH
10316
10317static tree
ee62a5a6 10318aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 10319{
9acc9cbe
EM
10320 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
10321
10322 if (!use_rsqrt_p (mode))
a6fc00da 10323 return NULL_TREE;
ee62a5a6 10324 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
10325}
10326
98daafa0
EM
10327/* Emit instruction sequence to compute either the approximate square root
10328 or its approximate reciprocal, depending on the flag RECP, and return
10329 whether the sequence was emitted or not. */
a6fc00da 10330
98daafa0
EM
10331bool
10332aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 10333{
98daafa0 10334 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
10335
10336 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
10337 {
10338 gcc_assert (!recp);
10339 return false;
10340 }
10341
2e19adc8
RE
10342 if (!recp)
10343 {
10344 if (!(flag_mlow_precision_sqrt
10345 || (aarch64_tune_params.approx_modes->sqrt
10346 & AARCH64_APPROX_MODE (mode))))
10347 return false;
10348
10349 if (flag_finite_math_only
10350 || flag_trapping_math
10351 || !flag_unsafe_math_optimizations
10352 || optimize_function_for_size_p (cfun))
10353 return false;
10354 }
10355 else
10356 /* Caller assumes we cannot fail. */
10357 gcc_assert (use_rsqrt_p (mode));
daef0a8c 10358
ddc203a7 10359 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
10360 rtx xmsk = gen_reg_rtx (mmsk);
10361 if (!recp)
2e19adc8
RE
10362 /* When calculating the approximate square root, compare the
10363 argument with 0.0 and create a mask. */
10364 emit_insn (gen_rtx_SET (xmsk,
10365 gen_rtx_NEG (mmsk,
10366 gen_rtx_EQ (mmsk, src,
10367 CONST0_RTX (mode)))));
a6fc00da 10368
98daafa0
EM
10369 /* Estimate the approximate reciprocal square root. */
10370 rtx xdst = gen_reg_rtx (mode);
0016d8d9 10371 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 10372
98daafa0
EM
10373 /* Iterate over the series twice for SF and thrice for DF. */
10374 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 10375
98daafa0
EM
10376 /* Optionally iterate over the series once less for faster performance
10377 while sacrificing the accuracy. */
10378 if ((recp && flag_mrecip_low_precision_sqrt)
10379 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
10380 iterations--;
10381
98daafa0
EM
10382 /* Iterate over the series to calculate the approximate reciprocal square
10383 root. */
10384 rtx x1 = gen_reg_rtx (mode);
10385 while (iterations--)
a6fc00da 10386 {
a6fc00da 10387 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
10388 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
10389
0016d8d9 10390 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 10391
98daafa0
EM
10392 if (iterations > 0)
10393 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
10394 }
10395
10396 if (!recp)
10397 {
10398 /* Qualify the approximate reciprocal square root when the argument is
10399 0.0 by squashing the intermediary result to 0.0. */
10400 rtx xtmp = gen_reg_rtx (mmsk);
10401 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
10402 gen_rtx_SUBREG (mmsk, xdst, 0)));
10403 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 10404
98daafa0
EM
10405 /* Calculate the approximate square root. */
10406 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
10407 }
10408
98daafa0
EM
10409 /* Finalize the approximation. */
10410 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
10411
10412 return true;
a6fc00da
BH
10413}
10414
79a2bc2d
EM
10415/* Emit the instruction sequence to compute the approximation for the division
10416 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
10417
10418bool
10419aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
10420{
10421 machine_mode mode = GET_MODE (quo);
33d72b63
JW
10422
10423 if (GET_MODE_INNER (mode) == HFmode)
10424 return false;
10425
79a2bc2d
EM
10426 bool use_approx_division_p = (flag_mlow_precision_div
10427 || (aarch64_tune_params.approx_modes->division
10428 & AARCH64_APPROX_MODE (mode)));
10429
10430 if (!flag_finite_math_only
10431 || flag_trapping_math
10432 || !flag_unsafe_math_optimizations
10433 || optimize_function_for_size_p (cfun)
10434 || !use_approx_division_p)
10435 return false;
10436
1be49a38
RR
10437 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10438 return false;
10439
79a2bc2d
EM
10440 /* Estimate the approximate reciprocal. */
10441 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 10442 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
10443
10444 /* Iterate over the series twice for SF and thrice for DF. */
10445 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10446
10447 /* Optionally iterate over the series once less for faster performance,
10448 while sacrificing the accuracy. */
10449 if (flag_mlow_precision_div)
10450 iterations--;
10451
10452 /* Iterate over the series to calculate the approximate reciprocal. */
10453 rtx xtmp = gen_reg_rtx (mode);
10454 while (iterations--)
10455 {
0016d8d9 10456 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
10457
10458 if (iterations > 0)
10459 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10460 }
10461
10462 if (num != CONST1_RTX (mode))
10463 {
10464 /* As the approximate reciprocal of DEN is already calculated, only
10465 calculate the approximate division when NUM is not 1.0. */
10466 rtx xnum = force_reg (mode, num);
10467 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10468 }
10469
10470 /* Finalize the approximation. */
10471 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10472 return true;
10473}
10474
d126a4ae
AP
10475/* Return the number of instructions that can be issued per cycle. */
10476static int
10477aarch64_sched_issue_rate (void)
10478{
b175b679 10479 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10480}
10481
d03f7e44
MK
10482static int
10483aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10484{
10485 int issue_rate = aarch64_sched_issue_rate ();
10486
10487 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10488}
10489
2d6bc7fa
KT
10490
10491/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10492 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10493 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10494
10495static int
10496aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10497 int ready_index)
10498{
10499 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10500}
10501
10502
8990e73a
TB
10503/* Vectorizer cost model target hooks. */
10504
10505/* Implement targetm.vectorize.builtin_vectorization_cost. */
10506static int
10507aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10508 tree vectype,
10509 int misalign ATTRIBUTE_UNUSED)
10510{
10511 unsigned elements;
cd8ae5ed
AP
10512 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10513 bool fp = false;
10514
10515 if (vectype != NULL)
10516 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10517
10518 switch (type_of_cost)
10519 {
10520 case scalar_stmt:
cd8ae5ed 10521 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10522
10523 case scalar_load:
cd8ae5ed 10524 return costs->scalar_load_cost;
8990e73a
TB
10525
10526 case scalar_store:
cd8ae5ed 10527 return costs->scalar_store_cost;
8990e73a
TB
10528
10529 case vector_stmt:
cd8ae5ed 10530 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10531
10532 case vector_load:
cd8ae5ed 10533 return costs->vec_align_load_cost;
8990e73a
TB
10534
10535 case vector_store:
cd8ae5ed 10536 return costs->vec_store_cost;
8990e73a
TB
10537
10538 case vec_to_scalar:
cd8ae5ed 10539 return costs->vec_to_scalar_cost;
8990e73a
TB
10540
10541 case scalar_to_vec:
cd8ae5ed 10542 return costs->scalar_to_vec_cost;
8990e73a
TB
10543
10544 case unaligned_load:
cc9fe6bb 10545 case vector_gather_load:
cd8ae5ed 10546 return costs->vec_unalign_load_cost;
8990e73a
TB
10547
10548 case unaligned_store:
cc9fe6bb 10549 case vector_scatter_store:
cd8ae5ed 10550 return costs->vec_unalign_store_cost;
8990e73a
TB
10551
10552 case cond_branch_taken:
cd8ae5ed 10553 return costs->cond_taken_branch_cost;
8990e73a
TB
10554
10555 case cond_branch_not_taken:
cd8ae5ed 10556 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10557
10558 case vec_perm:
cd8ae5ed 10559 return costs->vec_permute_cost;
c428f91c 10560
8990e73a 10561 case vec_promote_demote:
cd8ae5ed 10562 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10563
10564 case vec_construct:
6a70badb 10565 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10566 return elements / 2 + 1;
10567
10568 default:
10569 gcc_unreachable ();
10570 }
10571}
10572
10573/* Implement targetm.vectorize.add_stmt_cost. */
10574static unsigned
10575aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10576 struct _stmt_vec_info *stmt_info, int misalign,
10577 enum vect_cost_model_location where)
10578{
10579 unsigned *cost = (unsigned *) data;
10580 unsigned retval = 0;
10581
10582 if (flag_vect_cost_model)
10583 {
10584 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10585 int stmt_cost =
10586 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10587
10588 /* Statements in an inner loop relative to the loop being
10589 vectorized are weighted more heavily. The value here is
058e4c71 10590 arbitrary and could potentially be improved with analysis. */
8990e73a 10591 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10592 count *= 50; /* FIXME */
8990e73a
TB
10593
10594 retval = (unsigned) (count * stmt_cost);
10595 cost[where] += retval;
10596 }
10597
10598 return retval;
10599}
10600
0cfff2a1 10601static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10602
0cfff2a1
KT
10603/* Parse the TO_PARSE string and put the architecture struct that it
10604 selects into RES and the architectural features into ISA_FLAGS.
10605 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
10606 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
10607 When the TO_PARSE string contains an invalid extension,
10608 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10609
0cfff2a1
KT
10610static enum aarch64_parse_opt_result
10611aarch64_parse_arch (const char *to_parse, const struct processor **res,
c7887347 10612 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10613{
ff150bc4 10614 const char *ext;
43e9d192 10615 const struct processor *arch;
43e9d192
IB
10616 size_t len;
10617
ff150bc4 10618 ext = strchr (to_parse, '+');
43e9d192
IB
10619
10620 if (ext != NULL)
ff150bc4 10621 len = ext - to_parse;
43e9d192 10622 else
ff150bc4 10623 len = strlen (to_parse);
43e9d192
IB
10624
10625 if (len == 0)
0cfff2a1
KT
10626 return AARCH64_PARSE_MISSING_ARG;
10627
43e9d192 10628
0cfff2a1 10629 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10630 for (arch = all_architectures; arch->name != NULL; arch++)
10631 {
ff150bc4
ML
10632 if (strlen (arch->name) == len
10633 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 10634 {
0cfff2a1 10635 unsigned long isa_temp = arch->flags;
43e9d192
IB
10636
10637 if (ext != NULL)
10638 {
0cfff2a1
KT
10639 /* TO_PARSE string contains at least one extension. */
10640 enum aarch64_parse_opt_result ext_res
c7887347 10641 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10642
0cfff2a1
KT
10643 if (ext_res != AARCH64_PARSE_OK)
10644 return ext_res;
ffee7aa9 10645 }
0cfff2a1
KT
10646 /* Extension parsing was successful. Confirm the result
10647 arch and ISA flags. */
10648 *res = arch;
10649 *isa_flags = isa_temp;
10650 return AARCH64_PARSE_OK;
43e9d192
IB
10651 }
10652 }
10653
10654 /* ARCH name not found in list. */
0cfff2a1 10655 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10656}
10657
0cfff2a1
KT
10658/* Parse the TO_PARSE string and put the result tuning in RES and the
10659 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10660 describing the parse result. If there is an error parsing, RES and
c7887347
ML
10661 ISA_FLAGS are left unchanged.
10662 When the TO_PARSE string contains an invalid extension,
10663 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10664
0cfff2a1
KT
10665static enum aarch64_parse_opt_result
10666aarch64_parse_cpu (const char *to_parse, const struct processor **res,
c7887347 10667 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10668{
ff150bc4 10669 const char *ext;
43e9d192 10670 const struct processor *cpu;
43e9d192
IB
10671 size_t len;
10672
ff150bc4 10673 ext = strchr (to_parse, '+');
43e9d192
IB
10674
10675 if (ext != NULL)
ff150bc4 10676 len = ext - to_parse;
43e9d192 10677 else
ff150bc4 10678 len = strlen (to_parse);
43e9d192
IB
10679
10680 if (len == 0)
0cfff2a1
KT
10681 return AARCH64_PARSE_MISSING_ARG;
10682
43e9d192
IB
10683
10684 /* Loop through the list of supported CPUs to find a match. */
10685 for (cpu = all_cores; cpu->name != NULL; cpu++)
10686 {
ff150bc4 10687 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 10688 {
0cfff2a1
KT
10689 unsigned long isa_temp = cpu->flags;
10690
43e9d192
IB
10691
10692 if (ext != NULL)
10693 {
0cfff2a1
KT
10694 /* TO_PARSE string contains at least one extension. */
10695 enum aarch64_parse_opt_result ext_res
c7887347 10696 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10697
0cfff2a1
KT
10698 if (ext_res != AARCH64_PARSE_OK)
10699 return ext_res;
10700 }
10701 /* Extension parsing was successfull. Confirm the result
10702 cpu and ISA flags. */
10703 *res = cpu;
10704 *isa_flags = isa_temp;
10705 return AARCH64_PARSE_OK;
43e9d192
IB
10706 }
10707 }
10708
10709 /* CPU name not found in list. */
0cfff2a1 10710 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10711}
10712
0cfff2a1
KT
10713/* Parse the TO_PARSE string and put the cpu it selects into RES.
10714 Return an aarch64_parse_opt_result describing the parse result.
10715 If the parsing fails the RES does not change. */
43e9d192 10716
0cfff2a1
KT
10717static enum aarch64_parse_opt_result
10718aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10719{
10720 const struct processor *cpu;
43e9d192
IB
10721
10722 /* Loop through the list of supported CPUs to find a match. */
10723 for (cpu = all_cores; cpu->name != NULL; cpu++)
10724 {
ff150bc4 10725 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 10726 {
0cfff2a1
KT
10727 *res = cpu;
10728 return AARCH64_PARSE_OK;
43e9d192
IB
10729 }
10730 }
10731
10732 /* CPU name not found in list. */
0cfff2a1 10733 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10734}
10735
8dec06f2
JG
10736/* Parse TOKEN, which has length LENGTH to see if it is an option
10737 described in FLAG. If it is, return the index bit for that fusion type.
10738 If not, error (printing OPTION_NAME) and return zero. */
10739
10740static unsigned int
10741aarch64_parse_one_option_token (const char *token,
10742 size_t length,
10743 const struct aarch64_flag_desc *flag,
10744 const char *option_name)
10745{
10746 for (; flag->name != NULL; flag++)
10747 {
10748 if (length == strlen (flag->name)
10749 && !strncmp (flag->name, token, length))
10750 return flag->flag;
10751 }
10752
10753 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10754 return 0;
10755}
10756
10757/* Parse OPTION which is a comma-separated list of flags to enable.
10758 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10759 default state we inherit from the CPU tuning structures. OPTION_NAME
10760 gives the top-level option we are parsing in the -moverride string,
10761 for use in error messages. */
10762
10763static unsigned int
10764aarch64_parse_boolean_options (const char *option,
10765 const struct aarch64_flag_desc *flags,
10766 unsigned int initial_state,
10767 const char *option_name)
10768{
10769 const char separator = '.';
10770 const char* specs = option;
10771 const char* ntoken = option;
10772 unsigned int found_flags = initial_state;
10773
10774 while ((ntoken = strchr (specs, separator)))
10775 {
10776 size_t token_length = ntoken - specs;
10777 unsigned token_ops = aarch64_parse_one_option_token (specs,
10778 token_length,
10779 flags,
10780 option_name);
10781 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10782 in the token stream, reset the supported operations. So:
10783
10784 adrp+add.cmp+branch.none.adrp+add
10785
10786 would have the result of turning on only adrp+add fusion. */
10787 if (!token_ops)
10788 found_flags = 0;
10789
10790 found_flags |= token_ops;
10791 specs = ++ntoken;
10792 }
10793
10794 /* We ended with a comma, print something. */
10795 if (!(*specs))
10796 {
10797 error ("%s string ill-formed\n", option_name);
10798 return 0;
10799 }
10800
10801 /* We still have one more token to parse. */
10802 size_t token_length = strlen (specs);
10803 unsigned token_ops = aarch64_parse_one_option_token (specs,
10804 token_length,
10805 flags,
10806 option_name);
10807 if (!token_ops)
10808 found_flags = 0;
10809
10810 found_flags |= token_ops;
10811 return found_flags;
10812}
10813
10814/* Support for overriding instruction fusion. */
10815
10816static void
10817aarch64_parse_fuse_string (const char *fuse_string,
10818 struct tune_params *tune)
10819{
10820 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10821 aarch64_fusible_pairs,
10822 tune->fusible_ops,
10823 "fuse=");
10824}
10825
10826/* Support for overriding other tuning flags. */
10827
10828static void
10829aarch64_parse_tune_string (const char *tune_string,
10830 struct tune_params *tune)
10831{
10832 tune->extra_tuning_flags
10833 = aarch64_parse_boolean_options (tune_string,
10834 aarch64_tuning_flags,
10835 tune->extra_tuning_flags,
10836 "tune=");
10837}
10838
886f092f
KT
10839/* Parse the sve_width tuning moverride string in TUNE_STRING.
10840 Accept the valid SVE vector widths allowed by
10841 aarch64_sve_vector_bits_enum and use it to override sve_width
10842 in TUNE. */
10843
10844static void
10845aarch64_parse_sve_width_string (const char *tune_string,
10846 struct tune_params *tune)
10847{
10848 int width = -1;
10849
10850 int n = sscanf (tune_string, "%d", &width);
10851 if (n == EOF)
10852 {
10853 error ("invalid format for sve_width");
10854 return;
10855 }
10856 switch (width)
10857 {
10858 case SVE_128:
10859 case SVE_256:
10860 case SVE_512:
10861 case SVE_1024:
10862 case SVE_2048:
10863 break;
10864 default:
10865 error ("invalid sve_width value: %d", width);
10866 }
10867 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
10868}
10869
8dec06f2
JG
10870/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10871 we understand. If it is, extract the option string and handoff to
10872 the appropriate function. */
10873
10874void
10875aarch64_parse_one_override_token (const char* token,
10876 size_t length,
10877 struct tune_params *tune)
10878{
10879 const struct aarch64_tuning_override_function *fn
10880 = aarch64_tuning_override_functions;
10881
10882 const char *option_part = strchr (token, '=');
10883 if (!option_part)
10884 {
10885 error ("tuning string missing in option (%s)", token);
10886 return;
10887 }
10888
10889 /* Get the length of the option name. */
10890 length = option_part - token;
10891 /* Skip the '=' to get to the option string. */
10892 option_part++;
10893
10894 for (; fn->name != NULL; fn++)
10895 {
10896 if (!strncmp (fn->name, token, length))
10897 {
10898 fn->parse_override (option_part, tune);
10899 return;
10900 }
10901 }
10902
10903 error ("unknown tuning option (%s)",token);
10904 return;
10905}
10906
5eee3c34
JW
10907/* A checking mechanism for the implementation of the tls size. */
10908
10909static void
10910initialize_aarch64_tls_size (struct gcc_options *opts)
10911{
10912 if (aarch64_tls_size == 0)
10913 aarch64_tls_size = 24;
10914
10915 switch (opts->x_aarch64_cmodel_var)
10916 {
10917 case AARCH64_CMODEL_TINY:
10918 /* Both the default and maximum TLS size allowed under tiny is 1M which
10919 needs two instructions to address, so we clamp the size to 24. */
10920 if (aarch64_tls_size > 24)
10921 aarch64_tls_size = 24;
10922 break;
10923 case AARCH64_CMODEL_SMALL:
10924 /* The maximum TLS size allowed under small is 4G. */
10925 if (aarch64_tls_size > 32)
10926 aarch64_tls_size = 32;
10927 break;
10928 case AARCH64_CMODEL_LARGE:
10929 /* The maximum TLS size allowed under large is 16E.
10930 FIXME: 16E should be 64bit, we only support 48bit offset now. */
10931 if (aarch64_tls_size > 48)
10932 aarch64_tls_size = 48;
10933 break;
10934 default:
10935 gcc_unreachable ();
10936 }
10937
10938 return;
10939}
10940
8dec06f2
JG
10941/* Parse STRING looking for options in the format:
10942 string :: option:string
10943 option :: name=substring
10944 name :: {a-z}
10945 substring :: defined by option. */
10946
10947static void
10948aarch64_parse_override_string (const char* input_string,
10949 struct tune_params* tune)
10950{
10951 const char separator = ':';
10952 size_t string_length = strlen (input_string) + 1;
10953 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
10954 char *string = string_root;
10955 strncpy (string, input_string, string_length);
10956 string[string_length - 1] = '\0';
10957
10958 char* ntoken = string;
10959
10960 while ((ntoken = strchr (string, separator)))
10961 {
10962 size_t token_length = ntoken - string;
10963 /* Make this substring look like a string. */
10964 *ntoken = '\0';
10965 aarch64_parse_one_override_token (string, token_length, tune);
10966 string = ++ntoken;
10967 }
10968
10969 /* One last option to parse. */
10970 aarch64_parse_one_override_token (string, strlen (string), tune);
10971 free (string_root);
10972}
43e9d192 10973
43e9d192
IB
10974
10975static void
0cfff2a1 10976aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 10977{
acea40ac
WD
10978 /* PR 70044: We have to be careful about being called multiple times for the
10979 same function. This means all changes should be repeatable. */
10980
d6cb6d6a
WD
10981 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
10982 Disable the frame pointer flag so the mid-end will not use a frame
10983 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
10984 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
10985 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
10986 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 10987 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 10988 opts->x_flag_omit_frame_pointer = 2;
43e9d192 10989
1be34295 10990 /* If not optimizing for size, set the default
0cfff2a1
KT
10991 alignment to what the target wants. */
10992 if (!opts->x_optimize_size)
43e9d192 10993 {
c518c102
ML
10994 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
10995 opts->x_str_align_loops = aarch64_tune_params.loop_align;
10996 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
10997 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
10998 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
10999 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 11000 }
b4f50fd4 11001
9ee6540a
WD
11002 /* We default to no pc-relative literal loads. */
11003
11004 aarch64_pcrelative_literal_loads = false;
11005
11006 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 11007 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
11008 if (opts->x_pcrelative_literal_loads == 1)
11009 aarch64_pcrelative_literal_loads = true;
b4f50fd4 11010
9ee6540a
WD
11011 /* In the tiny memory model it makes no sense to disallow PC relative
11012 literal pool loads. */
11013 if (aarch64_cmodel == AARCH64_CMODEL_TINY
11014 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
11015 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
11016
11017 /* When enabling the lower precision Newton series for the square root, also
11018 enable it for the reciprocal square root, since the latter is an
11019 intermediary step for the former. */
11020 if (flag_mlow_precision_sqrt)
11021 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 11022}
43e9d192 11023
0cfff2a1
KT
11024/* 'Unpack' up the internal tuning structs and update the options
11025 in OPTS. The caller must have set up selected_tune and selected_arch
11026 as all the other target-specific codegen decisions are
11027 derived from them. */
11028
e4ea20c8 11029void
0cfff2a1
KT
11030aarch64_override_options_internal (struct gcc_options *opts)
11031{
11032 aarch64_tune_flags = selected_tune->flags;
11033 aarch64_tune = selected_tune->sched_core;
11034 /* Make a copy of the tuning parameters attached to the core, which
11035 we may later overwrite. */
11036 aarch64_tune_params = *(selected_tune->tune);
11037 aarch64_architecture_version = selected_arch->architecture_version;
11038
11039 if (opts->x_aarch64_override_tune_string)
11040 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
11041 &aarch64_tune_params);
11042
11043 /* This target defaults to strict volatile bitfields. */
11044 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
11045 opts->x_flag_strict_volatile_bitfields = 1;
11046
0cfff2a1 11047 initialize_aarch64_code_model (opts);
5eee3c34 11048 initialize_aarch64_tls_size (opts);
63892fa2 11049
2d6bc7fa
KT
11050 int queue_depth = 0;
11051 switch (aarch64_tune_params.autoprefetcher_model)
11052 {
11053 case tune_params::AUTOPREFETCHER_OFF:
11054 queue_depth = -1;
11055 break;
11056 case tune_params::AUTOPREFETCHER_WEAK:
11057 queue_depth = 0;
11058 break;
11059 case tune_params::AUTOPREFETCHER_STRONG:
11060 queue_depth = max_insn_queue_index + 1;
11061 break;
11062 default:
11063 gcc_unreachable ();
11064 }
11065
11066 /* We don't mind passing in global_options_set here as we don't use
11067 the *options_set structs anyway. */
11068 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
11069 queue_depth,
11070 opts->x_param_values,
11071 global_options_set.x_param_values);
11072
9d2c6e2e
MK
11073 /* Set up parameters to be used in prefetching algorithm. Do not
11074 override the defaults unless we are tuning for a core we have
11075 researched values for. */
11076 if (aarch64_tune_params.prefetch->num_slots > 0)
11077 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
11078 aarch64_tune_params.prefetch->num_slots,
11079 opts->x_param_values,
11080 global_options_set.x_param_values);
11081 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
11082 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
11083 aarch64_tune_params.prefetch->l1_cache_size,
11084 opts->x_param_values,
11085 global_options_set.x_param_values);
11086 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 11087 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
11088 aarch64_tune_params.prefetch->l1_cache_line_size,
11089 opts->x_param_values,
11090 global_options_set.x_param_values);
11091 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
11092 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
11093 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
11094 opts->x_param_values,
11095 global_options_set.x_param_values);
d2ff35c0
LM
11096 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
11097 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
11098 0,
11099 opts->x_param_values,
11100 global_options_set.x_param_values);
59100dfc
LM
11101 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
11102 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
11103 aarch64_tune_params.prefetch->minimum_stride,
11104 opts->x_param_values,
11105 global_options_set.x_param_values);
50487d79 11106
13494fcb
WD
11107 /* Use the alternative scheduling-pressure algorithm by default. */
11108 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
11109 opts->x_param_values,
11110 global_options_set.x_param_values);
11111
fbe9af50
TC
11112 /* If the user hasn't changed it via configure then set the default to 64 KB
11113 for the backend. */
11114 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
11115 DEFAULT_STK_CLASH_GUARD_SIZE == 0
11116 ? 16 : DEFAULT_STK_CLASH_GUARD_SIZE,
11117 opts->x_param_values,
11118 global_options_set.x_param_values);
11119
11120 /* Validate the guard size. */
11121 int guard_size = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
fbe9af50
TC
11122
11123 /* Enforce that interval is the same size as size so the mid-end does the
11124 right thing. */
11125 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
11126 guard_size,
11127 opts->x_param_values,
11128 global_options_set.x_param_values);
11129
11130 /* The maybe_set calls won't update the value if the user has explicitly set
11131 one. Which means we need to validate that probing interval and guard size
11132 are equal. */
11133 int probe_interval
11134 = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11135 if (guard_size != probe_interval)
11136 error ("stack clash guard size '%d' must be equal to probing interval "
11137 "'%d'", guard_size, probe_interval);
11138
16b2cafd
MK
11139 /* Enable sw prefetching at specified optimization level for
11140 CPUS that have prefetch. Lower optimization level threshold by 1
11141 when profiling is enabled. */
11142 if (opts->x_flag_prefetch_loop_arrays < 0
11143 && !opts->x_optimize_size
11144 && aarch64_tune_params.prefetch->default_opt_level >= 0
11145 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
11146 opts->x_flag_prefetch_loop_arrays = 1;
11147
266c2b54
ML
11148 if (opts->x_aarch64_arch_string == NULL)
11149 opts->x_aarch64_arch_string = selected_arch->name;
11150 if (opts->x_aarch64_cpu_string == NULL)
11151 opts->x_aarch64_cpu_string = selected_cpu->name;
11152 if (opts->x_aarch64_tune_string == NULL)
11153 opts->x_aarch64_tune_string = selected_tune->name;
11154
0cfff2a1
KT
11155 aarch64_override_options_after_change_1 (opts);
11156}
43e9d192 11157
01f44038
KT
11158/* Print a hint with a suggestion for a core or architecture name that
11159 most closely resembles what the user passed in STR. ARCH is true if
11160 the user is asking for an architecture name. ARCH is false if the user
11161 is asking for a core name. */
11162
11163static void
11164aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
11165{
11166 auto_vec<const char *> candidates;
11167 const struct processor *entry = arch ? all_architectures : all_cores;
11168 for (; entry->name != NULL; entry++)
11169 candidates.safe_push (entry->name);
a08b5429
ML
11170
11171#ifdef HAVE_LOCAL_CPU_DETECT
11172 /* Add also "native" as possible value. */
11173 if (arch)
11174 candidates.safe_push ("native");
11175#endif
11176
01f44038
KT
11177 char *s;
11178 const char *hint = candidates_list_and_hint (str, s, candidates);
11179 if (hint)
11180 inform (input_location, "valid arguments are: %s;"
11181 " did you mean %qs?", s, hint);
6285e915
ML
11182 else
11183 inform (input_location, "valid arguments are: %s", s);
11184
01f44038
KT
11185 XDELETEVEC (s);
11186}
11187
11188/* Print a hint with a suggestion for a core name that most closely resembles
11189 what the user passed in STR. */
11190
11191inline static void
11192aarch64_print_hint_for_core (const char *str)
11193{
11194 aarch64_print_hint_for_core_or_arch (str, false);
11195}
11196
11197/* Print a hint with a suggestion for an architecture name that most closely
11198 resembles what the user passed in STR. */
11199
11200inline static void
11201aarch64_print_hint_for_arch (const char *str)
11202{
11203 aarch64_print_hint_for_core_or_arch (str, true);
11204}
11205
c7887347
ML
11206
11207/* Print a hint with a suggestion for an extension name
11208 that most closely resembles what the user passed in STR. */
11209
11210void
11211aarch64_print_hint_for_extensions (const std::string &str)
11212{
11213 auto_vec<const char *> candidates;
11214 aarch64_get_all_extension_candidates (&candidates);
11215 char *s;
11216 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
11217 if (hint)
11218 inform (input_location, "valid arguments are: %s;"
11219 " did you mean %qs?", s, hint);
11220 else
11221 inform (input_location, "valid arguments are: %s;", s);
11222
11223 XDELETEVEC (s);
11224}
11225
0cfff2a1
KT
11226/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
11227 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
11228 they are valid in RES and ISA_FLAGS. Return whether the option is
11229 valid. */
43e9d192 11230
361fb3ee 11231static bool
0cfff2a1
KT
11232aarch64_validate_mcpu (const char *str, const struct processor **res,
11233 unsigned long *isa_flags)
11234{
c7887347 11235 std::string invalid_extension;
0cfff2a1 11236 enum aarch64_parse_opt_result parse_res
c7887347 11237 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11238
11239 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11240 return true;
0cfff2a1
KT
11241
11242 switch (parse_res)
11243 {
11244 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11245 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
11246 break;
11247 case AARCH64_PARSE_INVALID_ARG:
11248 error ("unknown value %qs for -mcpu", str);
01f44038 11249 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11250 break;
11251 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11252 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
11253 invalid_extension.c_str (), str);
11254 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11255 break;
11256 default:
11257 gcc_unreachable ();
11258 }
361fb3ee
KT
11259
11260 return false;
0cfff2a1
KT
11261}
11262
11263/* Validate a command-line -march option. Parse the arch and extensions
11264 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11265 results, if they are valid, in RES and ISA_FLAGS. Return whether the
11266 option is valid. */
0cfff2a1 11267
361fb3ee 11268static bool
0cfff2a1 11269aarch64_validate_march (const char *str, const struct processor **res,
01f44038 11270 unsigned long *isa_flags)
0cfff2a1 11271{
c7887347 11272 std::string invalid_extension;
0cfff2a1 11273 enum aarch64_parse_opt_result parse_res
c7887347 11274 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11275
11276 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11277 return true;
0cfff2a1
KT
11278
11279 switch (parse_res)
11280 {
11281 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11282 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
11283 break;
11284 case AARCH64_PARSE_INVALID_ARG:
11285 error ("unknown value %qs for -march", str);
01f44038 11286 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
11287 break;
11288 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11289 error ("invalid feature modifier %qs in %<-march=%s%>",
11290 invalid_extension.c_str (), str);
11291 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11292 break;
11293 default:
11294 gcc_unreachable ();
11295 }
361fb3ee
KT
11296
11297 return false;
0cfff2a1
KT
11298}
11299
11300/* Validate a command-line -mtune option. Parse the cpu
11301 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11302 result, if it is valid, in RES. Return whether the option is
11303 valid. */
0cfff2a1 11304
361fb3ee 11305static bool
0cfff2a1
KT
11306aarch64_validate_mtune (const char *str, const struct processor **res)
11307{
11308 enum aarch64_parse_opt_result parse_res
11309 = aarch64_parse_tune (str, res);
11310
11311 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11312 return true;
0cfff2a1
KT
11313
11314 switch (parse_res)
11315 {
11316 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11317 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
11318 break;
11319 case AARCH64_PARSE_INVALID_ARG:
11320 error ("unknown value %qs for -mtune", str);
01f44038 11321 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11322 break;
11323 default:
11324 gcc_unreachable ();
11325 }
361fb3ee
KT
11326 return false;
11327}
11328
11329/* Return the CPU corresponding to the enum CPU.
11330 If it doesn't specify a cpu, return the default. */
11331
11332static const struct processor *
11333aarch64_get_tune_cpu (enum aarch64_processor cpu)
11334{
11335 if (cpu != aarch64_none)
11336 return &all_cores[cpu];
11337
11338 /* The & 0x3f is to extract the bottom 6 bits that encode the
11339 default cpu as selected by the --with-cpu GCC configure option
11340 in config.gcc.
11341 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
11342 flags mechanism should be reworked to make it more sane. */
11343 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11344}
11345
11346/* Return the architecture corresponding to the enum ARCH.
11347 If it doesn't specify a valid architecture, return the default. */
11348
11349static const struct processor *
11350aarch64_get_arch (enum aarch64_arch arch)
11351{
11352 if (arch != aarch64_no_arch)
11353 return &all_architectures[arch];
11354
11355 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11356
11357 return &all_architectures[cpu->arch];
0cfff2a1
KT
11358}
11359
43cacb12
RS
11360/* Return the VG value associated with -msve-vector-bits= value VALUE. */
11361
11362static poly_uint16
11363aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
11364{
11365 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
11366 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
11367 deciding which .md file patterns to use and when deciding whether
11368 something is a legitimate address or constant. */
11369 if (value == SVE_SCALABLE || value == SVE_128)
11370 return poly_uint16 (2, 2);
11371 else
11372 return (int) value / 64;
11373}
11374
0cfff2a1
KT
11375/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
11376 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
11377 tuning structs. In particular it must set selected_tune and
11378 aarch64_isa_flags that define the available ISA features and tuning
11379 decisions. It must also set selected_arch as this will be used to
11380 output the .arch asm tags for each function. */
11381
11382static void
11383aarch64_override_options (void)
11384{
11385 unsigned long cpu_isa = 0;
11386 unsigned long arch_isa = 0;
11387 aarch64_isa_flags = 0;
11388
361fb3ee
KT
11389 bool valid_cpu = true;
11390 bool valid_tune = true;
11391 bool valid_arch = true;
11392
0cfff2a1
KT
11393 selected_cpu = NULL;
11394 selected_arch = NULL;
11395 selected_tune = NULL;
11396
11397 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
11398 If either of -march or -mtune is given, they override their
11399 respective component of -mcpu. */
11400 if (aarch64_cpu_string)
361fb3ee
KT
11401 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
11402 &cpu_isa);
0cfff2a1
KT
11403
11404 if (aarch64_arch_string)
361fb3ee
KT
11405 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
11406 &arch_isa);
0cfff2a1
KT
11407
11408 if (aarch64_tune_string)
361fb3ee 11409 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
11410
11411 /* If the user did not specify a processor, choose the default
11412 one for them. This will be the CPU set during configuration using
a3cd0246 11413 --with-cpu, otherwise it is "generic". */
43e9d192
IB
11414 if (!selected_cpu)
11415 {
0cfff2a1
KT
11416 if (selected_arch)
11417 {
11418 selected_cpu = &all_cores[selected_arch->ident];
11419 aarch64_isa_flags = arch_isa;
361fb3ee 11420 explicit_arch = selected_arch->arch;
0cfff2a1
KT
11421 }
11422 else
11423 {
361fb3ee
KT
11424 /* Get default configure-time CPU. */
11425 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
11426 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
11427 }
361fb3ee
KT
11428
11429 if (selected_tune)
11430 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
11431 }
11432 /* If both -mcpu and -march are specified check that they are architecturally
11433 compatible, warn if they're not and prefer the -march ISA flags. */
11434 else if (selected_arch)
11435 {
11436 if (selected_arch->arch != selected_cpu->arch)
11437 {
11438 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
11439 all_architectures[selected_cpu->arch].name,
11440 selected_arch->name);
11441 }
11442 aarch64_isa_flags = arch_isa;
361fb3ee
KT
11443 explicit_arch = selected_arch->arch;
11444 explicit_tune_core = selected_tune ? selected_tune->ident
11445 : selected_cpu->ident;
0cfff2a1
KT
11446 }
11447 else
11448 {
11449 /* -mcpu but no -march. */
11450 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
11451 explicit_tune_core = selected_tune ? selected_tune->ident
11452 : selected_cpu->ident;
11453 gcc_assert (selected_cpu);
11454 selected_arch = &all_architectures[selected_cpu->arch];
11455 explicit_arch = selected_arch->arch;
43e9d192
IB
11456 }
11457
0cfff2a1
KT
11458 /* Set the arch as well as we will need it when outputing
11459 the .arch directive in assembly. */
11460 if (!selected_arch)
11461 {
11462 gcc_assert (selected_cpu);
11463 selected_arch = &all_architectures[selected_cpu->arch];
11464 }
43e9d192 11465
43e9d192 11466 if (!selected_tune)
3edaf26d 11467 selected_tune = selected_cpu;
43e9d192 11468
0cfff2a1
KT
11469#ifndef HAVE_AS_MABI_OPTION
11470 /* The compiler may have been configured with 2.23.* binutils, which does
11471 not have support for ILP32. */
11472 if (TARGET_ILP32)
ee61f880 11473 error ("assembler does not support -mabi=ilp32");
0cfff2a1 11474#endif
43e9d192 11475
43cacb12
RS
11476 /* Convert -msve-vector-bits to a VG count. */
11477 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
11478
db58fd89 11479 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 11480 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 11481
361fb3ee
KT
11482 /* Make sure we properly set up the explicit options. */
11483 if ((aarch64_cpu_string && valid_cpu)
11484 || (aarch64_tune_string && valid_tune))
11485 gcc_assert (explicit_tune_core != aarch64_none);
11486
11487 if ((aarch64_cpu_string && valid_cpu)
11488 || (aarch64_arch_string && valid_arch))
11489 gcc_assert (explicit_arch != aarch64_no_arch);
11490
5f7dbaa0
RE
11491 /* The pass to insert speculation tracking runs before
11492 shrink-wrapping and the latter does not know how to update the
11493 tracking status. So disable it in this case. */
11494 if (aarch64_track_speculation)
11495 flag_shrink_wrap = 0;
11496
0cfff2a1
KT
11497 aarch64_override_options_internal (&global_options);
11498
11499 /* Save these options as the default ones in case we push and pop them later
11500 while processing functions with potential target attributes. */
11501 target_option_default_node = target_option_current_node
11502 = build_target_option_node (&global_options);
43e9d192
IB
11503}
11504
11505/* Implement targetm.override_options_after_change. */
11506
11507static void
11508aarch64_override_options_after_change (void)
11509{
0cfff2a1 11510 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
11511}
11512
11513static struct machine_function *
11514aarch64_init_machine_status (void)
11515{
11516 struct machine_function *machine;
766090c2 11517 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
11518 return machine;
11519}
11520
11521void
11522aarch64_init_expanders (void)
11523{
11524 init_machine_status = aarch64_init_machine_status;
11525}
11526
11527/* A checking mechanism for the implementation of the various code models. */
11528static void
0cfff2a1 11529initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 11530{
0cfff2a1 11531 if (opts->x_flag_pic)
43e9d192 11532 {
0cfff2a1 11533 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
11534 {
11535 case AARCH64_CMODEL_TINY:
11536 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
11537 break;
11538 case AARCH64_CMODEL_SMALL:
34ecdb0f 11539#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
11540 aarch64_cmodel = (flag_pic == 2
11541 ? AARCH64_CMODEL_SMALL_PIC
11542 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
11543#else
11544 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
11545#endif
43e9d192
IB
11546 break;
11547 case AARCH64_CMODEL_LARGE:
11548 sorry ("code model %qs with -f%s", "large",
0cfff2a1 11549 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 11550 break;
43e9d192
IB
11551 default:
11552 gcc_unreachable ();
11553 }
11554 }
11555 else
0cfff2a1 11556 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
11557}
11558
361fb3ee
KT
11559/* Implement TARGET_OPTION_SAVE. */
11560
11561static void
11562aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
11563{
11564 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
11565}
11566
11567/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
11568 using the information saved in PTR. */
11569
11570static void
11571aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
11572{
11573 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
11574 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11575 opts->x_explicit_arch = ptr->x_explicit_arch;
11576 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
11577 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
11578
11579 aarch64_override_options_internal (opts);
11580}
11581
11582/* Implement TARGET_OPTION_PRINT. */
11583
11584static void
11585aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
11586{
11587 const struct processor *cpu
11588 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11589 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
11590 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 11591 std::string extension
04a99ebe 11592 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
11593
11594 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
11595 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
11596 arch->name, extension.c_str ());
361fb3ee
KT
11597}
11598
d78006d9
KT
11599static GTY(()) tree aarch64_previous_fndecl;
11600
e4ea20c8
KT
11601void
11602aarch64_reset_previous_fndecl (void)
11603{
11604 aarch64_previous_fndecl = NULL;
11605}
11606
acfc1ac1
KT
11607/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
11608 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
11609 make sure optab availability predicates are recomputed when necessary. */
11610
11611void
11612aarch64_save_restore_target_globals (tree new_tree)
11613{
11614 if (TREE_TARGET_GLOBALS (new_tree))
11615 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
11616 else if (new_tree == target_option_default_node)
11617 restore_target_globals (&default_target_globals);
11618 else
11619 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
11620}
11621
d78006d9
KT
11622/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
11623 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
11624 of the function, if such exists. This function may be called multiple
11625 times on a single function so use aarch64_previous_fndecl to avoid
11626 setting up identical state. */
11627
11628static void
11629aarch64_set_current_function (tree fndecl)
11630{
acfc1ac1
KT
11631 if (!fndecl || fndecl == aarch64_previous_fndecl)
11632 return;
11633
d78006d9
KT
11634 tree old_tree = (aarch64_previous_fndecl
11635 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
11636 : NULL_TREE);
11637
acfc1ac1 11638 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 11639
acfc1ac1
KT
11640 /* If current function has no attributes but the previous one did,
11641 use the default node. */
11642 if (!new_tree && old_tree)
11643 new_tree = target_option_default_node;
d78006d9 11644
acfc1ac1
KT
11645 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
11646 the default have been handled by aarch64_save_restore_target_globals from
11647 aarch64_pragma_target_parse. */
11648 if (old_tree == new_tree)
11649 return;
d78006d9 11650
acfc1ac1 11651 aarch64_previous_fndecl = fndecl;
6e17a23b 11652
acfc1ac1
KT
11653 /* First set the target options. */
11654 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11655
acfc1ac1 11656 aarch64_save_restore_target_globals (new_tree);
d78006d9 11657}
361fb3ee 11658
5a2c8331
KT
11659/* Enum describing the various ways we can handle attributes.
11660 In many cases we can reuse the generic option handling machinery. */
11661
11662enum aarch64_attr_opt_type
11663{
11664 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11665 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11666 aarch64_attr_enum, /* Attribute sets an enum variable. */
11667 aarch64_attr_custom /* Attribute requires a custom handling function. */
11668};
11669
11670/* All the information needed to handle a target attribute.
11671 NAME is the name of the attribute.
9c582551 11672 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11673 in the definition of enum aarch64_attr_opt_type.
11674 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11675 HANDLER is the function that takes the attribute string as an argument
11676 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11677 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11678 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11679 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11680 aarch64_attr_enum. */
11681
11682struct aarch64_attribute_info
11683{
11684 const char *name;
11685 enum aarch64_attr_opt_type attr_type;
11686 bool allow_neg;
ab93e9b7 11687 bool (*handler) (const char *);
5a2c8331
KT
11688 enum opt_code opt_num;
11689};
11690
ab93e9b7 11691/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11692
11693static bool
ab93e9b7 11694aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11695{
11696 const struct processor *tmp_arch = NULL;
c7887347 11697 std::string invalid_extension;
5a2c8331 11698 enum aarch64_parse_opt_result parse_res
c7887347 11699 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11700
11701 if (parse_res == AARCH64_PARSE_OK)
11702 {
11703 gcc_assert (tmp_arch);
11704 selected_arch = tmp_arch;
11705 explicit_arch = selected_arch->arch;
11706 return true;
11707 }
11708
11709 switch (parse_res)
11710 {
11711 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11712 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11713 break;
11714 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11715 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11716 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11717 break;
11718 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11719 error ("invalid feature modifier %s of value (\"%s\") in "
11720 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11721 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11722 break;
11723 default:
11724 gcc_unreachable ();
11725 }
11726
11727 return false;
11728}
11729
ab93e9b7 11730/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11731
11732static bool
ab93e9b7 11733aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11734{
11735 const struct processor *tmp_cpu = NULL;
c7887347 11736 std::string invalid_extension;
5a2c8331 11737 enum aarch64_parse_opt_result parse_res
c7887347 11738 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11739
11740 if (parse_res == AARCH64_PARSE_OK)
11741 {
11742 gcc_assert (tmp_cpu);
11743 selected_tune = tmp_cpu;
11744 explicit_tune_core = selected_tune->ident;
11745
11746 selected_arch = &all_architectures[tmp_cpu->arch];
11747 explicit_arch = selected_arch->arch;
11748 return true;
11749 }
11750
11751 switch (parse_res)
11752 {
11753 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11754 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11755 break;
11756 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11757 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11758 aarch64_print_hint_for_core (str);
5a2c8331
KT
11759 break;
11760 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11761 error ("invalid feature modifier %s of value (\"%s\") in "
11762 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11763 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11764 break;
11765 default:
11766 gcc_unreachable ();
11767 }
11768
11769 return false;
11770}
11771
ab93e9b7 11772/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11773
11774static bool
ab93e9b7 11775aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11776{
11777 const struct processor *tmp_tune = NULL;
11778 enum aarch64_parse_opt_result parse_res
11779 = aarch64_parse_tune (str, &tmp_tune);
11780
11781 if (parse_res == AARCH64_PARSE_OK)
11782 {
11783 gcc_assert (tmp_tune);
11784 selected_tune = tmp_tune;
11785 explicit_tune_core = selected_tune->ident;
11786 return true;
11787 }
11788
11789 switch (parse_res)
11790 {
11791 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11792 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11793 aarch64_print_hint_for_core (str);
5a2c8331
KT
11794 break;
11795 default:
11796 gcc_unreachable ();
11797 }
11798
11799 return false;
11800}
11801
11802/* Parse an architecture extensions target attribute string specified in STR.
11803 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11804 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11805 modified. */
5a2c8331
KT
11806
11807static bool
ab93e9b7 11808aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11809{
11810 enum aarch64_parse_opt_result parse_res;
11811 unsigned long isa_flags = aarch64_isa_flags;
11812
e4ea20c8
KT
11813 /* We allow "+nothing" in the beginning to clear out all architectural
11814 features if the user wants to handpick specific features. */
11815 if (strncmp ("+nothing", str, 8) == 0)
11816 {
11817 isa_flags = 0;
11818 str += 8;
11819 }
11820
c7887347
ML
11821 std::string invalid_extension;
11822 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
11823
11824 if (parse_res == AARCH64_PARSE_OK)
11825 {
11826 aarch64_isa_flags = isa_flags;
11827 return true;
11828 }
11829
11830 switch (parse_res)
11831 {
11832 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11833 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11834 break;
11835
11836 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11837 error ("invalid feature modifier %s of value (\"%s\") in "
11838 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
11839 break;
11840
11841 default:
11842 gcc_unreachable ();
11843 }
11844
11845 return false;
11846}
11847
11848/* The target attributes that we support. On top of these we also support just
11849 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11850 handled explicitly in aarch64_process_one_target_attr. */
11851
11852static const struct aarch64_attribute_info aarch64_attributes[] =
11853{
11854 { "general-regs-only", aarch64_attr_mask, false, NULL,
11855 OPT_mgeneral_regs_only },
11856 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11857 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11858 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11859 OPT_mfix_cortex_a53_843419 },
5a2c8331 11860 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11861 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11862 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11863 OPT_momit_leaf_frame_pointer },
11864 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11865 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11866 OPT_march_ },
11867 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11868 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11869 OPT_mtune_ },
db58fd89
JW
11870 { "sign-return-address", aarch64_attr_enum, false, NULL,
11871 OPT_msign_return_address_ },
5a2c8331
KT
11872 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
11873};
11874
11875/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 11876 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
11877
11878static bool
ab93e9b7 11879aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
11880{
11881 bool invert = false;
11882
11883 size_t len = strlen (arg_str);
11884
11885 if (len == 0)
11886 {
ab93e9b7 11887 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11888 return false;
11889 }
11890
11891 char *str_to_check = (char *) alloca (len + 1);
11892 strcpy (str_to_check, arg_str);
11893
11894 /* Skip leading whitespace. */
11895 while (*str_to_check == ' ' || *str_to_check == '\t')
11896 str_to_check++;
11897
11898 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
11899 It is easier to detect and handle it explicitly here rather than going
11900 through the machinery for the rest of the target attributes in this
11901 function. */
11902 if (*str_to_check == '+')
ab93e9b7 11903 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
11904
11905 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
11906 {
11907 invert = true;
11908 str_to_check += 3;
11909 }
11910 char *arg = strchr (str_to_check, '=');
11911
11912 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
11913 and point ARG to "foo". */
11914 if (arg)
11915 {
11916 *arg = '\0';
11917 arg++;
11918 }
11919 const struct aarch64_attribute_info *p_attr;
16d12992 11920 bool found = false;
5a2c8331
KT
11921 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
11922 {
11923 /* If the names don't match up, or the user has given an argument
11924 to an attribute that doesn't accept one, or didn't give an argument
11925 to an attribute that expects one, fail to match. */
11926 if (strcmp (str_to_check, p_attr->name) != 0)
11927 continue;
11928
16d12992 11929 found = true;
5a2c8331
KT
11930 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
11931 || p_attr->attr_type == aarch64_attr_enum;
11932
11933 if (attr_need_arg_p ^ (arg != NULL))
11934 {
ab93e9b7 11935 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
11936 return false;
11937 }
11938
11939 /* If the name matches but the attribute does not allow "no-" versions
11940 then we can't match. */
11941 if (invert && !p_attr->allow_neg)
11942 {
ab93e9b7 11943 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
11944 return false;
11945 }
11946
11947 switch (p_attr->attr_type)
11948 {
11949 /* Has a custom handler registered.
11950 For example, cpu=, arch=, tune=. */
11951 case aarch64_attr_custom:
11952 gcc_assert (p_attr->handler);
ab93e9b7 11953 if (!p_attr->handler (arg))
5a2c8331
KT
11954 return false;
11955 break;
11956
11957 /* Either set or unset a boolean option. */
11958 case aarch64_attr_bool:
11959 {
11960 struct cl_decoded_option decoded;
11961
11962 generate_option (p_attr->opt_num, NULL, !invert,
11963 CL_TARGET, &decoded);
11964 aarch64_handle_option (&global_options, &global_options_set,
11965 &decoded, input_location);
11966 break;
11967 }
11968 /* Set or unset a bit in the target_flags. aarch64_handle_option
11969 should know what mask to apply given the option number. */
11970 case aarch64_attr_mask:
11971 {
11972 struct cl_decoded_option decoded;
11973 /* We only need to specify the option number.
11974 aarch64_handle_option will know which mask to apply. */
11975 decoded.opt_index = p_attr->opt_num;
11976 decoded.value = !invert;
11977 aarch64_handle_option (&global_options, &global_options_set,
11978 &decoded, input_location);
11979 break;
11980 }
11981 /* Use the option setting machinery to set an option to an enum. */
11982 case aarch64_attr_enum:
11983 {
11984 gcc_assert (arg);
11985 bool valid;
11986 int value;
11987 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
11988 &value, CL_TARGET);
11989 if (valid)
11990 {
11991 set_option (&global_options, NULL, p_attr->opt_num, value,
11992 NULL, DK_UNSPECIFIED, input_location,
11993 global_dc);
11994 }
11995 else
11996 {
ab93e9b7 11997 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
11998 }
11999 break;
12000 }
12001 default:
12002 gcc_unreachable ();
12003 }
12004 }
12005
16d12992
KT
12006 /* If we reached here we either have found an attribute and validated
12007 it or didn't match any. If we matched an attribute but its arguments
12008 were malformed we will have returned false already. */
12009 return found;
5a2c8331
KT
12010}
12011
12012/* Count how many times the character C appears in
12013 NULL-terminated string STR. */
12014
12015static unsigned int
12016num_occurences_in_str (char c, char *str)
12017{
12018 unsigned int res = 0;
12019 while (*str != '\0')
12020 {
12021 if (*str == c)
12022 res++;
12023
12024 str++;
12025 }
12026
12027 return res;
12028}
12029
12030/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 12031 and update the global target options space. */
5a2c8331
KT
12032
12033bool
ab93e9b7 12034aarch64_process_target_attr (tree args)
5a2c8331
KT
12035{
12036 if (TREE_CODE (args) == TREE_LIST)
12037 {
12038 do
12039 {
12040 tree head = TREE_VALUE (args);
12041 if (head)
12042 {
ab93e9b7 12043 if (!aarch64_process_target_attr (head))
5a2c8331
KT
12044 return false;
12045 }
12046 args = TREE_CHAIN (args);
12047 } while (args);
12048
12049 return true;
12050 }
3b6cb9e3
ML
12051
12052 if (TREE_CODE (args) != STRING_CST)
12053 {
12054 error ("attribute %<target%> argument not a string");
12055 return false;
12056 }
5a2c8331
KT
12057
12058 size_t len = strlen (TREE_STRING_POINTER (args));
12059 char *str_to_check = (char *) alloca (len + 1);
12060 strcpy (str_to_check, TREE_STRING_POINTER (args));
12061
12062 if (len == 0)
12063 {
ab93e9b7 12064 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
12065 return false;
12066 }
12067
12068 /* Used to catch empty spaces between commas i.e.
12069 attribute ((target ("attr1,,attr2"))). */
12070 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
12071
12072 /* Handle multiple target attributes separated by ','. */
7185a4eb 12073 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
12074
12075 unsigned int num_attrs = 0;
12076 while (token)
12077 {
12078 num_attrs++;
ab93e9b7 12079 if (!aarch64_process_one_target_attr (token))
5a2c8331 12080 {
ab93e9b7 12081 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
12082 return false;
12083 }
12084
7185a4eb 12085 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
12086 }
12087
12088 if (num_attrs != num_commas + 1)
12089 {
ab93e9b7 12090 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
12091 return false;
12092 }
12093
12094 return true;
12095}
12096
12097/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
12098 process attribute ((target ("..."))). */
12099
12100static bool
12101aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
12102{
12103 struct cl_target_option cur_target;
12104 bool ret;
12105 tree old_optimize;
12106 tree new_target, new_optimize;
12107 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
12108
12109 /* If what we're processing is the current pragma string then the
12110 target option node is already stored in target_option_current_node
12111 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
12112 having to re-parse the string. This is especially useful to keep
12113 arm_neon.h compile times down since that header contains a lot
12114 of intrinsics enclosed in pragmas. */
12115 if (!existing_target && args == current_target_pragma)
12116 {
12117 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
12118 return true;
12119 }
5a2c8331
KT
12120 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12121
12122 old_optimize = build_optimization_node (&global_options);
12123 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12124
12125 /* If the function changed the optimization levels as well as setting
12126 target options, start with the optimizations specified. */
12127 if (func_optimize && func_optimize != old_optimize)
12128 cl_optimization_restore (&global_options,
12129 TREE_OPTIMIZATION (func_optimize));
12130
12131 /* Save the current target options to restore at the end. */
12132 cl_target_option_save (&cur_target, &global_options);
12133
12134 /* If fndecl already has some target attributes applied to it, unpack
12135 them so that we add this attribute on top of them, rather than
12136 overwriting them. */
12137 if (existing_target)
12138 {
12139 struct cl_target_option *existing_options
12140 = TREE_TARGET_OPTION (existing_target);
12141
12142 if (existing_options)
12143 cl_target_option_restore (&global_options, existing_options);
12144 }
12145 else
12146 cl_target_option_restore (&global_options,
12147 TREE_TARGET_OPTION (target_option_current_node));
12148
ab93e9b7 12149 ret = aarch64_process_target_attr (args);
5a2c8331
KT
12150
12151 /* Set up any additional state. */
12152 if (ret)
12153 {
12154 aarch64_override_options_internal (&global_options);
e95a988a
KT
12155 /* Initialize SIMD builtins if we haven't already.
12156 Set current_target_pragma to NULL for the duration so that
12157 the builtin initialization code doesn't try to tag the functions
12158 being built with the attributes specified by any current pragma, thus
12159 going into an infinite recursion. */
12160 if (TARGET_SIMD)
12161 {
12162 tree saved_current_target_pragma = current_target_pragma;
12163 current_target_pragma = NULL;
12164 aarch64_init_simd_builtins ();
12165 current_target_pragma = saved_current_target_pragma;
12166 }
5a2c8331
KT
12167 new_target = build_target_option_node (&global_options);
12168 }
12169 else
12170 new_target = NULL;
12171
12172 new_optimize = build_optimization_node (&global_options);
12173
12174 if (fndecl && ret)
12175 {
12176 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
12177
12178 if (old_optimize != new_optimize)
12179 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
12180 }
12181
12182 cl_target_option_restore (&global_options, &cur_target);
12183
12184 if (old_optimize != new_optimize)
12185 cl_optimization_restore (&global_options,
12186 TREE_OPTIMIZATION (old_optimize));
12187 return ret;
12188}
12189
1fd8d40c
KT
12190/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
12191 tri-bool options (yes, no, don't care) and the default value is
12192 DEF, determine whether to reject inlining. */
12193
12194static bool
12195aarch64_tribools_ok_for_inlining_p (int caller, int callee,
12196 int dont_care, int def)
12197{
12198 /* If the callee doesn't care, always allow inlining. */
12199 if (callee == dont_care)
12200 return true;
12201
12202 /* If the caller doesn't care, always allow inlining. */
12203 if (caller == dont_care)
12204 return true;
12205
12206 /* Otherwise, allow inlining if either the callee and caller values
12207 agree, or if the callee is using the default value. */
12208 return (callee == caller || callee == def);
12209}
12210
12211/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
12212 to inline CALLEE into CALLER based on target-specific info.
12213 Make sure that the caller and callee have compatible architectural
12214 features. Then go through the other possible target attributes
12215 and see if they can block inlining. Try not to reject always_inline
12216 callees unless they are incompatible architecturally. */
12217
12218static bool
12219aarch64_can_inline_p (tree caller, tree callee)
12220{
12221 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
12222 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
12223
1fd8d40c
KT
12224 struct cl_target_option *caller_opts
12225 = TREE_TARGET_OPTION (caller_tree ? caller_tree
12226 : target_option_default_node);
12227
675d044c
SD
12228 struct cl_target_option *callee_opts
12229 = TREE_TARGET_OPTION (callee_tree ? callee_tree
12230 : target_option_default_node);
1fd8d40c
KT
12231
12232 /* Callee's ISA flags should be a subset of the caller's. */
12233 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
12234 != callee_opts->x_aarch64_isa_flags)
12235 return false;
12236
12237 /* Allow non-strict aligned functions inlining into strict
12238 aligned ones. */
12239 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
12240 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
12241 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
12242 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
12243 return false;
12244
12245 bool always_inline = lookup_attribute ("always_inline",
12246 DECL_ATTRIBUTES (callee));
12247
12248 /* If the architectural features match up and the callee is always_inline
12249 then the other attributes don't matter. */
12250 if (always_inline)
12251 return true;
12252
12253 if (caller_opts->x_aarch64_cmodel_var
12254 != callee_opts->x_aarch64_cmodel_var)
12255 return false;
12256
12257 if (caller_opts->x_aarch64_tls_dialect
12258 != callee_opts->x_aarch64_tls_dialect)
12259 return false;
12260
12261 /* Honour explicit requests to workaround errata. */
12262 if (!aarch64_tribools_ok_for_inlining_p (
12263 caller_opts->x_aarch64_fix_a53_err835769,
12264 callee_opts->x_aarch64_fix_a53_err835769,
12265 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
12266 return false;
12267
48bb1a55
CL
12268 if (!aarch64_tribools_ok_for_inlining_p (
12269 caller_opts->x_aarch64_fix_a53_err843419,
12270 callee_opts->x_aarch64_fix_a53_err843419,
12271 2, TARGET_FIX_ERR_A53_843419))
12272 return false;
12273
1fd8d40c
KT
12274 /* If the user explicitly specified -momit-leaf-frame-pointer for the
12275 caller and calle and they don't match up, reject inlining. */
12276 if (!aarch64_tribools_ok_for_inlining_p (
12277 caller_opts->x_flag_omit_leaf_frame_pointer,
12278 callee_opts->x_flag_omit_leaf_frame_pointer,
12279 2, 1))
12280 return false;
12281
12282 /* If the callee has specific tuning overrides, respect them. */
12283 if (callee_opts->x_aarch64_override_tune_string != NULL
12284 && caller_opts->x_aarch64_override_tune_string == NULL)
12285 return false;
12286
12287 /* If the user specified tuning override strings for the
12288 caller and callee and they don't match up, reject inlining.
12289 We just do a string compare here, we don't analyze the meaning
12290 of the string, as it would be too costly for little gain. */
12291 if (callee_opts->x_aarch64_override_tune_string
12292 && caller_opts->x_aarch64_override_tune_string
12293 && (strcmp (callee_opts->x_aarch64_override_tune_string,
12294 caller_opts->x_aarch64_override_tune_string) != 0))
12295 return false;
12296
12297 return true;
12298}
12299
43e9d192
IB
12300/* Return true if SYMBOL_REF X binds locally. */
12301
12302static bool
12303aarch64_symbol_binds_local_p (const_rtx x)
12304{
12305 return (SYMBOL_REF_DECL (x)
12306 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
12307 : SYMBOL_REF_LOCAL_P (x));
12308}
12309
12310/* Return true if SYMBOL_REF X is thread local */
12311static bool
12312aarch64_tls_symbol_p (rtx x)
12313{
12314 if (! TARGET_HAVE_TLS)
12315 return false;
12316
12317 if (GET_CODE (x) != SYMBOL_REF)
12318 return false;
12319
12320 return SYMBOL_REF_TLS_MODEL (x) != 0;
12321}
12322
12323/* Classify a TLS symbol into one of the TLS kinds. */
12324enum aarch64_symbol_type
12325aarch64_classify_tls_symbol (rtx x)
12326{
12327 enum tls_model tls_kind = tls_symbolic_operand_type (x);
12328
12329 switch (tls_kind)
12330 {
12331 case TLS_MODEL_GLOBAL_DYNAMIC:
12332 case TLS_MODEL_LOCAL_DYNAMIC:
12333 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
12334
12335 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
12336 switch (aarch64_cmodel)
12337 {
12338 case AARCH64_CMODEL_TINY:
12339 case AARCH64_CMODEL_TINY_PIC:
12340 return SYMBOL_TINY_TLSIE;
12341 default:
79496620 12342 return SYMBOL_SMALL_TLSIE;
5ae7caad 12343 }
43e9d192
IB
12344
12345 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
12346 if (aarch64_tls_size == 12)
12347 return SYMBOL_TLSLE12;
12348 else if (aarch64_tls_size == 24)
12349 return SYMBOL_TLSLE24;
12350 else if (aarch64_tls_size == 32)
12351 return SYMBOL_TLSLE32;
12352 else if (aarch64_tls_size == 48)
12353 return SYMBOL_TLSLE48;
12354 else
12355 gcc_unreachable ();
43e9d192
IB
12356
12357 case TLS_MODEL_EMULATED:
12358 case TLS_MODEL_NONE:
12359 return SYMBOL_FORCE_TO_MEM;
12360
12361 default:
12362 gcc_unreachable ();
12363 }
12364}
12365
43cacb12
RS
12366/* Return the correct method for accessing X + OFFSET, where X is either
12367 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 12368
43e9d192 12369enum aarch64_symbol_type
43cacb12 12370aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
12371{
12372 if (GET_CODE (x) == LABEL_REF)
12373 {
12374 switch (aarch64_cmodel)
12375 {
12376 case AARCH64_CMODEL_LARGE:
12377 return SYMBOL_FORCE_TO_MEM;
12378
12379 case AARCH64_CMODEL_TINY_PIC:
12380 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
12381 return SYMBOL_TINY_ABSOLUTE;
12382
1b1e81f8 12383 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12384 case AARCH64_CMODEL_SMALL_PIC:
12385 case AARCH64_CMODEL_SMALL:
12386 return SYMBOL_SMALL_ABSOLUTE;
12387
12388 default:
12389 gcc_unreachable ();
12390 }
12391 }
12392
17f4d4bf 12393 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 12394 {
43e9d192
IB
12395 if (aarch64_tls_symbol_p (x))
12396 return aarch64_classify_tls_symbol (x);
12397
17f4d4bf
CSS
12398 switch (aarch64_cmodel)
12399 {
12400 case AARCH64_CMODEL_TINY:
15f6e0da 12401 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
12402 the offset does not cause overflow of the final address. But
12403 we have no way of knowing the address of symbol at compile time
12404 so we can't accurately say if the distance between the PC and
12405 symbol + offset is outside the addressible range of +/-1M in the
12406 TINY code model. So we rely on images not being greater than
12407 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
12408 be loaded using an alternative mechanism. Furthermore if the
12409 symbol is a weak reference to something that isn't known to
12410 resolve to a symbol in this module, then force to memory. */
12411 if ((SYMBOL_REF_WEAK (x)
12412 && !aarch64_symbol_binds_local_p (x))
43cacb12 12413 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
12414 return SYMBOL_FORCE_TO_MEM;
12415 return SYMBOL_TINY_ABSOLUTE;
12416
17f4d4bf 12417 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
12418 /* Same reasoning as the tiny code model, but the offset cap here is
12419 4G. */
15f6e0da
RR
12420 if ((SYMBOL_REF_WEAK (x)
12421 && !aarch64_symbol_binds_local_p (x))
43cacb12 12422 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 12423 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
12424 return SYMBOL_FORCE_TO_MEM;
12425 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12426
17f4d4bf 12427 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 12428 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 12429 return SYMBOL_TINY_GOT;
38e6c9a6
MS
12430 return SYMBOL_TINY_ABSOLUTE;
12431
1b1e81f8 12432 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
12433 case AARCH64_CMODEL_SMALL_PIC:
12434 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
12435 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
12436 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 12437 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12438
9ee6540a
WD
12439 case AARCH64_CMODEL_LARGE:
12440 /* This is alright even in PIC code as the constant
12441 pool reference is always PC relative and within
12442 the same translation unit. */
d47d34bb 12443 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
12444 return SYMBOL_SMALL_ABSOLUTE;
12445 else
12446 return SYMBOL_FORCE_TO_MEM;
12447
17f4d4bf
CSS
12448 default:
12449 gcc_unreachable ();
12450 }
43e9d192 12451 }
17f4d4bf 12452
43e9d192
IB
12453 /* By default push everything into the constant pool. */
12454 return SYMBOL_FORCE_TO_MEM;
12455}
12456
43e9d192
IB
12457bool
12458aarch64_constant_address_p (rtx x)
12459{
12460 return (CONSTANT_P (x) && memory_address_p (DImode, x));
12461}
12462
12463bool
12464aarch64_legitimate_pic_operand_p (rtx x)
12465{
12466 if (GET_CODE (x) == SYMBOL_REF
12467 || (GET_CODE (x) == CONST
12468 && GET_CODE (XEXP (x, 0)) == PLUS
12469 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
12470 return false;
12471
12472 return true;
12473}
12474
26895c21
WD
12475/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
12476 that should be rematerialized rather than spilled. */
3520f7cc 12477
43e9d192 12478static bool
ef4bddc2 12479aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 12480{
26895c21 12481 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 12482 if (CONST_INT_P (x)
9f7b87ca 12483 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 12484 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
12485 return true;
12486
43cacb12
RS
12487 /* Do not allow vector struct mode constants for Advanced SIMD.
12488 We could support 0 and -1 easily, but they need support in
12489 aarch64-simd.md. */
12490 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12491 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
12492 return false;
12493
43cacb12
RS
12494 /* Only accept variable-length vector constants if they can be
12495 handled directly.
12496
12497 ??? It would be possible to handle rematerialization of other
12498 constants via secondary reloads. */
12499 if (vec_flags & VEC_ANY_SVE)
12500 return aarch64_simd_valid_immediate (x, NULL);
12501
509bb9b6
RS
12502 if (GET_CODE (x) == HIGH)
12503 x = XEXP (x, 0);
12504
43cacb12
RS
12505 /* Accept polynomial constants that can be calculated by using the
12506 destination of a move as the sole temporary. Constants that
12507 require a second temporary cannot be rematerialized (they can't be
12508 forced to memory and also aren't legitimate constants). */
12509 poly_int64 offset;
12510 if (poly_int_rtx_p (x, &offset))
12511 return aarch64_offset_temporaries (false, offset) <= 1;
12512
12513 /* If an offset is being added to something else, we need to allow the
12514 base to be moved into the destination register, meaning that there
12515 are no free temporaries for the offset. */
12516 x = strip_offset (x, &offset);
12517 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
12518 return false;
26895c21 12519
43cacb12
RS
12520 /* Do not allow const (plus (anchor_symbol, const_int)). */
12521 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
12522 return false;
26895c21 12523
f28e54bd
WD
12524 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
12525 so spilling them is better than rematerialization. */
12526 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
12527 return true;
12528
26895c21
WD
12529 /* Label references are always constant. */
12530 if (GET_CODE (x) == LABEL_REF)
12531 return true;
12532
12533 return false;
43e9d192
IB
12534}
12535
a5bc806c 12536rtx
43e9d192
IB
12537aarch64_load_tp (rtx target)
12538{
12539 if (!target
12540 || GET_MODE (target) != Pmode
12541 || !register_operand (target, Pmode))
12542 target = gen_reg_rtx (Pmode);
12543
12544 /* Can return in any reg. */
12545 emit_insn (gen_aarch64_load_tp_hard (target));
12546 return target;
12547}
12548
43e9d192
IB
12549/* On AAPCS systems, this is the "struct __va_list". */
12550static GTY(()) tree va_list_type;
12551
12552/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
12553 Return the type to use as __builtin_va_list.
12554
12555 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
12556
12557 struct __va_list
12558 {
12559 void *__stack;
12560 void *__gr_top;
12561 void *__vr_top;
12562 int __gr_offs;
12563 int __vr_offs;
12564 }; */
12565
12566static tree
12567aarch64_build_builtin_va_list (void)
12568{
12569 tree va_list_name;
12570 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12571
12572 /* Create the type. */
12573 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
12574 /* Give it the required name. */
12575 va_list_name = build_decl (BUILTINS_LOCATION,
12576 TYPE_DECL,
12577 get_identifier ("__va_list"),
12578 va_list_type);
12579 DECL_ARTIFICIAL (va_list_name) = 1;
12580 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 12581 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
12582
12583 /* Create the fields. */
12584 f_stack = build_decl (BUILTINS_LOCATION,
12585 FIELD_DECL, get_identifier ("__stack"),
12586 ptr_type_node);
12587 f_grtop = build_decl (BUILTINS_LOCATION,
12588 FIELD_DECL, get_identifier ("__gr_top"),
12589 ptr_type_node);
12590 f_vrtop = build_decl (BUILTINS_LOCATION,
12591 FIELD_DECL, get_identifier ("__vr_top"),
12592 ptr_type_node);
12593 f_groff = build_decl (BUILTINS_LOCATION,
12594 FIELD_DECL, get_identifier ("__gr_offs"),
12595 integer_type_node);
12596 f_vroff = build_decl (BUILTINS_LOCATION,
12597 FIELD_DECL, get_identifier ("__vr_offs"),
12598 integer_type_node);
12599
88e3bdd1 12600 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
12601 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
12602 purpose to identify whether the code is updating va_list internal
12603 offset fields through irregular way. */
12604 va_list_gpr_counter_field = f_groff;
12605 va_list_fpr_counter_field = f_vroff;
12606
43e9d192
IB
12607 DECL_ARTIFICIAL (f_stack) = 1;
12608 DECL_ARTIFICIAL (f_grtop) = 1;
12609 DECL_ARTIFICIAL (f_vrtop) = 1;
12610 DECL_ARTIFICIAL (f_groff) = 1;
12611 DECL_ARTIFICIAL (f_vroff) = 1;
12612
12613 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
12614 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
12615 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
12616 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
12617 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
12618
12619 TYPE_FIELDS (va_list_type) = f_stack;
12620 DECL_CHAIN (f_stack) = f_grtop;
12621 DECL_CHAIN (f_grtop) = f_vrtop;
12622 DECL_CHAIN (f_vrtop) = f_groff;
12623 DECL_CHAIN (f_groff) = f_vroff;
12624
12625 /* Compute its layout. */
12626 layout_type (va_list_type);
12627
12628 return va_list_type;
12629}
12630
12631/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
12632static void
12633aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12634{
12635 const CUMULATIVE_ARGS *cum;
12636 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12637 tree stack, grtop, vrtop, groff, vroff;
12638 tree t;
88e3bdd1
JW
12639 int gr_save_area_size = cfun->va_list_gpr_size;
12640 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
12641 int vr_offset;
12642
12643 cum = &crtl->args.info;
88e3bdd1
JW
12644 if (cfun->va_list_gpr_size)
12645 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
12646 cfun->va_list_gpr_size);
12647 if (cfun->va_list_fpr_size)
12648 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
12649 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 12650
d5726973 12651 if (!TARGET_FLOAT)
43e9d192 12652 {
261fb553 12653 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
12654 vr_save_area_size = 0;
12655 }
12656
12657 f_stack = TYPE_FIELDS (va_list_type_node);
12658 f_grtop = DECL_CHAIN (f_stack);
12659 f_vrtop = DECL_CHAIN (f_grtop);
12660 f_groff = DECL_CHAIN (f_vrtop);
12661 f_vroff = DECL_CHAIN (f_groff);
12662
12663 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12664 NULL_TREE);
12665 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12666 NULL_TREE);
12667 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12668 NULL_TREE);
12669 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12670 NULL_TREE);
12671 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12672 NULL_TREE);
12673
12674 /* Emit code to initialize STACK, which points to the next varargs stack
12675 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12676 by named arguments. STACK is 8-byte aligned. */
12677 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12678 if (cum->aapcs_stack_size > 0)
12679 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12680 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12681 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12682
12683 /* Emit code to initialize GRTOP, the top of the GR save area.
12684 virtual_incoming_args_rtx should have been 16 byte aligned. */
12685 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12686 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12688
12689 /* Emit code to initialize VRTOP, the top of the VR save area.
12690 This address is gr_save_area_bytes below GRTOP, rounded
12691 down to the next 16-byte boundary. */
12692 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12693 vr_offset = ROUND_UP (gr_save_area_size,
12694 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12695
12696 if (vr_offset)
12697 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12698 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12699 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12700
12701 /* Emit code to initialize GROFF, the offset from GRTOP of the
12702 next GPR argument. */
12703 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12704 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12705 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12706
12707 /* Likewise emit code to initialize VROFF, the offset from FTOP
12708 of the next VR argument. */
12709 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12710 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12711 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12712}
12713
12714/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12715
12716static tree
12717aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12718 gimple_seq *post_p ATTRIBUTE_UNUSED)
12719{
12720 tree addr;
12721 bool indirect_p;
12722 bool is_ha; /* is HFA or HVA. */
12723 bool dw_align; /* double-word align. */
ef4bddc2 12724 machine_mode ag_mode = VOIDmode;
43e9d192 12725 int nregs;
ef4bddc2 12726 machine_mode mode;
43e9d192
IB
12727
12728 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12729 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12730 HOST_WIDE_INT size, rsize, adjust, align;
12731 tree t, u, cond1, cond2;
12732
12733 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12734 if (indirect_p)
12735 type = build_pointer_type (type);
12736
12737 mode = TYPE_MODE (type);
12738
12739 f_stack = TYPE_FIELDS (va_list_type_node);
12740 f_grtop = DECL_CHAIN (f_stack);
12741 f_vrtop = DECL_CHAIN (f_grtop);
12742 f_groff = DECL_CHAIN (f_vrtop);
12743 f_vroff = DECL_CHAIN (f_groff);
12744
12745 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12746 f_stack, NULL_TREE);
12747 size = int_size_in_bytes (type);
985b8393 12748 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12749
12750 dw_align = false;
12751 adjust = 0;
12752 if (aarch64_vfp_is_call_or_return_candidate (mode,
12753 type,
12754 &ag_mode,
12755 &nregs,
12756 &is_ha))
12757 {
6a70badb
RS
12758 /* No frontends can create types with variable-sized modes, so we
12759 shouldn't be asked to pass or return them. */
12760 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12761
43e9d192 12762 /* TYPE passed in fp/simd registers. */
d5726973 12763 if (!TARGET_FLOAT)
fc29dfc9 12764 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
12765
12766 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12767 unshare_expr (valist), f_vrtop, NULL_TREE);
12768 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12769 unshare_expr (valist), f_vroff, NULL_TREE);
12770
12771 rsize = nregs * UNITS_PER_VREG;
12772
12773 if (is_ha)
12774 {
6a70badb
RS
12775 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12776 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12777 }
76b0cbf8 12778 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12779 && size < UNITS_PER_VREG)
12780 {
12781 adjust = UNITS_PER_VREG - size;
12782 }
12783 }
12784 else
12785 {
12786 /* TYPE passed in general registers. */
12787 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12788 unshare_expr (valist), f_grtop, NULL_TREE);
12789 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12790 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12791 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12792 nregs = rsize / UNITS_PER_WORD;
12793
12794 if (align > 8)
12795 dw_align = true;
12796
76b0cbf8 12797 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12798 && size < UNITS_PER_WORD)
12799 {
12800 adjust = UNITS_PER_WORD - size;
12801 }
12802 }
12803
12804 /* Get a local temporary for the field value. */
12805 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12806
12807 /* Emit code to branch if off >= 0. */
12808 t = build2 (GE_EXPR, boolean_type_node, off,
12809 build_int_cst (TREE_TYPE (off), 0));
12810 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12811
12812 if (dw_align)
12813 {
12814 /* Emit: offs = (offs + 15) & -16. */
12815 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12816 build_int_cst (TREE_TYPE (off), 15));
12817 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12818 build_int_cst (TREE_TYPE (off), -16));
12819 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12820 }
12821 else
12822 roundup = NULL;
12823
12824 /* Update ap.__[g|v]r_offs */
12825 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12826 build_int_cst (TREE_TYPE (off), rsize));
12827 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12828
12829 /* String up. */
12830 if (roundup)
12831 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12832
12833 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12834 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12835 build_int_cst (TREE_TYPE (f_off), 0));
12836 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12837
12838 /* String up: make sure the assignment happens before the use. */
12839 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12840 COND_EXPR_ELSE (cond1) = t;
12841
12842 /* Prepare the trees handling the argument that is passed on the stack;
12843 the top level node will store in ON_STACK. */
12844 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12845 if (align > 8)
12846 {
12847 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12848 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12849 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12850 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12851 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12852 }
12853 else
12854 roundup = NULL;
12855 /* Advance ap.__stack */
4bdc2738 12856 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12857 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12858 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12859 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12860 /* String up roundup and advance. */
12861 if (roundup)
12862 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12863 /* String up with arg */
12864 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12865 /* Big-endianness related address adjustment. */
76b0cbf8 12866 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12867 && size < UNITS_PER_WORD)
12868 {
12869 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12870 size_int (UNITS_PER_WORD - size));
12871 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
12872 }
12873
12874 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
12875 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
12876
12877 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
12878 t = off;
12879 if (adjust)
12880 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
12881 build_int_cst (TREE_TYPE (off), adjust));
12882
12883 t = fold_convert (sizetype, t);
12884 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
12885
12886 if (is_ha)
12887 {
12888 /* type ha; // treat as "struct {ftype field[n];}"
12889 ... [computing offs]
12890 for (i = 0; i <nregs; ++i, offs += 16)
12891 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
12892 return ha; */
12893 int i;
12894 tree tmp_ha, field_t, field_ptr_t;
12895
12896 /* Declare a local variable. */
12897 tmp_ha = create_tmp_var_raw (type, "ha");
12898 gimple_add_tmp_var (tmp_ha);
12899
12900 /* Establish the base type. */
12901 switch (ag_mode)
12902 {
4e10a5a7 12903 case E_SFmode:
43e9d192
IB
12904 field_t = float_type_node;
12905 field_ptr_t = float_ptr_type_node;
12906 break;
4e10a5a7 12907 case E_DFmode:
43e9d192
IB
12908 field_t = double_type_node;
12909 field_ptr_t = double_ptr_type_node;
12910 break;
4e10a5a7 12911 case E_TFmode:
43e9d192
IB
12912 field_t = long_double_type_node;
12913 field_ptr_t = long_double_ptr_type_node;
12914 break;
4e10a5a7 12915 case E_HFmode:
1b62ed4f
JG
12916 field_t = aarch64_fp16_type_node;
12917 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 12918 break;
4e10a5a7
RS
12919 case E_V2SImode:
12920 case E_V4SImode:
43e9d192
IB
12921 {
12922 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
12923 field_t = build_vector_type_for_mode (innertype, ag_mode);
12924 field_ptr_t = build_pointer_type (field_t);
12925 }
12926 break;
12927 default:
12928 gcc_assert (0);
12929 }
12930
12931 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
12932 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
12933 addr = t;
12934 t = fold_convert (field_ptr_t, addr);
12935 t = build2 (MODIFY_EXPR, field_t,
12936 build1 (INDIRECT_REF, field_t, tmp_ha),
12937 build1 (INDIRECT_REF, field_t, t));
12938
12939 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
12940 for (i = 1; i < nregs; ++i)
12941 {
12942 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
12943 u = fold_convert (field_ptr_t, addr);
12944 u = build2 (MODIFY_EXPR, field_t,
12945 build2 (MEM_REF, field_t, tmp_ha,
12946 build_int_cst (field_ptr_t,
12947 (i *
12948 int_size_in_bytes (field_t)))),
12949 build1 (INDIRECT_REF, field_t, u));
12950 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
12951 }
12952
12953 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
12954 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
12955 }
12956
12957 COND_EXPR_ELSE (cond2) = t;
12958 addr = fold_convert (build_pointer_type (type), cond1);
12959 addr = build_va_arg_indirect_ref (addr);
12960
12961 if (indirect_p)
12962 addr = build_va_arg_indirect_ref (addr);
12963
12964 return addr;
12965}
12966
12967/* Implement TARGET_SETUP_INCOMING_VARARGS. */
12968
12969static void
ef4bddc2 12970aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
12971 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12972 int no_rtl)
12973{
12974 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12975 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
12976 int gr_saved = cfun->va_list_gpr_size;
12977 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
12978
12979 /* The caller has advanced CUM up to, but not beyond, the last named
12980 argument. Advance a local copy of CUM past the last "real" named
12981 argument, to find out how many registers are left over. */
12982 local_cum = *cum;
12983 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
12984
88e3bdd1
JW
12985 /* Found out how many registers we need to save.
12986 Honor tree-stdvar analysis results. */
12987 if (cfun->va_list_gpr_size)
12988 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
12989 cfun->va_list_gpr_size / UNITS_PER_WORD);
12990 if (cfun->va_list_fpr_size)
12991 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
12992 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 12993
d5726973 12994 if (!TARGET_FLOAT)
43e9d192 12995 {
261fb553 12996 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
12997 vr_saved = 0;
12998 }
12999
13000 if (!no_rtl)
13001 {
13002 if (gr_saved > 0)
13003 {
13004 rtx ptr, mem;
13005
13006 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
13007 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
13008 - gr_saved * UNITS_PER_WORD);
13009 mem = gen_frame_mem (BLKmode, ptr);
13010 set_mem_alias_set (mem, get_varargs_alias_set ());
13011
13012 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
13013 mem, gr_saved);
13014 }
13015 if (vr_saved > 0)
13016 {
13017 /* We can't use move_block_from_reg, because it will use
13018 the wrong mode, storing D regs only. */
ef4bddc2 13019 machine_mode mode = TImode;
88e3bdd1 13020 int off, i, vr_start;
43e9d192
IB
13021
13022 /* Set OFF to the offset from virtual_incoming_args_rtx of
13023 the first vector register. The VR save area lies below
13024 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
13025 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
13026 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
13027 off -= vr_saved * UNITS_PER_VREG;
13028
88e3bdd1
JW
13029 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
13030 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
13031 {
13032 rtx ptr, mem;
13033
13034 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
13035 mem = gen_frame_mem (mode, ptr);
13036 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 13037 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
13038 off += UNITS_PER_VREG;
13039 }
13040 }
13041 }
13042
13043 /* We don't save the size into *PRETEND_SIZE because we want to avoid
13044 any complication of having crtl->args.pretend_args_size changed. */
8799637a 13045 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
13046 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
13047 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
13048 + vr_saved * UNITS_PER_VREG);
13049}
13050
13051static void
13052aarch64_conditional_register_usage (void)
13053{
13054 int i;
13055 if (!TARGET_FLOAT)
13056 {
13057 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
13058 {
13059 fixed_regs[i] = 1;
13060 call_used_regs[i] = 1;
13061 }
13062 }
43cacb12
RS
13063 if (!TARGET_SVE)
13064 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
13065 {
13066 fixed_regs[i] = 1;
13067 call_used_regs[i] = 1;
13068 }
3751345d
RE
13069
13070 /* When tracking speculation, we need a couple of call-clobbered registers
13071 to track the speculation state. It would be nice to just use
13072 IP0 and IP1, but currently there are numerous places that just
13073 assume these registers are free for other uses (eg pointer
13074 authentication). */
13075 if (aarch64_track_speculation)
13076 {
13077 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
13078 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
13079 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13080 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13081 }
43e9d192
IB
13082}
13083
13084/* Walk down the type tree of TYPE counting consecutive base elements.
13085 If *MODEP is VOIDmode, then set it to the first valid floating point
13086 type. If a non-floating point type is found, or if a floating point
13087 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
13088 otherwise return the count in the sub-tree. */
13089static int
ef4bddc2 13090aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 13091{
ef4bddc2 13092 machine_mode mode;
43e9d192
IB
13093 HOST_WIDE_INT size;
13094
13095 switch (TREE_CODE (type))
13096 {
13097 case REAL_TYPE:
13098 mode = TYPE_MODE (type);
1b62ed4f
JG
13099 if (mode != DFmode && mode != SFmode
13100 && mode != TFmode && mode != HFmode)
43e9d192
IB
13101 return -1;
13102
13103 if (*modep == VOIDmode)
13104 *modep = mode;
13105
13106 if (*modep == mode)
13107 return 1;
13108
13109 break;
13110
13111 case COMPLEX_TYPE:
13112 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
13113 if (mode != DFmode && mode != SFmode
13114 && mode != TFmode && mode != HFmode)
43e9d192
IB
13115 return -1;
13116
13117 if (*modep == VOIDmode)
13118 *modep = mode;
13119
13120 if (*modep == mode)
13121 return 2;
13122
13123 break;
13124
13125 case VECTOR_TYPE:
13126 /* Use V2SImode and V4SImode as representatives of all 64-bit
13127 and 128-bit vector types. */
13128 size = int_size_in_bytes (type);
13129 switch (size)
13130 {
13131 case 8:
13132 mode = V2SImode;
13133 break;
13134 case 16:
13135 mode = V4SImode;
13136 break;
13137 default:
13138 return -1;
13139 }
13140
13141 if (*modep == VOIDmode)
13142 *modep = mode;
13143
13144 /* Vector modes are considered to be opaque: two vectors are
13145 equivalent for the purposes of being homogeneous aggregates
13146 if they are the same size. */
13147 if (*modep == mode)
13148 return 1;
13149
13150 break;
13151
13152 case ARRAY_TYPE:
13153 {
13154 int count;
13155 tree index = TYPE_DOMAIN (type);
13156
807e902e
KZ
13157 /* Can't handle incomplete types nor sizes that are not
13158 fixed. */
13159 if (!COMPLETE_TYPE_P (type)
13160 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13161 return -1;
13162
13163 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
13164 if (count == -1
13165 || !index
13166 || !TYPE_MAX_VALUE (index)
cc269bb6 13167 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 13168 || !TYPE_MIN_VALUE (index)
cc269bb6 13169 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
13170 || count < 0)
13171 return -1;
13172
ae7e9ddd
RS
13173 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
13174 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
13175
13176 /* There must be no padding. */
6a70badb
RS
13177 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13178 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13179 return -1;
13180
13181 return count;
13182 }
13183
13184 case RECORD_TYPE:
13185 {
13186 int count = 0;
13187 int sub_count;
13188 tree field;
13189
807e902e
KZ
13190 /* Can't handle incomplete types nor sizes that are not
13191 fixed. */
13192 if (!COMPLETE_TYPE_P (type)
13193 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13194 return -1;
13195
13196 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13197 {
13198 if (TREE_CODE (field) != FIELD_DECL)
13199 continue;
13200
13201 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13202 if (sub_count < 0)
13203 return -1;
13204 count += sub_count;
13205 }
13206
13207 /* There must be no padding. */
6a70badb
RS
13208 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13209 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13210 return -1;
13211
13212 return count;
13213 }
13214
13215 case UNION_TYPE:
13216 case QUAL_UNION_TYPE:
13217 {
13218 /* These aren't very interesting except in a degenerate case. */
13219 int count = 0;
13220 int sub_count;
13221 tree field;
13222
807e902e
KZ
13223 /* Can't handle incomplete types nor sizes that are not
13224 fixed. */
13225 if (!COMPLETE_TYPE_P (type)
13226 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13227 return -1;
13228
13229 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13230 {
13231 if (TREE_CODE (field) != FIELD_DECL)
13232 continue;
13233
13234 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13235 if (sub_count < 0)
13236 return -1;
13237 count = count > sub_count ? count : sub_count;
13238 }
13239
13240 /* There must be no padding. */
6a70badb
RS
13241 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13242 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13243 return -1;
13244
13245 return count;
13246 }
13247
13248 default:
13249 break;
13250 }
13251
13252 return -1;
13253}
13254
b6ec6215
KT
13255/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
13256 type as described in AAPCS64 \S 4.1.2.
13257
13258 See the comment above aarch64_composite_type_p for the notes on MODE. */
13259
13260static bool
13261aarch64_short_vector_p (const_tree type,
13262 machine_mode mode)
13263{
6a70badb 13264 poly_int64 size = -1;
b6ec6215
KT
13265
13266 if (type && TREE_CODE (type) == VECTOR_TYPE)
13267 size = int_size_in_bytes (type);
13268 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13269 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13270 size = GET_MODE_SIZE (mode);
13271
6a70badb 13272 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
13273}
13274
43e9d192
IB
13275/* Return TRUE if the type, as described by TYPE and MODE, is a composite
13276 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
13277 array types. The C99 floating-point complex types are also considered
13278 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
13279 types, which are GCC extensions and out of the scope of AAPCS64, are
13280 treated as composite types here as well.
13281
13282 Note that MODE itself is not sufficient in determining whether a type
13283 is such a composite type or not. This is because
13284 stor-layout.c:compute_record_mode may have already changed the MODE
13285 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
13286 structure with only one field may have its MODE set to the mode of the
13287 field. Also an integer mode whose size matches the size of the
13288 RECORD_TYPE type may be used to substitute the original mode
13289 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
13290 solely relied on. */
13291
13292static bool
13293aarch64_composite_type_p (const_tree type,
ef4bddc2 13294 machine_mode mode)
43e9d192 13295{
b6ec6215
KT
13296 if (aarch64_short_vector_p (type, mode))
13297 return false;
13298
43e9d192
IB
13299 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
13300 return true;
13301
13302 if (mode == BLKmode
13303 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
13304 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13305 return true;
13306
13307 return false;
13308}
13309
43e9d192
IB
13310/* Return TRUE if an argument, whose type is described by TYPE and MODE,
13311 shall be passed or returned in simd/fp register(s) (providing these
13312 parameter passing registers are available).
13313
13314 Upon successful return, *COUNT returns the number of needed registers,
13315 *BASE_MODE returns the mode of the individual register and when IS_HAF
13316 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
13317 floating-point aggregate or a homogeneous short-vector aggregate. */
13318
13319static bool
ef4bddc2 13320aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 13321 const_tree type,
ef4bddc2 13322 machine_mode *base_mode,
43e9d192
IB
13323 int *count,
13324 bool *is_ha)
13325{
ef4bddc2 13326 machine_mode new_mode = VOIDmode;
43e9d192
IB
13327 bool composite_p = aarch64_composite_type_p (type, mode);
13328
13329 if (is_ha != NULL) *is_ha = false;
13330
13331 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
13332 || aarch64_short_vector_p (type, mode))
13333 {
13334 *count = 1;
13335 new_mode = mode;
13336 }
13337 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
13338 {
13339 if (is_ha != NULL) *is_ha = true;
13340 *count = 2;
13341 new_mode = GET_MODE_INNER (mode);
13342 }
13343 else if (type && composite_p)
13344 {
13345 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
13346
13347 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
13348 {
13349 if (is_ha != NULL) *is_ha = true;
13350 *count = ag_count;
13351 }
13352 else
13353 return false;
13354 }
13355 else
13356 return false;
13357
13358 *base_mode = new_mode;
13359 return true;
13360}
13361
13362/* Implement TARGET_STRUCT_VALUE_RTX. */
13363
13364static rtx
13365aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
13366 int incoming ATTRIBUTE_UNUSED)
13367{
13368 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
13369}
13370
13371/* Implements target hook vector_mode_supported_p. */
13372static bool
ef4bddc2 13373aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 13374{
43cacb12
RS
13375 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13376 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
13377}
13378
b7342d25
IB
13379/* Return appropriate SIMD container
13380 for MODE within a vector of WIDTH bits. */
ef4bddc2 13381static machine_mode
43cacb12 13382aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 13383{
43cacb12
RS
13384 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
13385 switch (mode)
13386 {
13387 case E_DFmode:
13388 return VNx2DFmode;
13389 case E_SFmode:
13390 return VNx4SFmode;
13391 case E_HFmode:
13392 return VNx8HFmode;
13393 case E_DImode:
13394 return VNx2DImode;
13395 case E_SImode:
13396 return VNx4SImode;
13397 case E_HImode:
13398 return VNx8HImode;
13399 case E_QImode:
13400 return VNx16QImode;
13401 default:
13402 return word_mode;
13403 }
13404
13405 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 13406 if (TARGET_SIMD)
b7342d25 13407 {
43cacb12 13408 if (known_eq (width, 128))
b7342d25
IB
13409 switch (mode)
13410 {
4e10a5a7 13411 case E_DFmode:
b7342d25 13412 return V2DFmode;
4e10a5a7 13413 case E_SFmode:
b7342d25 13414 return V4SFmode;
4e10a5a7 13415 case E_HFmode:
b719f884 13416 return V8HFmode;
4e10a5a7 13417 case E_SImode:
b7342d25 13418 return V4SImode;
4e10a5a7 13419 case E_HImode:
b7342d25 13420 return V8HImode;
4e10a5a7 13421 case E_QImode:
b7342d25 13422 return V16QImode;
4e10a5a7 13423 case E_DImode:
b7342d25
IB
13424 return V2DImode;
13425 default:
13426 break;
13427 }
13428 else
13429 switch (mode)
13430 {
4e10a5a7 13431 case E_SFmode:
b7342d25 13432 return V2SFmode;
4e10a5a7 13433 case E_HFmode:
b719f884 13434 return V4HFmode;
4e10a5a7 13435 case E_SImode:
b7342d25 13436 return V2SImode;
4e10a5a7 13437 case E_HImode:
b7342d25 13438 return V4HImode;
4e10a5a7 13439 case E_QImode:
b7342d25
IB
13440 return V8QImode;
13441 default:
13442 break;
13443 }
13444 }
43e9d192
IB
13445 return word_mode;
13446}
13447
b7342d25 13448/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 13449static machine_mode
005ba29c 13450aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 13451{
43cacb12
RS
13452 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
13453 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
13454}
13455
86e36728 13456/* Return a list of possible vector sizes for the vectorizer
3b357264 13457 to iterate over. */
86e36728
RS
13458static void
13459aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 13460{
43cacb12
RS
13461 if (TARGET_SVE)
13462 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
13463 sizes->safe_push (16);
13464 sizes->safe_push (8);
3b357264
JG
13465}
13466
ac2b960f
YZ
13467/* Implement TARGET_MANGLE_TYPE. */
13468
6f549691 13469static const char *
ac2b960f
YZ
13470aarch64_mangle_type (const_tree type)
13471{
13472 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 13473 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
13474 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
13475 return "St9__va_list";
13476
c2ec330c
AL
13477 /* Half-precision float. */
13478 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
13479 return "Dh";
13480
f9d53c27
TB
13481 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
13482 builtin types. */
13483 if (TYPE_NAME (type) != NULL)
13484 return aarch64_mangle_builtin_type (type);
c6fc9e43 13485
ac2b960f
YZ
13486 /* Use the default mangling. */
13487 return NULL;
13488}
13489
75cf1494
KT
13490/* Find the first rtx_insn before insn that will generate an assembly
13491 instruction. */
13492
13493static rtx_insn *
13494aarch64_prev_real_insn (rtx_insn *insn)
13495{
13496 if (!insn)
13497 return NULL;
13498
13499 do
13500 {
13501 insn = prev_real_insn (insn);
13502 }
13503 while (insn && recog_memoized (insn) < 0);
13504
13505 return insn;
13506}
13507
13508static bool
13509is_madd_op (enum attr_type t1)
13510{
13511 unsigned int i;
13512 /* A number of these may be AArch32 only. */
13513 enum attr_type mlatypes[] = {
13514 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
13515 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
13516 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
13517 };
13518
13519 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
13520 {
13521 if (t1 == mlatypes[i])
13522 return true;
13523 }
13524
13525 return false;
13526}
13527
13528/* Check if there is a register dependency between a load and the insn
13529 for which we hold recog_data. */
13530
13531static bool
13532dep_between_memop_and_curr (rtx memop)
13533{
13534 rtx load_reg;
13535 int opno;
13536
8baff86e 13537 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
13538
13539 if (!REG_P (SET_DEST (memop)))
13540 return false;
13541
13542 load_reg = SET_DEST (memop);
8baff86e 13543 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
13544 {
13545 rtx operand = recog_data.operand[opno];
13546 if (REG_P (operand)
13547 && reg_overlap_mentioned_p (load_reg, operand))
13548 return true;
13549
13550 }
13551 return false;
13552}
13553
8baff86e
KT
13554
13555/* When working around the Cortex-A53 erratum 835769,
13556 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
13557 instruction and has a preceding memory instruction such that a NOP
13558 should be inserted between them. */
13559
75cf1494
KT
13560bool
13561aarch64_madd_needs_nop (rtx_insn* insn)
13562{
13563 enum attr_type attr_type;
13564 rtx_insn *prev;
13565 rtx body;
13566
b32c1043 13567 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
13568 return false;
13569
e322d6e3 13570 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
13571 return false;
13572
13573 attr_type = get_attr_type (insn);
13574 if (!is_madd_op (attr_type))
13575 return false;
13576
13577 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
13578 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
13579 Restore recog state to INSN to avoid state corruption. */
13580 extract_constrain_insn_cached (insn);
13581
550e2205 13582 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
13583 return false;
13584
13585 body = single_set (prev);
13586
13587 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
13588 it and the DImode madd, emit a NOP between them. If body is NULL then we
13589 have a complex memory operation, probably a load/store pair.
13590 Be conservative for now and emit a NOP. */
13591 if (GET_MODE (recog_data.operand[0]) == DImode
13592 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
13593 return true;
13594
13595 return false;
13596
13597}
13598
8baff86e
KT
13599
13600/* Implement FINAL_PRESCAN_INSN. */
13601
75cf1494
KT
13602void
13603aarch64_final_prescan_insn (rtx_insn *insn)
13604{
13605 if (aarch64_madd_needs_nop (insn))
13606 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
13607}
13608
13609
43cacb12
RS
13610/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
13611 instruction. */
13612
13613bool
13614aarch64_sve_index_immediate_p (rtx base_or_step)
13615{
13616 return (CONST_INT_P (base_or_step)
13617 && IN_RANGE (INTVAL (base_or_step), -16, 15));
13618}
13619
13620/* Return true if X is a valid immediate for the SVE ADD and SUB
13621 instructions. Negate X first if NEGATE_P is true. */
13622
13623bool
13624aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
13625{
13626 rtx elt;
13627
13628 if (!const_vec_duplicate_p (x, &elt)
13629 || !CONST_INT_P (elt))
13630 return false;
13631
13632 HOST_WIDE_INT val = INTVAL (elt);
13633 if (negate_p)
13634 val = -val;
13635 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
13636
13637 if (val & 0xff)
13638 return IN_RANGE (val, 0, 0xff);
13639 return IN_RANGE (val, 0, 0xff00);
13640}
13641
13642/* Return true if X is a valid immediate operand for an SVE logical
13643 instruction such as AND. */
13644
13645bool
13646aarch64_sve_bitmask_immediate_p (rtx x)
13647{
13648 rtx elt;
13649
13650 return (const_vec_duplicate_p (x, &elt)
13651 && CONST_INT_P (elt)
13652 && aarch64_bitmask_imm (INTVAL (elt),
13653 GET_MODE_INNER (GET_MODE (x))));
13654}
13655
13656/* Return true if X is a valid immediate for the SVE DUP and CPY
13657 instructions. */
13658
13659bool
13660aarch64_sve_dup_immediate_p (rtx x)
13661{
13662 rtx elt;
13663
13664 if (!const_vec_duplicate_p (x, &elt)
13665 || !CONST_INT_P (elt))
13666 return false;
13667
13668 HOST_WIDE_INT val = INTVAL (elt);
13669 if (val & 0xff)
13670 return IN_RANGE (val, -0x80, 0x7f);
13671 return IN_RANGE (val, -0x8000, 0x7f00);
13672}
13673
13674/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13675 SIGNED_P says whether the operand is signed rather than unsigned. */
13676
13677bool
13678aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13679{
13680 rtx elt;
13681
13682 return (const_vec_duplicate_p (x, &elt)
13683 && CONST_INT_P (elt)
13684 && (signed_p
13685 ? IN_RANGE (INTVAL (elt), -16, 15)
13686 : IN_RANGE (INTVAL (elt), 0, 127)));
13687}
13688
13689/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13690 instruction. Negate X first if NEGATE_P is true. */
13691
13692bool
13693aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13694{
13695 rtx elt;
13696 REAL_VALUE_TYPE r;
13697
13698 if (!const_vec_duplicate_p (x, &elt)
13699 || GET_CODE (elt) != CONST_DOUBLE)
13700 return false;
13701
13702 r = *CONST_DOUBLE_REAL_VALUE (elt);
13703
13704 if (negate_p)
13705 r = real_value_negate (&r);
13706
13707 if (real_equal (&r, &dconst1))
13708 return true;
13709 if (real_equal (&r, &dconsthalf))
13710 return true;
13711 return false;
13712}
13713
13714/* Return true if X is a valid immediate operand for an SVE FMUL
13715 instruction. */
13716
13717bool
13718aarch64_sve_float_mul_immediate_p (rtx x)
13719{
13720 rtx elt;
13721
13722 /* GCC will never generate a multiply with an immediate of 2, so there is no
13723 point testing for it (even though it is a valid constant). */
13724 return (const_vec_duplicate_p (x, &elt)
13725 && GET_CODE (elt) == CONST_DOUBLE
13726 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13727}
13728
b187677b
RS
13729/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13730 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13731 is nonnull, use it to describe valid immediates. */
3520f7cc 13732static bool
b187677b
RS
13733aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13734 simd_immediate_info *info,
13735 enum simd_immediate_check which,
13736 simd_immediate_info::insn_type insn)
13737{
13738 /* Try a 4-byte immediate with LSL. */
13739 for (unsigned int shift = 0; shift < 32; shift += 8)
13740 if ((val32 & (0xff << shift)) == val32)
13741 {
13742 if (info)
13743 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13744 simd_immediate_info::LSL, shift);
13745 return true;
13746 }
3520f7cc 13747
b187677b
RS
13748 /* Try a 2-byte immediate with LSL. */
13749 unsigned int imm16 = val32 & 0xffff;
13750 if (imm16 == (val32 >> 16))
13751 for (unsigned int shift = 0; shift < 16; shift += 8)
13752 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13753 {
b187677b
RS
13754 if (info)
13755 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13756 simd_immediate_info::LSL, shift);
13757 return true;
48063b9d 13758 }
3520f7cc 13759
b187677b
RS
13760 /* Try a 4-byte immediate with MSL, except for cases that MVN
13761 can handle. */
13762 if (which == AARCH64_CHECK_MOV)
13763 for (unsigned int shift = 8; shift < 24; shift += 8)
13764 {
13765 unsigned int low = (1 << shift) - 1;
13766 if (((val32 & (0xff << shift)) | low) == val32)
13767 {
13768 if (info)
13769 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13770 simd_immediate_info::MSL, shift);
13771 return true;
13772 }
13773 }
43e9d192 13774
b187677b
RS
13775 return false;
13776}
13777
13778/* Return true if replicating VAL64 is a valid immediate for the
13779 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13780 use it to describe valid immediates. */
13781static bool
13782aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13783 simd_immediate_info *info,
13784 enum simd_immediate_check which)
13785{
13786 unsigned int val32 = val64 & 0xffffffff;
13787 unsigned int val16 = val64 & 0xffff;
13788 unsigned int val8 = val64 & 0xff;
13789
13790 if (val32 == (val64 >> 32))
43e9d192 13791 {
b187677b
RS
13792 if ((which & AARCH64_CHECK_ORR) != 0
13793 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13794 simd_immediate_info::MOV))
13795 return true;
43e9d192 13796
b187677b
RS
13797 if ((which & AARCH64_CHECK_BIC) != 0
13798 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13799 simd_immediate_info::MVN))
13800 return true;
ee78df47 13801
b187677b
RS
13802 /* Try using a replicated byte. */
13803 if (which == AARCH64_CHECK_MOV
13804 && val16 == (val32 >> 16)
13805 && val8 == (val16 >> 8))
ee78df47 13806 {
b187677b
RS
13807 if (info)
13808 *info = simd_immediate_info (QImode, val8);
13809 return true;
ee78df47 13810 }
43e9d192
IB
13811 }
13812
b187677b
RS
13813 /* Try using a bit-to-bytemask. */
13814 if (which == AARCH64_CHECK_MOV)
43e9d192 13815 {
b187677b
RS
13816 unsigned int i;
13817 for (i = 0; i < 64; i += 8)
ab6501d7 13818 {
b187677b
RS
13819 unsigned char byte = (val64 >> i) & 0xff;
13820 if (byte != 0 && byte != 0xff)
13821 break;
ab6501d7 13822 }
b187677b 13823 if (i == 64)
ab6501d7 13824 {
b187677b
RS
13825 if (info)
13826 *info = simd_immediate_info (DImode, val64);
13827 return true;
ab6501d7 13828 }
43e9d192 13829 }
b187677b
RS
13830 return false;
13831}
43e9d192 13832
43cacb12
RS
13833/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13834 instruction. If INFO is nonnull, use it to describe valid immediates. */
13835
13836static bool
13837aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13838 simd_immediate_info *info)
13839{
13840 scalar_int_mode mode = DImode;
13841 unsigned int val32 = val64 & 0xffffffff;
13842 if (val32 == (val64 >> 32))
13843 {
13844 mode = SImode;
13845 unsigned int val16 = val32 & 0xffff;
13846 if (val16 == (val32 >> 16))
13847 {
13848 mode = HImode;
13849 unsigned int val8 = val16 & 0xff;
13850 if (val8 == (val16 >> 8))
13851 mode = QImode;
13852 }
13853 }
13854 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13855 if (IN_RANGE (val, -0x80, 0x7f))
13856 {
13857 /* DUP with no shift. */
13858 if (info)
13859 *info = simd_immediate_info (mode, val);
13860 return true;
13861 }
13862 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13863 {
13864 /* DUP with LSL #8. */
13865 if (info)
13866 *info = simd_immediate_info (mode, val);
13867 return true;
13868 }
13869 if (aarch64_bitmask_imm (val64, mode))
13870 {
13871 /* DUPM. */
13872 if (info)
13873 *info = simd_immediate_info (mode, val);
13874 return true;
13875 }
13876 return false;
13877}
13878
b187677b
RS
13879/* Return true if OP is a valid SIMD immediate for the operation
13880 described by WHICH. If INFO is nonnull, use it to describe valid
13881 immediates. */
13882bool
13883aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
13884 enum simd_immediate_check which)
13885{
43cacb12
RS
13886 machine_mode mode = GET_MODE (op);
13887 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13888 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
13889 return false;
13890
13891 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 13892 rtx base, step;
b187677b 13893 unsigned int n_elts;
f9093f23
RS
13894 if (GET_CODE (op) == CONST_VECTOR
13895 && CONST_VECTOR_DUPLICATE_P (op))
13896 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
13897 else if ((vec_flags & VEC_SVE_DATA)
13898 && const_vec_series_p (op, &base, &step))
13899 {
13900 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
13901 if (!aarch64_sve_index_immediate_p (base)
13902 || !aarch64_sve_index_immediate_p (step))
13903 return false;
13904
13905 if (info)
13906 *info = simd_immediate_info (elt_mode, base, step);
13907 return true;
13908 }
6a70badb
RS
13909 else if (GET_CODE (op) == CONST_VECTOR
13910 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
13911 /* N_ELTS set above. */;
b187677b 13912 else
d8edd899 13913 return false;
43e9d192 13914
43cacb12
RS
13915 /* Handle PFALSE and PTRUE. */
13916 if (vec_flags & VEC_SVE_PRED)
13917 return (op == CONST0_RTX (mode)
13918 || op == CONSTM1_RTX (mode));
13919
b187677b 13920 scalar_float_mode elt_float_mode;
f9093f23
RS
13921 if (n_elts == 1
13922 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 13923 {
f9093f23
RS
13924 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
13925 if (aarch64_float_const_zero_rtx_p (elt)
13926 || aarch64_float_const_representable_p (elt))
13927 {
13928 if (info)
13929 *info = simd_immediate_info (elt_float_mode, elt);
13930 return true;
13931 }
b187677b 13932 }
43e9d192 13933
b187677b
RS
13934 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
13935 if (elt_size > 8)
13936 return false;
e4f0f84d 13937
b187677b 13938 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 13939
b187677b
RS
13940 /* Expand the vector constant out into a byte vector, with the least
13941 significant byte of the register first. */
13942 auto_vec<unsigned char, 16> bytes;
13943 bytes.reserve (n_elts * elt_size);
13944 for (unsigned int i = 0; i < n_elts; i++)
13945 {
f9093f23
RS
13946 /* The vector is provided in gcc endian-neutral fashion.
13947 For aarch64_be Advanced SIMD, it must be laid out in the vector
13948 register in reverse order. */
13949 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
13950 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 13951
b187677b
RS
13952 if (elt_mode != elt_int_mode)
13953 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 13954
b187677b
RS
13955 if (!CONST_INT_P (elt))
13956 return false;
43e9d192 13957
b187677b
RS
13958 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
13959 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 13960 {
b187677b
RS
13961 bytes.quick_push (elt_val & 0xff);
13962 elt_val >>= BITS_PER_UNIT;
48063b9d 13963 }
43e9d192
IB
13964 }
13965
b187677b
RS
13966 /* The immediate must repeat every eight bytes. */
13967 unsigned int nbytes = bytes.length ();
13968 for (unsigned i = 8; i < nbytes; ++i)
13969 if (bytes[i] != bytes[i - 8])
13970 return false;
13971
13972 /* Get the repeating 8-byte value as an integer. No endian correction
13973 is needed here because bytes is already in lsb-first order. */
13974 unsigned HOST_WIDE_INT val64 = 0;
13975 for (unsigned int i = 0; i < 8; i++)
13976 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
13977 << (i * BITS_PER_UNIT));
13978
43cacb12
RS
13979 if (vec_flags & VEC_SVE_DATA)
13980 return aarch64_sve_valid_immediate (val64, info);
13981 else
13982 return aarch64_advsimd_valid_immediate (val64, info, which);
13983}
13984
13985/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
13986 has a step in the range of INDEX. Return the index expression if so,
13987 otherwise return null. */
13988rtx
13989aarch64_check_zero_based_sve_index_immediate (rtx x)
13990{
13991 rtx base, step;
13992 if (const_vec_series_p (x, &base, &step)
13993 && base == const0_rtx
13994 && aarch64_sve_index_immediate_p (step))
13995 return step;
13996 return NULL_RTX;
43e9d192
IB
13997}
13998
43e9d192
IB
13999/* Check of immediate shift constants are within range. */
14000bool
ef4bddc2 14001aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
14002{
14003 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
14004 if (left)
ddeabd3e 14005 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 14006 else
ddeabd3e 14007 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
14008}
14009
7325d85a
KT
14010/* Return the bitmask CONST_INT to select the bits required by a zero extract
14011 operation of width WIDTH at bit position POS. */
14012
14013rtx
14014aarch64_mask_from_zextract_ops (rtx width, rtx pos)
14015{
14016 gcc_assert (CONST_INT_P (width));
14017 gcc_assert (CONST_INT_P (pos));
14018
14019 unsigned HOST_WIDE_INT mask
14020 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
14021 return GEN_INT (mask << UINTVAL (pos));
14022}
14023
83f8c414 14024bool
a6e0bfa7 14025aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 14026{
83f8c414
CSS
14027 if (GET_CODE (x) == HIGH
14028 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
14029 return true;
14030
82614948 14031 if (CONST_INT_P (x))
83f8c414
CSS
14032 return true;
14033
43cacb12
RS
14034 if (VECTOR_MODE_P (GET_MODE (x)))
14035 return aarch64_simd_valid_immediate (x, NULL);
14036
83f8c414
CSS
14037 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
14038 return true;
14039
43cacb12
RS
14040 if (aarch64_sve_cnt_immediate_p (x))
14041 return true;
14042
a6e0bfa7 14043 return aarch64_classify_symbolic_expression (x)
a5350ddc 14044 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
14045}
14046
43e9d192
IB
14047/* Return a const_int vector of VAL. */
14048rtx
ab014eb3 14049aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 14050{
59d06c05
RS
14051 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
14052 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
14053}
14054
051d0e2f
SN
14055/* Check OP is a legal scalar immediate for the MOVI instruction. */
14056
14057bool
77e994c9 14058aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 14059{
ef4bddc2 14060 machine_mode vmode;
051d0e2f 14061
43cacb12 14062 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 14063 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 14064 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
14065}
14066
988fa693
JG
14067/* Construct and return a PARALLEL RTX vector with elements numbering the
14068 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
14069 the vector - from the perspective of the architecture. This does not
14070 line up with GCC's perspective on lane numbers, so we end up with
14071 different masks depending on our target endian-ness. The diagram
14072 below may help. We must draw the distinction when building masks
14073 which select one half of the vector. An instruction selecting
14074 architectural low-lanes for a big-endian target, must be described using
14075 a mask selecting GCC high-lanes.
14076
14077 Big-Endian Little-Endian
14078
14079GCC 0 1 2 3 3 2 1 0
14080 | x | x | x | x | | x | x | x | x |
14081Architecture 3 2 1 0 3 2 1 0
14082
14083Low Mask: { 2, 3 } { 0, 1 }
14084High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
14085
14086 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 14087
43e9d192 14088rtx
f5cbabc1 14089aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 14090{
43e9d192 14091 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
14092 int high_base = nunits / 2;
14093 int low_base = 0;
14094 int base;
43e9d192
IB
14095 rtx t1;
14096 int i;
14097
988fa693
JG
14098 if (BYTES_BIG_ENDIAN)
14099 base = high ? low_base : high_base;
14100 else
14101 base = high ? high_base : low_base;
14102
14103 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
14104 RTVEC_ELT (v, i) = GEN_INT (base + i);
14105
14106 t1 = gen_rtx_PARALLEL (mode, v);
14107 return t1;
14108}
14109
988fa693
JG
14110/* Check OP for validity as a PARALLEL RTX vector with elements
14111 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
14112 from the perspective of the architecture. See the diagram above
14113 aarch64_simd_vect_par_cnst_half for more details. */
14114
14115bool
ef4bddc2 14116aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
14117 bool high)
14118{
6a70badb
RS
14119 int nelts;
14120 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
14121 return false;
14122
6a70badb 14123 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
14124 HOST_WIDE_INT count_op = XVECLEN (op, 0);
14125 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
14126 int i = 0;
14127
988fa693
JG
14128 if (count_op != count_ideal)
14129 return false;
14130
14131 for (i = 0; i < count_ideal; i++)
14132 {
14133 rtx elt_op = XVECEXP (op, 0, i);
14134 rtx elt_ideal = XVECEXP (ideal, 0, i);
14135
4aa81c2e 14136 if (!CONST_INT_P (elt_op)
988fa693
JG
14137 || INTVAL (elt_ideal) != INTVAL (elt_op))
14138 return false;
14139 }
14140 return true;
14141}
14142
43e9d192
IB
14143/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
14144 HIGH (exclusive). */
14145void
46ed6024
CB
14146aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
14147 const_tree exp)
43e9d192
IB
14148{
14149 HOST_WIDE_INT lane;
4aa81c2e 14150 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
14151 lane = INTVAL (operand);
14152
14153 if (lane < low || lane >= high)
46ed6024
CB
14154 {
14155 if (exp)
cf0c27ef 14156 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 14157 else
cf0c27ef 14158 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 14159 }
43e9d192
IB
14160}
14161
7ac29c0f
RS
14162/* Peform endian correction on lane number N, which indexes a vector
14163 of mode MODE, and return the result as an SImode rtx. */
14164
14165rtx
14166aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
14167{
14168 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
14169}
14170
43e9d192 14171/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 14172
43e9d192
IB
14173bool
14174aarch64_simd_mem_operand_p (rtx op)
14175{
14176 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 14177 || REG_P (XEXP (op, 0)));
43e9d192
IB
14178}
14179
43cacb12
RS
14180/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
14181
14182bool
14183aarch64_sve_ld1r_operand_p (rtx op)
14184{
14185 struct aarch64_address_info addr;
14186 scalar_mode mode;
14187
14188 return (MEM_P (op)
14189 && is_a <scalar_mode> (GET_MODE (op), &mode)
14190 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
14191 && addr.type == ADDRESS_REG_IMM
14192 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
14193}
14194
14195/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
14196 The conditions for STR are the same. */
14197bool
14198aarch64_sve_ldr_operand_p (rtx op)
14199{
14200 struct aarch64_address_info addr;
14201
14202 return (MEM_P (op)
14203 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
14204 false, ADDR_QUERY_ANY)
14205 && addr.type == ADDRESS_REG_IMM);
14206}
14207
9f4cbab8
RS
14208/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
14209 We need to be able to access the individual pieces, so the range
14210 is different from LD[234] and ST[234]. */
14211bool
14212aarch64_sve_struct_memory_operand_p (rtx op)
14213{
14214 if (!MEM_P (op))
14215 return false;
14216
14217 machine_mode mode = GET_MODE (op);
14218 struct aarch64_address_info addr;
14219 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
14220 ADDR_QUERY_ANY)
14221 || addr.type != ADDRESS_REG_IMM)
14222 return false;
14223
14224 poly_int64 first = addr.const_offset;
14225 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
14226 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
14227 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
14228}
14229
2d8c6dc1
AH
14230/* Emit a register copy from operand to operand, taking care not to
14231 early-clobber source registers in the process.
43e9d192 14232
2d8c6dc1
AH
14233 COUNT is the number of components into which the copy needs to be
14234 decomposed. */
43e9d192 14235void
b8506a8a 14236aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 14237 unsigned int count)
43e9d192
IB
14238{
14239 unsigned int i;
2d8c6dc1
AH
14240 int rdest = REGNO (operands[0]);
14241 int rsrc = REGNO (operands[1]);
43e9d192
IB
14242
14243 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
14244 || rdest < rsrc)
14245 for (i = 0; i < count; i++)
14246 emit_move_insn (gen_rtx_REG (mode, rdest + i),
14247 gen_rtx_REG (mode, rsrc + i));
43e9d192 14248 else
2d8c6dc1
AH
14249 for (i = 0; i < count; i++)
14250 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
14251 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
14252}
14253
668046d1 14254/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 14255 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 14256int
b8506a8a 14257aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 14258{
6a70badb
RS
14259 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
14260 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
14261}
14262
db0253a4 14263/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
14264 alignment of a vector to 128 bits. SVE predicates have an alignment of
14265 16 bits. */
db0253a4
TB
14266static HOST_WIDE_INT
14267aarch64_simd_vector_alignment (const_tree type)
14268{
43cacb12
RS
14269 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
14270 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
14271 be set for non-predicate vectors of booleans. Modes are the most
14272 direct way we have of identifying real SVE predicate types. */
14273 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 14274 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
14275 return MIN (align, 128);
14276}
14277
43cacb12 14278/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 14279static poly_uint64
43cacb12
RS
14280aarch64_vectorize_preferred_vector_alignment (const_tree type)
14281{
14282 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
14283 {
14284 /* If the length of the vector is fixed, try to align to that length,
14285 otherwise don't try to align at all. */
14286 HOST_WIDE_INT result;
14287 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
14288 result = TYPE_ALIGN (TREE_TYPE (type));
14289 return result;
14290 }
14291 return TYPE_ALIGN (type);
14292}
14293
db0253a4
TB
14294/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
14295static bool
14296aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
14297{
14298 if (is_packed)
14299 return false;
14300
43cacb12
RS
14301 /* For fixed-length vectors, check that the vectorizer will aim for
14302 full-vector alignment. This isn't true for generic GCC vectors
14303 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
14304 poly_uint64 preferred_alignment =
14305 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 14306 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
14307 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
14308 preferred_alignment))
db0253a4
TB
14309 return false;
14310
14311 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
14312 return true;
14313}
14314
7df76747
N
14315/* Return true if the vector misalignment factor is supported by the
14316 target. */
14317static bool
14318aarch64_builtin_support_vector_misalignment (machine_mode mode,
14319 const_tree type, int misalignment,
14320 bool is_packed)
14321{
14322 if (TARGET_SIMD && STRICT_ALIGNMENT)
14323 {
14324 /* Return if movmisalign pattern is not supported for this mode. */
14325 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
14326 return false;
14327
a509c571 14328 /* Misalignment factor is unknown at compile time. */
7df76747 14329 if (misalignment == -1)
a509c571 14330 return false;
7df76747
N
14331 }
14332 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14333 is_packed);
14334}
14335
4369c11e
TB
14336/* If VALS is a vector constant that can be loaded into a register
14337 using DUP, generate instructions to do so and return an RTX to
14338 assign to the register. Otherwise return NULL_RTX. */
14339static rtx
14340aarch64_simd_dup_constant (rtx vals)
14341{
ef4bddc2
RS
14342 machine_mode mode = GET_MODE (vals);
14343 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 14344 rtx x;
4369c11e 14345
92695fbb 14346 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
14347 return NULL_RTX;
14348
14349 /* We can load this constant by using DUP and a constant in a
14350 single ARM register. This will be cheaper than a vector
14351 load. */
92695fbb 14352 x = copy_to_mode_reg (inner_mode, x);
59d06c05 14353 return gen_vec_duplicate (mode, x);
4369c11e
TB
14354}
14355
14356
14357/* Generate code to load VALS, which is a PARALLEL containing only
14358 constants (for vec_init) or CONST_VECTOR, efficiently into a
14359 register. Returns an RTX to copy into the register, or NULL_RTX
14360 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 14361static rtx
4369c11e
TB
14362aarch64_simd_make_constant (rtx vals)
14363{
ef4bddc2 14364 machine_mode mode = GET_MODE (vals);
4369c11e
TB
14365 rtx const_dup;
14366 rtx const_vec = NULL_RTX;
4369c11e
TB
14367 int n_const = 0;
14368 int i;
14369
14370 if (GET_CODE (vals) == CONST_VECTOR)
14371 const_vec = vals;
14372 else if (GET_CODE (vals) == PARALLEL)
14373 {
14374 /* A CONST_VECTOR must contain only CONST_INTs and
14375 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
14376 Only store valid constants in a CONST_VECTOR. */
6a70badb 14377 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
14378 for (i = 0; i < n_elts; ++i)
14379 {
14380 rtx x = XVECEXP (vals, 0, i);
14381 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14382 n_const++;
14383 }
14384 if (n_const == n_elts)
14385 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
14386 }
14387 else
14388 gcc_unreachable ();
14389
14390 if (const_vec != NULL_RTX
b187677b 14391 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
14392 /* Load using MOVI/MVNI. */
14393 return const_vec;
14394 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
14395 /* Loaded using DUP. */
14396 return const_dup;
14397 else if (const_vec != NULL_RTX)
14398 /* Load from constant pool. We can not take advantage of single-cycle
14399 LD1 because we need a PC-relative addressing mode. */
14400 return const_vec;
14401 else
14402 /* A PARALLEL containing something not valid inside CONST_VECTOR.
14403 We can not construct an initializer. */
14404 return NULL_RTX;
14405}
14406
35a093b6
JG
14407/* Expand a vector initialisation sequence, such that TARGET is
14408 initialised to contain VALS. */
14409
4369c11e
TB
14410void
14411aarch64_expand_vector_init (rtx target, rtx vals)
14412{
ef4bddc2 14413 machine_mode mode = GET_MODE (target);
146c2e3a 14414 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 14415 /* The number of vector elements. */
6a70badb 14416 int n_elts = XVECLEN (vals, 0);
35a093b6 14417 /* The number of vector elements which are not constant. */
8b66a2d4
AL
14418 int n_var = 0;
14419 rtx any_const = NULL_RTX;
35a093b6
JG
14420 /* The first element of vals. */
14421 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 14422 bool all_same = true;
4369c11e 14423
35a093b6 14424 /* Count the number of variable elements to initialise. */
8b66a2d4 14425 for (int i = 0; i < n_elts; ++i)
4369c11e 14426 {
8b66a2d4 14427 rtx x = XVECEXP (vals, 0, i);
35a093b6 14428 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
14429 ++n_var;
14430 else
14431 any_const = x;
4369c11e 14432
35a093b6 14433 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
14434 }
14435
35a093b6
JG
14436 /* No variable elements, hand off to aarch64_simd_make_constant which knows
14437 how best to handle this. */
4369c11e
TB
14438 if (n_var == 0)
14439 {
14440 rtx constant = aarch64_simd_make_constant (vals);
14441 if (constant != NULL_RTX)
14442 {
14443 emit_move_insn (target, constant);
14444 return;
14445 }
14446 }
14447
14448 /* Splat a single non-constant element if we can. */
14449 if (all_same)
14450 {
35a093b6 14451 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 14452 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
14453 return;
14454 }
14455
85c1b6d7
AP
14456 enum insn_code icode = optab_handler (vec_set_optab, mode);
14457 gcc_assert (icode != CODE_FOR_nothing);
14458
14459 /* If there are only variable elements, try to optimize
14460 the insertion using dup for the most common element
14461 followed by insertions. */
14462
14463 /* The algorithm will fill matches[*][0] with the earliest matching element,
14464 and matches[X][1] with the count of duplicate elements (if X is the
14465 earliest element which has duplicates). */
14466
14467 if (n_var == n_elts && n_elts <= 16)
14468 {
14469 int matches[16][2] = {0};
14470 for (int i = 0; i < n_elts; i++)
14471 {
14472 for (int j = 0; j <= i; j++)
14473 {
14474 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
14475 {
14476 matches[i][0] = j;
14477 matches[j][1]++;
14478 break;
14479 }
14480 }
14481 }
14482 int maxelement = 0;
14483 int maxv = 0;
14484 for (int i = 0; i < n_elts; i++)
14485 if (matches[i][1] > maxv)
14486 {
14487 maxelement = i;
14488 maxv = matches[i][1];
14489 }
14490
b4e2cd5b
JG
14491 /* Create a duplicate of the most common element, unless all elements
14492 are equally useless to us, in which case just immediately set the
14493 vector register using the first element. */
14494
14495 if (maxv == 1)
14496 {
14497 /* For vectors of two 64-bit elements, we can do even better. */
14498 if (n_elts == 2
14499 && (inner_mode == E_DImode
14500 || inner_mode == E_DFmode))
14501
14502 {
14503 rtx x0 = XVECEXP (vals, 0, 0);
14504 rtx x1 = XVECEXP (vals, 0, 1);
14505 /* Combine can pick up this case, but handling it directly
14506 here leaves clearer RTL.
14507
14508 This is load_pair_lanes<mode>, and also gives us a clean-up
14509 for store_pair_lanes<mode>. */
14510 if (memory_operand (x0, inner_mode)
14511 && memory_operand (x1, inner_mode)
14512 && !STRICT_ALIGNMENT
14513 && rtx_equal_p (XEXP (x1, 0),
14514 plus_constant (Pmode,
14515 XEXP (x0, 0),
14516 GET_MODE_SIZE (inner_mode))))
14517 {
14518 rtx t;
14519 if (inner_mode == DFmode)
14520 t = gen_load_pair_lanesdf (target, x0, x1);
14521 else
14522 t = gen_load_pair_lanesdi (target, x0, x1);
14523 emit_insn (t);
14524 return;
14525 }
14526 }
14527 /* The subreg-move sequence below will move into lane zero of the
14528 vector register. For big-endian we want that position to hold
14529 the last element of VALS. */
14530 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
14531 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14532 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
14533 }
14534 else
14535 {
14536 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14537 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
14538 }
85c1b6d7
AP
14539
14540 /* Insert the rest. */
14541 for (int i = 0; i < n_elts; i++)
14542 {
14543 rtx x = XVECEXP (vals, 0, i);
14544 if (matches[i][0] == maxelement)
14545 continue;
14546 x = copy_to_mode_reg (inner_mode, x);
14547 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14548 }
14549 return;
14550 }
14551
35a093b6
JG
14552 /* Initialise a vector which is part-variable. We want to first try
14553 to build those lanes which are constant in the most efficient way we
14554 can. */
14555 if (n_var != n_elts)
4369c11e
TB
14556 {
14557 rtx copy = copy_rtx (vals);
4369c11e 14558
8b66a2d4
AL
14559 /* Load constant part of vector. We really don't care what goes into the
14560 parts we will overwrite, but we're more likely to be able to load the
14561 constant efficiently if it has fewer, larger, repeating parts
14562 (see aarch64_simd_valid_immediate). */
14563 for (int i = 0; i < n_elts; i++)
14564 {
14565 rtx x = XVECEXP (vals, 0, i);
14566 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14567 continue;
14568 rtx subst = any_const;
14569 for (int bit = n_elts / 2; bit > 0; bit /= 2)
14570 {
14571 /* Look in the copied vector, as more elements are const. */
14572 rtx test = XVECEXP (copy, 0, i ^ bit);
14573 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
14574 {
14575 subst = test;
14576 break;
14577 }
14578 }
14579 XVECEXP (copy, 0, i) = subst;
14580 }
4369c11e 14581 aarch64_expand_vector_init (target, copy);
35a093b6 14582 }
4369c11e 14583
35a093b6 14584 /* Insert the variable lanes directly. */
8b66a2d4 14585 for (int i = 0; i < n_elts; i++)
35a093b6
JG
14586 {
14587 rtx x = XVECEXP (vals, 0, i);
14588 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14589 continue;
14590 x = copy_to_mode_reg (inner_mode, x);
14591 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14592 }
4369c11e
TB
14593}
14594
43e9d192 14595static unsigned HOST_WIDE_INT
ef4bddc2 14596aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 14597{
43cacb12
RS
14598 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
14599 return 0;
14600 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
14601}
14602
43e9d192
IB
14603/* Select a format to encode pointers in exception handling data. */
14604int
14605aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
14606{
14607 int type;
14608 switch (aarch64_cmodel)
14609 {
14610 case AARCH64_CMODEL_TINY:
14611 case AARCH64_CMODEL_TINY_PIC:
14612 case AARCH64_CMODEL_SMALL:
14613 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 14614 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
14615 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
14616 for everything. */
14617 type = DW_EH_PE_sdata4;
14618 break;
14619 default:
14620 /* No assumptions here. 8-byte relocs required. */
14621 type = DW_EH_PE_sdata8;
14622 break;
14623 }
14624 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
14625}
14626
e1c1ecb0
KT
14627/* The last .arch and .tune assembly strings that we printed. */
14628static std::string aarch64_last_printed_arch_string;
14629static std::string aarch64_last_printed_tune_string;
14630
361fb3ee
KT
14631/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
14632 by the function fndecl. */
14633
14634void
14635aarch64_declare_function_name (FILE *stream, const char* name,
14636 tree fndecl)
14637{
14638 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14639
14640 struct cl_target_option *targ_options;
14641 if (target_parts)
14642 targ_options = TREE_TARGET_OPTION (target_parts);
14643 else
14644 targ_options = TREE_TARGET_OPTION (target_option_current_node);
14645 gcc_assert (targ_options);
14646
14647 const struct processor *this_arch
14648 = aarch64_get_arch (targ_options->x_explicit_arch);
14649
054b4005
JG
14650 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
14651 std::string extension
04a99ebe
JG
14652 = aarch64_get_extension_string_for_isa_flags (isa_flags,
14653 this_arch->flags);
e1c1ecb0
KT
14654 /* Only update the assembler .arch string if it is distinct from the last
14655 such string we printed. */
14656 std::string to_print = this_arch->name + extension;
14657 if (to_print != aarch64_last_printed_arch_string)
14658 {
14659 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14660 aarch64_last_printed_arch_string = to_print;
14661 }
361fb3ee
KT
14662
14663 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14664 useful to readers of the generated asm. Do it only when it changes
14665 from function to function and verbose assembly is requested. */
361fb3ee
KT
14666 const struct processor *this_tune
14667 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14668
e1c1ecb0
KT
14669 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14670 {
14671 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14672 this_tune->name);
14673 aarch64_last_printed_tune_string = this_tune->name;
14674 }
361fb3ee
KT
14675
14676 /* Don't forget the type directive for ELF. */
14677 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14678 ASM_OUTPUT_LABEL (stream, name);
14679}
14680
e1c1ecb0
KT
14681/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14682
14683static void
14684aarch64_start_file (void)
14685{
14686 struct cl_target_option *default_options
14687 = TREE_TARGET_OPTION (target_option_default_node);
14688
14689 const struct processor *default_arch
14690 = aarch64_get_arch (default_options->x_explicit_arch);
14691 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14692 std::string extension
04a99ebe
JG
14693 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14694 default_arch->flags);
e1c1ecb0
KT
14695
14696 aarch64_last_printed_arch_string = default_arch->name + extension;
14697 aarch64_last_printed_tune_string = "";
14698 asm_fprintf (asm_out_file, "\t.arch %s\n",
14699 aarch64_last_printed_arch_string.c_str ());
14700
14701 default_file_start ();
14702}
14703
0462169c
SN
14704/* Emit load exclusive. */
14705
14706static void
ef4bddc2 14707aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14708 rtx mem, rtx model_rtx)
14709{
0016d8d9 14710 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
14711}
14712
14713/* Emit store exclusive. */
14714
14715static void
ef4bddc2 14716aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14717 rtx rval, rtx mem, rtx model_rtx)
14718{
0016d8d9 14719 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
14720}
14721
14722/* Mark the previous jump instruction as unlikely. */
14723
14724static void
14725aarch64_emit_unlikely_jump (rtx insn)
14726{
f370536c 14727 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14728 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14729}
14730
14731/* Expand a compare and swap pattern. */
14732
14733void
14734aarch64_expand_compare_and_swap (rtx operands[])
14735{
d400fda3
RH
14736 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
14737 machine_mode mode, r_mode;
0462169c
SN
14738
14739 bval = operands[0];
14740 rval = operands[1];
14741 mem = operands[2];
14742 oldval = operands[3];
14743 newval = operands[4];
14744 is_weak = operands[5];
14745 mod_s = operands[6];
14746 mod_f = operands[7];
14747 mode = GET_MODE (mem);
0462169c
SN
14748
14749 /* Normally the succ memory model must be stronger than fail, but in the
14750 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14751 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
14752 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14753 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14754 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14755
d400fda3
RH
14756 r_mode = mode;
14757 if (mode == QImode || mode == HImode)
0462169c 14758 {
d400fda3
RH
14759 r_mode = SImode;
14760 rval = gen_reg_rtx (r_mode);
0462169c
SN
14761 }
14762
b0770c0f 14763 if (TARGET_LSE)
77f33f44
RH
14764 {
14765 /* The CAS insn requires oldval and rval overlap, but we need to
14766 have a copy of oldval saved across the operation to tell if
14767 the operation is successful. */
d400fda3
RH
14768 if (reg_overlap_mentioned_p (rval, oldval))
14769 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 14770 else
d400fda3
RH
14771 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
14772
77f33f44
RH
14773 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
14774 newval, mod_s));
d400fda3 14775 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 14776 }
b0770c0f 14777 else
d400fda3
RH
14778 {
14779 /* The oldval predicate varies by mode. Test it and force to reg. */
14780 insn_code code = code_for_aarch64_compare_and_swap (mode);
14781 if (!insn_data[code].operand[2].predicate (oldval, mode))
14782 oldval = force_reg (mode, oldval);
0462169c 14783
d400fda3
RH
14784 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
14785 is_weak, mod_s, mod_f));
14786 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
14787 }
14788
14789 if (r_mode != mode)
77f33f44
RH
14790 rval = gen_lowpart (mode, rval);
14791 emit_move_insn (operands[1], rval);
0462169c 14792
d400fda3 14793 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 14794 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14795}
14796
f70fb3b6
MW
14797/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14798 sequence implementing an atomic operation. */
14799
14800static void
14801aarch64_emit_post_barrier (enum memmodel model)
14802{
14803 const enum memmodel base_model = memmodel_base (model);
14804
14805 if (is_mm_sync (model)
14806 && (base_model == MEMMODEL_ACQUIRE
14807 || base_model == MEMMODEL_ACQ_REL
14808 || base_model == MEMMODEL_SEQ_CST))
14809 {
14810 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14811 }
14812}
14813
0462169c
SN
14814/* Split a compare and swap pattern. */
14815
14816void
14817aarch64_split_compare_and_swap (rtx operands[])
14818{
14819 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14820 machine_mode mode;
0462169c 14821 bool is_weak;
5d8a22a5
DM
14822 rtx_code_label *label1, *label2;
14823 rtx x, cond;
ab876106
MW
14824 enum memmodel model;
14825 rtx model_rtx;
0462169c
SN
14826
14827 rval = operands[0];
14828 mem = operands[1];
14829 oldval = operands[2];
14830 newval = operands[3];
14831 is_weak = (operands[4] != const0_rtx);
ab876106 14832 model_rtx = operands[5];
0462169c
SN
14833 scratch = operands[7];
14834 mode = GET_MODE (mem);
ab876106 14835 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14836
17f47f86
KT
14837 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14838 loop:
14839 .label1:
14840 LD[A]XR rval, [mem]
14841 CBNZ rval, .label2
14842 ST[L]XR scratch, newval, [mem]
14843 CBNZ scratch, .label1
14844 .label2:
14845 CMP rval, 0. */
14846 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14847
5d8a22a5 14848 label1 = NULL;
0462169c
SN
14849 if (!is_weak)
14850 {
14851 label1 = gen_label_rtx ();
14852 emit_label (label1);
14853 }
14854 label2 = gen_label_rtx ();
14855
ab876106
MW
14856 /* The initial load can be relaxed for a __sync operation since a final
14857 barrier will be emitted to stop code hoisting. */
14858 if (is_mm_sync (model))
14859 aarch64_emit_load_exclusive (mode, rval, mem,
14860 GEN_INT (MEMMODEL_RELAXED));
14861 else
14862 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14863
17f47f86
KT
14864 if (strong_zero_p)
14865 {
6e1eaca9
RE
14866 if (aarch64_track_speculation)
14867 {
14868 /* Emit an explicit compare instruction, so that we can correctly
14869 track the condition codes. */
14870 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
14871 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14872 }
14873 else
14874 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
14875
17f47f86
KT
14876 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14877 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14878 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14879 }
14880 else
14881 {
d400fda3 14882 cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
17f47f86
KT
14883 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14884 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
d400fda3 14885 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
17f47f86
KT
14886 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14887 }
0462169c 14888
ab876106 14889 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
14890
14891 if (!is_weak)
14892 {
6e1eaca9
RE
14893 if (aarch64_track_speculation)
14894 {
14895 /* Emit an explicit compare instruction, so that we can correctly
14896 track the condition codes. */
14897 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
14898 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14899 }
14900 else
14901 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
14902
0462169c
SN
14903 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14904 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 14905 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
14906 }
14907 else
14908 {
14909 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14910 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 14911 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
14912 }
14913
14914 emit_label (label2);
17f47f86
KT
14915 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
14916 to set the condition flags. If this is not used it will be removed by
14917 later passes. */
14918 if (strong_zero_p)
14919 {
14920 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14921 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
14922 emit_insn (gen_rtx_SET (cond, x));
14923 }
ab876106
MW
14924 /* Emit any final barrier needed for a __sync operation. */
14925 if (is_mm_sync (model))
14926 aarch64_emit_post_barrier (model);
0462169c 14927}
9cd7b720 14928
0462169c
SN
14929/* Split an atomic operation. */
14930
14931void
14932aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 14933 rtx value, rtx model_rtx, rtx cond)
0462169c 14934{
ef4bddc2
RS
14935 machine_mode mode = GET_MODE (mem);
14936 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
14937 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
14938 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
14939 rtx_code_label *label;
14940 rtx x;
0462169c 14941
9cd7b720 14942 /* Split the atomic operation into a sequence. */
0462169c
SN
14943 label = gen_label_rtx ();
14944 emit_label (label);
14945
14946 if (new_out)
14947 new_out = gen_lowpart (wmode, new_out);
14948 if (old_out)
14949 old_out = gen_lowpart (wmode, old_out);
14950 else
14951 old_out = new_out;
14952 value = simplify_gen_subreg (wmode, value, mode, 0);
14953
f70fb3b6
MW
14954 /* The initial load can be relaxed for a __sync operation since a final
14955 barrier will be emitted to stop code hoisting. */
14956 if (is_sync)
14957 aarch64_emit_load_exclusive (mode, old_out, mem,
14958 GEN_INT (MEMMODEL_RELAXED));
14959 else
14960 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
14961
14962 switch (code)
14963 {
14964 case SET:
14965 new_out = value;
14966 break;
14967
14968 case NOT:
14969 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 14970 emit_insn (gen_rtx_SET (new_out, x));
0462169c 14971 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 14972 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14973 break;
14974
14975 case MINUS:
14976 if (CONST_INT_P (value))
14977 {
14978 value = GEN_INT (-INTVAL (value));
14979 code = PLUS;
14980 }
14981 /* Fall through. */
14982
14983 default:
14984 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 14985 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14986 break;
14987 }
14988
14989 aarch64_emit_store_exclusive (mode, cond, mem,
14990 gen_lowpart (mode, new_out), model_rtx);
14991
6e1eaca9
RE
14992 if (aarch64_track_speculation)
14993 {
14994 /* Emit an explicit compare instruction, so that we can correctly
14995 track the condition codes. */
14996 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
14997 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14998 }
14999 else
15000 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15001
0462169c
SN
15002 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15003 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 15004 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
15005
15006 /* Emit any final barrier needed for a __sync operation. */
15007 if (is_sync)
15008 aarch64_emit_post_barrier (model);
0462169c
SN
15009}
15010
c2ec330c
AL
15011static void
15012aarch64_init_libfuncs (void)
15013{
15014 /* Half-precision float operations. The compiler handles all operations
15015 with NULL libfuncs by converting to SFmode. */
15016
15017 /* Conversions. */
15018 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
15019 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
15020
15021 /* Arithmetic. */
15022 set_optab_libfunc (add_optab, HFmode, NULL);
15023 set_optab_libfunc (sdiv_optab, HFmode, NULL);
15024 set_optab_libfunc (smul_optab, HFmode, NULL);
15025 set_optab_libfunc (neg_optab, HFmode, NULL);
15026 set_optab_libfunc (sub_optab, HFmode, NULL);
15027
15028 /* Comparisons. */
15029 set_optab_libfunc (eq_optab, HFmode, NULL);
15030 set_optab_libfunc (ne_optab, HFmode, NULL);
15031 set_optab_libfunc (lt_optab, HFmode, NULL);
15032 set_optab_libfunc (le_optab, HFmode, NULL);
15033 set_optab_libfunc (ge_optab, HFmode, NULL);
15034 set_optab_libfunc (gt_optab, HFmode, NULL);
15035 set_optab_libfunc (unord_optab, HFmode, NULL);
15036}
15037
43e9d192 15038/* Target hook for c_mode_for_suffix. */
ef4bddc2 15039static machine_mode
43e9d192
IB
15040aarch64_c_mode_for_suffix (char suffix)
15041{
15042 if (suffix == 'q')
15043 return TFmode;
15044
15045 return VOIDmode;
15046}
15047
3520f7cc
JG
15048/* We can only represent floating point constants which will fit in
15049 "quarter-precision" values. These values are characterised by
15050 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
15051 by:
15052
15053 (-1)^s * (n/16) * 2^r
15054
15055 Where:
15056 's' is the sign bit.
15057 'n' is an integer in the range 16 <= n <= 31.
15058 'r' is an integer in the range -3 <= r <= 4. */
15059
15060/* Return true iff X can be represented by a quarter-precision
15061 floating point immediate operand X. Note, we cannot represent 0.0. */
15062bool
15063aarch64_float_const_representable_p (rtx x)
15064{
15065 /* This represents our current view of how many bits
15066 make up the mantissa. */
15067 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 15068 int exponent;
3520f7cc 15069 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 15070 REAL_VALUE_TYPE r, m;
807e902e 15071 bool fail;
3520f7cc
JG
15072
15073 if (!CONST_DOUBLE_P (x))
15074 return false;
15075
a4518821
RS
15076 if (GET_MODE (x) == VOIDmode
15077 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
15078 return false;
15079
34a72c33 15080 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
15081
15082 /* We cannot represent infinities, NaNs or +/-zero. We won't
15083 know if we have +zero until we analyse the mantissa, but we
15084 can reject the other invalid values. */
15085 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
15086 || REAL_VALUE_MINUS_ZERO (r))
15087 return false;
15088
ba96cdfb 15089 /* Extract exponent. */
3520f7cc
JG
15090 r = real_value_abs (&r);
15091 exponent = REAL_EXP (&r);
15092
15093 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
15094 highest (sign) bit, with a fixed binary point at bit point_pos.
15095 m1 holds the low part of the mantissa, m2 the high part.
15096 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
15097 bits for the mantissa, this can fail (low bits will be lost). */
15098 real_ldexp (&m, &r, point_pos - exponent);
807e902e 15099 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
15100
15101 /* If the low part of the mantissa has bits set we cannot represent
15102 the value. */
d9074b29 15103 if (w.ulow () != 0)
3520f7cc
JG
15104 return false;
15105 /* We have rejected the lower HOST_WIDE_INT, so update our
15106 understanding of how many bits lie in the mantissa and
15107 look only at the high HOST_WIDE_INT. */
807e902e 15108 mantissa = w.elt (1);
3520f7cc
JG
15109 point_pos -= HOST_BITS_PER_WIDE_INT;
15110
15111 /* We can only represent values with a mantissa of the form 1.xxxx. */
15112 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
15113 if ((mantissa & mask) != 0)
15114 return false;
15115
15116 /* Having filtered unrepresentable values, we may now remove all
15117 but the highest 5 bits. */
15118 mantissa >>= point_pos - 5;
15119
15120 /* We cannot represent the value 0.0, so reject it. This is handled
15121 elsewhere. */
15122 if (mantissa == 0)
15123 return false;
15124
15125 /* Then, as bit 4 is always set, we can mask it off, leaving
15126 the mantissa in the range [0, 15]. */
15127 mantissa &= ~(1 << 4);
15128 gcc_assert (mantissa <= 15);
15129
15130 /* GCC internally does not use IEEE754-like encoding (where normalized
15131 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
15132 Our mantissa values are shifted 4 places to the left relative to
15133 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
15134 by 5 places to correct for GCC's representation. */
15135 exponent = 5 - exponent;
15136
15137 return (exponent >= 0 && exponent <= 7);
15138}
15139
ab6501d7
SD
15140/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
15141 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
15142 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 15143char*
b187677b 15144aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 15145 enum simd_immediate_check which)
3520f7cc 15146{
3ea63f60 15147 bool is_valid;
3520f7cc 15148 static char templ[40];
3520f7cc 15149 const char *mnemonic;
e4f0f84d 15150 const char *shift_op;
3520f7cc 15151 unsigned int lane_count = 0;
81c2dfb9 15152 char element_char;
3520f7cc 15153
b187677b 15154 struct simd_immediate_info info;
48063b9d
IB
15155
15156 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
15157 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
15158 It will also update INFO to show how the immediate should be generated.
15159 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 15160 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
15161 gcc_assert (is_valid);
15162
b187677b
RS
15163 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15164 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 15165
b187677b 15166 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 15167 {
b187677b 15168 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
15169 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
15170 move immediate path. */
48063b9d
IB
15171 if (aarch64_float_const_zero_rtx_p (info.value))
15172 info.value = GEN_INT (0);
15173 else
15174 {
83faf7d0 15175 const unsigned int buf_size = 20;
48063b9d 15176 char float_buf[buf_size] = {'\0'};
34a72c33
RS
15177 real_to_decimal_for_mode (float_buf,
15178 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 15179 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
15180
15181 if (lane_count == 1)
15182 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
15183 else
15184 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 15185 lane_count, element_char, float_buf);
48063b9d
IB
15186 return templ;
15187 }
3520f7cc 15188 }
3520f7cc 15189
0d8e1702 15190 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
15191
15192 if (which == AARCH64_CHECK_MOV)
15193 {
b187677b
RS
15194 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
15195 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
15196 if (lane_count == 1)
15197 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
15198 mnemonic, UINTVAL (info.value));
15199 else if (info.shift)
15200 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15201 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
15202 element_char, UINTVAL (info.value), shift_op, info.shift);
15203 else
15204 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15205 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
15206 element_char, UINTVAL (info.value));
15207 }
3520f7cc 15208 else
ab6501d7
SD
15209 {
15210 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 15211 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
15212 if (info.shift)
15213 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15214 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
15215 element_char, UINTVAL (info.value), "lsl", info.shift);
15216 else
15217 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15218 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
15219 element_char, UINTVAL (info.value));
15220 }
3520f7cc
JG
15221 return templ;
15222}
15223
b7342d25 15224char*
77e994c9 15225aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 15226{
a2170965
TC
15227
15228 /* If a floating point number was passed and we desire to use it in an
15229 integer mode do the conversion to integer. */
15230 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
15231 {
15232 unsigned HOST_WIDE_INT ival;
15233 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15234 gcc_unreachable ();
15235 immediate = gen_int_mode (ival, mode);
15236 }
15237
ef4bddc2 15238 machine_mode vmode;
a2170965
TC
15239 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15240 a 128 bit vector mode. */
15241 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15242
a2170965 15243 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15244 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15245 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15246}
15247
43cacb12
RS
15248/* Return the output string to use for moving immediate CONST_VECTOR
15249 into an SVE register. */
15250
15251char *
15252aarch64_output_sve_mov_immediate (rtx const_vector)
15253{
15254 static char templ[40];
15255 struct simd_immediate_info info;
15256 char element_char;
15257
15258 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15259 gcc_assert (is_valid);
15260
15261 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15262
15263 if (info.step)
15264 {
15265 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15266 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15267 element_char, INTVAL (info.value), INTVAL (info.step));
15268 return templ;
15269 }
15270
15271 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15272 {
15273 if (aarch64_float_const_zero_rtx_p (info.value))
15274 info.value = GEN_INT (0);
15275 else
15276 {
15277 const int buf_size = 20;
15278 char float_buf[buf_size] = {};
15279 real_to_decimal_for_mode (float_buf,
15280 CONST_DOUBLE_REAL_VALUE (info.value),
15281 buf_size, buf_size, 1, info.elt_mode);
15282
15283 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15284 element_char, float_buf);
15285 return templ;
15286 }
15287 }
15288
15289 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15290 element_char, INTVAL (info.value));
15291 return templ;
15292}
15293
15294/* Return the asm format for a PTRUE instruction whose destination has
15295 mode MODE. SUFFIX is the element size suffix. */
15296
15297char *
15298aarch64_output_ptrue (machine_mode mode, char suffix)
15299{
15300 unsigned int nunits;
15301 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15302 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15303 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15304 else
15305 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15306 return buf;
15307}
15308
88b08073
JG
15309/* Split operands into moves from op[1] + op[2] into op[0]. */
15310
15311void
15312aarch64_split_combinev16qi (rtx operands[3])
15313{
15314 unsigned int dest = REGNO (operands[0]);
15315 unsigned int src1 = REGNO (operands[1]);
15316 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15317 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15318 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15319 rtx destlo, desthi;
15320
15321 gcc_assert (halfmode == V16QImode);
15322
15323 if (src1 == dest && src2 == dest + halfregs)
15324 {
15325 /* No-op move. Can't split to nothing; emit something. */
15326 emit_note (NOTE_INSN_DELETED);
15327 return;
15328 }
15329
15330 /* Preserve register attributes for variable tracking. */
15331 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15332 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15333 GET_MODE_SIZE (halfmode));
15334
15335 /* Special case of reversed high/low parts. */
15336 if (reg_overlap_mentioned_p (operands[2], destlo)
15337 && reg_overlap_mentioned_p (operands[1], desthi))
15338 {
15339 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15340 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15341 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15342 }
15343 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15344 {
15345 /* Try to avoid unnecessary moves if part of the result
15346 is in the right place already. */
15347 if (src1 != dest)
15348 emit_move_insn (destlo, operands[1]);
15349 if (src2 != dest + halfregs)
15350 emit_move_insn (desthi, operands[2]);
15351 }
15352 else
15353 {
15354 if (src2 != dest + halfregs)
15355 emit_move_insn (desthi, operands[2]);
15356 if (src1 != dest)
15357 emit_move_insn (destlo, operands[1]);
15358 }
15359}
15360
15361/* vec_perm support. */
15362
88b08073
JG
15363struct expand_vec_perm_d
15364{
15365 rtx target, op0, op1;
e3342de4 15366 vec_perm_indices perm;
ef4bddc2 15367 machine_mode vmode;
43cacb12 15368 unsigned int vec_flags;
88b08073
JG
15369 bool one_vector_p;
15370 bool testing_p;
15371};
15372
15373/* Generate a variable permutation. */
15374
15375static void
15376aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
15377{
ef4bddc2 15378 machine_mode vmode = GET_MODE (target);
88b08073
JG
15379 bool one_vector_p = rtx_equal_p (op0, op1);
15380
15381 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
15382 gcc_checking_assert (GET_MODE (op0) == vmode);
15383 gcc_checking_assert (GET_MODE (op1) == vmode);
15384 gcc_checking_assert (GET_MODE (sel) == vmode);
15385 gcc_checking_assert (TARGET_SIMD);
15386
15387 if (one_vector_p)
15388 {
15389 if (vmode == V8QImode)
15390 {
15391 /* Expand the argument to a V16QI mode by duplicating it. */
15392 rtx pair = gen_reg_rtx (V16QImode);
15393 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
15394 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15395 }
15396 else
15397 {
15398 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
15399 }
15400 }
15401 else
15402 {
15403 rtx pair;
15404
15405 if (vmode == V8QImode)
15406 {
15407 pair = gen_reg_rtx (V16QImode);
15408 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
15409 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15410 }
15411 else
15412 {
15413 pair = gen_reg_rtx (OImode);
15414 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
15415 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
15416 }
15417 }
15418}
15419
80940017
RS
15420/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
15421 NELT is the number of elements in the vector. */
15422
88b08073 15423void
80940017
RS
15424aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
15425 unsigned int nelt)
88b08073 15426{
ef4bddc2 15427 machine_mode vmode = GET_MODE (target);
88b08073 15428 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 15429 rtx mask;
88b08073
JG
15430
15431 /* The TBL instruction does not use a modulo index, so we must take care
15432 of that ourselves. */
f7c4e5b8
AL
15433 mask = aarch64_simd_gen_const_vector_dup (vmode,
15434 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
15435 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
15436
f7c4e5b8
AL
15437 /* For big-endian, we also need to reverse the index within the vector
15438 (but not which vector). */
15439 if (BYTES_BIG_ENDIAN)
15440 {
15441 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
15442 if (!one_vector_p)
15443 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15444 sel = expand_simple_binop (vmode, XOR, sel, mask,
15445 NULL, 0, OPTAB_LIB_WIDEN);
15446 }
88b08073
JG
15447 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15448}
15449
43cacb12
RS
15450/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15451
15452static void
15453emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15454{
15455 emit_insn (gen_rtx_SET (target,
15456 gen_rtx_UNSPEC (GET_MODE (target),
15457 gen_rtvec (2, op0, op1), code)));
15458}
15459
15460/* Expand an SVE vec_perm with the given operands. */
15461
15462void
15463aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15464{
15465 machine_mode data_mode = GET_MODE (target);
15466 machine_mode sel_mode = GET_MODE (sel);
15467 /* Enforced by the pattern condition. */
15468 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15469
15470 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15471 size of the two value vectors, i.e. the upper bits of the indices
15472 are effectively ignored. SVE TBL instead produces 0 for any
15473 out-of-range indices, so we need to modulo all the vec_perm indices
15474 to ensure they are all in range. */
15475 rtx sel_reg = force_reg (sel_mode, sel);
15476
15477 /* Check if the sel only references the first values vector. */
15478 if (GET_CODE (sel) == CONST_VECTOR
15479 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15480 {
15481 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15482 return;
15483 }
15484
15485 /* Check if the two values vectors are the same. */
15486 if (rtx_equal_p (op0, op1))
15487 {
15488 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15489 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15490 NULL, 0, OPTAB_DIRECT);
15491 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15492 return;
15493 }
15494
15495 /* Run TBL on for each value vector and combine the results. */
15496
15497 rtx res0 = gen_reg_rtx (data_mode);
15498 rtx res1 = gen_reg_rtx (data_mode);
15499 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15500 if (GET_CODE (sel) != CONST_VECTOR
15501 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15502 {
15503 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15504 2 * nunits - 1);
15505 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15506 NULL, 0, OPTAB_DIRECT);
15507 }
15508 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15509 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15510 NULL, 0, OPTAB_DIRECT);
15511 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15512 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15513 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15514 else
15515 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15516}
15517
cc4d934f
JG
15518/* Recognize patterns suitable for the TRN instructions. */
15519static bool
15520aarch64_evpc_trn (struct expand_vec_perm_d *d)
15521{
6a70badb
RS
15522 HOST_WIDE_INT odd;
15523 poly_uint64 nelt = d->perm.length ();
cc4d934f 15524 rtx out, in0, in1, x;
ef4bddc2 15525 machine_mode vmode = d->vmode;
cc4d934f
JG
15526
15527 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15528 return false;
15529
15530 /* Note that these are little-endian tests.
15531 We correct for big-endian later. */
6a70badb
RS
15532 if (!d->perm[0].is_constant (&odd)
15533 || (odd != 0 && odd != 1)
326ac20e
RS
15534 || !d->perm.series_p (0, 2, odd, 2)
15535 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15536 return false;
cc4d934f
JG
15537
15538 /* Success! */
15539 if (d->testing_p)
15540 return true;
15541
15542 in0 = d->op0;
15543 in1 = d->op1;
43cacb12
RS
15544 /* We don't need a big-endian lane correction for SVE; see the comment
15545 at the head of aarch64-sve.md for details. */
15546 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15547 {
15548 x = in0, in0 = in1, in1 = x;
15549 odd = !odd;
15550 }
15551 out = d->target;
15552
3f8334a5
RS
15553 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15554 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15555 return true;
15556}
15557
15558/* Recognize patterns suitable for the UZP instructions. */
15559static bool
15560aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15561{
6a70badb 15562 HOST_WIDE_INT odd;
cc4d934f 15563 rtx out, in0, in1, x;
ef4bddc2 15564 machine_mode vmode = d->vmode;
cc4d934f
JG
15565
15566 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15567 return false;
15568
15569 /* Note that these are little-endian tests.
15570 We correct for big-endian later. */
6a70badb
RS
15571 if (!d->perm[0].is_constant (&odd)
15572 || (odd != 0 && odd != 1)
326ac20e 15573 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15574 return false;
cc4d934f
JG
15575
15576 /* Success! */
15577 if (d->testing_p)
15578 return true;
15579
15580 in0 = d->op0;
15581 in1 = d->op1;
43cacb12
RS
15582 /* We don't need a big-endian lane correction for SVE; see the comment
15583 at the head of aarch64-sve.md for details. */
15584 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15585 {
15586 x = in0, in0 = in1, in1 = x;
15587 odd = !odd;
15588 }
15589 out = d->target;
15590
3f8334a5
RS
15591 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15592 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15593 return true;
15594}
15595
15596/* Recognize patterns suitable for the ZIP instructions. */
15597static bool
15598aarch64_evpc_zip (struct expand_vec_perm_d *d)
15599{
6a70badb
RS
15600 unsigned int high;
15601 poly_uint64 nelt = d->perm.length ();
cc4d934f 15602 rtx out, in0, in1, x;
ef4bddc2 15603 machine_mode vmode = d->vmode;
cc4d934f
JG
15604
15605 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15606 return false;
15607
15608 /* Note that these are little-endian tests.
15609 We correct for big-endian later. */
6a70badb
RS
15610 poly_uint64 first = d->perm[0];
15611 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15612 || !d->perm.series_p (0, 2, first, 1)
15613 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15614 return false;
6a70badb 15615 high = maybe_ne (first, 0U);
cc4d934f
JG
15616
15617 /* Success! */
15618 if (d->testing_p)
15619 return true;
15620
15621 in0 = d->op0;
15622 in1 = d->op1;
43cacb12
RS
15623 /* We don't need a big-endian lane correction for SVE; see the comment
15624 at the head of aarch64-sve.md for details. */
15625 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15626 {
15627 x = in0, in0 = in1, in1 = x;
15628 high = !high;
15629 }
15630 out = d->target;
15631
3f8334a5
RS
15632 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15633 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15634 return true;
15635}
15636
ae0533da
AL
15637/* Recognize patterns for the EXT insn. */
15638
15639static bool
15640aarch64_evpc_ext (struct expand_vec_perm_d *d)
15641{
6a70badb 15642 HOST_WIDE_INT location;
ae0533da
AL
15643 rtx offset;
15644
6a70badb
RS
15645 /* The first element always refers to the first vector.
15646 Check if the extracted indices are increasing by one. */
43cacb12
RS
15647 if (d->vec_flags == VEC_SVE_PRED
15648 || !d->perm[0].is_constant (&location)
6a70badb 15649 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15650 return false;
ae0533da 15651
ae0533da
AL
15652 /* Success! */
15653 if (d->testing_p)
15654 return true;
15655
b31e65bb 15656 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15657 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15658
43cacb12
RS
15659 We don't need a big-endian lane correction for SVE; see the comment
15660 at the head of aarch64-sve.md for details. */
15661 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15662 {
15663 /* After setup, we want the high elements of the first vector (stored
15664 at the LSB end of the register), and the low elements of the second
15665 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15666 std::swap (d->op0, d->op1);
6a70badb
RS
15667 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15668 to_constant () is safe since this is restricted to Advanced SIMD
15669 vectors. */
15670 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15671 }
15672
15673 offset = GEN_INT (location);
3f8334a5
RS
15674 emit_set_insn (d->target,
15675 gen_rtx_UNSPEC (d->vmode,
15676 gen_rtvec (3, d->op0, d->op1, offset),
15677 UNSPEC_EXT));
ae0533da
AL
15678 return true;
15679}
15680
43cacb12
RS
15681/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15682 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15683
15684static bool
43cacb12 15685aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15686{
6a70badb
RS
15687 HOST_WIDE_INT diff;
15688 unsigned int i, size, unspec;
43cacb12 15689 machine_mode pred_mode;
923fcec3 15690
43cacb12
RS
15691 if (d->vec_flags == VEC_SVE_PRED
15692 || !d->one_vector_p
6a70badb 15693 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15694 return false;
15695
3f8334a5
RS
15696 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15697 if (size == 8)
43cacb12
RS
15698 {
15699 unspec = UNSPEC_REV64;
15700 pred_mode = VNx2BImode;
15701 }
3f8334a5 15702 else if (size == 4)
43cacb12
RS
15703 {
15704 unspec = UNSPEC_REV32;
15705 pred_mode = VNx4BImode;
15706 }
3f8334a5 15707 else if (size == 2)
43cacb12
RS
15708 {
15709 unspec = UNSPEC_REV16;
15710 pred_mode = VNx8BImode;
15711 }
3f8334a5
RS
15712 else
15713 return false;
923fcec3 15714
326ac20e
RS
15715 unsigned int step = diff + 1;
15716 for (i = 0; i < step; ++i)
15717 if (!d->perm.series_p (i, step, diff - i, step))
15718 return false;
923fcec3
AL
15719
15720 /* Success! */
15721 if (d->testing_p)
15722 return true;
15723
43cacb12
RS
15724 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15725 if (d->vec_flags == VEC_SVE_DATA)
15726 {
15727 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15728 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15729 UNSPEC_MERGE_PTRUE);
15730 }
15731 emit_set_insn (d->target, src);
15732 return true;
15733}
15734
15735/* Recognize patterns for the REV insn, which reverses elements within
15736 a full vector. */
15737
15738static bool
15739aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15740{
15741 poly_uint64 nelt = d->perm.length ();
15742
15743 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15744 return false;
15745
15746 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15747 return false;
15748
15749 /* Success! */
15750 if (d->testing_p)
15751 return true;
15752
15753 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15754 emit_set_insn (d->target, src);
923fcec3
AL
15755 return true;
15756}
15757
91bd4114
JG
15758static bool
15759aarch64_evpc_dup (struct expand_vec_perm_d *d)
15760{
91bd4114
JG
15761 rtx out = d->target;
15762 rtx in0;
6a70badb 15763 HOST_WIDE_INT elt;
ef4bddc2 15764 machine_mode vmode = d->vmode;
91bd4114
JG
15765 rtx lane;
15766
43cacb12
RS
15767 if (d->vec_flags == VEC_SVE_PRED
15768 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15769 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15770 return false;
15771
43cacb12
RS
15772 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15773 return false;
15774
326ac20e
RS
15775 /* Success! */
15776 if (d->testing_p)
15777 return true;
15778
91bd4114
JG
15779 /* The generic preparation in aarch64_expand_vec_perm_const_1
15780 swaps the operand order and the permute indices if it finds
15781 d->perm[0] to be in the second operand. Thus, we can always
15782 use d->op0 and need not do any extra arithmetic to get the
15783 correct lane number. */
15784 in0 = d->op0;
f901401e 15785 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15786
3f8334a5
RS
15787 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15788 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15789 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15790 return true;
15791}
15792
88b08073
JG
15793static bool
15794aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15795{
43cacb12 15796 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15797 machine_mode vmode = d->vmode;
6a70badb
RS
15798
15799 /* Make sure that the indices are constant. */
15800 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15801 for (unsigned int i = 0; i < encoded_nelts; ++i)
15802 if (!d->perm[i].is_constant ())
15803 return false;
88b08073 15804
88b08073
JG
15805 if (d->testing_p)
15806 return true;
15807
15808 /* Generic code will try constant permutation twice. Once with the
15809 original mode and again with the elements lowered to QImode.
15810 So wait and don't do the selector expansion ourselves. */
15811 if (vmode != V8QImode && vmode != V16QImode)
15812 return false;
15813
6a70badb
RS
15814 /* to_constant is safe since this routine is specific to Advanced SIMD
15815 vectors. */
15816 unsigned int nelt = d->perm.length ().to_constant ();
15817 for (unsigned int i = 0; i < nelt; ++i)
15818 /* If big-endian and two vectors we end up with a weird mixed-endian
15819 mode on NEON. Reverse the index within each word but not the word
15820 itself. to_constant is safe because we checked is_constant above. */
15821 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15822 ? d->perm[i].to_constant () ^ (nelt - 1)
15823 : d->perm[i].to_constant ());
bbcc9c00 15824
88b08073
JG
15825 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15826 sel = force_reg (vmode, sel);
15827
15828 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15829 return true;
15830}
15831
43cacb12
RS
15832/* Try to implement D using an SVE TBL instruction. */
15833
15834static bool
15835aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15836{
15837 unsigned HOST_WIDE_INT nelt;
15838
15839 /* Permuting two variable-length vectors could overflow the
15840 index range. */
15841 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15842 return false;
15843
15844 if (d->testing_p)
15845 return true;
15846
15847 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15848 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
15849 if (d->one_vector_p)
15850 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
15851 else
15852 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
15853 return true;
15854}
15855
88b08073
JG
15856static bool
15857aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15858{
15859 /* The pattern matching functions above are written to look for a small
15860 number to begin the sequence (0, 1, N/2). If we begin with an index
15861 from the second operand, we can swap the operands. */
6a70badb
RS
15862 poly_int64 nelt = d->perm.length ();
15863 if (known_ge (d->perm[0], nelt))
88b08073 15864 {
e3342de4 15865 d->perm.rotate_inputs (1);
cb5c6c29 15866 std::swap (d->op0, d->op1);
88b08073
JG
15867 }
15868
43cacb12
RS
15869 if ((d->vec_flags == VEC_ADVSIMD
15870 || d->vec_flags == VEC_SVE_DATA
15871 || d->vec_flags == VEC_SVE_PRED)
15872 && known_gt (nelt, 1))
cc4d934f 15873 {
43cacb12
RS
15874 if (aarch64_evpc_rev_local (d))
15875 return true;
15876 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
15877 return true;
15878 else if (aarch64_evpc_ext (d))
ae0533da 15879 return true;
f901401e
AL
15880 else if (aarch64_evpc_dup (d))
15881 return true;
ae0533da 15882 else if (aarch64_evpc_zip (d))
cc4d934f
JG
15883 return true;
15884 else if (aarch64_evpc_uzp (d))
15885 return true;
15886 else if (aarch64_evpc_trn (d))
15887 return true;
43cacb12
RS
15888 if (d->vec_flags == VEC_SVE_DATA)
15889 return aarch64_evpc_sve_tbl (d);
4ec8bb67 15890 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 15891 return aarch64_evpc_tbl (d);
cc4d934f 15892 }
88b08073
JG
15893 return false;
15894}
15895
f151c9e1 15896/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 15897
f151c9e1
RS
15898static bool
15899aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
15900 rtx op1, const vec_perm_indices &sel)
88b08073
JG
15901{
15902 struct expand_vec_perm_d d;
88b08073 15903
326ac20e 15904 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
15905 if (sel.ninputs () == 1
15906 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
15907 d.one_vector_p = true;
15908 else if (sel.all_from_input_p (0))
88b08073 15909 {
326ac20e
RS
15910 d.one_vector_p = true;
15911 op1 = op0;
88b08073 15912 }
326ac20e 15913 else if (sel.all_from_input_p (1))
88b08073 15914 {
88b08073 15915 d.one_vector_p = true;
326ac20e 15916 op0 = op1;
88b08073 15917 }
326ac20e
RS
15918 else
15919 d.one_vector_p = false;
88b08073 15920
326ac20e
RS
15921 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
15922 sel.nelts_per_input ());
15923 d.vmode = vmode;
43cacb12 15924 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
15925 d.target = target;
15926 d.op0 = op0;
15927 d.op1 = op1;
15928 d.testing_p = !target;
e3342de4 15929
f151c9e1
RS
15930 if (!d.testing_p)
15931 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 15932
326ac20e 15933 rtx_insn *last = get_last_insn ();
f151c9e1 15934 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 15935 gcc_assert (last == get_last_insn ());
88b08073
JG
15936
15937 return ret;
15938}
15939
73e3da51
RS
15940/* Generate a byte permute mask for a register of mode MODE,
15941 which has NUNITS units. */
15942
668046d1 15943rtx
73e3da51 15944aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
15945{
15946 /* We have to reverse each vector because we dont have
15947 a permuted load that can reverse-load according to ABI rules. */
15948 rtx mask;
15949 rtvec v = rtvec_alloc (16);
73e3da51
RS
15950 unsigned int i, j;
15951 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
15952
15953 gcc_assert (BYTES_BIG_ENDIAN);
15954 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
15955
15956 for (i = 0; i < nunits; i++)
15957 for (j = 0; j < usize; j++)
15958 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
15959 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
15960 return force_reg (V16QImode, mask);
15961}
15962
43cacb12
RS
15963/* Return true if X is a valid second operand for the SVE instruction
15964 that implements integer comparison OP_CODE. */
15965
15966static bool
15967aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
15968{
15969 if (register_operand (x, VOIDmode))
15970 return true;
15971
15972 switch (op_code)
15973 {
15974 case LTU:
15975 case LEU:
15976 case GEU:
15977 case GTU:
15978 return aarch64_sve_cmp_immediate_p (x, false);
15979 case LT:
15980 case LE:
15981 case GE:
15982 case GT:
15983 case NE:
15984 case EQ:
15985 return aarch64_sve_cmp_immediate_p (x, true);
15986 default:
15987 gcc_unreachable ();
15988 }
15989}
15990
f22d7973
RS
15991/* Use predicated SVE instructions to implement the equivalent of:
15992
15993 (set TARGET OP)
15994
15995 given that PTRUE is an all-true predicate of the appropriate mode. */
15996
15997static void
15998aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
15999{
16000 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16001 gen_rtvec (2, ptrue, op),
16002 UNSPEC_MERGE_PTRUE);
16003 rtx_insn *insn = emit_set_insn (target, unspec);
16004 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16005}
16006
16007/* Likewise, but also clobber the condition codes. */
16008
16009static void
16010aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
16011{
16012 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16013 gen_rtvec (2, ptrue, op),
16014 UNSPEC_MERGE_PTRUE);
16015 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
16016 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16017}
16018
43cacb12
RS
16019/* Return the UNSPEC_COND_* code for comparison CODE. */
16020
16021static unsigned int
16022aarch64_unspec_cond_code (rtx_code code)
16023{
16024 switch (code)
16025 {
16026 case NE:
16027 return UNSPEC_COND_NE;
16028 case EQ:
16029 return UNSPEC_COND_EQ;
16030 case LT:
16031 return UNSPEC_COND_LT;
16032 case GT:
16033 return UNSPEC_COND_GT;
16034 case LE:
16035 return UNSPEC_COND_LE;
16036 case GE:
16037 return UNSPEC_COND_GE;
43cacb12
RS
16038 default:
16039 gcc_unreachable ();
16040 }
16041}
16042
f22d7973 16043/* Emit:
43cacb12 16044
f22d7973
RS
16045 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
16046
16047 where <X> is the operation associated with comparison CODE. This form
16048 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
16049 semantics, such as when PRED might not be all-true and when comparing
16050 inactive lanes could have side effects. */
16051
16052static void
16053aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
16054 rtx pred, rtx op0, rtx op1)
43cacb12 16055{
f22d7973
RS
16056 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
16057 gen_rtvec (3, pred, op0, op1),
16058 aarch64_unspec_cond_code (code));
16059 emit_set_insn (target, unspec);
43cacb12
RS
16060}
16061
f22d7973 16062/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 16063
f22d7973 16064 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
16065
16066void
16067aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
16068{
16069 machine_mode pred_mode = GET_MODE (target);
16070 machine_mode data_mode = GET_MODE (op0);
16071
16072 if (!aarch64_sve_cmp_operand_p (code, op1))
16073 op1 = force_reg (data_mode, op1);
16074
16075 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
16076 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16077 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
16078}
16079
f22d7973 16080/* Emit the SVE equivalent of:
43cacb12 16081
f22d7973
RS
16082 (set TMP1 (CODE1 OP0 OP1))
16083 (set TMP2 (CODE2 OP0 OP1))
16084 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 16085
f22d7973 16086 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16087
16088static void
f22d7973
RS
16089aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
16090 rtx ptrue, rtx op0, rtx op1)
43cacb12 16091{
f22d7973 16092 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 16093 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
16094 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
16095 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 16096 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
16097 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
16098 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
16099 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
16100}
16101
f22d7973 16102/* Emit the SVE equivalent of:
43cacb12 16103
f22d7973
RS
16104 (set TMP (CODE OP0 OP1))
16105 (set TARGET (not TMP))
43cacb12 16106
f22d7973 16107 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16108
16109static void
f22d7973
RS
16110aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
16111 rtx op0, rtx op1)
43cacb12 16112{
f22d7973
RS
16113 machine_mode pred_mode = GET_MODE (ptrue);
16114 rtx tmp = gen_reg_rtx (pred_mode);
16115 aarch64_emit_sve_ptrue_op (tmp, ptrue,
16116 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
16117 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
16118}
16119
f22d7973 16120/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 16121
f22d7973 16122 (set TARGET (CODE OP0 OP1))
43cacb12
RS
16123
16124 If CAN_INVERT_P is true, the caller can also handle inverted results;
16125 return true if the result is in fact inverted. */
16126
16127bool
16128aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
16129 rtx op0, rtx op1, bool can_invert_p)
16130{
16131 machine_mode pred_mode = GET_MODE (target);
16132 machine_mode data_mode = GET_MODE (op0);
16133
16134 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16135 switch (code)
16136 {
16137 case UNORDERED:
16138 /* UNORDERED has no immediate form. */
16139 op1 = force_reg (data_mode, op1);
f22d7973 16140 /* fall through */
43cacb12
RS
16141 case LT:
16142 case LE:
16143 case GT:
16144 case GE:
16145 case EQ:
16146 case NE:
f22d7973
RS
16147 {
16148 /* There is native support for the comparison. */
16149 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16150 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16151 return false;
16152 }
43cacb12
RS
16153
16154 case LTGT:
16155 /* This is a trapping operation (LT or GT). */
f22d7973 16156 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
16157 return false;
16158
16159 case UNEQ:
16160 if (!flag_trapping_math)
16161 {
16162 /* This would trap for signaling NaNs. */
16163 op1 = force_reg (data_mode, op1);
f22d7973 16164 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
16165 return false;
16166 }
16167 /* fall through */
43cacb12
RS
16168 case UNLT:
16169 case UNLE:
16170 case UNGT:
16171 case UNGE:
f22d7973
RS
16172 if (flag_trapping_math)
16173 {
16174 /* Work out which elements are ordered. */
16175 rtx ordered = gen_reg_rtx (pred_mode);
16176 op1 = force_reg (data_mode, op1);
16177 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
16178
16179 /* Test the opposite condition for the ordered elements,
16180 then invert the result. */
16181 if (code == UNEQ)
16182 code = NE;
16183 else
16184 code = reverse_condition_maybe_unordered (code);
16185 if (can_invert_p)
16186 {
16187 aarch64_emit_sve_predicated_cond (target, code,
16188 ordered, op0, op1);
16189 return true;
16190 }
16191 rtx tmp = gen_reg_rtx (pred_mode);
16192 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
16193 aarch64_emit_unop (target, one_cmpl_optab, tmp);
16194 return false;
16195 }
16196 break;
16197
16198 case ORDERED:
16199 /* ORDERED has no immediate form. */
16200 op1 = force_reg (data_mode, op1);
16201 break;
43cacb12
RS
16202
16203 default:
16204 gcc_unreachable ();
16205 }
f22d7973
RS
16206
16207 /* There is native support for the inverse comparison. */
16208 code = reverse_condition_maybe_unordered (code);
16209 if (can_invert_p)
16210 {
16211 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16212 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16213 return true;
16214 }
16215 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
16216 return false;
43cacb12
RS
16217}
16218
16219/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
16220 of the data being selected and CMP_MODE is the mode of the values being
16221 compared. */
16222
16223void
16224aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
16225 rtx *ops)
16226{
16227 machine_mode pred_mode
16228 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
16229 GET_MODE_SIZE (cmp_mode)).require ();
16230 rtx pred = gen_reg_rtx (pred_mode);
16231 if (FLOAT_MODE_P (cmp_mode))
16232 {
16233 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
16234 ops[4], ops[5], true))
16235 std::swap (ops[1], ops[2]);
16236 }
16237 else
16238 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16239
16240 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16241 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16242}
16243
99e1629f
RS
16244/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16245 true. However due to issues with register allocation it is preferable
16246 to avoid tieing integer scalar and FP scalar modes. Executing integer
16247 operations in general registers is better than treating them as scalar
16248 vector operations. This reduces latency and avoids redundant int<->FP
16249 moves. So tie modes if they are either the same class, or vector modes
16250 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16251
99e1629f 16252static bool
ef4bddc2 16253aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16254{
16255 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16256 return true;
16257
16258 /* We specifically want to allow elements of "structure" modes to
16259 be tieable to the structure. This more general condition allows
43cacb12
RS
16260 other rarer situations too. The reason we don't extend this to
16261 predicate modes is that there are no predicate structure modes
16262 nor any specific instructions for extracting part of a predicate
16263 register. */
16264 if (aarch64_vector_data_mode_p (mode1)
16265 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16266 return true;
16267
16268 /* Also allow any scalar modes with vectors. */
16269 if (aarch64_vector_mode_supported_p (mode1)
16270 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16271 return true;
16272
16273 return false;
16274}
16275
e2c75eea
JG
16276/* Return a new RTX holding the result of moving POINTER forward by
16277 AMOUNT bytes. */
16278
16279static rtx
6a70badb 16280aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16281{
16282 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16283
16284 return adjust_automodify_address (pointer, GET_MODE (pointer),
16285 next, amount);
16286}
16287
16288/* Return a new RTX holding the result of moving POINTER forward by the
16289 size of the mode it points to. */
16290
16291static rtx
16292aarch64_progress_pointer (rtx pointer)
16293{
6a70badb 16294 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16295}
16296
16297/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16298 MODE bytes. */
16299
16300static void
16301aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16302 machine_mode mode)
e2c75eea
JG
16303{
16304 rtx reg = gen_reg_rtx (mode);
16305
16306 /* "Cast" the pointers to the correct mode. */
16307 *src = adjust_address (*src, mode, 0);
16308 *dst = adjust_address (*dst, mode, 0);
16309 /* Emit the memcpy. */
16310 emit_move_insn (reg, *src);
16311 emit_move_insn (*dst, reg);
16312 /* Move the pointers forward. */
16313 *src = aarch64_progress_pointer (*src);
16314 *dst = aarch64_progress_pointer (*dst);
16315}
16316
16317/* Expand movmem, as if from a __builtin_memcpy. Return true if
16318 we succeed, otherwise return false. */
16319
16320bool
16321aarch64_expand_movmem (rtx *operands)
16322{
89c52e5e 16323 int n, mode_bits;
e2c75eea
JG
16324 rtx dst = operands[0];
16325 rtx src = operands[1];
16326 rtx base;
89c52e5e 16327 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
16328 bool speed_p = !optimize_function_for_size_p (cfun);
16329
16330 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
16331 memcpy call, but use the default otherwise. Moves larger than 8 bytes
16332 will always require an even number of instructions to do now. And each
16333 operation requires both a load+store, so devide the max number by 2. */
16334 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
16335
16336 /* We can't do anything smart if the amount to copy is not constant. */
16337 if (!CONST_INT_P (operands[2]))
16338 return false;
16339
89c52e5e 16340 n = INTVAL (operands[2]);
e2c75eea 16341
89c52e5e
TC
16342 /* Try to keep the number of instructions low. For all cases we will do at
16343 most two moves for the residual amount, since we'll always overlap the
16344 remainder. */
16345 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
16346 return false;
16347
16348 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16349 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16350
16351 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
16352 src = adjust_automodify_address (src, VOIDmode, base, 0);
16353
89c52e5e
TC
16354 /* Convert n to bits to make the rest of the code simpler. */
16355 n = n * BITS_PER_UNIT;
e2c75eea 16356
f7e1d19d
TC
16357 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
16358 larger than TImode, but we should not use them for loads/stores here. */
16359 const int copy_limit = GET_MODE_BITSIZE (TImode);
16360
89c52e5e 16361 while (n > 0)
e2c75eea 16362 {
89c52e5e
TC
16363 /* Find the largest mode in which to do the copy in without over reading
16364 or writing. */
16365 opt_scalar_int_mode mode_iter;
16366 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 16367 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 16368 cur_mode = mode_iter.require ();
e2c75eea 16369
89c52e5e 16370 gcc_assert (cur_mode != BLKmode);
e2c75eea 16371
89c52e5e
TC
16372 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
16373 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 16374
89c52e5e 16375 n -= mode_bits;
e2c75eea 16376
89c52e5e
TC
16377 /* Do certain trailing copies as overlapping if it's going to be
16378 cheaper. i.e. less instructions to do so. For instance doing a 15
16379 byte copy it's more efficient to do two overlapping 8 byte copies than
16380 8 + 6 + 1. */
f7e1d19d 16381 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 16382 {
f7e1d19d
TC
16383 next_mode = smallest_mode_for_size (n, MODE_INT);
16384 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
16385 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
16386 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
16387 n = n_bits;
e2c75eea
JG
16388 }
16389 }
16390
16391 return true;
16392}
16393
141a3ccf
KT
16394/* Split a DImode store of a CONST_INT SRC to MEM DST as two
16395 SImode stores. Handle the case when the constant has identical
16396 bottom and top halves. This is beneficial when the two stores can be
16397 merged into an STP and we avoid synthesising potentially expensive
16398 immediates twice. Return true if such a split is possible. */
16399
16400bool
16401aarch64_split_dimode_const_store (rtx dst, rtx src)
16402{
16403 rtx lo = gen_lowpart (SImode, src);
16404 rtx hi = gen_highpart_mode (SImode, DImode, src);
16405
16406 bool size_p = optimize_function_for_size_p (cfun);
16407
16408 if (!rtx_equal_p (lo, hi))
16409 return false;
16410
16411 unsigned int orig_cost
16412 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
16413 unsigned int lo_cost
16414 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
16415
16416 /* We want to transform:
16417 MOV x1, 49370
16418 MOVK x1, 0x140, lsl 16
16419 MOVK x1, 0xc0da, lsl 32
16420 MOVK x1, 0x140, lsl 48
16421 STR x1, [x0]
16422 into:
16423 MOV w1, 49370
16424 MOVK w1, 0x140, lsl 16
16425 STP w1, w1, [x0]
16426 So we want to perform this only when we save two instructions
16427 or more. When optimizing for size, however, accept any code size
16428 savings we can. */
16429 if (size_p && orig_cost <= lo_cost)
16430 return false;
16431
16432 if (!size_p
16433 && (orig_cost <= lo_cost + 1))
16434 return false;
16435
16436 rtx mem_lo = adjust_address (dst, SImode, 0);
16437 if (!aarch64_mem_pair_operand (mem_lo, SImode))
16438 return false;
16439
16440 rtx tmp_reg = gen_reg_rtx (SImode);
16441 aarch64_expand_mov_immediate (tmp_reg, lo);
16442 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
16443 /* Don't emit an explicit store pair as this may not be always profitable.
16444 Let the sched-fusion logic decide whether to merge them. */
16445 emit_move_insn (mem_lo, tmp_reg);
16446 emit_move_insn (mem_hi, tmp_reg);
16447
16448 return true;
16449}
16450
30c46053
MC
16451/* Generate RTL for a conditional branch with rtx comparison CODE in
16452 mode CC_MODE. The destination of the unlikely conditional branch
16453 is LABEL_REF. */
16454
16455void
16456aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
16457 rtx label_ref)
16458{
16459 rtx x;
16460 x = gen_rtx_fmt_ee (code, VOIDmode,
16461 gen_rtx_REG (cc_mode, CC_REGNUM),
16462 const0_rtx);
16463
16464 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16465 gen_rtx_LABEL_REF (VOIDmode, label_ref),
16466 pc_rtx);
16467 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16468}
16469
16470/* Generate DImode scratch registers for 128-bit (TImode) addition.
16471
16472 OP1 represents the TImode destination operand 1
16473 OP2 represents the TImode destination operand 2
16474 LOW_DEST represents the low half (DImode) of TImode operand 0
16475 LOW_IN1 represents the low half (DImode) of TImode operand 1
16476 LOW_IN2 represents the low half (DImode) of TImode operand 2
16477 HIGH_DEST represents the high half (DImode) of TImode operand 0
16478 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16479 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16480
16481void
16482aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16483 rtx *low_in1, rtx *low_in2,
16484 rtx *high_dest, rtx *high_in1,
16485 rtx *high_in2)
16486{
16487 *low_dest = gen_reg_rtx (DImode);
16488 *low_in1 = gen_lowpart (DImode, op1);
16489 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16490 subreg_lowpart_offset (DImode, TImode));
16491 *high_dest = gen_reg_rtx (DImode);
16492 *high_in1 = gen_highpart (DImode, op1);
16493 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16494 subreg_highpart_offset (DImode, TImode));
16495}
16496
16497/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
16498
16499 This function differs from 'arch64_addti_scratch_regs' in that
16500 OP1 can be an immediate constant (zero). We must call
16501 subreg_highpart_offset with DImode and TImode arguments, otherwise
16502 VOIDmode will be used for the const_int which generates an internal
16503 error from subreg_size_highpart_offset which does not expect a size of zero.
16504
16505 OP1 represents the TImode destination operand 1
16506 OP2 represents the TImode destination operand 2
16507 LOW_DEST represents the low half (DImode) of TImode operand 0
16508 LOW_IN1 represents the low half (DImode) of TImode operand 1
16509 LOW_IN2 represents the low half (DImode) of TImode operand 2
16510 HIGH_DEST represents the high half (DImode) of TImode operand 0
16511 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16512 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16513
16514
16515void
16516aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16517 rtx *low_in1, rtx *low_in2,
16518 rtx *high_dest, rtx *high_in1,
16519 rtx *high_in2)
16520{
16521 *low_dest = gen_reg_rtx (DImode);
16522 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
16523 subreg_lowpart_offset (DImode, TImode));
16524
16525 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16526 subreg_lowpart_offset (DImode, TImode));
16527 *high_dest = gen_reg_rtx (DImode);
16528
16529 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
16530 subreg_highpart_offset (DImode, TImode));
16531 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16532 subreg_highpart_offset (DImode, TImode));
16533}
16534
16535/* Generate RTL for 128-bit (TImode) subtraction with overflow.
16536
16537 OP0 represents the TImode destination operand 0
16538 LOW_DEST represents the low half (DImode) of TImode operand 0
16539 LOW_IN1 represents the low half (DImode) of TImode operand 1
16540 LOW_IN2 represents the low half (DImode) of TImode operand 2
16541 HIGH_DEST represents the high half (DImode) of TImode operand 0
16542 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16543 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16544
16545void
16546aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
16547 rtx low_in2, rtx high_dest, rtx high_in1,
16548 rtx high_in2)
16549{
16550 if (low_in2 == const0_rtx)
16551 {
16552 low_dest = low_in1;
16553 emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
16554 force_reg (DImode, high_in2)));
16555 }
16556 else
16557 {
16558 if (CONST_INT_P (low_in2))
16559 {
16560 low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
16561 high_in2 = force_reg (DImode, high_in2);
16562 emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
16563 }
16564 else
16565 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
16566 emit_insn (gen_subdi3_carryinCV (high_dest,
16567 force_reg (DImode, high_in1),
16568 high_in2));
16569 }
16570
16571 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
16572 emit_move_insn (gen_highpart (DImode, op0), high_dest);
16573
16574}
16575
a3125fc2
CL
16576/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16577
16578static unsigned HOST_WIDE_INT
16579aarch64_asan_shadow_offset (void)
16580{
16581 return (HOST_WIDE_INT_1 << 36);
16582}
16583
5f3bc026 16584static rtx
cb4347e8 16585aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16586 int code, tree treeop0, tree treeop1)
16587{
c8012fbc
WD
16588 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16589 rtx op0, op1;
5f3bc026 16590 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16591 insn_code icode;
5f3bc026
ZC
16592 struct expand_operand ops[4];
16593
5f3bc026
ZC
16594 start_sequence ();
16595 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16596
16597 op_mode = GET_MODE (op0);
16598 if (op_mode == VOIDmode)
16599 op_mode = GET_MODE (op1);
16600
16601 switch (op_mode)
16602 {
4e10a5a7
RS
16603 case E_QImode:
16604 case E_HImode:
16605 case E_SImode:
5f3bc026
ZC
16606 cmp_mode = SImode;
16607 icode = CODE_FOR_cmpsi;
16608 break;
16609
4e10a5a7 16610 case E_DImode:
5f3bc026
ZC
16611 cmp_mode = DImode;
16612 icode = CODE_FOR_cmpdi;
16613 break;
16614
4e10a5a7 16615 case E_SFmode:
786e3c06
WD
16616 cmp_mode = SFmode;
16617 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16618 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16619 break;
16620
4e10a5a7 16621 case E_DFmode:
786e3c06
WD
16622 cmp_mode = DFmode;
16623 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16624 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16625 break;
16626
5f3bc026
ZC
16627 default:
16628 end_sequence ();
16629 return NULL_RTX;
16630 }
16631
c8012fbc
WD
16632 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16633 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16634 if (!op0 || !op1)
16635 {
16636 end_sequence ();
16637 return NULL_RTX;
16638 }
16639 *prep_seq = get_insns ();
16640 end_sequence ();
16641
c8012fbc
WD
16642 create_fixed_operand (&ops[0], op0);
16643 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16644
16645 start_sequence ();
c8012fbc 16646 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16647 {
16648 end_sequence ();
16649 return NULL_RTX;
16650 }
16651 *gen_seq = get_insns ();
16652 end_sequence ();
16653
c8012fbc
WD
16654 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16655 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16656}
16657
16658static rtx
cb4347e8
TS
16659aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16660 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16661{
c8012fbc
WD
16662 rtx op0, op1, target;
16663 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16664 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16665 insn_code icode;
5f3bc026 16666 struct expand_operand ops[6];
c8012fbc 16667 int aarch64_cond;
5f3bc026 16668
cb4347e8 16669 push_to_sequence (*prep_seq);
5f3bc026
ZC
16670 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16671
16672 op_mode = GET_MODE (op0);
16673 if (op_mode == VOIDmode)
16674 op_mode = GET_MODE (op1);
16675
16676 switch (op_mode)
16677 {
4e10a5a7
RS
16678 case E_QImode:
16679 case E_HImode:
16680 case E_SImode:
5f3bc026 16681 cmp_mode = SImode;
c8012fbc 16682 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16683 break;
16684
4e10a5a7 16685 case E_DImode:
5f3bc026 16686 cmp_mode = DImode;
c8012fbc 16687 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16688 break;
16689
4e10a5a7 16690 case E_SFmode:
786e3c06
WD
16691 cmp_mode = SFmode;
16692 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16693 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16694 break;
16695
4e10a5a7 16696 case E_DFmode:
786e3c06
WD
16697 cmp_mode = DFmode;
16698 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16699 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16700 break;
16701
5f3bc026
ZC
16702 default:
16703 end_sequence ();
16704 return NULL_RTX;
16705 }
16706
16707 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16708 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16709 if (!op0 || !op1)
16710 {
16711 end_sequence ();
16712 return NULL_RTX;
16713 }
16714 *prep_seq = get_insns ();
16715 end_sequence ();
16716
16717 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16718 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16719
c8012fbc
WD
16720 if (bit_code != AND)
16721 {
16722 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16723 GET_MODE (XEXP (prev, 0))),
16724 VOIDmode, XEXP (prev, 0), const0_rtx);
16725 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16726 }
16727
16728 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16729 create_fixed_operand (&ops[1], target);
16730 create_fixed_operand (&ops[2], op0);
16731 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16732 create_fixed_operand (&ops[4], prev);
16733 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16734
cb4347e8 16735 push_to_sequence (*gen_seq);
5f3bc026
ZC
16736 if (!maybe_expand_insn (icode, 6, ops))
16737 {
16738 end_sequence ();
16739 return NULL_RTX;
16740 }
16741
16742 *gen_seq = get_insns ();
16743 end_sequence ();
16744
c8012fbc 16745 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16746}
16747
16748#undef TARGET_GEN_CCMP_FIRST
16749#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16750
16751#undef TARGET_GEN_CCMP_NEXT
16752#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16753
6a569cdd
KT
16754/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16755 instruction fusion of some sort. */
16756
16757static bool
16758aarch64_macro_fusion_p (void)
16759{
b175b679 16760 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16761}
16762
16763
16764/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16765 should be kept together during scheduling. */
16766
16767static bool
16768aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16769{
16770 rtx set_dest;
16771 rtx prev_set = single_set (prev);
16772 rtx curr_set = single_set (curr);
16773 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16774 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16775
16776 if (!aarch64_macro_fusion_p ())
16777 return false;
16778
d7b03373 16779 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16780 {
16781 /* We are trying to match:
16782 prev (mov) == (set (reg r0) (const_int imm16))
16783 curr (movk) == (set (zero_extract (reg r0)
16784 (const_int 16)
16785 (const_int 16))
16786 (const_int imm16_1)) */
16787
16788 set_dest = SET_DEST (curr_set);
16789
16790 if (GET_CODE (set_dest) == ZERO_EXTRACT
16791 && CONST_INT_P (SET_SRC (curr_set))
16792 && CONST_INT_P (SET_SRC (prev_set))
16793 && CONST_INT_P (XEXP (set_dest, 2))
16794 && INTVAL (XEXP (set_dest, 2)) == 16
16795 && REG_P (XEXP (set_dest, 0))
16796 && REG_P (SET_DEST (prev_set))
16797 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16798 {
16799 return true;
16800 }
16801 }
16802
d7b03373 16803 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16804 {
16805
16806 /* We're trying to match:
16807 prev (adrp) == (set (reg r1)
16808 (high (symbol_ref ("SYM"))))
16809 curr (add) == (set (reg r0)
16810 (lo_sum (reg r1)
16811 (symbol_ref ("SYM"))))
16812 Note that r0 need not necessarily be the same as r1, especially
16813 during pre-regalloc scheduling. */
16814
16815 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16816 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16817 {
16818 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16819 && REG_P (XEXP (SET_SRC (curr_set), 0))
16820 && REGNO (XEXP (SET_SRC (curr_set), 0))
16821 == REGNO (SET_DEST (prev_set))
16822 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16823 XEXP (SET_SRC (curr_set), 1)))
16824 return true;
16825 }
16826 }
16827
d7b03373 16828 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16829 {
16830
16831 /* We're trying to match:
16832 prev (movk) == (set (zero_extract (reg r0)
16833 (const_int 16)
16834 (const_int 32))
16835 (const_int imm16_1))
16836 curr (movk) == (set (zero_extract (reg r0)
16837 (const_int 16)
16838 (const_int 48))
16839 (const_int imm16_2)) */
16840
16841 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16842 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16843 && REG_P (XEXP (SET_DEST (prev_set), 0))
16844 && REG_P (XEXP (SET_DEST (curr_set), 0))
16845 && REGNO (XEXP (SET_DEST (prev_set), 0))
16846 == REGNO (XEXP (SET_DEST (curr_set), 0))
16847 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16848 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16849 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16850 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16851 && CONST_INT_P (SET_SRC (prev_set))
16852 && CONST_INT_P (SET_SRC (curr_set)))
16853 return true;
16854
16855 }
d7b03373 16856 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16857 {
16858 /* We're trying to match:
16859 prev (adrp) == (set (reg r0)
16860 (high (symbol_ref ("SYM"))))
16861 curr (ldr) == (set (reg r1)
16862 (mem (lo_sum (reg r0)
16863 (symbol_ref ("SYM")))))
16864 or
16865 curr (ldr) == (set (reg r1)
16866 (zero_extend (mem
16867 (lo_sum (reg r0)
16868 (symbol_ref ("SYM")))))) */
16869 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16870 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16871 {
16872 rtx curr_src = SET_SRC (curr_set);
16873
16874 if (GET_CODE (curr_src) == ZERO_EXTEND)
16875 curr_src = XEXP (curr_src, 0);
16876
16877 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
16878 && REG_P (XEXP (XEXP (curr_src, 0), 0))
16879 && REGNO (XEXP (XEXP (curr_src, 0), 0))
16880 == REGNO (SET_DEST (prev_set))
16881 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
16882 XEXP (SET_SRC (prev_set), 0)))
16883 return true;
16884 }
16885 }
cd0cb232 16886
d7b03373 16887 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
16888 && aarch_crypto_can_dual_issue (prev, curr))
16889 return true;
16890
d7b03373 16891 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
16892 && any_condjump_p (curr))
16893 {
509f819a
N
16894 unsigned int condreg1, condreg2;
16895 rtx cc_reg_1;
16896 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
16897 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
16898
16899 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
16900 && prev
16901 && modified_in_p (cc_reg_1, prev))
16902 {
f8a27206
AP
16903 enum attr_type prev_type = get_attr_type (prev);
16904
509f819a
N
16905 /* FIXME: this misses some which is considered simple arthematic
16906 instructions for ThunderX. Simple shifts are missed here. */
16907 if (prev_type == TYPE_ALUS_SREG
16908 || prev_type == TYPE_ALUS_IMM
16909 || prev_type == TYPE_LOGICS_REG
16910 || prev_type == TYPE_LOGICS_IMM)
16911 return true;
16912 }
3759108f
AP
16913 }
16914
bee7e0fc
AP
16915 if (prev_set
16916 && curr_set
16917 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
16918 && any_condjump_p (curr))
16919 {
16920 /* We're trying to match:
16921 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
16922 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
16923 (const_int 0))
16924 (label_ref ("SYM"))
16925 (pc)) */
16926 if (SET_DEST (curr_set) == (pc_rtx)
16927 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
16928 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
16929 && REG_P (SET_DEST (prev_set))
16930 && REGNO (SET_DEST (prev_set))
16931 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
16932 {
16933 /* Fuse ALU operations followed by conditional branch instruction. */
16934 switch (get_attr_type (prev))
16935 {
16936 case TYPE_ALU_IMM:
16937 case TYPE_ALU_SREG:
16938 case TYPE_ADC_REG:
16939 case TYPE_ADC_IMM:
16940 case TYPE_ADCS_REG:
16941 case TYPE_ADCS_IMM:
16942 case TYPE_LOGIC_REG:
16943 case TYPE_LOGIC_IMM:
16944 case TYPE_CSEL:
16945 case TYPE_ADR:
16946 case TYPE_MOV_IMM:
16947 case TYPE_SHIFT_REG:
16948 case TYPE_SHIFT_IMM:
16949 case TYPE_BFM:
16950 case TYPE_RBIT:
16951 case TYPE_REV:
16952 case TYPE_EXTEND:
16953 return true;
16954
16955 default:;
16956 }
16957 }
16958 }
16959
6a569cdd
KT
16960 return false;
16961}
16962
f2879a90
KT
16963/* Return true iff the instruction fusion described by OP is enabled. */
16964
16965bool
16966aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
16967{
16968 return (aarch64_tune_params.fusible_ops & op) != 0;
16969}
16970
350013bc
BC
16971/* If MEM is in the form of [base+offset], extract the two parts
16972 of address and set to BASE and OFFSET, otherwise return false
16973 after clearing BASE and OFFSET. */
16974
16975bool
16976extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
16977{
16978 rtx addr;
16979
16980 gcc_assert (MEM_P (mem));
16981
16982 addr = XEXP (mem, 0);
16983
16984 if (REG_P (addr))
16985 {
16986 *base = addr;
16987 *offset = const0_rtx;
16988 return true;
16989 }
16990
16991 if (GET_CODE (addr) == PLUS
16992 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
16993 {
16994 *base = XEXP (addr, 0);
16995 *offset = XEXP (addr, 1);
16996 return true;
16997 }
16998
16999 *base = NULL_RTX;
17000 *offset = NULL_RTX;
17001
17002 return false;
17003}
17004
17005/* Types for scheduling fusion. */
17006enum sched_fusion_type
17007{
17008 SCHED_FUSION_NONE = 0,
17009 SCHED_FUSION_LD_SIGN_EXTEND,
17010 SCHED_FUSION_LD_ZERO_EXTEND,
17011 SCHED_FUSION_LD,
17012 SCHED_FUSION_ST,
17013 SCHED_FUSION_NUM
17014};
17015
17016/* If INSN is a load or store of address in the form of [base+offset],
17017 extract the two parts and set to BASE and OFFSET. Return scheduling
17018 fusion type this INSN is. */
17019
17020static enum sched_fusion_type
17021fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
17022{
17023 rtx x, dest, src;
17024 enum sched_fusion_type fusion = SCHED_FUSION_LD;
17025
17026 gcc_assert (INSN_P (insn));
17027 x = PATTERN (insn);
17028 if (GET_CODE (x) != SET)
17029 return SCHED_FUSION_NONE;
17030
17031 src = SET_SRC (x);
17032 dest = SET_DEST (x);
17033
abc52318
KT
17034 machine_mode dest_mode = GET_MODE (dest);
17035
17036 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
17037 return SCHED_FUSION_NONE;
17038
17039 if (GET_CODE (src) == SIGN_EXTEND)
17040 {
17041 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
17042 src = XEXP (src, 0);
17043 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17044 return SCHED_FUSION_NONE;
17045 }
17046 else if (GET_CODE (src) == ZERO_EXTEND)
17047 {
17048 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
17049 src = XEXP (src, 0);
17050 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17051 return SCHED_FUSION_NONE;
17052 }
17053
17054 if (GET_CODE (src) == MEM && REG_P (dest))
17055 extract_base_offset_in_addr (src, base, offset);
17056 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
17057 {
17058 fusion = SCHED_FUSION_ST;
17059 extract_base_offset_in_addr (dest, base, offset);
17060 }
17061 else
17062 return SCHED_FUSION_NONE;
17063
17064 if (*base == NULL_RTX || *offset == NULL_RTX)
17065 fusion = SCHED_FUSION_NONE;
17066
17067 return fusion;
17068}
17069
17070/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
17071
17072 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
17073 and PRI are only calculated for these instructions. For other instruction,
17074 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
17075 type instruction fusion can be added by returning different priorities.
17076
17077 It's important that irrelevant instructions get the largest FUSION_PRI. */
17078
17079static void
17080aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
17081 int *fusion_pri, int *pri)
17082{
17083 int tmp, off_val;
17084 rtx base, offset;
17085 enum sched_fusion_type fusion;
17086
17087 gcc_assert (INSN_P (insn));
17088
17089 tmp = max_pri - 1;
17090 fusion = fusion_load_store (insn, &base, &offset);
17091 if (fusion == SCHED_FUSION_NONE)
17092 {
17093 *pri = tmp;
17094 *fusion_pri = tmp;
17095 return;
17096 }
17097
17098 /* Set FUSION_PRI according to fusion type and base register. */
17099 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
17100
17101 /* Calculate PRI. */
17102 tmp /= 2;
17103
17104 /* INSN with smaller offset goes first. */
17105 off_val = (int)(INTVAL (offset));
17106 if (off_val >= 0)
17107 tmp -= (off_val & 0xfffff);
17108 else
17109 tmp += ((- off_val) & 0xfffff);
17110
17111 *pri = tmp;
17112 return;
17113}
17114
9bca63d4
WD
17115/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
17116 Adjust priority of sha1h instructions so they are scheduled before
17117 other SHA1 instructions. */
17118
17119static int
17120aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
17121{
17122 rtx x = PATTERN (insn);
17123
17124 if (GET_CODE (x) == SET)
17125 {
17126 x = SET_SRC (x);
17127
17128 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
17129 return priority + 10;
17130 }
17131
17132 return priority;
17133}
17134
350013bc
BC
17135/* Given OPERANDS of consecutive load/store, check if we can merge
17136 them into ldp/stp. LOAD is true if they are load instructions.
17137 MODE is the mode of memory operands. */
17138
17139bool
17140aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 17141 machine_mode mode)
350013bc
BC
17142{
17143 HOST_WIDE_INT offval_1, offval_2, msize;
17144 enum reg_class rclass_1, rclass_2;
17145 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
17146
17147 if (load)
17148 {
17149 mem_1 = operands[1];
17150 mem_2 = operands[3];
17151 reg_1 = operands[0];
17152 reg_2 = operands[2];
17153 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
17154 if (REGNO (reg_1) == REGNO (reg_2))
17155 return false;
17156 }
17157 else
17158 {
17159 mem_1 = operands[0];
17160 mem_2 = operands[2];
17161 reg_1 = operands[1];
17162 reg_2 = operands[3];
17163 }
17164
bf84ac44
AP
17165 /* The mems cannot be volatile. */
17166 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
17167 return false;
17168
54700e2e
AP
17169 /* If we have SImode and slow unaligned ldp,
17170 check the alignment to be at least 8 byte. */
17171 if (mode == SImode
17172 && (aarch64_tune_params.extra_tuning_flags
17173 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17174 && !optimize_size
17175 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17176 return false;
17177
350013bc
BC
17178 /* Check if the addresses are in the form of [base+offset]. */
17179 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17180 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17181 return false;
17182 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17183 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17184 return false;
17185
17186 /* Check if the bases are same. */
17187 if (!rtx_equal_p (base_1, base_2))
17188 return false;
17189
dfe1da23
JW
17190 /* The operands must be of the same size. */
17191 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
17192 GET_MODE_SIZE (GET_MODE (mem_2))));
17193
350013bc
BC
17194 offval_1 = INTVAL (offset_1);
17195 offval_2 = INTVAL (offset_2);
6a70badb
RS
17196 /* We should only be trying this for fixed-sized modes. There is no
17197 SVE LDP/STP instruction. */
17198 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
17199 /* Check if the offsets are consecutive. */
17200 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
17201 return false;
17202
17203 /* Check if the addresses are clobbered by load. */
17204 if (load)
17205 {
17206 if (reg_mentioned_p (reg_1, mem_1))
17207 return false;
17208
17209 /* In increasing order, the last load can clobber the address. */
17210 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 17211 return false;
350013bc
BC
17212 }
17213
9b56ec11
JW
17214 /* One of the memory accesses must be a mempair operand.
17215 If it is not the first one, they need to be swapped by the
17216 peephole. */
17217 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
17218 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
17219 return false;
17220
350013bc
BC
17221 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17222 rclass_1 = FP_REGS;
17223 else
17224 rclass_1 = GENERAL_REGS;
17225
17226 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17227 rclass_2 = FP_REGS;
17228 else
17229 rclass_2 = GENERAL_REGS;
17230
17231 /* Check if the registers are of same class. */
17232 if (rclass_1 != rclass_2)
17233 return false;
17234
17235 return true;
17236}
17237
9b56ec11
JW
17238/* Given OPERANDS of consecutive load/store that can be merged,
17239 swap them if they are not in ascending order. */
17240void
17241aarch64_swap_ldrstr_operands (rtx* operands, bool load)
17242{
17243 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
17244 HOST_WIDE_INT offval_1, offval_2;
17245
17246 if (load)
17247 {
17248 mem_1 = operands[1];
17249 mem_2 = operands[3];
17250 }
17251 else
17252 {
17253 mem_1 = operands[0];
17254 mem_2 = operands[2];
17255 }
17256
17257 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17258 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17259
17260 offval_1 = INTVAL (offset_1);
17261 offval_2 = INTVAL (offset_2);
17262
17263 if (offval_1 > offval_2)
17264 {
17265 /* Irrespective of whether this is a load or a store,
17266 we do the same swap. */
17267 std::swap (operands[0], operands[2]);
17268 std::swap (operands[1], operands[3]);
17269 }
17270}
17271
d0b51297
JW
17272/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
17273 comparison between the two. */
17274int
17275aarch64_host_wide_int_compare (const void *x, const void *y)
17276{
17277 return wi::cmps (* ((const HOST_WIDE_INT *) x),
17278 * ((const HOST_WIDE_INT *) y));
17279}
17280
17281/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
17282 other pointing to a REG rtx containing an offset, compare the offsets
17283 of the two pairs.
17284
17285 Return:
17286
17287 1 iff offset (X) > offset (Y)
17288 0 iff offset (X) == offset (Y)
17289 -1 iff offset (X) < offset (Y) */
17290int
17291aarch64_ldrstr_offset_compare (const void *x, const void *y)
17292{
17293 const rtx * operands_1 = (const rtx *) x;
17294 const rtx * operands_2 = (const rtx *) y;
17295 rtx mem_1, mem_2, base, offset_1, offset_2;
17296
17297 if (MEM_P (operands_1[0]))
17298 mem_1 = operands_1[0];
17299 else
17300 mem_1 = operands_1[1];
17301
17302 if (MEM_P (operands_2[0]))
17303 mem_2 = operands_2[0];
17304 else
17305 mem_2 = operands_2[1];
17306
17307 /* Extract the offsets. */
17308 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17309 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17310
17311 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
17312
17313 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
17314}
17315
350013bc
BC
17316/* Given OPERANDS of consecutive load/store, check if we can merge
17317 them into ldp/stp by adjusting the offset. LOAD is true if they
17318 are load instructions. MODE is the mode of memory operands.
17319
17320 Given below consecutive stores:
17321
17322 str w1, [xb, 0x100]
17323 str w1, [xb, 0x104]
17324 str w1, [xb, 0x108]
17325 str w1, [xb, 0x10c]
17326
17327 Though the offsets are out of the range supported by stp, we can
17328 still pair them after adjusting the offset, like:
17329
17330 add scratch, xb, 0x100
17331 stp w1, w1, [scratch]
17332 stp w1, w1, [scratch, 0x8]
17333
17334 The peephole patterns detecting this opportunity should guarantee
17335 the scratch register is avaliable. */
17336
17337bool
17338aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 17339 scalar_mode mode)
350013bc 17340{
34d7854d
JW
17341 const int num_insns = 4;
17342 enum reg_class rclass;
17343 HOST_WIDE_INT offvals[num_insns], msize;
17344 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
17345
17346 if (load)
17347 {
34d7854d
JW
17348 for (int i = 0; i < num_insns; i++)
17349 {
17350 reg[i] = operands[2 * i];
17351 mem[i] = operands[2 * i + 1];
17352
17353 gcc_assert (REG_P (reg[i]));
17354 }
d0b51297
JW
17355
17356 /* Do not attempt to merge the loads if the loads clobber each other. */
17357 for (int i = 0; i < 8; i += 2)
17358 for (int j = i + 2; j < 8; j += 2)
17359 if (reg_overlap_mentioned_p (operands[i], operands[j]))
17360 return false;
350013bc
BC
17361 }
17362 else
34d7854d
JW
17363 for (int i = 0; i < num_insns; i++)
17364 {
17365 mem[i] = operands[2 * i];
17366 reg[i] = operands[2 * i + 1];
17367 }
350013bc 17368
34d7854d
JW
17369 /* Skip if memory operand is by itself valid for ldp/stp. */
17370 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
17371 return false;
17372
34d7854d
JW
17373 for (int i = 0; i < num_insns; i++)
17374 {
17375 /* The mems cannot be volatile. */
17376 if (MEM_VOLATILE_P (mem[i]))
17377 return false;
17378
17379 /* Check if the addresses are in the form of [base+offset]. */
17380 extract_base_offset_in_addr (mem[i], base + i, offset + i);
17381 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
17382 return false;
17383 }
17384
363b395b
JW
17385 /* Check if the registers are of same class. */
17386 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
17387 ? FP_REGS : GENERAL_REGS;
17388
17389 for (int i = 1; i < num_insns; i++)
17390 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
17391 {
17392 if (rclass != FP_REGS)
17393 return false;
17394 }
17395 else
17396 {
17397 if (rclass != GENERAL_REGS)
17398 return false;
17399 }
17400
17401 /* Only the last register in the order in which they occur
17402 may be clobbered by the load. */
17403 if (rclass == GENERAL_REGS && load)
17404 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
17405 if (reg_mentioned_p (reg[i], mem[i]))
17406 return false;
350013bc
BC
17407
17408 /* Check if the bases are same. */
34d7854d
JW
17409 for (int i = 0; i < num_insns - 1; i++)
17410 if (!rtx_equal_p (base[i], base[i + 1]))
17411 return false;
17412
17413 for (int i = 0; i < num_insns; i++)
17414 offvals[i] = INTVAL (offset[i]);
350013bc 17415
350013bc 17416 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17417
17418 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
17419 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
17420 aarch64_host_wide_int_compare);
d0b51297
JW
17421
17422 if (!(offvals[1] == offvals[0] + msize
17423 && offvals[3] == offvals[2] + msize))
350013bc
BC
17424 return false;
17425
d0b51297
JW
17426 /* Check that offsets are within range of each other. The ldp/stp
17427 instructions have 7 bit immediate offsets, so use 0x80. */
17428 if (offvals[2] - offvals[0] >= msize * 0x80)
17429 return false;
350013bc 17430
d0b51297
JW
17431 /* The offsets must be aligned with respect to each other. */
17432 if (offvals[0] % msize != offvals[2] % msize)
17433 return false;
17434
54700e2e
AP
17435 /* If we have SImode and slow unaligned ldp,
17436 check the alignment to be at least 8 byte. */
17437 if (mode == SImode
17438 && (aarch64_tune_params.extra_tuning_flags
34d7854d 17439 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 17440 && !optimize_size
34d7854d 17441 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
17442 return false;
17443
350013bc
BC
17444 return true;
17445}
17446
17447/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
17448 into LDP/STP after adjusting the offset. It depends on the fact
17449 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
17450 MODE is the mode of memory operands. CODE is the rtl operator
17451 which should be applied to all memory operands, it's SIGN_EXTEND,
17452 ZERO_EXTEND or UNKNOWN. */
17453
17454bool
17455aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17456 scalar_mode mode, RTX_CODE code)
350013bc 17457{
d0b51297 17458 rtx base, offset_1, offset_3, t1, t2;
350013bc 17459 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
17460 rtx temp_operands[8];
17461 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
17462 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 17463
d0b51297
JW
17464 /* We make changes on a copy as we may still bail out. */
17465 for (int i = 0; i < 8; i ++)
17466 temp_operands[i] = operands[i];
9b56ec11 17467
d0b51297
JW
17468 /* Sort the operands. */
17469 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 17470
350013bc
BC
17471 if (load)
17472 {
d0b51297
JW
17473 mem_1 = temp_operands[1];
17474 mem_2 = temp_operands[3];
17475 mem_3 = temp_operands[5];
17476 mem_4 = temp_operands[7];
350013bc
BC
17477 }
17478 else
17479 {
d0b51297
JW
17480 mem_1 = temp_operands[0];
17481 mem_2 = temp_operands[2];
17482 mem_3 = temp_operands[4];
17483 mem_4 = temp_operands[6];
350013bc
BC
17484 gcc_assert (code == UNKNOWN);
17485 }
17486
9b56ec11 17487 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
17488 extract_base_offset_in_addr (mem_3, &base, &offset_3);
17489 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17490 && offset_3 != NULL_RTX);
350013bc 17491
d0b51297 17492 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 17493 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17494 stp_off_upper_limit = msize * (0x40 - 1);
17495 stp_off_lower_limit = - msize * 0x40;
350013bc 17496
d0b51297
JW
17497 off_val_1 = INTVAL (offset_1);
17498 off_val_3 = INTVAL (offset_3);
17499
17500 /* The base offset is optimally half way between the two STP/LDP offsets. */
17501 if (msize <= 4)
17502 base_off = (off_val_1 + off_val_3) / 2;
17503 else
17504 /* However, due to issues with negative LDP/STP offset generation for
17505 larger modes, for DF, DI and vector modes. we must not use negative
17506 addresses smaller than 9 signed unadjusted bits can store. This
17507 provides the most range in this case. */
17508 base_off = off_val_1;
17509
17510 /* Adjust the base so that it is aligned with the addresses but still
17511 optimal. */
17512 if (base_off % msize != off_val_1 % msize)
17513 /* Fix the offset, bearing in mind we want to make it bigger not
17514 smaller. */
17515 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
17516 else if (msize <= 4)
17517 /* The negative range of LDP/STP is one larger than the positive range. */
17518 base_off += msize;
17519
17520 /* Check if base offset is too big or too small. We can attempt to resolve
17521 this issue by setting it to the maximum value and seeing if the offsets
17522 still fit. */
17523 if (base_off >= 0x1000)
350013bc 17524 {
d0b51297
JW
17525 base_off = 0x1000 - 1;
17526 /* We must still make sure that the base offset is aligned with respect
17527 to the address. But it may may not be made any bigger. */
17528 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17529 }
17530
d0b51297
JW
17531 /* Likewise for the case where the base is too small. */
17532 if (base_off <= -0x1000)
350013bc 17533 {
d0b51297
JW
17534 base_off = -0x1000 + 1;
17535 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17536 }
17537
d0b51297
JW
17538 /* Offset of the first STP/LDP. */
17539 new_off_1 = off_val_1 - base_off;
17540
17541 /* Offset of the second STP/LDP. */
17542 new_off_3 = off_val_3 - base_off;
350013bc 17543
d0b51297
JW
17544 /* The offsets must be within the range of the LDP/STP instructions. */
17545 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
17546 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
17547 return false;
17548
d0b51297
JW
17549 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
17550 new_off_1), true);
17551 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
17552 new_off_1 + msize), true);
17553 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
17554 new_off_3), true);
17555 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
17556 new_off_3 + msize), true);
17557
17558 if (!aarch64_mem_pair_operand (mem_1, mode)
17559 || !aarch64_mem_pair_operand (mem_3, mode))
17560 return false;
350013bc
BC
17561
17562 if (code == ZERO_EXTEND)
17563 {
17564 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17565 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17566 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17567 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17568 }
17569 else if (code == SIGN_EXTEND)
17570 {
17571 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17572 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17573 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17574 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17575 }
17576
17577 if (load)
17578 {
d0b51297 17579 operands[0] = temp_operands[0];
350013bc 17580 operands[1] = mem_1;
d0b51297 17581 operands[2] = temp_operands[2];
350013bc 17582 operands[3] = mem_2;
d0b51297 17583 operands[4] = temp_operands[4];
350013bc 17584 operands[5] = mem_3;
d0b51297 17585 operands[6] = temp_operands[6];
350013bc
BC
17586 operands[7] = mem_4;
17587 }
17588 else
17589 {
17590 operands[0] = mem_1;
d0b51297 17591 operands[1] = temp_operands[1];
350013bc 17592 operands[2] = mem_2;
d0b51297 17593 operands[3] = temp_operands[3];
350013bc 17594 operands[4] = mem_3;
d0b51297 17595 operands[5] = temp_operands[5];
350013bc 17596 operands[6] = mem_4;
d0b51297 17597 operands[7] = temp_operands[7];
350013bc
BC
17598 }
17599
17600 /* Emit adjusting instruction. */
d0b51297 17601 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 17602 /* Emit ldp/stp instructions. */
f7df4a84
RS
17603 t1 = gen_rtx_SET (operands[0], operands[1]);
17604 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17605 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17606 t1 = gen_rtx_SET (operands[4], operands[5]);
17607 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17608 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17609 return true;
17610}
17611
76a34e3f
RS
17612/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17613 it isn't worth branching around empty masked ops (including masked
17614 stores). */
17615
17616static bool
17617aarch64_empty_mask_is_expensive (unsigned)
17618{
17619 return false;
17620}
17621
1b1e81f8
JW
17622/* Return 1 if pseudo register should be created and used to hold
17623 GOT address for PIC code. */
17624
17625bool
17626aarch64_use_pseudo_pic_reg (void)
17627{
17628 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17629}
17630
7b841a12
JW
17631/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17632
17633static int
17634aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17635{
17636 switch (XINT (x, 1))
17637 {
17638 case UNSPEC_GOTSMALLPIC:
17639 case UNSPEC_GOTSMALLPIC28K:
17640 case UNSPEC_GOTTINYPIC:
17641 return 0;
17642 default:
17643 break;
17644 }
17645
17646 return default_unspec_may_trap_p (x, flags);
17647}
17648
39252973
KT
17649
17650/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17651 return the log2 of that value. Otherwise return -1. */
17652
17653int
17654aarch64_fpconst_pow_of_2 (rtx x)
17655{
17656 const REAL_VALUE_TYPE *r;
17657
17658 if (!CONST_DOUBLE_P (x))
17659 return -1;
17660
17661 r = CONST_DOUBLE_REAL_VALUE (x);
17662
17663 if (REAL_VALUE_NEGATIVE (*r)
17664 || REAL_VALUE_ISNAN (*r)
17665 || REAL_VALUE_ISINF (*r)
17666 || !real_isinteger (r, DFmode))
17667 return -1;
17668
17669 return exact_log2 (real_to_integer (r));
17670}
17671
17672/* If X is a vector of equal CONST_DOUBLE values and that value is
17673 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17674
17675int
17676aarch64_vec_fpconst_pow_of_2 (rtx x)
17677{
6a70badb
RS
17678 int nelts;
17679 if (GET_CODE (x) != CONST_VECTOR
17680 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17681 return -1;
17682
17683 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17684 return -1;
17685
17686 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17687 if (firstval <= 0)
17688 return -1;
17689
6a70badb 17690 for (int i = 1; i < nelts; i++)
39252973
KT
17691 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17692 return -1;
17693
17694 return firstval;
17695}
17696
11e554b3
JG
17697/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17698 to float.
17699
17700 __fp16 always promotes through this hook.
17701 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17702 through the generic excess precision logic rather than here. */
17703
c2ec330c
AL
17704static tree
17705aarch64_promoted_type (const_tree t)
17706{
11e554b3
JG
17707 if (SCALAR_FLOAT_TYPE_P (t)
17708 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17709 return float_type_node;
11e554b3 17710
c2ec330c
AL
17711 return NULL_TREE;
17712}
ee62a5a6
RS
17713
17714/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17715
17716static bool
9acc9cbe 17717aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17718 optimization_type opt_type)
17719{
17720 switch (op)
17721 {
17722 case rsqrt_optab:
9acc9cbe 17723 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17724
17725 default:
17726 return true;
17727 }
17728}
17729
43cacb12
RS
17730/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17731
17732static unsigned int
17733aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17734 int *offset)
17735{
17736 /* Polynomial invariant 1 == (VG / 2) - 1. */
17737 gcc_assert (i == 1);
17738 *factor = 2;
17739 *offset = 1;
17740 return AARCH64_DWARF_VG;
17741}
17742
11e554b3
JG
17743/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17744 if MODE is HFmode, and punt to the generic implementation otherwise. */
17745
17746static bool
7c5bd57a 17747aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17748{
17749 return (mode == HFmode
17750 ? true
17751 : default_libgcc_floating_mode_supported_p (mode));
17752}
17753
2e5f8203
JG
17754/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17755 if MODE is HFmode, and punt to the generic implementation otherwise. */
17756
17757static bool
18e2a8b8 17758aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17759{
17760 return (mode == HFmode
17761 ? true
17762 : default_scalar_mode_supported_p (mode));
17763}
17764
11e554b3
JG
17765/* Set the value of FLT_EVAL_METHOD.
17766 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17767
17768 0: evaluate all operations and constants, whose semantic type has at
17769 most the range and precision of type float, to the range and
17770 precision of float; evaluate all other operations and constants to
17771 the range and precision of the semantic type;
17772
17773 N, where _FloatN is a supported interchange floating type
17774 evaluate all operations and constants, whose semantic type has at
17775 most the range and precision of _FloatN type, to the range and
17776 precision of the _FloatN type; evaluate all other operations and
17777 constants to the range and precision of the semantic type;
17778
17779 If we have the ARMv8.2-A extensions then we support _Float16 in native
17780 precision, so we should set this to 16. Otherwise, we support the type,
17781 but want to evaluate expressions in float precision, so set this to
17782 0. */
17783
17784static enum flt_eval_method
17785aarch64_excess_precision (enum excess_precision_type type)
17786{
17787 switch (type)
17788 {
17789 case EXCESS_PRECISION_TYPE_FAST:
17790 case EXCESS_PRECISION_TYPE_STANDARD:
17791 /* We can calculate either in 16-bit range and precision or
17792 32-bit range and precision. Make that decision based on whether
17793 we have native support for the ARMv8.2-A 16-bit floating-point
17794 instructions or not. */
17795 return (TARGET_FP_F16INST
17796 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17797 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17798 case EXCESS_PRECISION_TYPE_IMPLICIT:
17799 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17800 default:
17801 gcc_unreachable ();
17802 }
17803 return FLT_EVAL_METHOD_UNPREDICTABLE;
17804}
17805
b48d6421
KT
17806/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17807 scheduled for speculative execution. Reject the long-running division
17808 and square-root instructions. */
17809
17810static bool
17811aarch64_sched_can_speculate_insn (rtx_insn *insn)
17812{
17813 switch (get_attr_type (insn))
17814 {
17815 case TYPE_SDIV:
17816 case TYPE_UDIV:
17817 case TYPE_FDIVS:
17818 case TYPE_FDIVD:
17819 case TYPE_FSQRTS:
17820 case TYPE_FSQRTD:
17821 case TYPE_NEON_FP_SQRT_S:
17822 case TYPE_NEON_FP_SQRT_D:
17823 case TYPE_NEON_FP_SQRT_S_Q:
17824 case TYPE_NEON_FP_SQRT_D_Q:
17825 case TYPE_NEON_FP_DIV_S:
17826 case TYPE_NEON_FP_DIV_D:
17827 case TYPE_NEON_FP_DIV_S_Q:
17828 case TYPE_NEON_FP_DIV_D_Q:
17829 return false;
17830 default:
17831 return true;
17832 }
17833}
17834
43cacb12
RS
17835/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17836
17837static int
17838aarch64_compute_pressure_classes (reg_class *classes)
17839{
17840 int i = 0;
17841 classes[i++] = GENERAL_REGS;
17842 classes[i++] = FP_REGS;
17843 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17844 registers need to go in PR_LO_REGS at some point during their
17845 lifetime. Splitting it into two halves has the effect of making
17846 all predicates count against PR_LO_REGS, so that we try whenever
17847 possible to restrict the number of live predicates to 8. This
17848 greatly reduces the amount of spilling in certain loops. */
17849 classes[i++] = PR_LO_REGS;
17850 classes[i++] = PR_HI_REGS;
17851 return i;
17852}
17853
17854/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17855
17856static bool
17857aarch64_can_change_mode_class (machine_mode from,
17858 machine_mode to, reg_class_t)
17859{
002092be
RS
17860 if (BYTES_BIG_ENDIAN)
17861 {
17862 bool from_sve_p = aarch64_sve_data_mode_p (from);
17863 bool to_sve_p = aarch64_sve_data_mode_p (to);
17864
17865 /* Don't allow changes between SVE data modes and non-SVE modes.
17866 See the comment at the head of aarch64-sve.md for details. */
17867 if (from_sve_p != to_sve_p)
17868 return false;
17869
17870 /* Don't allow changes in element size: lane 0 of the new vector
17871 would not then be lane 0 of the old vector. See the comment
17872 above aarch64_maybe_expand_sve_subreg_move for a more detailed
17873 description.
17874
17875 In the worst case, this forces a register to be spilled in
17876 one mode and reloaded in the other, which handles the
17877 endianness correctly. */
17878 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
17879 return false;
17880 }
43cacb12
RS
17881 return true;
17882}
17883
5cce8171
RS
17884/* Implement TARGET_EARLY_REMAT_MODES. */
17885
17886static void
17887aarch64_select_early_remat_modes (sbitmap modes)
17888{
17889 /* SVE values are not normally live across a call, so it should be
17890 worth doing early rematerialization even in VL-specific mode. */
17891 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
17892 {
17893 machine_mode mode = (machine_mode) i;
17894 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17895 if (vec_flags & VEC_ANY_SVE)
17896 bitmap_set_bit (modes, i);
17897 }
17898}
17899
c0111dc4
RE
17900/* Override the default target speculation_safe_value. */
17901static rtx
17902aarch64_speculation_safe_value (machine_mode mode,
17903 rtx result, rtx val, rtx failval)
17904{
17905 /* Maybe we should warn if falling back to hard barriers. They are
17906 likely to be noticably more expensive than the alternative below. */
17907 if (!aarch64_track_speculation)
17908 return default_speculation_safe_value (mode, result, val, failval);
17909
17910 if (!REG_P (val))
17911 val = copy_to_mode_reg (mode, val);
17912
17913 if (!aarch64_reg_or_zero (failval, mode))
17914 failval = copy_to_mode_reg (mode, failval);
17915
21cebf90 17916 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
17917 return result;
17918}
17919
2d56d6ba
KT
17920/* Implement TARGET_ESTIMATED_POLY_VALUE.
17921 Look into the tuning structure for an estimate.
17922 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
17923 Advanced SIMD 128 bits. */
17924
17925static HOST_WIDE_INT
17926aarch64_estimated_poly_value (poly_int64 val)
17927{
17928 enum aarch64_sve_vector_bits_enum width_source
17929 = aarch64_tune_params.sve_width;
17930
17931 /* If we still don't have an estimate, use the default. */
17932 if (width_source == SVE_SCALABLE)
17933 return default_estimated_poly_value (val);
17934
17935 HOST_WIDE_INT over_128 = width_source - 128;
17936 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
17937}
17938
51b86113
DM
17939/* Target-specific selftests. */
17940
17941#if CHECKING_P
17942
17943namespace selftest {
17944
17945/* Selftest for the RTL loader.
17946 Verify that the RTL loader copes with a dump from
17947 print_rtx_function. This is essentially just a test that class
17948 function_reader can handle a real dump, but it also verifies
17949 that lookup_reg_by_dump_name correctly handles hard regs.
17950 The presence of hard reg names in the dump means that the test is
17951 target-specific, hence it is in this file. */
17952
17953static void
17954aarch64_test_loading_full_dump ()
17955{
17956 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
17957
17958 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
17959
17960 rtx_insn *insn_1 = get_insn_by_uid (1);
17961 ASSERT_EQ (NOTE, GET_CODE (insn_1));
17962
17963 rtx_insn *insn_15 = get_insn_by_uid (15);
17964 ASSERT_EQ (INSN, GET_CODE (insn_15));
17965 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
17966
17967 /* Verify crtl->return_rtx. */
17968 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
17969 ASSERT_EQ (0, REGNO (crtl->return_rtx));
17970 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
17971}
17972
17973/* Run all target-specific selftests. */
17974
17975static void
17976aarch64_run_selftests (void)
17977{
17978 aarch64_test_loading_full_dump ();
17979}
17980
17981} // namespace selftest
17982
17983#endif /* #if CHECKING_P */
17984
43e9d192
IB
17985#undef TARGET_ADDRESS_COST
17986#define TARGET_ADDRESS_COST aarch64_address_cost
17987
17988/* This hook will determines whether unnamed bitfields affect the alignment
17989 of the containing structure. The hook returns true if the structure
17990 should inherit the alignment requirements of an unnamed bitfield's
17991 type. */
17992#undef TARGET_ALIGN_ANON_BITFIELD
17993#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
17994
17995#undef TARGET_ASM_ALIGNED_DI_OP
17996#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
17997
17998#undef TARGET_ASM_ALIGNED_HI_OP
17999#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
18000
18001#undef TARGET_ASM_ALIGNED_SI_OP
18002#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
18003
18004#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
18005#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
18006 hook_bool_const_tree_hwi_hwi_const_tree_true
18007
e1c1ecb0
KT
18008#undef TARGET_ASM_FILE_START
18009#define TARGET_ASM_FILE_START aarch64_start_file
18010
43e9d192
IB
18011#undef TARGET_ASM_OUTPUT_MI_THUNK
18012#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
18013
18014#undef TARGET_ASM_SELECT_RTX_SECTION
18015#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
18016
18017#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
18018#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
18019
18020#undef TARGET_BUILD_BUILTIN_VA_LIST
18021#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
18022
18023#undef TARGET_CALLEE_COPIES
18024#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
18025
18026#undef TARGET_CAN_ELIMINATE
18027#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
18028
1fd8d40c
KT
18029#undef TARGET_CAN_INLINE_P
18030#define TARGET_CAN_INLINE_P aarch64_can_inline_p
18031
43e9d192
IB
18032#undef TARGET_CANNOT_FORCE_CONST_MEM
18033#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
18034
50487d79
EM
18035#undef TARGET_CASE_VALUES_THRESHOLD
18036#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
18037
43e9d192
IB
18038#undef TARGET_CONDITIONAL_REGISTER_USAGE
18039#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
18040
18041/* Only the least significant bit is used for initialization guard
18042 variables. */
18043#undef TARGET_CXX_GUARD_MASK_BIT
18044#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
18045
18046#undef TARGET_C_MODE_FOR_SUFFIX
18047#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
18048
18049#ifdef TARGET_BIG_ENDIAN_DEFAULT
18050#undef TARGET_DEFAULT_TARGET_FLAGS
18051#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
18052#endif
18053
18054#undef TARGET_CLASS_MAX_NREGS
18055#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
18056
119103ca
JG
18057#undef TARGET_BUILTIN_DECL
18058#define TARGET_BUILTIN_DECL aarch64_builtin_decl
18059
a6fc00da
BH
18060#undef TARGET_BUILTIN_RECIPROCAL
18061#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
18062
11e554b3
JG
18063#undef TARGET_C_EXCESS_PRECISION
18064#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
18065
43e9d192
IB
18066#undef TARGET_EXPAND_BUILTIN
18067#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
18068
18069#undef TARGET_EXPAND_BUILTIN_VA_START
18070#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
18071
9697e620
JG
18072#undef TARGET_FOLD_BUILTIN
18073#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
18074
43e9d192
IB
18075#undef TARGET_FUNCTION_ARG
18076#define TARGET_FUNCTION_ARG aarch64_function_arg
18077
18078#undef TARGET_FUNCTION_ARG_ADVANCE
18079#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
18080
18081#undef TARGET_FUNCTION_ARG_BOUNDARY
18082#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
18083
76b0cbf8
RS
18084#undef TARGET_FUNCTION_ARG_PADDING
18085#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
18086
43cacb12
RS
18087#undef TARGET_GET_RAW_RESULT_MODE
18088#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
18089#undef TARGET_GET_RAW_ARG_MODE
18090#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
18091
43e9d192
IB
18092#undef TARGET_FUNCTION_OK_FOR_SIBCALL
18093#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
18094
18095#undef TARGET_FUNCTION_VALUE
18096#define TARGET_FUNCTION_VALUE aarch64_function_value
18097
18098#undef TARGET_FUNCTION_VALUE_REGNO_P
18099#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
18100
fc72cba7
AL
18101#undef TARGET_GIMPLE_FOLD_BUILTIN
18102#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 18103
43e9d192
IB
18104#undef TARGET_GIMPLIFY_VA_ARG_EXPR
18105#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
18106
18107#undef TARGET_INIT_BUILTINS
18108#define TARGET_INIT_BUILTINS aarch64_init_builtins
18109
c64f7d37
WD
18110#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
18111#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
18112 aarch64_ira_change_pseudo_allocno_class
18113
43e9d192
IB
18114#undef TARGET_LEGITIMATE_ADDRESS_P
18115#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
18116
18117#undef TARGET_LEGITIMATE_CONSTANT_P
18118#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
18119
491ec060
WD
18120#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
18121#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
18122 aarch64_legitimize_address_displacement
18123
43e9d192
IB
18124#undef TARGET_LIBGCC_CMP_RETURN_MODE
18125#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
18126
11e554b3
JG
18127#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
18128#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
18129aarch64_libgcc_floating_mode_supported_p
18130
ac2b960f
YZ
18131#undef TARGET_MANGLE_TYPE
18132#define TARGET_MANGLE_TYPE aarch64_mangle_type
18133
43e9d192
IB
18134#undef TARGET_MEMORY_MOVE_COST
18135#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
18136
26e0ff94
WD
18137#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
18138#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
18139
43e9d192
IB
18140#undef TARGET_MUST_PASS_IN_STACK
18141#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
18142
18143/* This target hook should return true if accesses to volatile bitfields
18144 should use the narrowest mode possible. It should return false if these
18145 accesses should use the bitfield container type. */
18146#undef TARGET_NARROW_VOLATILE_BITFIELD
18147#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
18148
18149#undef TARGET_OPTION_OVERRIDE
18150#define TARGET_OPTION_OVERRIDE aarch64_override_options
18151
18152#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18153#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
18154 aarch64_override_options_after_change
18155
361fb3ee
KT
18156#undef TARGET_OPTION_SAVE
18157#define TARGET_OPTION_SAVE aarch64_option_save
18158
18159#undef TARGET_OPTION_RESTORE
18160#define TARGET_OPTION_RESTORE aarch64_option_restore
18161
18162#undef TARGET_OPTION_PRINT
18163#define TARGET_OPTION_PRINT aarch64_option_print
18164
5a2c8331
KT
18165#undef TARGET_OPTION_VALID_ATTRIBUTE_P
18166#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
18167
d78006d9
KT
18168#undef TARGET_SET_CURRENT_FUNCTION
18169#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
18170
43e9d192
IB
18171#undef TARGET_PASS_BY_REFERENCE
18172#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
18173
18174#undef TARGET_PREFERRED_RELOAD_CLASS
18175#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
18176
cee66c68
WD
18177#undef TARGET_SCHED_REASSOCIATION_WIDTH
18178#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
18179
c2ec330c
AL
18180#undef TARGET_PROMOTED_TYPE
18181#define TARGET_PROMOTED_TYPE aarch64_promoted_type
18182
43e9d192
IB
18183#undef TARGET_SECONDARY_RELOAD
18184#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
18185
18186#undef TARGET_SHIFT_TRUNCATION_MASK
18187#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
18188
18189#undef TARGET_SETUP_INCOMING_VARARGS
18190#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
18191
18192#undef TARGET_STRUCT_VALUE_RTX
18193#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
18194
18195#undef TARGET_REGISTER_MOVE_COST
18196#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
18197
18198#undef TARGET_RETURN_IN_MEMORY
18199#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
18200
18201#undef TARGET_RETURN_IN_MSB
18202#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
18203
18204#undef TARGET_RTX_COSTS
7cc2145f 18205#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 18206
2e5f8203
JG
18207#undef TARGET_SCALAR_MODE_SUPPORTED_P
18208#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
18209
d126a4ae
AP
18210#undef TARGET_SCHED_ISSUE_RATE
18211#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
18212
d03f7e44
MK
18213#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18214#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
18215 aarch64_sched_first_cycle_multipass_dfa_lookahead
18216
2d6bc7fa
KT
18217#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
18218#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
18219 aarch64_first_cycle_multipass_dfa_lookahead_guard
18220
827ab47a
KT
18221#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
18222#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
18223 aarch64_get_separate_components
18224
18225#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
18226#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
18227 aarch64_components_for_bb
18228
18229#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
18230#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
18231 aarch64_disqualify_components
18232
18233#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
18234#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
18235 aarch64_emit_prologue_components
18236
18237#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
18238#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
18239 aarch64_emit_epilogue_components
18240
18241#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
18242#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
18243 aarch64_set_handled_components
18244
43e9d192
IB
18245#undef TARGET_TRAMPOLINE_INIT
18246#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
18247
18248#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
18249#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
18250
18251#undef TARGET_VECTOR_MODE_SUPPORTED_P
18252#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
18253
7df76747
N
18254#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18255#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
18256 aarch64_builtin_support_vector_misalignment
18257
9f4cbab8
RS
18258#undef TARGET_ARRAY_MODE
18259#define TARGET_ARRAY_MODE aarch64_array_mode
18260
43e9d192
IB
18261#undef TARGET_ARRAY_MODE_SUPPORTED_P
18262#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
18263
8990e73a
TB
18264#undef TARGET_VECTORIZE_ADD_STMT_COST
18265#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
18266
18267#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18268#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18269 aarch64_builtin_vectorization_cost
18270
43e9d192
IB
18271#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18272#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
18273
42fc9a7f
JG
18274#undef TARGET_VECTORIZE_BUILTINS
18275#define TARGET_VECTORIZE_BUILTINS
18276
18277#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
18278#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
18279 aarch64_builtin_vectorized_function
18280
3b357264
JG
18281#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
18282#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
18283 aarch64_autovectorize_vector_sizes
18284
aa87aced
KV
18285#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18286#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
18287 aarch64_atomic_assign_expand_fenv
18288
43e9d192
IB
18289/* Section anchor support. */
18290
18291#undef TARGET_MIN_ANCHOR_OFFSET
18292#define TARGET_MIN_ANCHOR_OFFSET -256
18293
18294/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
18295 byte offset; we can do much more for larger data types, but have no way
18296 to determine the size of the access. We assume accesses are aligned. */
18297#undef TARGET_MAX_ANCHOR_OFFSET
18298#define TARGET_MAX_ANCHOR_OFFSET 4095
18299
db0253a4
TB
18300#undef TARGET_VECTOR_ALIGNMENT
18301#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
18302
43cacb12
RS
18303#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
18304#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
18305 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
18306#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
18307#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
18308 aarch64_simd_vector_alignment_reachable
18309
88b08073
JG
18310/* vec_perm support. */
18311
f151c9e1
RS
18312#undef TARGET_VECTORIZE_VEC_PERM_CONST
18313#define TARGET_VECTORIZE_VEC_PERM_CONST \
18314 aarch64_vectorize_vec_perm_const
88b08073 18315
43cacb12
RS
18316#undef TARGET_VECTORIZE_GET_MASK_MODE
18317#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
18318#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
18319#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
18320 aarch64_empty_mask_is_expensive
6a86928d
RS
18321#undef TARGET_PREFERRED_ELSE_VALUE
18322#define TARGET_PREFERRED_ELSE_VALUE \
18323 aarch64_preferred_else_value
43cacb12 18324
c2ec330c
AL
18325#undef TARGET_INIT_LIBFUNCS
18326#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 18327
706b2314 18328#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
18329#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
18330
5cb74e90
RR
18331#undef TARGET_FLAGS_REGNUM
18332#define TARGET_FLAGS_REGNUM CC_REGNUM
18333
78607708
TV
18334#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
18335#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
18336
a3125fc2
CL
18337#undef TARGET_ASAN_SHADOW_OFFSET
18338#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
18339
0c4ec427
RE
18340#undef TARGET_LEGITIMIZE_ADDRESS
18341#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
18342
b48d6421
KT
18343#undef TARGET_SCHED_CAN_SPECULATE_INSN
18344#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
18345
594bdd53
FY
18346#undef TARGET_CAN_USE_DOLOOP_P
18347#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
18348
9bca63d4
WD
18349#undef TARGET_SCHED_ADJUST_PRIORITY
18350#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
18351
6a569cdd
KT
18352#undef TARGET_SCHED_MACRO_FUSION_P
18353#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
18354
18355#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
18356#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
18357
350013bc
BC
18358#undef TARGET_SCHED_FUSION_PRIORITY
18359#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
18360
7b841a12
JW
18361#undef TARGET_UNSPEC_MAY_TRAP_P
18362#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
18363
1b1e81f8
JW
18364#undef TARGET_USE_PSEUDO_PIC_REG
18365#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
18366
cc8ca59e
JB
18367#undef TARGET_PRINT_OPERAND
18368#define TARGET_PRINT_OPERAND aarch64_print_operand
18369
18370#undef TARGET_PRINT_OPERAND_ADDRESS
18371#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
18372
ee62a5a6
RS
18373#undef TARGET_OPTAB_SUPPORTED_P
18374#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
18375
43203dea
RR
18376#undef TARGET_OMIT_STRUCT_RETURN_REG
18377#define TARGET_OMIT_STRUCT_RETURN_REG true
18378
43cacb12
RS
18379#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
18380#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
18381 aarch64_dwarf_poly_indeterminate_value
18382
f46fe37e
EB
18383/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
18384#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18385#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
18386
c43f4279
RS
18387#undef TARGET_HARD_REGNO_NREGS
18388#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
18389#undef TARGET_HARD_REGNO_MODE_OK
18390#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
18391
99e1629f
RS
18392#undef TARGET_MODES_TIEABLE_P
18393#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
18394
80ec73f4
RS
18395#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18396#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18397 aarch64_hard_regno_call_part_clobbered
18398
58e17cf8
RS
18399#undef TARGET_CONSTANT_ALIGNMENT
18400#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
18401
8c6e3b23
TC
18402#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
18403#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
18404 aarch64_stack_clash_protection_alloca_probe_range
18405
43cacb12
RS
18406#undef TARGET_COMPUTE_PRESSURE_CLASSES
18407#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
18408
18409#undef TARGET_CAN_CHANGE_MODE_CLASS
18410#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
18411
5cce8171
RS
18412#undef TARGET_SELECT_EARLY_REMAT_MODES
18413#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
18414
c0111dc4
RE
18415#undef TARGET_SPECULATION_SAFE_VALUE
18416#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
18417
2d56d6ba
KT
18418#undef TARGET_ESTIMATED_POLY_VALUE
18419#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
18420
51b86113
DM
18421#if CHECKING_P
18422#undef TARGET_RUN_TARGET_SELFTESTS
18423#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
18424#endif /* #if CHECKING_P */
18425
43e9d192
IB
18426struct gcc_target targetm = TARGET_INITIALIZER;
18427
18428#include "gt-aarch64.h"