]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
* update-copyright.py: Add Gerard Jungman as external author.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
b187677b 81/* Information about a legitimate vector immediate operand. */
48063b9d
IB
82struct simd_immediate_info
83{
b187677b
RS
84 enum insn_type { MOV, MVN };
85 enum modifier_type { LSL, MSL };
86
87 simd_immediate_info () {}
88 simd_immediate_info (scalar_float_mode, rtx);
89 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
90 insn_type = MOV, modifier_type = LSL,
91 unsigned int = 0);
43cacb12 92 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
93
94 /* The mode of the elements. */
95 scalar_mode elt_mode;
96
43cacb12
RS
97 /* The value of each element if all elements are the same, or the
98 first value if the constant is a series. */
48063b9d 99 rtx value;
b187677b 100
43cacb12
RS
101 /* The value of the step if the constant is a series, null otherwise. */
102 rtx step;
103
b187677b
RS
104 /* The instruction to use to move the immediate into a vector. */
105 insn_type insn;
106
107 /* The kind of shift modifier to use, and the number of bits to shift.
108 This is (LSL, 0) if no shift is needed. */
109 modifier_type modifier;
110 unsigned int shift;
48063b9d
IB
111};
112
b187677b
RS
113/* Construct a floating-point immediate in which each element has mode
114 ELT_MODE_IN and value VALUE_IN. */
115inline simd_immediate_info
116::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 117 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
118 modifier (LSL), shift (0)
119{}
120
121/* Construct an integer immediate in which each element has mode ELT_MODE_IN
122 and value VALUE_IN. The other parameters are as for the structure
123 fields. */
124inline simd_immediate_info
125::simd_immediate_info (scalar_int_mode elt_mode_in,
126 unsigned HOST_WIDE_INT value_in,
127 insn_type insn_in, modifier_type modifier_in,
128 unsigned int shift_in)
129 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
130 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
131{}
132
133/* Construct an integer immediate in which each element has mode ELT_MODE_IN
134 and where element I is equal to VALUE_IN + I * STEP_IN. */
135inline simd_immediate_info
136::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
137 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
138 modifier (LSL), shift (0)
b187677b
RS
139{}
140
43e9d192
IB
141/* The current code model. */
142enum aarch64_code_model aarch64_cmodel;
143
43cacb12
RS
144/* The number of 64-bit elements in an SVE vector. */
145poly_uint16 aarch64_sve_vg;
146
43e9d192
IB
147#ifdef HAVE_AS_TLS
148#undef TARGET_HAVE_TLS
149#define TARGET_HAVE_TLS 1
150#endif
151
ef4bddc2
RS
152static bool aarch64_composite_type_p (const_tree, machine_mode);
153static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 154 const_tree,
ef4bddc2 155 machine_mode *, int *,
43e9d192
IB
156 bool *);
157static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 159static void aarch64_override_options_after_change (void);
ef4bddc2 160static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 161static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
162static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
163 const_tree type,
164 int misalignment,
165 bool is_packed);
43cacb12 166static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
167static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
168 aarch64_addr_query_type);
eb471ba3 169static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 170
0c6caaf8
RL
171/* Major revision number of the ARM Architecture implemented by the target. */
172unsigned aarch64_architecture_version;
173
43e9d192 174/* The processor for which instructions should be scheduled. */
02fdbd5b 175enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 176
43e9d192
IB
177/* Mask to specify which instruction scheduling options should be used. */
178unsigned long aarch64_tune_flags = 0;
179
1be34295 180/* Global flag for PC relative loads. */
9ee6540a 181bool aarch64_pcrelative_literal_loads;
1be34295 182
d6cb6d6a
WD
183/* Global flag for whether frame pointer is enabled. */
184bool aarch64_use_frame_pointer;
185
8dec06f2
JG
186/* Support for command line parsing of boolean flags in the tuning
187 structures. */
188struct aarch64_flag_desc
189{
190 const char* name;
191 unsigned int flag;
192};
193
ed9fa8d2 194#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
195 { name, AARCH64_FUSE_##internal_name },
196static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
197{
198 { "none", AARCH64_FUSE_NOTHING },
199#include "aarch64-fusion-pairs.def"
200 { "all", AARCH64_FUSE_ALL },
201 { NULL, AARCH64_FUSE_NOTHING }
202};
8dec06f2 203
a339a01c 204#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
205 { name, AARCH64_EXTRA_TUNE_##internal_name },
206static const struct aarch64_flag_desc aarch64_tuning_flags[] =
207{
208 { "none", AARCH64_EXTRA_TUNE_NONE },
209#include "aarch64-tuning-flags.def"
210 { "all", AARCH64_EXTRA_TUNE_ALL },
211 { NULL, AARCH64_EXTRA_TUNE_NONE }
212};
8dec06f2 213
43e9d192
IB
214/* Tuning parameters. */
215
43e9d192
IB
216static const struct cpu_addrcost_table generic_addrcost_table =
217{
67747367 218 {
2fae724a 219 1, /* hi */
bd95e655
JG
220 0, /* si */
221 0, /* di */
2fae724a 222 1, /* ti */
67747367 223 },
bd95e655
JG
224 0, /* pre_modify */
225 0, /* post_modify */
226 0, /* register_offset */
783879e6
EM
227 0, /* register_sextend */
228 0, /* register_zextend */
bd95e655 229 0 /* imm_offset */
43e9d192
IB
230};
231
5ec1ae3b
EM
232static const struct cpu_addrcost_table exynosm1_addrcost_table =
233{
234 {
235 0, /* hi */
236 0, /* si */
237 0, /* di */
238 2, /* ti */
239 },
240 0, /* pre_modify */
241 0, /* post_modify */
242 1, /* register_offset */
243 1, /* register_sextend */
244 2, /* register_zextend */
245 0, /* imm_offset */
246};
247
381e27aa
PT
248static const struct cpu_addrcost_table xgene1_addrcost_table =
249{
381e27aa 250 {
bd95e655
JG
251 1, /* hi */
252 0, /* si */
253 0, /* di */
254 1, /* ti */
381e27aa 255 },
bd95e655 256 1, /* pre_modify */
52ddefd8 257 1, /* post_modify */
bd95e655 258 0, /* register_offset */
783879e6
EM
259 1, /* register_sextend */
260 1, /* register_zextend */
bd95e655 261 0, /* imm_offset */
381e27aa
PT
262};
263
d1261ac6 264static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
265{
266 {
5f407e57
AP
267 1, /* hi */
268 1, /* si */
269 1, /* di */
ad611a4c
VP
270 2, /* ti */
271 },
272 0, /* pre_modify */
273 0, /* post_modify */
274 2, /* register_offset */
275 3, /* register_sextend */
276 3, /* register_zextend */
277 0, /* imm_offset */
278};
279
910f72e7
SZ
280static const struct cpu_addrcost_table tsv110_addrcost_table =
281{
282 {
283 1, /* hi */
284 0, /* si */
285 0, /* di */
286 1, /* ti */
287 },
288 0, /* pre_modify */
289 0, /* post_modify */
290 0, /* register_offset */
291 1, /* register_sextend */
292 1, /* register_zextend */
293 0, /* imm_offset */
294};
295
8d39ea2f
LM
296static const struct cpu_addrcost_table qdf24xx_addrcost_table =
297{
298 {
299 1, /* hi */
300 1, /* si */
301 1, /* di */
302 2, /* ti */
303 },
304 1, /* pre_modify */
305 1, /* post_modify */
306 3, /* register_offset */
31508b39 307 3, /* register_sextend */
8d39ea2f
LM
308 3, /* register_zextend */
309 2, /* imm_offset */
310};
311
43e9d192
IB
312static const struct cpu_regmove_cost generic_regmove_cost =
313{
bd95e655 314 1, /* GP2GP */
3969c510
WD
315 /* Avoid the use of slow int<->fp moves for spilling by setting
316 their cost higher than memmov_cost. */
bd95e655
JG
317 5, /* GP2FP */
318 5, /* FP2GP */
319 2 /* FP2FP */
43e9d192
IB
320};
321
e4a9c55a
WD
322static const struct cpu_regmove_cost cortexa57_regmove_cost =
323{
bd95e655 324 1, /* GP2GP */
e4a9c55a
WD
325 /* Avoid the use of slow int<->fp moves for spilling by setting
326 their cost higher than memmov_cost. */
bd95e655
JG
327 5, /* GP2FP */
328 5, /* FP2GP */
329 2 /* FP2FP */
e4a9c55a
WD
330};
331
332static const struct cpu_regmove_cost cortexa53_regmove_cost =
333{
bd95e655 334 1, /* GP2GP */
e4a9c55a
WD
335 /* Avoid the use of slow int<->fp moves for spilling by setting
336 their cost higher than memmov_cost. */
bd95e655
JG
337 5, /* GP2FP */
338 5, /* FP2GP */
339 2 /* FP2FP */
e4a9c55a
WD
340};
341
5ec1ae3b
EM
342static const struct cpu_regmove_cost exynosm1_regmove_cost =
343{
344 1, /* GP2GP */
345 /* Avoid the use of slow int<->fp moves for spilling by setting
346 their cost higher than memmov_cost (actual, 4 and 9). */
347 9, /* GP2FP */
348 9, /* FP2GP */
349 1 /* FP2FP */
350};
351
d1bcc29f
AP
352static const struct cpu_regmove_cost thunderx_regmove_cost =
353{
bd95e655
JG
354 2, /* GP2GP */
355 2, /* GP2FP */
356 6, /* FP2GP */
357 4 /* FP2FP */
d1bcc29f
AP
358};
359
381e27aa
PT
360static const struct cpu_regmove_cost xgene1_regmove_cost =
361{
bd95e655 362 1, /* GP2GP */
381e27aa
PT
363 /* Avoid the use of slow int<->fp moves for spilling by setting
364 their cost higher than memmov_cost. */
bd95e655
JG
365 8, /* GP2FP */
366 8, /* FP2GP */
367 2 /* FP2FP */
381e27aa
PT
368};
369
ee446d9f
JW
370static const struct cpu_regmove_cost qdf24xx_regmove_cost =
371{
372 2, /* GP2GP */
373 /* Avoid the use of int<->fp moves for spilling. */
374 6, /* GP2FP */
375 6, /* FP2GP */
376 4 /* FP2FP */
377};
378
d1261ac6 379static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
380{
381 1, /* GP2GP */
382 /* Avoid the use of int<->fp moves for spilling. */
383 8, /* GP2FP */
384 8, /* FP2GP */
385 4 /* FP2FP */
386};
387
910f72e7
SZ
388static const struct cpu_regmove_cost tsv110_regmove_cost =
389{
390 1, /* GP2GP */
391 /* Avoid the use of slow int<->fp moves for spilling by setting
392 their cost higher than memmov_cost. */
393 2, /* GP2FP */
394 3, /* FP2GP */
395 2 /* FP2FP */
396};
397
8990e73a 398/* Generic costs for vector insn classes. */
8990e73a
TB
399static const struct cpu_vector_cost generic_vector_cost =
400{
cd8ae5ed
AP
401 1, /* scalar_int_stmt_cost */
402 1, /* scalar_fp_stmt_cost */
bd95e655
JG
403 1, /* scalar_load_cost */
404 1, /* scalar_store_cost */
cd8ae5ed
AP
405 1, /* vec_int_stmt_cost */
406 1, /* vec_fp_stmt_cost */
c428f91c 407 2, /* vec_permute_cost */
bd95e655
JG
408 1, /* vec_to_scalar_cost */
409 1, /* scalar_to_vec_cost */
410 1, /* vec_align_load_cost */
411 1, /* vec_unalign_load_cost */
412 1, /* vec_unalign_store_cost */
413 1, /* vec_store_cost */
414 3, /* cond_taken_branch_cost */
415 1 /* cond_not_taken_branch_cost */
8990e73a
TB
416};
417
e75bc10e
LM
418/* QDF24XX costs for vector insn classes. */
419static const struct cpu_vector_cost qdf24xx_vector_cost =
420{
421 1, /* scalar_int_stmt_cost */
422 1, /* scalar_fp_stmt_cost */
423 1, /* scalar_load_cost */
424 1, /* scalar_store_cost */
425 1, /* vec_int_stmt_cost */
426 3, /* vec_fp_stmt_cost */
427 2, /* vec_permute_cost */
428 1, /* vec_to_scalar_cost */
429 1, /* scalar_to_vec_cost */
430 1, /* vec_align_load_cost */
431 1, /* vec_unalign_load_cost */
432 1, /* vec_unalign_store_cost */
433 1, /* vec_store_cost */
434 3, /* cond_taken_branch_cost */
435 1 /* cond_not_taken_branch_cost */
436};
437
c3f20327
AP
438/* ThunderX costs for vector insn classes. */
439static const struct cpu_vector_cost thunderx_vector_cost =
440{
cd8ae5ed
AP
441 1, /* scalar_int_stmt_cost */
442 1, /* scalar_fp_stmt_cost */
c3f20327
AP
443 3, /* scalar_load_cost */
444 1, /* scalar_store_cost */
cd8ae5ed 445 4, /* vec_int_stmt_cost */
b29d7591 446 1, /* vec_fp_stmt_cost */
c3f20327
AP
447 4, /* vec_permute_cost */
448 2, /* vec_to_scalar_cost */
449 2, /* scalar_to_vec_cost */
450 3, /* vec_align_load_cost */
7e87a3d9
AP
451 5, /* vec_unalign_load_cost */
452 5, /* vec_unalign_store_cost */
c3f20327
AP
453 1, /* vec_store_cost */
454 3, /* cond_taken_branch_cost */
455 3 /* cond_not_taken_branch_cost */
456};
457
910f72e7
SZ
458static const struct cpu_vector_cost tsv110_vector_cost =
459{
460 1, /* scalar_int_stmt_cost */
461 1, /* scalar_fp_stmt_cost */
462 5, /* scalar_load_cost */
463 1, /* scalar_store_cost */
464 2, /* vec_int_stmt_cost */
465 2, /* vec_fp_stmt_cost */
466 2, /* vec_permute_cost */
467 3, /* vec_to_scalar_cost */
468 2, /* scalar_to_vec_cost */
469 5, /* vec_align_load_cost */
470 5, /* vec_unalign_load_cost */
471 1, /* vec_unalign_store_cost */
472 1, /* vec_store_cost */
473 1, /* cond_taken_branch_cost */
474 1 /* cond_not_taken_branch_cost */
475};
476
60bff090 477/* Generic costs for vector insn classes. */
60bff090
JG
478static const struct cpu_vector_cost cortexa57_vector_cost =
479{
cd8ae5ed
AP
480 1, /* scalar_int_stmt_cost */
481 1, /* scalar_fp_stmt_cost */
bd95e655
JG
482 4, /* scalar_load_cost */
483 1, /* scalar_store_cost */
cd8ae5ed
AP
484 2, /* vec_int_stmt_cost */
485 2, /* vec_fp_stmt_cost */
c428f91c 486 3, /* vec_permute_cost */
bd95e655
JG
487 8, /* vec_to_scalar_cost */
488 8, /* scalar_to_vec_cost */
db4a1c18
WD
489 4, /* vec_align_load_cost */
490 4, /* vec_unalign_load_cost */
bd95e655
JG
491 1, /* vec_unalign_store_cost */
492 1, /* vec_store_cost */
493 1, /* cond_taken_branch_cost */
494 1 /* cond_not_taken_branch_cost */
60bff090
JG
495};
496
5ec1ae3b
EM
497static const struct cpu_vector_cost exynosm1_vector_cost =
498{
cd8ae5ed
AP
499 1, /* scalar_int_stmt_cost */
500 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
501 5, /* scalar_load_cost */
502 1, /* scalar_store_cost */
cd8ae5ed
AP
503 3, /* vec_int_stmt_cost */
504 3, /* vec_fp_stmt_cost */
c428f91c 505 3, /* vec_permute_cost */
5ec1ae3b
EM
506 3, /* vec_to_scalar_cost */
507 3, /* scalar_to_vec_cost */
508 5, /* vec_align_load_cost */
509 5, /* vec_unalign_load_cost */
510 1, /* vec_unalign_store_cost */
511 1, /* vec_store_cost */
512 1, /* cond_taken_branch_cost */
513 1 /* cond_not_taken_branch_cost */
514};
515
381e27aa 516/* Generic costs for vector insn classes. */
381e27aa
PT
517static const struct cpu_vector_cost xgene1_vector_cost =
518{
cd8ae5ed
AP
519 1, /* scalar_int_stmt_cost */
520 1, /* scalar_fp_stmt_cost */
bd95e655
JG
521 5, /* scalar_load_cost */
522 1, /* scalar_store_cost */
cd8ae5ed
AP
523 2, /* vec_int_stmt_cost */
524 2, /* vec_fp_stmt_cost */
c428f91c 525 2, /* vec_permute_cost */
bd95e655
JG
526 4, /* vec_to_scalar_cost */
527 4, /* scalar_to_vec_cost */
528 10, /* vec_align_load_cost */
529 10, /* vec_unalign_load_cost */
530 2, /* vec_unalign_store_cost */
531 2, /* vec_store_cost */
532 2, /* cond_taken_branch_cost */
533 1 /* cond_not_taken_branch_cost */
381e27aa
PT
534};
535
ad611a4c 536/* Costs for vector insn classes for Vulcan. */
d1261ac6 537static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 538{
cd8ae5ed
AP
539 1, /* scalar_int_stmt_cost */
540 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
541 4, /* scalar_load_cost */
542 1, /* scalar_store_cost */
cd8ae5ed
AP
543 5, /* vec_int_stmt_cost */
544 6, /* vec_fp_stmt_cost */
ad611a4c
VP
545 3, /* vec_permute_cost */
546 6, /* vec_to_scalar_cost */
547 5, /* scalar_to_vec_cost */
548 8, /* vec_align_load_cost */
549 8, /* vec_unalign_load_cost */
550 4, /* vec_unalign_store_cost */
551 4, /* vec_store_cost */
552 2, /* cond_taken_branch_cost */
553 1 /* cond_not_taken_branch_cost */
554};
555
b9066f5a
MW
556/* Generic costs for branch instructions. */
557static const struct cpu_branch_cost generic_branch_cost =
558{
9094d4a4
WD
559 1, /* Predictable. */
560 3 /* Unpredictable. */
b9066f5a
MW
561};
562
9acc9cbe
EM
563/* Generic approximation modes. */
564static const cpu_approx_modes generic_approx_modes =
565{
79a2bc2d 566 AARCH64_APPROX_NONE, /* division */
98daafa0 567 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
568 AARCH64_APPROX_NONE /* recip_sqrt */
569};
570
571/* Approximation modes for Exynos M1. */
572static const cpu_approx_modes exynosm1_approx_modes =
573{
79a2bc2d 574 AARCH64_APPROX_NONE, /* division */
98daafa0 575 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
576 AARCH64_APPROX_ALL /* recip_sqrt */
577};
578
579/* Approximation modes for X-Gene 1. */
580static const cpu_approx_modes xgene1_approx_modes =
581{
79a2bc2d 582 AARCH64_APPROX_NONE, /* division */
98daafa0 583 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
584 AARCH64_APPROX_ALL /* recip_sqrt */
585};
586
9d2c6e2e
MK
587/* Generic prefetch settings (which disable prefetch). */
588static const cpu_prefetch_tune generic_prefetch_tune =
589{
590 0, /* num_slots */
591 -1, /* l1_cache_size */
592 -1, /* l1_cache_line_size */
16b2cafd 593 -1, /* l2_cache_size */
d2ff35c0 594 true, /* prefetch_dynamic_strides */
59100dfc 595 -1, /* minimum_stride */
16b2cafd 596 -1 /* default_opt_level */
9d2c6e2e
MK
597};
598
599static const cpu_prefetch_tune exynosm1_prefetch_tune =
600{
601 0, /* num_slots */
602 -1, /* l1_cache_size */
603 64, /* l1_cache_line_size */
16b2cafd 604 -1, /* l2_cache_size */
d2ff35c0 605 true, /* prefetch_dynamic_strides */
59100dfc 606 -1, /* minimum_stride */
16b2cafd 607 -1 /* default_opt_level */
9d2c6e2e
MK
608};
609
610static const cpu_prefetch_tune qdf24xx_prefetch_tune =
611{
70c51b58
MK
612 4, /* num_slots */
613 32, /* l1_cache_size */
9d2c6e2e 614 64, /* l1_cache_line_size */
725e2110 615 512, /* l2_cache_size */
d2ff35c0 616 false, /* prefetch_dynamic_strides */
59100dfc
LM
617 2048, /* minimum_stride */
618 3 /* default_opt_level */
9d2c6e2e
MK
619};
620
f1e247d0
AP
621static const cpu_prefetch_tune thunderxt88_prefetch_tune =
622{
623 8, /* num_slots */
624 32, /* l1_cache_size */
625 128, /* l1_cache_line_size */
626 16*1024, /* l2_cache_size */
d2ff35c0 627 true, /* prefetch_dynamic_strides */
59100dfc 628 -1, /* minimum_stride */
f1e247d0
AP
629 3 /* default_opt_level */
630};
631
632static const cpu_prefetch_tune thunderx_prefetch_tune =
633{
634 8, /* num_slots */
635 32, /* l1_cache_size */
636 128, /* l1_cache_line_size */
637 -1, /* l2_cache_size */
d2ff35c0 638 true, /* prefetch_dynamic_strides */
59100dfc 639 -1, /* minimum_stride */
f1e247d0
AP
640 -1 /* default_opt_level */
641};
642
9d2c6e2e
MK
643static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
644{
f1e247d0
AP
645 8, /* num_slots */
646 32, /* l1_cache_size */
9d2c6e2e 647 64, /* l1_cache_line_size */
f1e247d0 648 256, /* l2_cache_size */
d2ff35c0 649 true, /* prefetch_dynamic_strides */
59100dfc 650 -1, /* minimum_stride */
16b2cafd 651 -1 /* default_opt_level */
9d2c6e2e
MK
652};
653
910f72e7
SZ
654static const cpu_prefetch_tune tsv110_prefetch_tune =
655{
656 0, /* num_slots */
657 64, /* l1_cache_size */
658 64, /* l1_cache_line_size */
659 512, /* l2_cache_size */
660 true, /* prefetch_dynamic_strides */
661 -1, /* minimum_stride */
662 -1 /* default_opt_level */
663};
664
d5e9851e
CM
665static const cpu_prefetch_tune xgene1_prefetch_tune =
666{
667 8, /* num_slots */
668 32, /* l1_cache_size */
669 64, /* l1_cache_line_size */
670 256, /* l2_cache_size */
671 true, /* prefetch_dynamic_strides */
672 -1, /* minimum_stride */
673 -1 /* default_opt_level */
674};
675
43e9d192
IB
676static const struct tune_params generic_tunings =
677{
4e2cd668 678 &cortexa57_extra_costs,
43e9d192
IB
679 &generic_addrcost_table,
680 &generic_regmove_cost,
8990e73a 681 &generic_vector_cost,
b9066f5a 682 &generic_branch_cost,
9acc9cbe 683 &generic_approx_modes,
2d56d6ba 684 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
685 4, /* memmov_cost */
686 2, /* issue_rate */
e0701ef0 687 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
688 "8", /* function_align. */
689 "4", /* jump_align. */
690 "8", /* loop_align. */
cee66c68
WD
691 2, /* int_reassoc_width. */
692 4, /* fp_reassoc_width. */
50093a33
WD
693 1, /* vec_reassoc_width. */
694 2, /* min_div_recip_mul_sf. */
dfba575f 695 2, /* min_div_recip_mul_df. */
50487d79 696 0, /* max_case_values. */
3b4c0f7e 697 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
698 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
699 &generic_prefetch_tune
43e9d192
IB
700};
701
1c72a3ca
JG
702static const struct tune_params cortexa35_tunings =
703{
704 &cortexa53_extra_costs,
705 &generic_addrcost_table,
706 &cortexa53_regmove_cost,
707 &generic_vector_cost,
aca97ef8 708 &generic_branch_cost,
9acc9cbe 709 &generic_approx_modes,
2d56d6ba 710 SVE_NOT_IMPLEMENTED, /* sve_width */
1c72a3ca
JG
711 4, /* memmov_cost */
712 1, /* issue_rate */
0bc24338 713 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 714 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
715 "16", /* function_align. */
716 "4", /* jump_align. */
717 "8", /* loop_align. */
1c72a3ca
JG
718 2, /* int_reassoc_width. */
719 4, /* fp_reassoc_width. */
720 1, /* vec_reassoc_width. */
721 2, /* min_div_recip_mul_sf. */
722 2, /* min_div_recip_mul_df. */
723 0, /* max_case_values. */
1c72a3ca 724 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
725 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
726 &generic_prefetch_tune
1c72a3ca
JG
727};
728
984239ad
KT
729static const struct tune_params cortexa53_tunings =
730{
731 &cortexa53_extra_costs,
732 &generic_addrcost_table,
e4a9c55a 733 &cortexa53_regmove_cost,
984239ad 734 &generic_vector_cost,
aca97ef8 735 &generic_branch_cost,
9acc9cbe 736 &generic_approx_modes,
2d56d6ba 737 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
738 4, /* memmov_cost */
739 2, /* issue_rate */
00a8574a 740 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 741 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
742 "16", /* function_align. */
743 "4", /* jump_align. */
744 "8", /* loop_align. */
cee66c68
WD
745 2, /* int_reassoc_width. */
746 4, /* fp_reassoc_width. */
50093a33
WD
747 1, /* vec_reassoc_width. */
748 2, /* min_div_recip_mul_sf. */
dfba575f 749 2, /* min_div_recip_mul_df. */
50487d79 750 0, /* max_case_values. */
2d6bc7fa 751 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
752 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
753 &generic_prefetch_tune
984239ad
KT
754};
755
4fd92af6
KT
756static const struct tune_params cortexa57_tunings =
757{
758 &cortexa57_extra_costs,
a39d4348 759 &generic_addrcost_table,
e4a9c55a 760 &cortexa57_regmove_cost,
60bff090 761 &cortexa57_vector_cost,
aca97ef8 762 &generic_branch_cost,
9acc9cbe 763 &generic_approx_modes,
2d56d6ba 764 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
765 4, /* memmov_cost */
766 3, /* issue_rate */
00a8574a 767 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 768 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
769 "16", /* function_align. */
770 "4", /* jump_align. */
771 "8", /* loop_align. */
cee66c68
WD
772 2, /* int_reassoc_width. */
773 4, /* fp_reassoc_width. */
50093a33
WD
774 1, /* vec_reassoc_width. */
775 2, /* min_div_recip_mul_sf. */
dfba575f 776 2, /* min_div_recip_mul_df. */
50487d79 777 0, /* max_case_values. */
2d6bc7fa 778 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
779 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
780 &generic_prefetch_tune
dfba575f
JG
781};
782
783static const struct tune_params cortexa72_tunings =
784{
785 &cortexa57_extra_costs,
a39d4348 786 &generic_addrcost_table,
dfba575f
JG
787 &cortexa57_regmove_cost,
788 &cortexa57_vector_cost,
aca97ef8 789 &generic_branch_cost,
9acc9cbe 790 &generic_approx_modes,
2d56d6ba 791 SVE_NOT_IMPLEMENTED, /* sve_width */
dfba575f
JG
792 4, /* memmov_cost */
793 3, /* issue_rate */
00a8574a 794 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 795 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
796 "16", /* function_align. */
797 "4", /* jump_align. */
798 "8", /* loop_align. */
dfba575f
JG
799 2, /* int_reassoc_width. */
800 4, /* fp_reassoc_width. */
801 1, /* vec_reassoc_width. */
802 2, /* min_div_recip_mul_sf. */
803 2, /* min_div_recip_mul_df. */
50487d79 804 0, /* max_case_values. */
0bc24338 805 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
806 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
807 &generic_prefetch_tune
4fd92af6
KT
808};
809
4fb570c4
KT
810static const struct tune_params cortexa73_tunings =
811{
812 &cortexa57_extra_costs,
a39d4348 813 &generic_addrcost_table,
4fb570c4
KT
814 &cortexa57_regmove_cost,
815 &cortexa57_vector_cost,
aca97ef8 816 &generic_branch_cost,
4fb570c4 817 &generic_approx_modes,
2d56d6ba 818 SVE_NOT_IMPLEMENTED, /* sve_width */
4fb570c4
KT
819 4, /* memmov_cost. */
820 2, /* issue_rate. */
821 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
822 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
823 "16", /* function_align. */
824 "4", /* jump_align. */
825 "8", /* loop_align. */
4fb570c4
KT
826 2, /* int_reassoc_width. */
827 4, /* fp_reassoc_width. */
828 1, /* vec_reassoc_width. */
829 2, /* min_div_recip_mul_sf. */
830 2, /* min_div_recip_mul_df. */
831 0, /* max_case_values. */
4fb570c4 832 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
833 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
834 &generic_prefetch_tune
4fb570c4
KT
835};
836
9d2c6e2e
MK
837
838
5ec1ae3b
EM
839static const struct tune_params exynosm1_tunings =
840{
841 &exynosm1_extra_costs,
842 &exynosm1_addrcost_table,
843 &exynosm1_regmove_cost,
844 &exynosm1_vector_cost,
845 &generic_branch_cost,
9acc9cbe 846 &exynosm1_approx_modes,
2d56d6ba 847 SVE_NOT_IMPLEMENTED, /* sve_width */
5ec1ae3b
EM
848 4, /* memmov_cost */
849 3, /* issue_rate */
25cc2199 850 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
851 "4", /* function_align. */
852 "4", /* jump_align. */
853 "4", /* loop_align. */
5ec1ae3b
EM
854 2, /* int_reassoc_width. */
855 4, /* fp_reassoc_width. */
856 1, /* vec_reassoc_width. */
857 2, /* min_div_recip_mul_sf. */
858 2, /* min_div_recip_mul_df. */
859 48, /* max_case_values. */
220379df 860 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
861 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
862 &exynosm1_prefetch_tune
5ec1ae3b
EM
863};
864
f1e247d0
AP
865static const struct tune_params thunderxt88_tunings =
866{
867 &thunderx_extra_costs,
868 &generic_addrcost_table,
869 &thunderx_regmove_cost,
870 &thunderx_vector_cost,
871 &generic_branch_cost,
872 &generic_approx_modes,
2d56d6ba 873 SVE_NOT_IMPLEMENTED, /* sve_width */
f1e247d0
AP
874 6, /* memmov_cost */
875 2, /* issue_rate */
876 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
877 "8", /* function_align. */
878 "8", /* jump_align. */
879 "8", /* loop_align. */
f1e247d0
AP
880 2, /* int_reassoc_width. */
881 4, /* fp_reassoc_width. */
882 1, /* vec_reassoc_width. */
883 2, /* min_div_recip_mul_sf. */
884 2, /* min_div_recip_mul_df. */
885 0, /* max_case_values. */
886 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
887 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
888 &thunderxt88_prefetch_tune
889};
890
d1bcc29f
AP
891static const struct tune_params thunderx_tunings =
892{
893 &thunderx_extra_costs,
894 &generic_addrcost_table,
895 &thunderx_regmove_cost,
c3f20327 896 &thunderx_vector_cost,
b9066f5a 897 &generic_branch_cost,
9acc9cbe 898 &generic_approx_modes,
2d56d6ba 899 SVE_NOT_IMPLEMENTED, /* sve_width */
bd95e655
JG
900 6, /* memmov_cost */
901 2, /* issue_rate */
e9a3a175 902 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
903 "8", /* function_align. */
904 "8", /* jump_align. */
905 "8", /* loop_align. */
cee66c68
WD
906 2, /* int_reassoc_width. */
907 4, /* fp_reassoc_width. */
50093a33
WD
908 1, /* vec_reassoc_width. */
909 2, /* min_div_recip_mul_sf. */
dfba575f 910 2, /* min_div_recip_mul_df. */
50487d79 911 0, /* max_case_values. */
2d6bc7fa 912 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
913 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
914 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 915 &thunderx_prefetch_tune
d1bcc29f
AP
916};
917
910f72e7
SZ
918static const struct tune_params tsv110_tunings =
919{
920 &tsv110_extra_costs,
921 &tsv110_addrcost_table,
922 &tsv110_regmove_cost,
923 &tsv110_vector_cost,
924 &generic_branch_cost,
925 &generic_approx_modes,
2d56d6ba 926 SVE_NOT_IMPLEMENTED, /* sve_width */
910f72e7
SZ
927 4, /* memmov_cost */
928 4, /* issue_rate */
929 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
930 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
931 "16", /* function_align. */
932 "4", /* jump_align. */
933 "8", /* loop_align. */
934 2, /* int_reassoc_width. */
935 4, /* fp_reassoc_width. */
936 1, /* vec_reassoc_width. */
937 2, /* min_div_recip_mul_sf. */
938 2, /* min_div_recip_mul_df. */
939 0, /* max_case_values. */
940 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
941 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
942 &tsv110_prefetch_tune
943};
944
381e27aa 945static const struct tune_params xgene1_tunings =
e02669db
CM
946{
947 &xgene1_extra_costs,
948 &xgene1_addrcost_table,
949 &xgene1_regmove_cost,
950 &xgene1_vector_cost,
951 &generic_branch_cost,
952 &xgene1_approx_modes,
2d56d6ba 953 SVE_NOT_IMPLEMENTED, /* sve_width */
e02669db
CM
954 6, /* memmov_cost */
955 4, /* issue_rate */
956 AARCH64_FUSE_NOTHING, /* fusible_ops */
957 "16", /* function_align. */
958 "16", /* jump_align. */
959 "16", /* loop_align. */
960 2, /* int_reassoc_width. */
961 4, /* fp_reassoc_width. */
962 1, /* vec_reassoc_width. */
963 2, /* min_div_recip_mul_sf. */
964 2, /* min_div_recip_mul_df. */
965 17, /* max_case_values. */
966 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
967 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
968 &xgene1_prefetch_tune
969};
970
971static const struct tune_params emag_tunings =
381e27aa
PT
972{
973 &xgene1_extra_costs,
974 &xgene1_addrcost_table,
975 &xgene1_regmove_cost,
976 &xgene1_vector_cost,
b9066f5a 977 &generic_branch_cost,
9acc9cbe 978 &xgene1_approx_modes,
2d56d6ba 979 SVE_NOT_IMPLEMENTED,
bd95e655
JG
980 6, /* memmov_cost */
981 4, /* issue_rate */
e9a3a175 982 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102 983 "16", /* function_align. */
cf28c77e 984 "16", /* jump_align. */
c518c102 985 "16", /* loop_align. */
381e27aa
PT
986 2, /* int_reassoc_width. */
987 4, /* fp_reassoc_width. */
50093a33
WD
988 1, /* vec_reassoc_width. */
989 2, /* min_div_recip_mul_sf. */
dfba575f 990 2, /* min_div_recip_mul_df. */
cf28c77e 991 17, /* max_case_values. */
2d6bc7fa 992 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 993 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 994 &xgene1_prefetch_tune
381e27aa
PT
995};
996
ee446d9f
JW
997static const struct tune_params qdf24xx_tunings =
998{
999 &qdf24xx_extra_costs,
8d39ea2f 1000 &qdf24xx_addrcost_table,
ee446d9f 1001 &qdf24xx_regmove_cost,
e75bc10e 1002 &qdf24xx_vector_cost,
ee446d9f
JW
1003 &generic_branch_cost,
1004 &generic_approx_modes,
2d56d6ba 1005 SVE_NOT_IMPLEMENTED, /* sve_width */
ee446d9f
JW
1006 4, /* memmov_cost */
1007 4, /* issue_rate */
1008 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1009 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1010 "16", /* function_align. */
1011 "8", /* jump_align. */
1012 "16", /* loop_align. */
ee446d9f
JW
1013 2, /* int_reassoc_width. */
1014 4, /* fp_reassoc_width. */
1015 1, /* vec_reassoc_width. */
1016 2, /* min_div_recip_mul_sf. */
1017 2, /* min_div_recip_mul_df. */
1018 0, /* max_case_values. */
4f2a94e6 1019 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 1020 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 1021 &qdf24xx_prefetch_tune
ee446d9f
JW
1022};
1023
52ee8191
SP
1024/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
1025 for now. */
1026static const struct tune_params saphira_tunings =
1027{
1028 &generic_extra_costs,
1029 &generic_addrcost_table,
1030 &generic_regmove_cost,
1031 &generic_vector_cost,
1032 &generic_branch_cost,
1033 &generic_approx_modes,
2d56d6ba 1034 SVE_NOT_IMPLEMENTED, /* sve_width */
52ee8191
SP
1035 4, /* memmov_cost */
1036 4, /* issue_rate */
1037 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1038 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1039 "16", /* function_align. */
1040 "8", /* jump_align. */
1041 "16", /* loop_align. */
52ee8191
SP
1042 2, /* int_reassoc_width. */
1043 4, /* fp_reassoc_width. */
1044 1, /* vec_reassoc_width. */
1045 2, /* min_div_recip_mul_sf. */
1046 2, /* min_div_recip_mul_df. */
1047 0, /* max_case_values. */
1048 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1049 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1050 &generic_prefetch_tune
1051};
1052
d1261ac6 1053static const struct tune_params thunderx2t99_tunings =
ad611a4c 1054{
d1261ac6
AP
1055 &thunderx2t99_extra_costs,
1056 &thunderx2t99_addrcost_table,
1057 &thunderx2t99_regmove_cost,
1058 &thunderx2t99_vector_cost,
aca97ef8 1059 &generic_branch_cost,
ad611a4c 1060 &generic_approx_modes,
2d56d6ba 1061 SVE_NOT_IMPLEMENTED, /* sve_width */
ad611a4c
VP
1062 4, /* memmov_cost. */
1063 4, /* issue_rate. */
00c7c57f
JB
1064 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
1065 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
1066 "16", /* function_align. */
1067 "8", /* jump_align. */
1068 "16", /* loop_align. */
ad611a4c
VP
1069 3, /* int_reassoc_width. */
1070 2, /* fp_reassoc_width. */
1071 2, /* vec_reassoc_width. */
1072 2, /* min_div_recip_mul_sf. */
1073 2, /* min_div_recip_mul_df. */
1074 0, /* max_case_values. */
f1e247d0 1075 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1076 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1077 &thunderx2t99_prefetch_tune
ad611a4c
VP
1078};
1079
8dec06f2
JG
1080/* Support for fine-grained override of the tuning structures. */
1081struct aarch64_tuning_override_function
1082{
1083 const char* name;
1084 void (*parse_override)(const char*, struct tune_params*);
1085};
1086
1087static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1088static void aarch64_parse_tune_string (const char*, struct tune_params*);
886f092f 1089static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
8dec06f2
JG
1090
1091static const struct aarch64_tuning_override_function
1092aarch64_tuning_override_functions[] =
1093{
1094 { "fuse", aarch64_parse_fuse_string },
1095 { "tune", aarch64_parse_tune_string },
886f092f 1096 { "sve_width", aarch64_parse_sve_width_string },
8dec06f2
JG
1097 { NULL, NULL }
1098};
1099
43e9d192
IB
1100/* A processor implementing AArch64. */
1101struct processor
1102{
1103 const char *const name;
46806c44
KT
1104 enum aarch64_processor ident;
1105 enum aarch64_processor sched_core;
393ae126 1106 enum aarch64_arch arch;
0c6caaf8 1107 unsigned architecture_version;
43e9d192
IB
1108 const unsigned long flags;
1109 const struct tune_params *const tune;
1110};
1111
393ae126
KT
1112/* Architectures implementing AArch64. */
1113static const struct processor all_architectures[] =
1114{
1115#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1116 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1117#include "aarch64-arches.def"
393ae126
KT
1118 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1119};
1120
43e9d192
IB
1121/* Processor cores implementing AArch64. */
1122static const struct processor all_cores[] =
1123{
e8fcc9fa 1124#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1125 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1126 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1127 FLAGS, &COSTS##_tunings},
43e9d192 1128#include "aarch64-cores.def"
393ae126
KT
1129 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1130 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1131 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1132};
1133
43e9d192 1134
361fb3ee
KT
1135/* Target specification. These are populated by the -march, -mtune, -mcpu
1136 handling code or by target attributes. */
43e9d192
IB
1137static const struct processor *selected_arch;
1138static const struct processor *selected_cpu;
1139static const struct processor *selected_tune;
1140
b175b679
JG
1141/* The current tuning set. */
1142struct tune_params aarch64_tune_params = generic_tunings;
1143
a0d0b980
SE
1144/* Table of machine attributes. */
1145static const struct attribute_spec aarch64_attribute_table[] =
1146{
1147 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1148 affects_type_identity, handler, exclude } */
1149 { "aarch64_vector_pcs", 0, 0, false, true, true, false, NULL, NULL },
1150 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1151};
1152
43e9d192
IB
1153#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1154
1155/* An ISA extension in the co-processor and main instruction set space. */
1156struct aarch64_option_extension
1157{
1158 const char *const name;
1159 const unsigned long flags_on;
1160 const unsigned long flags_off;
1161};
1162
43e9d192
IB
1163typedef enum aarch64_cond_code
1164{
1165 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1166 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1167 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1168}
1169aarch64_cc;
1170
1171#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1172
1173/* The condition codes of the processor, and the inverse function. */
1174static const char * const aarch64_condition_codes[] =
1175{
1176 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1177 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1178};
1179
973d2e01
TP
1180/* Generate code to enable conditional branches in functions over 1 MiB. */
1181const char *
1182aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1183 const char * branch_format)
1184{
1185 rtx_code_label * tmp_label = gen_label_rtx ();
1186 char label_buf[256];
1187 char buffer[128];
1188 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1189 CODE_LABEL_NUMBER (tmp_label));
1190 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1191 rtx dest_label = operands[pos_label];
1192 operands[pos_label] = tmp_label;
1193
1194 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1195 output_asm_insn (buffer, operands);
1196
1197 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1198 operands[pos_label] = dest_label;
1199 output_asm_insn (buffer, operands);
1200 return "";
1201}
1202
261fb553 1203void
fc29dfc9 1204aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1205{
261fb553 1206 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1207 if (FLOAT_MODE_P (mode))
1208 error ("%qs is incompatible with the use of floating-point types",
1209 "-mgeneral-regs-only");
1210 else
1211 error ("%qs is incompatible with the use of vector types",
1212 "-mgeneral-regs-only");
261fb553 1213 else
fc29dfc9
SE
1214 if (FLOAT_MODE_P (mode))
1215 error ("%qs feature modifier is incompatible with the use of"
1216 " floating-point types", "+nofp");
1217 else
1218 error ("%qs feature modifier is incompatible with the use of"
1219 " vector types", "+nofp");
261fb553
AL
1220}
1221
c64f7d37 1222/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1223 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1224 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1225 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1226 and GENERAL_REGS is lower than the memory cost (in this case the best class
1227 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1228 cost results in bad allocations with many redundant int<->FP moves which
1229 are expensive on various cores.
1230 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1231 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1232 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1233 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1234 The result of this is that it is no longer inefficient to have a higher
1235 memory move cost than the register move cost.
1236*/
c64f7d37
WD
1237
1238static reg_class_t
31e2b5a3
WD
1239aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1240 reg_class_t best_class)
c64f7d37 1241{
b8506a8a 1242 machine_mode mode;
c64f7d37 1243
67e5c59a
RS
1244 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1245 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1246 return allocno_class;
1247
67e5c59a
RS
1248 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1249 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1250 return best_class;
1251
c64f7d37
WD
1252 mode = PSEUDO_REGNO_MODE (regno);
1253 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1254}
1255
26e0ff94 1256static unsigned int
b8506a8a 1257aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1258{
50093a33 1259 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1260 return aarch64_tune_params.min_div_recip_mul_sf;
1261 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1262}
1263
b5b33e11 1264/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1265static int
b5b33e11 1266aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1267{
1268 if (VECTOR_MODE_P (mode))
b175b679 1269 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1270 if (INTEGRAL_MODE_P (mode))
b175b679 1271 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1272 /* Avoid reassociating floating point addition so we emit more FMAs. */
1273 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1274 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1275 return 1;
1276}
1277
43e9d192
IB
1278/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1279unsigned
1280aarch64_dbx_register_number (unsigned regno)
1281{
1282 if (GP_REGNUM_P (regno))
1283 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1284 else if (regno == SP_REGNUM)
1285 return AARCH64_DWARF_SP;
1286 else if (FP_REGNUM_P (regno))
1287 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1288 else if (PR_REGNUM_P (regno))
1289 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1290 else if (regno == VG_REGNUM)
1291 return AARCH64_DWARF_VG;
43e9d192
IB
1292
1293 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1294 equivalent DWARF register. */
1295 return DWARF_FRAME_REGISTERS;
1296}
1297
43cacb12
RS
1298/* Return true if MODE is any of the Advanced SIMD structure modes. */
1299static bool
1300aarch64_advsimd_struct_mode_p (machine_mode mode)
1301{
1302 return (TARGET_SIMD
1303 && (mode == OImode || mode == CImode || mode == XImode));
1304}
1305
1306/* Return true if MODE is an SVE predicate mode. */
1307static bool
1308aarch64_sve_pred_mode_p (machine_mode mode)
1309{
1310 return (TARGET_SVE
1311 && (mode == VNx16BImode
1312 || mode == VNx8BImode
1313 || mode == VNx4BImode
1314 || mode == VNx2BImode));
1315}
1316
1317/* Three mutually-exclusive flags describing a vector or predicate type. */
1318const unsigned int VEC_ADVSIMD = 1;
1319const unsigned int VEC_SVE_DATA = 2;
1320const unsigned int VEC_SVE_PRED = 4;
1321/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1322 a structure of 2, 3 or 4 vectors. */
1323const unsigned int VEC_STRUCT = 8;
1324/* Useful combinations of the above. */
1325const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1326const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1327
1328/* Return a set of flags describing the vector properties of mode MODE.
1329 Ignore modes that are not supported by the current target. */
1330static unsigned int
1331aarch64_classify_vector_mode (machine_mode mode)
1332{
1333 if (aarch64_advsimd_struct_mode_p (mode))
1334 return VEC_ADVSIMD | VEC_STRUCT;
1335
1336 if (aarch64_sve_pred_mode_p (mode))
1337 return VEC_SVE_PRED;
1338
1339 scalar_mode inner = GET_MODE_INNER (mode);
1340 if (VECTOR_MODE_P (mode)
1341 && (inner == QImode
1342 || inner == HImode
1343 || inner == HFmode
1344 || inner == SImode
1345 || inner == SFmode
1346 || inner == DImode
1347 || inner == DFmode))
1348 {
9f4cbab8
RS
1349 if (TARGET_SVE)
1350 {
1351 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1352 return VEC_SVE_DATA;
1353 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1354 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1355 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1356 return VEC_SVE_DATA | VEC_STRUCT;
1357 }
43cacb12
RS
1358
1359 /* This includes V1DF but not V1DI (which doesn't exist). */
1360 if (TARGET_SIMD
1361 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1362 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1363 return VEC_ADVSIMD;
1364 }
1365
1366 return 0;
1367}
1368
1369/* Return true if MODE is any of the data vector modes, including
1370 structure modes. */
43e9d192 1371static bool
43cacb12 1372aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1373{
43cacb12 1374 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1375}
1376
43cacb12
RS
1377/* Return true if MODE is an SVE data vector mode; either a single vector
1378 or a structure of vectors. */
43e9d192 1379static bool
43cacb12 1380aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1381{
43cacb12 1382 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1383}
1384
9f4cbab8
RS
1385/* Implement target hook TARGET_ARRAY_MODE. */
1386static opt_machine_mode
1387aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1388{
1389 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1390 && IN_RANGE (nelems, 2, 4))
1391 return mode_for_vector (GET_MODE_INNER (mode),
1392 GET_MODE_NUNITS (mode) * nelems);
1393
1394 return opt_machine_mode ();
1395}
1396
43e9d192
IB
1397/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1398static bool
ef4bddc2 1399aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1400 unsigned HOST_WIDE_INT nelems)
1401{
1402 if (TARGET_SIMD
635e66fe
AL
1403 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1404 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1405 && (nelems >= 2 && nelems <= 4))
1406 return true;
1407
1408 return false;
1409}
1410
43cacb12
RS
1411/* Return the SVE predicate mode to use for elements that have
1412 ELEM_NBYTES bytes, if such a mode exists. */
1413
1414opt_machine_mode
1415aarch64_sve_pred_mode (unsigned int elem_nbytes)
1416{
1417 if (TARGET_SVE)
1418 {
1419 if (elem_nbytes == 1)
1420 return VNx16BImode;
1421 if (elem_nbytes == 2)
1422 return VNx8BImode;
1423 if (elem_nbytes == 4)
1424 return VNx4BImode;
1425 if (elem_nbytes == 8)
1426 return VNx2BImode;
1427 }
1428 return opt_machine_mode ();
1429}
1430
1431/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1432
1433static opt_machine_mode
1434aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1435{
1436 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1437 {
1438 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1439 machine_mode pred_mode;
1440 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1441 return pred_mode;
1442 }
1443
1444 return default_get_mask_mode (nunits, nbytes);
1445}
1446
b41d1f6e
RS
1447/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1448 prefer to use the first arithmetic operand as the else value if
1449 the else value doesn't matter, since that exactly matches the SVE
1450 destructive merging form. For ternary operations we could either
1451 pick the first operand and use FMAD-like instructions or the last
1452 operand and use FMLA-like instructions; the latter seems more
1453 natural. */
6a86928d
RS
1454
1455static tree
b41d1f6e 1456aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1457{
b41d1f6e 1458 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1459}
1460
c43f4279 1461/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1462
c43f4279 1463static unsigned int
ef4bddc2 1464aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1465{
6a70badb
RS
1466 /* ??? Logically we should only need to provide a value when
1467 HARD_REGNO_MODE_OK says that the combination is valid,
1468 but at the moment we need to handle all modes. Just ignore
1469 any runtime parts for registers that can't store them. */
1470 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1471 switch (aarch64_regno_regclass (regno))
1472 {
1473 case FP_REGS:
1474 case FP_LO_REGS:
43cacb12
RS
1475 if (aarch64_sve_data_mode_p (mode))
1476 return exact_div (GET_MODE_SIZE (mode),
1477 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1478 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1479 case PR_REGS:
1480 case PR_LO_REGS:
1481 case PR_HI_REGS:
1482 return 1;
43e9d192 1483 default:
6a70badb 1484 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1485 }
1486 gcc_unreachable ();
1487}
1488
f939c3e6 1489/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1490
f939c3e6 1491static bool
ef4bddc2 1492aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1493{
1494 if (GET_MODE_CLASS (mode) == MODE_CC)
1495 return regno == CC_REGNUM;
1496
43cacb12
RS
1497 if (regno == VG_REGNUM)
1498 /* This must have the same size as _Unwind_Word. */
1499 return mode == DImode;
1500
1501 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1502 if (vec_flags & VEC_SVE_PRED)
1503 return PR_REGNUM_P (regno);
1504
1505 if (PR_REGNUM_P (regno))
1506 return 0;
1507
9259db42
YZ
1508 if (regno == SP_REGNUM)
1509 /* The purpose of comparing with ptr_mode is to support the
1510 global register variable associated with the stack pointer
1511 register via the syntax of asm ("wsp") in ILP32. */
1512 return mode == Pmode || mode == ptr_mode;
1513
1514 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1515 return mode == Pmode;
1516
563cc649
RH
1517 if (GP_REGNUM_P (regno))
1518 {
1519 if (known_le (GET_MODE_SIZE (mode), 8))
1520 return true;
1521 else if (known_le (GET_MODE_SIZE (mode), 16))
1522 return (regno & 1) == 0;
1523 }
1524 else if (FP_REGNUM_P (regno))
43e9d192 1525 {
43cacb12 1526 if (vec_flags & VEC_STRUCT)
4edd6298 1527 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1528 else
43cacb12 1529 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1530 }
1531
f939c3e6 1532 return false;
43e9d192
IB
1533}
1534
a0d0b980
SE
1535/* Return true if this is a definition of a vectorized simd function. */
1536
1537static bool
1538aarch64_simd_decl_p (tree fndecl)
1539{
1540 tree fntype;
1541
1542 if (fndecl == NULL)
1543 return false;
1544 fntype = TREE_TYPE (fndecl);
1545 if (fntype == NULL)
1546 return false;
1547
1548 /* Functions with the aarch64_vector_pcs attribute use the simd ABI. */
1549 if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL)
1550 return true;
1551
1552 return false;
1553}
1554
1555/* Return the mode a register save/restore should use. DImode for integer
1556 registers, DFmode for FP registers in non-SIMD functions (they only save
1557 the bottom half of a 128 bit register), or TFmode for FP registers in
1558 SIMD functions. */
1559
1560static machine_mode
1561aarch64_reg_save_mode (tree fndecl, unsigned regno)
1562{
1563 return GP_REGNUM_P (regno)
1564 ? E_DImode
1565 : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode);
1566}
1567
80ec73f4
RS
1568/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1569 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1570 clobbers the top 64 bits when restoring the bottom 64 bits. */
1571
1572static bool
1573aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1574{
6a70badb 1575 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1576}
1577
43cacb12
RS
1578/* Implement REGMODE_NATURAL_SIZE. */
1579poly_uint64
1580aarch64_regmode_natural_size (machine_mode mode)
1581{
1582 /* The natural size for SVE data modes is one SVE data vector,
1583 and similarly for predicates. We can't independently modify
1584 anything smaller than that. */
1585 /* ??? For now, only do this for variable-width SVE registers.
1586 Doing it for constant-sized registers breaks lower-subreg.c. */
1587 /* ??? And once that's fixed, we should probably have similar
1588 code for Advanced SIMD. */
1589 if (!aarch64_sve_vg.is_constant ())
1590 {
1591 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1592 if (vec_flags & VEC_SVE_PRED)
1593 return BYTES_PER_SVE_PRED;
1594 if (vec_flags & VEC_SVE_DATA)
1595 return BYTES_PER_SVE_VECTOR;
1596 }
1597 return UNITS_PER_WORD;
1598}
1599
73d9ac6a 1600/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1601machine_mode
43cacb12
RS
1602aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1603 machine_mode mode)
1604{
1605 /* The predicate mode determines which bits are significant and
1606 which are "don't care". Decreasing the number of lanes would
1607 lose data while increasing the number of lanes would make bits
1608 unnecessarily significant. */
1609 if (PR_REGNUM_P (regno))
1610 return mode;
6a70badb
RS
1611 if (known_ge (GET_MODE_SIZE (mode), 4))
1612 return mode;
73d9ac6a 1613 else
6a70badb 1614 return SImode;
73d9ac6a
IB
1615}
1616
231c52ae
ST
1617/* Return true if I's bits are consecutive ones from the MSB. */
1618bool
1619aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
1620{
1621 return exact_log2 (-i) != HOST_WIDE_INT_M1;
1622}
1623
58e17cf8
RS
1624/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1625 that strcpy from constants will be faster. */
1626
1627static HOST_WIDE_INT
1628aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1629{
1630 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1631 return MAX (align, BITS_PER_WORD);
1632 return align;
1633}
1634
43e9d192
IB
1635/* Return true if calls to DECL should be treated as
1636 long-calls (ie called via a register). */
1637static bool
1638aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1639{
1640 return false;
1641}
1642
1643/* Return true if calls to symbol-ref SYM should be treated as
1644 long-calls (ie called via a register). */
1645bool
1646aarch64_is_long_call_p (rtx sym)
1647{
1648 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1649}
1650
b60d63cb
JW
1651/* Return true if calls to symbol-ref SYM should not go through
1652 plt stubs. */
1653
1654bool
1655aarch64_is_noplt_call_p (rtx sym)
1656{
1657 const_tree decl = SYMBOL_REF_DECL (sym);
1658
1659 if (flag_pic
1660 && decl
1661 && (!flag_plt
1662 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1663 && !targetm.binds_local_p (decl))
1664 return true;
1665
1666 return false;
1667}
1668
43e9d192
IB
1669/* Return true if the offsets to a zero/sign-extract operation
1670 represent an expression that matches an extend operation. The
1671 operands represent the paramters from
1672
4745e701 1673 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1674bool
77e994c9 1675aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1676 rtx extract_imm)
1677{
1678 HOST_WIDE_INT mult_val, extract_val;
1679
1680 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1681 return false;
1682
1683 mult_val = INTVAL (mult_imm);
1684 extract_val = INTVAL (extract_imm);
1685
1686 if (extract_val > 8
1687 && extract_val < GET_MODE_BITSIZE (mode)
1688 && exact_log2 (extract_val & ~7) > 0
1689 && (extract_val & 7) <= 4
1690 && mult_val == (1 << (extract_val & 7)))
1691 return true;
1692
1693 return false;
1694}
1695
1696/* Emit an insn that's a simple single-set. Both the operands must be
1697 known to be valid. */
827ab47a 1698inline static rtx_insn *
43e9d192
IB
1699emit_set_insn (rtx x, rtx y)
1700{
f7df4a84 1701 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1702}
1703
1704/* X and Y are two things to compare using CODE. Emit the compare insn and
1705 return the rtx for register 0 in the proper mode. */
1706rtx
1707aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1708{
ef4bddc2 1709 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1710 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1711
1712 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1713 return cc_reg;
1714}
1715
d400fda3
RH
1716/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
1717
1718static rtx
1719aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
1720 machine_mode y_mode)
1721{
1722 if (y_mode == E_QImode || y_mode == E_HImode)
1723 {
1724 if (CONST_INT_P (y))
1725 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
1726 else
1727 {
1728 rtx t, cc_reg;
1729 machine_mode cc_mode;
1730
1731 t = gen_rtx_ZERO_EXTEND (SImode, y);
1732 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
1733 cc_mode = CC_SWPmode;
1734 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
1735 emit_set_insn (cc_reg, t);
1736 return cc_reg;
1737 }
1738 }
1739
1740 return aarch64_gen_compare_reg (code, x, y);
1741}
1742
43e9d192
IB
1743/* Build the SYMBOL_REF for __tls_get_addr. */
1744
1745static GTY(()) rtx tls_get_addr_libfunc;
1746
1747rtx
1748aarch64_tls_get_addr (void)
1749{
1750 if (!tls_get_addr_libfunc)
1751 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1752 return tls_get_addr_libfunc;
1753}
1754
1755/* Return the TLS model to use for ADDR. */
1756
1757static enum tls_model
1758tls_symbolic_operand_type (rtx addr)
1759{
1760 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1761 if (GET_CODE (addr) == CONST)
1762 {
6a70badb
RS
1763 poly_int64 addend;
1764 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1765 if (GET_CODE (sym) == SYMBOL_REF)
1766 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1767 }
1768 else if (GET_CODE (addr) == SYMBOL_REF)
1769 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1770
1771 return tls_kind;
1772}
1773
1774/* We'll allow lo_sum's in addresses in our legitimate addresses
1775 so that combine would take care of combining addresses where
1776 necessary, but for generation purposes, we'll generate the address
1777 as :
1778 RTL Absolute
1779 tmp = hi (symbol_ref); adrp x1, foo
1780 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1781 nop
1782
1783 PIC TLS
1784 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1785 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1786 bl __tls_get_addr
1787 nop
1788
1789 Load TLS symbol, depending on TLS mechanism and TLS access model.
1790
1791 Global Dynamic - Traditional TLS:
1792 adrp tmp, :tlsgd:imm
1793 add dest, tmp, #:tlsgd_lo12:imm
1794 bl __tls_get_addr
1795
1796 Global Dynamic - TLS Descriptors:
1797 adrp dest, :tlsdesc:imm
1798 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1799 add dest, dest, #:tlsdesc_lo12:imm
1800 blr tmp
1801 mrs tp, tpidr_el0
1802 add dest, dest, tp
1803
1804 Initial Exec:
1805 mrs tp, tpidr_el0
1806 adrp tmp, :gottprel:imm
1807 ldr dest, [tmp, #:gottprel_lo12:imm]
1808 add dest, dest, tp
1809
1810 Local Exec:
1811 mrs tp, tpidr_el0
0699caae
RL
1812 add t0, tp, #:tprel_hi12:imm, lsl #12
1813 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1814*/
1815
1816static void
1817aarch64_load_symref_appropriately (rtx dest, rtx imm,
1818 enum aarch64_symbol_type type)
1819{
1820 switch (type)
1821 {
1822 case SYMBOL_SMALL_ABSOLUTE:
1823 {
28514dda 1824 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1825 rtx tmp_reg = dest;
ef4bddc2 1826 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1827
1828 gcc_assert (mode == Pmode || mode == ptr_mode);
1829
43e9d192 1830 if (can_create_pseudo_p ())
28514dda 1831 tmp_reg = gen_reg_rtx (mode);
43e9d192 1832
28514dda 1833 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1834 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1835 return;
1836 }
1837
a5350ddc 1838 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1839 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1840 return;
1841
1b1e81f8
JW
1842 case SYMBOL_SMALL_GOT_28K:
1843 {
1844 machine_mode mode = GET_MODE (dest);
1845 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1846 rtx insn;
1847 rtx mem;
1b1e81f8
JW
1848
1849 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1850 here before rtl expand. Tree IVOPT will generate rtl pattern to
1851 decide rtx costs, in which case pic_offset_table_rtx is not
1852 initialized. For that case no need to generate the first adrp
026c3cfd 1853 instruction as the final cost for global variable access is
1b1e81f8
JW
1854 one instruction. */
1855 if (gp_rtx != NULL)
1856 {
1857 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1858 using the page base as GOT base, the first page may be wasted,
1859 in the worst scenario, there is only 28K space for GOT).
1860
1861 The generate instruction sequence for accessing global variable
1862 is:
1863
a3957742 1864 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1865
1866 Only one instruction needed. But we must initialize
1867 pic_offset_table_rtx properly. We generate initialize insn for
1868 every global access, and allow CSE to remove all redundant.
1869
1870 The final instruction sequences will look like the following
1871 for multiply global variables access.
1872
a3957742 1873 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1874
a3957742
JW
1875 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1876 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1877 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1878 ... */
1b1e81f8
JW
1879
1880 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1881 crtl->uses_pic_offset_table = 1;
1882 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1883
1884 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1885 gp_rtx = gen_lowpart (mode, gp_rtx);
1886
1b1e81f8
JW
1887 }
1888
1889 if (mode == ptr_mode)
1890 {
1891 if (mode == DImode)
53021678 1892 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1893 else
53021678
JW
1894 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1895
1896 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1897 }
1898 else
1899 {
1900 gcc_assert (mode == Pmode);
53021678
JW
1901
1902 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1903 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1904 }
1905
53021678
JW
1906 /* The operand is expected to be MEM. Whenever the related insn
1907 pattern changed, above code which calculate mem should be
1908 updated. */
1909 gcc_assert (GET_CODE (mem) == MEM);
1910 MEM_READONLY_P (mem) = 1;
1911 MEM_NOTRAP_P (mem) = 1;
1912 emit_insn (insn);
1b1e81f8
JW
1913 return;
1914 }
1915
6642bdb4 1916 case SYMBOL_SMALL_GOT_4G:
43e9d192 1917 {
28514dda
YZ
1918 /* In ILP32, the mode of dest can be either SImode or DImode,
1919 while the got entry is always of SImode size. The mode of
1920 dest depends on how dest is used: if dest is assigned to a
1921 pointer (e.g. in the memory), it has SImode; it may have
1922 DImode if dest is dereferenced to access the memeory.
1923 This is why we have to handle three different ldr_got_small
1924 patterns here (two patterns for ILP32). */
53021678
JW
1925
1926 rtx insn;
1927 rtx mem;
43e9d192 1928 rtx tmp_reg = dest;
ef4bddc2 1929 machine_mode mode = GET_MODE (dest);
28514dda 1930
43e9d192 1931 if (can_create_pseudo_p ())
28514dda
YZ
1932 tmp_reg = gen_reg_rtx (mode);
1933
1934 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1935 if (mode == ptr_mode)
1936 {
1937 if (mode == DImode)
53021678 1938 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1939 else
53021678
JW
1940 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1941
1942 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1943 }
1944 else
1945 {
1946 gcc_assert (mode == Pmode);
53021678
JW
1947
1948 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1949 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1950 }
1951
53021678
JW
1952 gcc_assert (GET_CODE (mem) == MEM);
1953 MEM_READONLY_P (mem) = 1;
1954 MEM_NOTRAP_P (mem) = 1;
1955 emit_insn (insn);
43e9d192
IB
1956 return;
1957 }
1958
1959 case SYMBOL_SMALL_TLSGD:
1960 {
5d8a22a5 1961 rtx_insn *insns;
23b88fda
N
1962 machine_mode mode = GET_MODE (dest);
1963 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1964
1965 start_sequence ();
23b88fda
N
1966 if (TARGET_ILP32)
1967 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1968 else
1969 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1970 insns = get_insns ();
1971 end_sequence ();
1972
1973 RTL_CONST_CALL_P (insns) = 1;
1974 emit_libcall_block (insns, dest, result, imm);
1975 return;
1976 }
1977
1978 case SYMBOL_SMALL_TLSDESC:
1979 {
ef4bddc2 1980 machine_mode mode = GET_MODE (dest);
621ad2de 1981 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1982 rtx tp;
1983
621ad2de
AP
1984 gcc_assert (mode == Pmode || mode == ptr_mode);
1985
2876a13f
JW
1986 /* In ILP32, the got entry is always of SImode size. Unlike
1987 small GOT, the dest is fixed at reg 0. */
1988 if (TARGET_ILP32)
1989 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1990 else
2876a13f 1991 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1992 tp = aarch64_load_tp (NULL);
621ad2de
AP
1993
1994 if (mode != Pmode)
1995 tp = gen_lowpart (mode, tp);
1996
2876a13f 1997 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1998 if (REG_P (dest))
1999 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2000 return;
2001 }
2002
79496620 2003 case SYMBOL_SMALL_TLSIE:
43e9d192 2004 {
621ad2de
AP
2005 /* In ILP32, the mode of dest can be either SImode or DImode,
2006 while the got entry is always of SImode size. The mode of
2007 dest depends on how dest is used: if dest is assigned to a
2008 pointer (e.g. in the memory), it has SImode; it may have
2009 DImode if dest is dereferenced to access the memeory.
2010 This is why we have to handle three different tlsie_small
2011 patterns here (two patterns for ILP32). */
ef4bddc2 2012 machine_mode mode = GET_MODE (dest);
621ad2de 2013 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 2014 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
2015
2016 if (mode == ptr_mode)
2017 {
2018 if (mode == DImode)
2019 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
2020 else
2021 {
2022 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
2023 tp = gen_lowpart (mode, tp);
2024 }
2025 }
2026 else
2027 {
2028 gcc_assert (mode == Pmode);
2029 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
2030 }
2031
f7df4a84 2032 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
2033 if (REG_P (dest))
2034 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2035 return;
2036 }
2037
cbf5629e 2038 case SYMBOL_TLSLE12:
d18ba284 2039 case SYMBOL_TLSLE24:
cbf5629e
JW
2040 case SYMBOL_TLSLE32:
2041 case SYMBOL_TLSLE48:
43e9d192 2042 {
cbf5629e 2043 machine_mode mode = GET_MODE (dest);
43e9d192 2044 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 2045
cbf5629e
JW
2046 if (mode != Pmode)
2047 tp = gen_lowpart (mode, tp);
2048
2049 switch (type)
2050 {
2051 case SYMBOL_TLSLE12:
2052 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
2053 (dest, tp, imm));
2054 break;
2055 case SYMBOL_TLSLE24:
2056 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
2057 (dest, tp, imm));
2058 break;
2059 case SYMBOL_TLSLE32:
2060 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
2061 (dest, imm));
2062 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2063 (dest, dest, tp));
2064 break;
2065 case SYMBOL_TLSLE48:
2066 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
2067 (dest, imm));
2068 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
2069 (dest, dest, tp));
2070 break;
2071 default:
2072 gcc_unreachable ();
2073 }
e6f7f0e9 2074
241dbd9d
QZ
2075 if (REG_P (dest))
2076 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
2077 return;
2078 }
2079
87dd8ab0
MS
2080 case SYMBOL_TINY_GOT:
2081 emit_insn (gen_ldr_got_tiny (dest, imm));
2082 return;
2083
5ae7caad
JW
2084 case SYMBOL_TINY_TLSIE:
2085 {
2086 machine_mode mode = GET_MODE (dest);
2087 rtx tp = aarch64_load_tp (NULL);
2088
2089 if (mode == ptr_mode)
2090 {
2091 if (mode == DImode)
2092 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2093 else
2094 {
2095 tp = gen_lowpart (mode, tp);
2096 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2097 }
2098 }
2099 else
2100 {
2101 gcc_assert (mode == Pmode);
2102 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2103 }
2104
241dbd9d
QZ
2105 if (REG_P (dest))
2106 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2107 return;
2108 }
2109
43e9d192
IB
2110 default:
2111 gcc_unreachable ();
2112 }
2113}
2114
2115/* Emit a move from SRC to DEST. Assume that the move expanders can
2116 handle all moves if !can_create_pseudo_p (). The distinction is
2117 important because, unlike emit_move_insn, the move expanders know
2118 how to force Pmode objects into the constant pool even when the
2119 constant pool address is not itself legitimate. */
2120static rtx
2121aarch64_emit_move (rtx dest, rtx src)
2122{
2123 return (can_create_pseudo_p ()
2124 ? emit_move_insn (dest, src)
2125 : emit_move_insn_1 (dest, src));
2126}
2127
f22d7973
RS
2128/* Apply UNOPTAB to OP and store the result in DEST. */
2129
2130static void
2131aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2132{
2133 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2134 if (dest != tmp)
2135 emit_move_insn (dest, tmp);
2136}
2137
2138/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2139
2140static void
2141aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2142{
2143 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2144 OPTAB_DIRECT);
2145 if (dest != tmp)
2146 emit_move_insn (dest, tmp);
2147}
2148
030d03b8
RE
2149/* Split a 128-bit move operation into two 64-bit move operations,
2150 taking care to handle partial overlap of register to register
2151 copies. Special cases are needed when moving between GP regs and
2152 FP regs. SRC can be a register, constant or memory; DST a register
2153 or memory. If either operand is memory it must not have any side
2154 effects. */
43e9d192
IB
2155void
2156aarch64_split_128bit_move (rtx dst, rtx src)
2157{
030d03b8
RE
2158 rtx dst_lo, dst_hi;
2159 rtx src_lo, src_hi;
43e9d192 2160
ef4bddc2 2161 machine_mode mode = GET_MODE (dst);
12dc6974 2162
030d03b8
RE
2163 gcc_assert (mode == TImode || mode == TFmode);
2164 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2165 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2166
2167 if (REG_P (dst) && REG_P (src))
2168 {
030d03b8
RE
2169 int src_regno = REGNO (src);
2170 int dst_regno = REGNO (dst);
43e9d192 2171
030d03b8 2172 /* Handle FP <-> GP regs. */
43e9d192
IB
2173 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2174 {
030d03b8
RE
2175 src_lo = gen_lowpart (word_mode, src);
2176 src_hi = gen_highpart (word_mode, src);
2177
0016d8d9
RS
2178 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2179 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2180 return;
43e9d192
IB
2181 }
2182 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2183 {
030d03b8
RE
2184 dst_lo = gen_lowpart (word_mode, dst);
2185 dst_hi = gen_highpart (word_mode, dst);
2186
0016d8d9
RS
2187 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2188 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2189 return;
43e9d192 2190 }
43e9d192
IB
2191 }
2192
030d03b8
RE
2193 dst_lo = gen_lowpart (word_mode, dst);
2194 dst_hi = gen_highpart (word_mode, dst);
2195 src_lo = gen_lowpart (word_mode, src);
2196 src_hi = gen_highpart_mode (word_mode, mode, src);
2197
2198 /* At most one pairing may overlap. */
2199 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2200 {
2201 aarch64_emit_move (dst_hi, src_hi);
2202 aarch64_emit_move (dst_lo, src_lo);
2203 }
2204 else
2205 {
2206 aarch64_emit_move (dst_lo, src_lo);
2207 aarch64_emit_move (dst_hi, src_hi);
2208 }
43e9d192
IB
2209}
2210
2211bool
2212aarch64_split_128bit_move_p (rtx dst, rtx src)
2213{
2214 return (! REG_P (src)
2215 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2216}
2217
8b033a8a
SN
2218/* Split a complex SIMD combine. */
2219
2220void
2221aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2222{
ef4bddc2
RS
2223 machine_mode src_mode = GET_MODE (src1);
2224 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2225
2226 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2227 gcc_assert (register_operand (dst, dst_mode)
2228 && register_operand (src1, src_mode)
2229 && register_operand (src2, src_mode));
8b033a8a 2230
0016d8d9 2231 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2232 return;
8b033a8a
SN
2233}
2234
fd4842cd
SN
2235/* Split a complex SIMD move. */
2236
2237void
2238aarch64_split_simd_move (rtx dst, rtx src)
2239{
ef4bddc2
RS
2240 machine_mode src_mode = GET_MODE (src);
2241 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2242
2243 gcc_assert (VECTOR_MODE_P (dst_mode));
2244
2245 if (REG_P (dst) && REG_P (src))
2246 {
2247 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2248 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2249 }
2250}
2251
ef22810a
RH
2252bool
2253aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2254 machine_mode ymode, rtx y)
2255{
2256 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2257 gcc_assert (r != NULL);
2258 return rtx_equal_p (x, r);
2259}
2260
2261
43e9d192 2262static rtx
ef4bddc2 2263aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2264{
2265 if (can_create_pseudo_p ())
e18b4a81 2266 return force_reg (mode, value);
43e9d192
IB
2267 else
2268 {
f5470a77
RS
2269 gcc_assert (x);
2270 aarch64_emit_move (x, value);
43e9d192
IB
2271 return x;
2272 }
2273}
2274
43cacb12
RS
2275/* Return true if we can move VALUE into a register using a single
2276 CNT[BHWD] instruction. */
2277
2278static bool
2279aarch64_sve_cnt_immediate_p (poly_int64 value)
2280{
2281 HOST_WIDE_INT factor = value.coeffs[0];
2282 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2283 return (value.coeffs[1] == factor
2284 && IN_RANGE (factor, 2, 16 * 16)
2285 && (factor & 1) == 0
2286 && factor <= 16 * (factor & -factor));
2287}
2288
2289/* Likewise for rtx X. */
2290
2291bool
2292aarch64_sve_cnt_immediate_p (rtx x)
2293{
2294 poly_int64 value;
2295 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2296}
2297
2298/* Return the asm string for an instruction with a CNT-like vector size
2299 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2300 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2301 first part of the operands template (the part that comes before the
2302 vector size itself). FACTOR is the number of quadwords.
2303 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2304 If it is zero, we can use any element size. */
2305
2306static char *
2307aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2308 unsigned int factor,
2309 unsigned int nelts_per_vq)
2310{
2311 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2312
2313 if (nelts_per_vq == 0)
2314 /* There is some overlap in the ranges of the four CNT instructions.
2315 Here we always use the smallest possible element size, so that the
2316 multiplier is 1 whereever possible. */
2317 nelts_per_vq = factor & -factor;
2318 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2319 gcc_assert (IN_RANGE (shift, 1, 4));
2320 char suffix = "dwhb"[shift - 1];
2321
2322 factor >>= shift;
2323 unsigned int written;
2324 if (factor == 1)
2325 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2326 prefix, suffix, operands);
2327 else
2328 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2329 prefix, suffix, operands, factor);
2330 gcc_assert (written < sizeof (buffer));
2331 return buffer;
2332}
2333
2334/* Return the asm string for an instruction with a CNT-like vector size
2335 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2336 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2337 first part of the operands template (the part that comes before the
2338 vector size itself). X is the value of the vector size operand,
2339 as a polynomial integer rtx. */
2340
2341char *
2342aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2343 rtx x)
2344{
2345 poly_int64 value = rtx_to_poly_int64 (x);
2346 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2347 return aarch64_output_sve_cnt_immediate (prefix, operands,
2348 value.coeffs[1], 0);
2349}
2350
2351/* Return true if we can add VALUE to a register using a single ADDVL
2352 or ADDPL instruction. */
2353
2354static bool
2355aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2356{
2357 HOST_WIDE_INT factor = value.coeffs[0];
2358 if (factor == 0 || value.coeffs[1] != factor)
2359 return false;
2360 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2361 and a value of 16 is one vector width. */
2362 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2363 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2364}
2365
2366/* Likewise for rtx X. */
2367
2368bool
2369aarch64_sve_addvl_addpl_immediate_p (rtx x)
2370{
2371 poly_int64 value;
2372 return (poly_int_rtx_p (x, &value)
2373 && aarch64_sve_addvl_addpl_immediate_p (value));
2374}
2375
2376/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2377 and storing the result in operand 0. */
2378
2379char *
2380aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2381{
2382 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2383 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2384 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2385
2386 /* Use INC or DEC if possible. */
2387 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2388 {
2389 if (aarch64_sve_cnt_immediate_p (offset_value))
2390 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2391 offset_value.coeffs[1], 0);
2392 if (aarch64_sve_cnt_immediate_p (-offset_value))
2393 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2394 -offset_value.coeffs[1], 0);
2395 }
2396
2397 int factor = offset_value.coeffs[1];
2398 if ((factor & 15) == 0)
2399 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2400 else
2401 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2402 return buffer;
2403}
2404
2405/* Return true if X is a valid immediate for an SVE vector INC or DEC
2406 instruction. If it is, store the number of elements in each vector
2407 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2408 factor in *FACTOR_OUT (if nonnull). */
2409
2410bool
2411aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2412 unsigned int *nelts_per_vq_out)
2413{
2414 rtx elt;
2415 poly_int64 value;
2416
2417 if (!const_vec_duplicate_p (x, &elt)
2418 || !poly_int_rtx_p (elt, &value))
2419 return false;
2420
2421 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2422 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2423 /* There's no vector INCB. */
2424 return false;
2425
2426 HOST_WIDE_INT factor = value.coeffs[0];
2427 if (value.coeffs[1] != factor)
2428 return false;
2429
2430 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2431 if ((factor % nelts_per_vq) != 0
2432 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2433 return false;
2434
2435 if (factor_out)
2436 *factor_out = factor;
2437 if (nelts_per_vq_out)
2438 *nelts_per_vq_out = nelts_per_vq;
2439 return true;
2440}
2441
2442/* Return true if X is a valid immediate for an SVE vector INC or DEC
2443 instruction. */
2444
2445bool
2446aarch64_sve_inc_dec_immediate_p (rtx x)
2447{
2448 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2449}
2450
2451/* Return the asm template for an SVE vector INC or DEC instruction.
2452 OPERANDS gives the operands before the vector count and X is the
2453 value of the vector count operand itself. */
2454
2455char *
2456aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2457{
2458 int factor;
2459 unsigned int nelts_per_vq;
2460 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2461 gcc_unreachable ();
2462 if (factor < 0)
2463 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2464 nelts_per_vq);
2465 else
2466 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2467 nelts_per_vq);
2468}
43e9d192 2469
82614948
RR
2470static int
2471aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2472 scalar_int_mode mode)
43e9d192 2473{
43e9d192 2474 int i;
9a4865db
WD
2475 unsigned HOST_WIDE_INT val, val2, mask;
2476 int one_match, zero_match;
2477 int num_insns;
43e9d192 2478
9a4865db
WD
2479 val = INTVAL (imm);
2480
2481 if (aarch64_move_imm (val, mode))
43e9d192 2482 {
82614948 2483 if (generate)
f7df4a84 2484 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2485 return 1;
43e9d192
IB
2486 }
2487
9de00935
TC
2488 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2489 (with XXXX non-zero). In that case check to see if the move can be done in
2490 a smaller mode. */
2491 val2 = val & 0xffffffff;
2492 if (mode == DImode
2493 && aarch64_move_imm (val2, SImode)
2494 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2495 {
2496 if (generate)
2497 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2498
2499 /* Check if we have to emit a second instruction by checking to see
2500 if any of the upper 32 bits of the original DI mode value is set. */
2501 if (val == val2)
2502 return 1;
2503
2504 i = (val >> 48) ? 48 : 32;
2505
2506 if (generate)
2507 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2508 GEN_INT ((val >> i) & 0xffff)));
2509
2510 return 2;
2511 }
2512
9a4865db 2513 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2514 {
82614948
RR
2515 if (generate)
2516 {
9a4865db
WD
2517 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2518 if (mode == SImode)
2519 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2520 GEN_INT ((val >> 16) & 0xffff)));
2521 else
2522 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2523 GEN_INT ((val >> 16) & 0xffff)));
82614948 2524 }
9a4865db 2525 return 2;
43e9d192
IB
2526 }
2527
2528 /* Remaining cases are all for DImode. */
2529
43e9d192 2530 mask = 0xffff;
9a4865db
WD
2531 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2532 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2533 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2534 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2535
62c8d76c 2536 if (zero_match != 2 && one_match != 2)
43e9d192 2537 {
62c8d76c
WD
2538 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2539 For a 64-bit bitmask try whether changing 16 bits to all ones or
2540 zeroes creates a valid bitmask. To check any repeated bitmask,
2541 try using 16 bits from the other 32-bit half of val. */
43e9d192 2542
62c8d76c 2543 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2544 {
62c8d76c
WD
2545 val2 = val & ~mask;
2546 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2547 break;
2548 val2 = val | mask;
2549 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2550 break;
2551 val2 = val2 & ~mask;
2552 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2553 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2554 break;
43e9d192 2555 }
62c8d76c 2556 if (i != 64)
43e9d192 2557 {
62c8d76c 2558 if (generate)
43e9d192 2559 {
62c8d76c
WD
2560 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2561 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2562 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2563 }
1312b1ba 2564 return 2;
43e9d192
IB
2565 }
2566 }
2567
9a4865db
WD
2568 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2569 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2570 otherwise skip zero bits. */
2c274197 2571
9a4865db 2572 num_insns = 1;
43e9d192 2573 mask = 0xffff;
9a4865db
WD
2574 val2 = one_match > zero_match ? ~val : val;
2575 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2576
2577 if (generate)
2578 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2579 ? (val | ~(mask << i))
2580 : (val & (mask << i)))));
2581 for (i += 16; i < 64; i += 16)
43e9d192 2582 {
9a4865db
WD
2583 if ((val2 & (mask << i)) == 0)
2584 continue;
2585 if (generate)
2586 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2587 GEN_INT ((val >> i) & 0xffff)));
2588 num_insns ++;
82614948
RR
2589 }
2590
2591 return num_insns;
2592}
2593
c0bb5bc5
WD
2594/* Return whether imm is a 128-bit immediate which is simple enough to
2595 expand inline. */
2596bool
2597aarch64_mov128_immediate (rtx imm)
2598{
2599 if (GET_CODE (imm) == CONST_INT)
2600 return true;
2601
2602 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2603
2604 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2605 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2606
2607 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2608 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2609}
2610
2611
43cacb12
RS
2612/* Return the number of temporary registers that aarch64_add_offset_1
2613 would need to add OFFSET to a register. */
2614
2615static unsigned int
2616aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2617{
2618 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2619}
2620
f5470a77
RS
2621/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2622 a non-polynomial OFFSET. MODE is the mode of the addition.
2623 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2624 be set and CFA adjustments added to the generated instructions.
2625
2626 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2627 temporary if register allocation is already complete. This temporary
2628 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2629 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2630 the immediate again.
0100c5f9
RS
2631
2632 Since this function may be used to adjust the stack pointer, we must
2633 ensure that it cannot cause transient stack deallocation (for example
2634 by first incrementing SP and then decrementing when adjusting by a
2635 large immediate). */
2636
2637static void
f5470a77
RS
2638aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2639 rtx src, HOST_WIDE_INT offset, rtx temp1,
2640 bool frame_related_p, bool emit_move_imm)
0100c5f9 2641{
f5470a77
RS
2642 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2643 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2644
2645 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2646 rtx_insn *insn;
2647
f5470a77
RS
2648 if (!moffset)
2649 {
2650 if (!rtx_equal_p (dest, src))
2651 {
2652 insn = emit_insn (gen_rtx_SET (dest, src));
2653 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2654 }
2655 return;
2656 }
0100c5f9
RS
2657
2658 /* Single instruction adjustment. */
f5470a77 2659 if (aarch64_uimm12_shift (moffset))
0100c5f9 2660 {
f5470a77 2661 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2662 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2663 return;
2664 }
2665
f5470a77
RS
2666 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2667 and either:
2668
2669 a) the offset cannot be loaded by a 16-bit move or
2670 b) there is no spare register into which we can move it. */
2671 if (moffset < 0x1000000
2672 && ((!temp1 && !can_create_pseudo_p ())
2673 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2674 {
f5470a77 2675 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2676
f5470a77
RS
2677 low_off = offset < 0 ? -low_off : low_off;
2678 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2679 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2680 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2681 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2682 return;
2683 }
2684
2685 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2686 if (emit_move_imm)
f5470a77
RS
2687 {
2688 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2689 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2690 }
2691 insn = emit_insn (offset < 0
2692 ? gen_sub3_insn (dest, src, temp1)
2693 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2694 if (frame_related_p)
2695 {
2696 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2697 rtx adj = plus_constant (mode, src, offset);
2698 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2699 }
2700}
2701
43cacb12
RS
2702/* Return the number of temporary registers that aarch64_add_offset
2703 would need to move OFFSET into a register or add OFFSET to a register;
2704 ADD_P is true if we want the latter rather than the former. */
2705
2706static unsigned int
2707aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2708{
2709 /* This follows the same structure as aarch64_add_offset. */
2710 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2711 return 0;
2712
2713 unsigned int count = 0;
2714 HOST_WIDE_INT factor = offset.coeffs[1];
2715 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2716 poly_int64 poly_offset (factor, factor);
2717 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2718 /* Need one register for the ADDVL/ADDPL result. */
2719 count += 1;
2720 else if (factor != 0)
2721 {
2722 factor = abs (factor);
2723 if (factor > 16 * (factor & -factor))
2724 /* Need one register for the CNT result and one for the multiplication
2725 factor. If necessary, the second temporary can be reused for the
2726 constant part of the offset. */
2727 return 2;
2728 /* Need one register for the CNT result (which might then
2729 be shifted). */
2730 count += 1;
2731 }
2732 return count + aarch64_add_offset_1_temporaries (constant);
2733}
2734
2735/* If X can be represented as a poly_int64, return the number
2736 of temporaries that are required to add it to a register.
2737 Return -1 otherwise. */
2738
2739int
2740aarch64_add_offset_temporaries (rtx x)
2741{
2742 poly_int64 offset;
2743 if (!poly_int_rtx_p (x, &offset))
2744 return -1;
2745 return aarch64_offset_temporaries (true, offset);
2746}
2747
f5470a77
RS
2748/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2749 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2750 be set and CFA adjustments added to the generated instructions.
2751
2752 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2753 temporary if register allocation is already complete. This temporary
43cacb12
RS
2754 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2755 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2756 false to avoid emitting the immediate again.
2757
2758 TEMP2, if nonnull, is a second temporary register that doesn't
2759 overlap either DEST or REG.
f5470a77
RS
2760
2761 Since this function may be used to adjust the stack pointer, we must
2762 ensure that it cannot cause transient stack deallocation (for example
2763 by first incrementing SP and then decrementing when adjusting by a
2764 large immediate). */
2765
2766static void
2767aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2768 poly_int64 offset, rtx temp1, rtx temp2,
2769 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2770{
f5470a77
RS
2771 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2772 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2773 gcc_assert (temp1 == NULL_RTX
2774 || !frame_related_p
2775 || !reg_overlap_mentioned_p (temp1, dest));
2776 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2777
2778 /* Try using ADDVL or ADDPL to add the whole value. */
2779 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2780 {
2781 rtx offset_rtx = gen_int_mode (offset, mode);
2782 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2783 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2784 return;
2785 }
2786
2787 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2788 SVE vector register, over and above the minimum size of 128 bits.
2789 This is equivalent to half the value returned by CNTD with a
2790 vector shape of ALL. */
2791 HOST_WIDE_INT factor = offset.coeffs[1];
2792 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2793
2794 /* Try using ADDVL or ADDPL to add the VG-based part. */
2795 poly_int64 poly_offset (factor, factor);
2796 if (src != const0_rtx
2797 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2798 {
2799 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2800 if (frame_related_p)
2801 {
2802 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2803 RTX_FRAME_RELATED_P (insn) = true;
2804 src = dest;
2805 }
2806 else
2807 {
2808 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2809 src = aarch64_force_temporary (mode, temp1, addr);
2810 temp1 = temp2;
2811 temp2 = NULL_RTX;
2812 }
2813 }
2814 /* Otherwise use a CNT-based sequence. */
2815 else if (factor != 0)
2816 {
2817 /* Use a subtraction if we have a negative factor. */
2818 rtx_code code = PLUS;
2819 if (factor < 0)
2820 {
2821 factor = -factor;
2822 code = MINUS;
2823 }
2824
2825 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2826 into the multiplication. */
2827 rtx val;
2828 int shift = 0;
2829 if (factor & 1)
2830 /* Use a right shift by 1. */
2831 shift = -1;
2832 else
2833 factor /= 2;
2834 HOST_WIDE_INT low_bit = factor & -factor;
2835 if (factor <= 16 * low_bit)
2836 {
2837 if (factor > 16 * 8)
2838 {
2839 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2840 the value with the minimum multiplier and shift it into
2841 position. */
2842 int extra_shift = exact_log2 (low_bit);
2843 shift += extra_shift;
2844 factor >>= extra_shift;
2845 }
2846 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2847 }
2848 else
2849 {
2850 /* Use CNTD, then multiply it by FACTOR. */
2851 val = gen_int_mode (poly_int64 (2, 2), mode);
2852 val = aarch64_force_temporary (mode, temp1, val);
2853
2854 /* Go back to using a negative multiplication factor if we have
2855 no register from which to subtract. */
2856 if (code == MINUS && src == const0_rtx)
2857 {
2858 factor = -factor;
2859 code = PLUS;
2860 }
2861 rtx coeff1 = gen_int_mode (factor, mode);
2862 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2863 val = gen_rtx_MULT (mode, val, coeff1);
2864 }
2865
2866 if (shift > 0)
2867 {
2868 /* Multiply by 1 << SHIFT. */
2869 val = aarch64_force_temporary (mode, temp1, val);
2870 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2871 }
2872 else if (shift == -1)
2873 {
2874 /* Divide by 2. */
2875 val = aarch64_force_temporary (mode, temp1, val);
2876 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2877 }
2878
2879 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2880 if (src != const0_rtx)
2881 {
2882 val = aarch64_force_temporary (mode, temp1, val);
2883 val = gen_rtx_fmt_ee (code, mode, src, val);
2884 }
2885 else if (code == MINUS)
2886 {
2887 val = aarch64_force_temporary (mode, temp1, val);
2888 val = gen_rtx_NEG (mode, val);
2889 }
2890
2891 if (constant == 0 || frame_related_p)
2892 {
2893 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2894 if (frame_related_p)
2895 {
2896 RTX_FRAME_RELATED_P (insn) = true;
2897 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2898 gen_rtx_SET (dest, plus_constant (Pmode, src,
2899 poly_offset)));
2900 }
2901 src = dest;
2902 if (constant == 0)
2903 return;
2904 }
2905 else
2906 {
2907 src = aarch64_force_temporary (mode, temp1, val);
2908 temp1 = temp2;
2909 temp2 = NULL_RTX;
2910 }
2911
2912 emit_move_imm = true;
2913 }
f5470a77 2914
f5470a77
RS
2915 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2916 frame_related_p, emit_move_imm);
0100c5f9
RS
2917}
2918
43cacb12
RS
2919/* Like aarch64_add_offset, but the offset is given as an rtx rather
2920 than a poly_int64. */
2921
2922void
2923aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2924 rtx offset_rtx, rtx temp1, rtx temp2)
2925{
2926 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2927 temp1, temp2, false);
2928}
2929
f5470a77
RS
2930/* Add DELTA to the stack pointer, marking the instructions frame-related.
2931 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2932 if TEMP1 already contains abs (DELTA). */
2933
0100c5f9 2934static inline void
43cacb12 2935aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2936{
f5470a77 2937 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2938 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2939}
2940
f5470a77
RS
2941/* Subtract DELTA from the stack pointer, marking the instructions
2942 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2943 if nonnull. */
2944
0100c5f9 2945static inline void
cd1bef27
JL
2946aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
2947 bool emit_move_imm = true)
0100c5f9 2948{
f5470a77 2949 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 2950 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 2951}
82614948 2952
43cacb12
RS
2953/* Set DEST to (vec_series BASE STEP). */
2954
2955static void
2956aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2957{
2958 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2959 scalar_mode inner = GET_MODE_INNER (mode);
2960
2961 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2962 if (!aarch64_sve_index_immediate_p (base))
2963 base = force_reg (inner, base);
2964 if (!aarch64_sve_index_immediate_p (step))
2965 step = force_reg (inner, step);
2966
2967 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2968}
82614948 2969
43cacb12
RS
2970/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2971 integer of mode INT_MODE. Return true on success. */
2972
2973static bool
2974aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2975 rtx src)
2976{
2977 /* If the constant is smaller than 128 bits, we can do the move
2978 using a vector of SRC_MODEs. */
2979 if (src_mode != TImode)
2980 {
2981 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2982 GET_MODE_SIZE (src_mode));
2983 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2984 emit_move_insn (gen_lowpart (dup_mode, dest),
2985 gen_const_vec_duplicate (dup_mode, src));
2986 return true;
2987 }
2988
947b1372 2989 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2990 src = force_const_mem (src_mode, src);
2991 if (!src)
2992 return false;
2993
2994 /* Make sure that the address is legitimate. */
2995 if (!aarch64_sve_ld1r_operand_p (src))
2996 {
2997 rtx addr = force_reg (Pmode, XEXP (src, 0));
2998 src = replace_equiv_address (src, addr);
2999 }
3000
947b1372
RS
3001 machine_mode mode = GET_MODE (dest);
3002 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
3003 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
3004 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3005 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
3006 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
3007 return true;
3008}
3009
3010/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
3011 isn't a simple duplicate or series. */
3012
3013static void
3014aarch64_expand_sve_const_vector (rtx dest, rtx src)
3015{
3016 machine_mode mode = GET_MODE (src);
3017 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
3018 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
3019 gcc_assert (npatterns > 1);
3020
3021 if (nelts_per_pattern == 1)
3022 {
3023 /* The constant is a repeating seqeuence of at least two elements,
3024 where the repeating elements occupy no more than 128 bits.
3025 Get an integer representation of the replicated value. */
8179efe0
RS
3026 scalar_int_mode int_mode;
3027 if (BYTES_BIG_ENDIAN)
3028 /* For now, always use LD1RQ to load the value on big-endian
3029 targets, since the handling of smaller integers includes a
3030 subreg that is semantically an element reverse. */
3031 int_mode = TImode;
3032 else
3033 {
3034 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
3035 gcc_assert (int_bits <= 128);
3036 int_mode = int_mode_for_size (int_bits, 0).require ();
3037 }
43cacb12
RS
3038 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
3039 if (int_value
3040 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
3041 return;
3042 }
3043
3044 /* Expand each pattern individually. */
3045 rtx_vector_builder builder;
3046 auto_vec<rtx, 16> vectors (npatterns);
3047 for (unsigned int i = 0; i < npatterns; ++i)
3048 {
3049 builder.new_vector (mode, 1, nelts_per_pattern);
3050 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
3051 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
3052 vectors.quick_push (force_reg (mode, builder.build ()));
3053 }
3054
3055 /* Use permutes to interleave the separate vectors. */
3056 while (npatterns > 1)
3057 {
3058 npatterns /= 2;
3059 for (unsigned int i = 0; i < npatterns; ++i)
3060 {
3061 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
3062 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
3063 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
3064 vectors[i] = tmp;
3065 }
3066 }
3067 gcc_assert (vectors[0] == dest);
3068}
3069
3070/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
3071 is a pattern that can be used to set DEST to a replicated scalar
3072 element. */
3073
3074void
3075aarch64_expand_mov_immediate (rtx dest, rtx imm,
3076 rtx (*gen_vec_duplicate) (rtx, rtx))
3077{
3078 machine_mode mode = GET_MODE (dest);
82614948
RR
3079
3080 /* Check on what type of symbol it is. */
77e994c9
RS
3081 scalar_int_mode int_mode;
3082 if ((GET_CODE (imm) == SYMBOL_REF
3083 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
3084 || GET_CODE (imm) == CONST
3085 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 3086 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 3087 {
43cacb12
RS
3088 rtx mem;
3089 poly_int64 offset;
3090 HOST_WIDE_INT const_offset;
82614948
RR
3091 enum aarch64_symbol_type sty;
3092
3093 /* If we have (const (plus symbol offset)), separate out the offset
3094 before we start classifying the symbol. */
43cacb12 3095 rtx base = strip_offset (imm, &offset);
82614948 3096
43cacb12
RS
3097 /* We must always add an offset involving VL separately, rather than
3098 folding it into the relocation. */
3099 if (!offset.is_constant (&const_offset))
3100 {
3101 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
3102 emit_insn (gen_rtx_SET (dest, imm));
3103 else
3104 {
3105 /* Do arithmetic on 32-bit values if the result is smaller
3106 than that. */
3107 if (partial_subreg_p (int_mode, SImode))
3108 {
3109 /* It is invalid to do symbol calculations in modes
3110 narrower than SImode. */
3111 gcc_assert (base == const0_rtx);
3112 dest = gen_lowpart (SImode, dest);
3113 int_mode = SImode;
3114 }
3115 if (base != const0_rtx)
3116 {
3117 base = aarch64_force_temporary (int_mode, dest, base);
3118 aarch64_add_offset (int_mode, dest, base, offset,
3119 NULL_RTX, NULL_RTX, false);
3120 }
3121 else
3122 aarch64_add_offset (int_mode, dest, base, offset,
3123 dest, NULL_RTX, false);
3124 }
3125 return;
3126 }
3127
3128 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
3129 switch (sty)
3130 {
3131 case SYMBOL_FORCE_TO_MEM:
43cacb12 3132 if (const_offset != 0
77e994c9 3133 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
3134 {
3135 gcc_assert (can_create_pseudo_p ());
77e994c9 3136 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3137 aarch64_add_offset (int_mode, dest, base, const_offset,
3138 NULL_RTX, NULL_RTX, false);
82614948
RR
3139 return;
3140 }
b4f50fd4 3141
82614948
RR
3142 mem = force_const_mem (ptr_mode, imm);
3143 gcc_assert (mem);
b4f50fd4
RR
3144
3145 /* If we aren't generating PC relative literals, then
3146 we need to expand the literal pool access carefully.
3147 This is something that needs to be done in a number
3148 of places, so could well live as a separate function. */
9ee6540a 3149 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
3150 {
3151 gcc_assert (can_create_pseudo_p ());
3152 base = gen_reg_rtx (ptr_mode);
3153 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
3154 if (ptr_mode != Pmode)
3155 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
3156 mem = gen_rtx_MEM (ptr_mode, base);
3157 }
3158
77e994c9
RS
3159 if (int_mode != ptr_mode)
3160 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 3161
f7df4a84 3162 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 3163
82614948
RR
3164 return;
3165
3166 case SYMBOL_SMALL_TLSGD:
3167 case SYMBOL_SMALL_TLSDESC:
79496620 3168 case SYMBOL_SMALL_TLSIE:
1b1e81f8 3169 case SYMBOL_SMALL_GOT_28K:
6642bdb4 3170 case SYMBOL_SMALL_GOT_4G:
82614948 3171 case SYMBOL_TINY_GOT:
5ae7caad 3172 case SYMBOL_TINY_TLSIE:
43cacb12 3173 if (const_offset != 0)
82614948
RR
3174 {
3175 gcc_assert(can_create_pseudo_p ());
77e994c9 3176 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3177 aarch64_add_offset (int_mode, dest, base, const_offset,
3178 NULL_RTX, NULL_RTX, false);
82614948
RR
3179 return;
3180 }
3181 /* FALLTHRU */
3182
82614948
RR
3183 case SYMBOL_SMALL_ABSOLUTE:
3184 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3185 case SYMBOL_TLSLE12:
d18ba284 3186 case SYMBOL_TLSLE24:
cbf5629e
JW
3187 case SYMBOL_TLSLE32:
3188 case SYMBOL_TLSLE48:
82614948
RR
3189 aarch64_load_symref_appropriately (dest, imm, sty);
3190 return;
3191
3192 default:
3193 gcc_unreachable ();
3194 }
3195 }
3196
3197 if (!CONST_INT_P (imm))
3198 {
43cacb12
RS
3199 rtx base, step, value;
3200 if (GET_CODE (imm) == HIGH
3201 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3202 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3203 else if (const_vec_series_p (imm, &base, &step))
3204 aarch64_expand_vec_series (dest, base, step);
3205 else if (const_vec_duplicate_p (imm, &value))
3206 {
3207 /* If the constant is out of range of an SVE vector move,
3208 load it from memory if we can, otherwise move it into
3209 a register and use a DUP. */
3210 scalar_mode inner_mode = GET_MODE_INNER (mode);
3211 rtx op = force_const_mem (inner_mode, value);
3212 if (!op)
3213 op = force_reg (inner_mode, value);
3214 else if (!aarch64_sve_ld1r_operand_p (op))
3215 {
3216 rtx addr = force_reg (Pmode, XEXP (op, 0));
3217 op = replace_equiv_address (op, addr);
3218 }
3219 emit_insn (gen_vec_duplicate (dest, op));
3220 }
3221 else if (GET_CODE (imm) == CONST_VECTOR
3222 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3223 aarch64_expand_sve_const_vector (dest, imm);
82614948 3224 else
43cacb12 3225 {
82614948
RR
3226 rtx mem = force_const_mem (mode, imm);
3227 gcc_assert (mem);
43cacb12 3228 emit_move_insn (dest, mem);
43e9d192 3229 }
82614948
RR
3230
3231 return;
43e9d192 3232 }
82614948 3233
77e994c9
RS
3234 aarch64_internal_mov_immediate (dest, imm, true,
3235 as_a <scalar_int_mode> (mode));
43e9d192
IB
3236}
3237
43cacb12
RS
3238/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3239 that is known to contain PTRUE. */
3240
3241void
3242aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3243{
3244 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3245 gen_rtvec (2, pred, src),
3246 UNSPEC_MERGE_PTRUE)));
3247}
3248
3249/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3250 operand is in memory. In this case we need to use the predicated LD1
3251 and ST1 instead of LDR and STR, both for correctness on big-endian
3252 targets and because LD1 and ST1 support a wider range of addressing modes.
3253 PRED_MODE is the mode of the predicate.
3254
3255 See the comment at the head of aarch64-sve.md for details about the
3256 big-endian handling. */
3257
3258void
3259aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3260{
3261 machine_mode mode = GET_MODE (dest);
3262 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3263 if (!register_operand (src, mode)
3264 && !register_operand (dest, mode))
3265 {
3266 rtx tmp = gen_reg_rtx (mode);
3267 if (MEM_P (src))
3268 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3269 else
3270 emit_move_insn (tmp, src);
3271 src = tmp;
3272 }
3273 aarch64_emit_sve_pred_move (dest, ptrue, src);
3274}
3275
002092be
RS
3276/* Called only on big-endian targets. See whether an SVE vector move
3277 from SRC to DEST is effectively a REV[BHW] instruction, because at
3278 least one operand is a subreg of an SVE vector that has wider or
3279 narrower elements. Return true and emit the instruction if so.
3280
3281 For example:
3282
3283 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3284
3285 represents a VIEW_CONVERT between the following vectors, viewed
3286 in memory order:
3287
3288 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3289 R1: { [0], [1], [2], [3], ... }
3290
3291 The high part of lane X in R2 should therefore correspond to lane X*2
3292 of R1, but the register representations are:
3293
3294 msb lsb
3295 R2: ...... [1].high [1].low [0].high [0].low
3296 R1: ...... [3] [2] [1] [0]
3297
3298 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3299 We therefore need a reverse operation to swap the high and low values
3300 around.
3301
3302 This is purely an optimization. Without it we would spill the
3303 subreg operand to the stack in one mode and reload it in the
3304 other mode, which has the same effect as the REV. */
3305
3306bool
3307aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3308{
3309 gcc_assert (BYTES_BIG_ENDIAN);
3310 if (GET_CODE (dest) == SUBREG)
3311 dest = SUBREG_REG (dest);
3312 if (GET_CODE (src) == SUBREG)
3313 src = SUBREG_REG (src);
3314
3315 /* The optimization handles two single SVE REGs with different element
3316 sizes. */
3317 if (!REG_P (dest)
3318 || !REG_P (src)
3319 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3320 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3321 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3322 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3323 return false;
3324
3325 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3326 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3327 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3328 UNSPEC_REV_SUBREG);
3329 emit_insn (gen_rtx_SET (dest, unspec));
3330 return true;
3331}
3332
3333/* Return a copy of X with mode MODE, without changing its other
3334 attributes. Unlike gen_lowpart, this doesn't care whether the
3335 mode change is valid. */
3336
3337static rtx
3338aarch64_replace_reg_mode (rtx x, machine_mode mode)
3339{
3340 if (GET_MODE (x) == mode)
3341 return x;
3342
3343 x = shallow_copy_rtx (x);
3344 set_mode_and_regno (x, mode, REGNO (x));
3345 return x;
3346}
3347
3348/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3349 operands. */
3350
3351void
3352aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3353{
3354 /* Decide which REV operation we need. The mode with narrower elements
3355 determines the mode of the operands and the mode with the wider
3356 elements determines the reverse width. */
3357 machine_mode mode_with_wider_elts = GET_MODE (dest);
3358 machine_mode mode_with_narrower_elts = GET_MODE (src);
3359 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3360 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3361 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3362
3363 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3364 unsigned int unspec;
3365 if (wider_bytes == 8)
3366 unspec = UNSPEC_REV64;
3367 else if (wider_bytes == 4)
3368 unspec = UNSPEC_REV32;
3369 else if (wider_bytes == 2)
3370 unspec = UNSPEC_REV16;
3371 else
3372 gcc_unreachable ();
3373 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3374
3375 /* Emit:
3376
3377 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3378 UNSPEC_MERGE_PTRUE))
3379
3380 with the appropriate modes. */
3381 ptrue = gen_lowpart (pred_mode, ptrue);
3382 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3383 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3384 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3385 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3386 UNSPEC_MERGE_PTRUE);
3387 emit_insn (gen_rtx_SET (dest, src));
3388}
3389
43e9d192 3390static bool
fee9ba42
JW
3391aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3392 tree exp ATTRIBUTE_UNUSED)
43e9d192 3393{
a0d0b980
SE
3394 if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl))
3395 return false;
3396
43e9d192
IB
3397 return true;
3398}
3399
3400/* Implement TARGET_PASS_BY_REFERENCE. */
3401
3402static bool
3403aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3404 machine_mode mode,
43e9d192
IB
3405 const_tree type,
3406 bool named ATTRIBUTE_UNUSED)
3407{
3408 HOST_WIDE_INT size;
ef4bddc2 3409 machine_mode dummymode;
43e9d192
IB
3410 int nregs;
3411
3412 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3413 if (mode == BLKmode && type)
3414 size = int_size_in_bytes (type);
3415 else
3416 /* No frontends can create types with variable-sized modes, so we
3417 shouldn't be asked to pass or return them. */
3418 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3419
aadc1c43
MHD
3420 /* Aggregates are passed by reference based on their size. */
3421 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3422 {
aadc1c43 3423 size = int_size_in_bytes (type);
43e9d192
IB
3424 }
3425
3426 /* Variable sized arguments are always returned by reference. */
3427 if (size < 0)
3428 return true;
3429
3430 /* Can this be a candidate to be passed in fp/simd register(s)? */
3431 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3432 &dummymode, &nregs,
3433 NULL))
3434 return false;
3435
3436 /* Arguments which are variable sized or larger than 2 registers are
3437 passed by reference unless they are a homogenous floating point
3438 aggregate. */
3439 return size > 2 * UNITS_PER_WORD;
3440}
3441
3442/* Return TRUE if VALTYPE is padded to its least significant bits. */
3443static bool
3444aarch64_return_in_msb (const_tree valtype)
3445{
ef4bddc2 3446 machine_mode dummy_mode;
43e9d192
IB
3447 int dummy_int;
3448
3449 /* Never happens in little-endian mode. */
3450 if (!BYTES_BIG_ENDIAN)
3451 return false;
3452
3453 /* Only composite types smaller than or equal to 16 bytes can
3454 be potentially returned in registers. */
3455 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3456 || int_size_in_bytes (valtype) <= 0
3457 || int_size_in_bytes (valtype) > 16)
3458 return false;
3459
3460 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3461 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3462 is always passed/returned in the least significant bits of fp/simd
3463 register(s). */
3464 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3465 &dummy_mode, &dummy_int, NULL))
3466 return false;
3467
3468 return true;
3469}
3470
3471/* Implement TARGET_FUNCTION_VALUE.
3472 Define how to find the value returned by a function. */
3473
3474static rtx
3475aarch64_function_value (const_tree type, const_tree func,
3476 bool outgoing ATTRIBUTE_UNUSED)
3477{
ef4bddc2 3478 machine_mode mode;
43e9d192
IB
3479 int unsignedp;
3480 int count;
ef4bddc2 3481 machine_mode ag_mode;
43e9d192
IB
3482
3483 mode = TYPE_MODE (type);
3484 if (INTEGRAL_TYPE_P (type))
3485 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3486
3487 if (aarch64_return_in_msb (type))
3488 {
3489 HOST_WIDE_INT size = int_size_in_bytes (type);
3490
3491 if (size % UNITS_PER_WORD != 0)
3492 {
3493 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3494 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3495 }
3496 }
3497
3498 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3499 &ag_mode, &count, NULL))
3500 {
3501 if (!aarch64_composite_type_p (type, mode))
3502 {
3503 gcc_assert (count == 1 && mode == ag_mode);
3504 return gen_rtx_REG (mode, V0_REGNUM);
3505 }
3506 else
3507 {
3508 int i;
3509 rtx par;
3510
3511 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3512 for (i = 0; i < count; i++)
3513 {
3514 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3515 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3516 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3517 XVECEXP (par, 0, i) = tmp;
3518 }
3519 return par;
3520 }
3521 }
3522 else
3523 return gen_rtx_REG (mode, R0_REGNUM);
3524}
3525
3526/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3527 Return true if REGNO is the number of a hard register in which the values
3528 of called function may come back. */
3529
3530static bool
3531aarch64_function_value_regno_p (const unsigned int regno)
3532{
3533 /* Maximum of 16 bytes can be returned in the general registers. Examples
3534 of 16-byte return values are: 128-bit integers and 16-byte small
3535 structures (excluding homogeneous floating-point aggregates). */
3536 if (regno == R0_REGNUM || regno == R1_REGNUM)
3537 return true;
3538
3539 /* Up to four fp/simd registers can return a function value, e.g. a
3540 homogeneous floating-point aggregate having four members. */
3541 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3542 return TARGET_FLOAT;
43e9d192
IB
3543
3544 return false;
3545}
3546
3547/* Implement TARGET_RETURN_IN_MEMORY.
3548
3549 If the type T of the result of a function is such that
3550 void func (T arg)
3551 would require that arg be passed as a value in a register (or set of
3552 registers) according to the parameter passing rules, then the result
3553 is returned in the same registers as would be used for such an
3554 argument. */
3555
3556static bool
3557aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3558{
3559 HOST_WIDE_INT size;
ef4bddc2 3560 machine_mode ag_mode;
43e9d192
IB
3561 int count;
3562
3563 if (!AGGREGATE_TYPE_P (type)
3564 && TREE_CODE (type) != COMPLEX_TYPE
3565 && TREE_CODE (type) != VECTOR_TYPE)
3566 /* Simple scalar types always returned in registers. */
3567 return false;
3568
3569 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3570 type,
3571 &ag_mode,
3572 &count,
3573 NULL))
3574 return false;
3575
3576 /* Types larger than 2 registers returned in memory. */
3577 size = int_size_in_bytes (type);
3578 return (size < 0 || size > 2 * UNITS_PER_WORD);
3579}
3580
3581static bool
ef4bddc2 3582aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3583 const_tree type, int *nregs)
3584{
3585 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3586 return aarch64_vfp_is_call_or_return_candidate (mode,
3587 type,
3588 &pcum->aapcs_vfp_rmode,
3589 nregs,
3590 NULL);
3591}
3592
985b8393 3593/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3594 bits. The idea is to suppress any stronger alignment requested by
3595 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3596 This is a helper function for local use only. */
3597
985b8393 3598static unsigned int
ef4bddc2 3599aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3600{
75d6cc81 3601 if (!type)
985b8393 3602 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3603
75d6cc81 3604 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3605 return 0;
43e9d192 3606
75d6cc81
AL
3607 gcc_assert (TYPE_MODE (type) == mode);
3608
3609 if (!AGGREGATE_TYPE_P (type))
985b8393 3610 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3611
3612 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3613 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3614
985b8393 3615 unsigned int alignment = 0;
75d6cc81 3616 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3617 if (TREE_CODE (field) == FIELD_DECL)
3618 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3619
985b8393 3620 return alignment;
43e9d192
IB
3621}
3622
3623/* Layout a function argument according to the AAPCS64 rules. The rule
3624 numbers refer to the rule numbers in the AAPCS64. */
3625
3626static void
ef4bddc2 3627aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3628 const_tree type,
3629 bool named ATTRIBUTE_UNUSED)
3630{
3631 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3632 int ncrn, nvrn, nregs;
3633 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3634 HOST_WIDE_INT size;
43e9d192
IB
3635
3636 /* We need to do this once per argument. */
3637 if (pcum->aapcs_arg_processed)
3638 return;
3639
3640 pcum->aapcs_arg_processed = true;
3641
3abf17cf 3642 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3643 if (type)
3644 size = int_size_in_bytes (type);
3645 else
3646 /* No frontends can create types with variable-sized modes, so we
3647 shouldn't be asked to pass or return them. */
3648 size = GET_MODE_SIZE (mode).to_constant ();
3649 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3650
43e9d192
IB
3651 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3652 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3653 mode,
3654 type,
3655 &nregs);
3656
3657 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3658 The following code thus handles passing by SIMD/FP registers first. */
3659
3660 nvrn = pcum->aapcs_nvrn;
3661
3662 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3663 and homogenous short-vector aggregates (HVA). */
3664 if (allocate_nvrn)
3665 {
261fb553 3666 if (!TARGET_FLOAT)
fc29dfc9 3667 aarch64_err_no_fpadvsimd (mode);
261fb553 3668
43e9d192
IB
3669 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3670 {
3671 pcum->aapcs_nextnvrn = nvrn + nregs;
3672 if (!aarch64_composite_type_p (type, mode))
3673 {
3674 gcc_assert (nregs == 1);
3675 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3676 }
3677 else
3678 {
3679 rtx par;
3680 int i;
3681 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3682 for (i = 0; i < nregs; i++)
3683 {
3684 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3685 V0_REGNUM + nvrn + i);
6a70badb
RS
3686 rtx offset = gen_int_mode
3687 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3688 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3689 XVECEXP (par, 0, i) = tmp;
3690 }
3691 pcum->aapcs_reg = par;
3692 }
3693 return;
3694 }
3695 else
3696 {
3697 /* C.3 NSRN is set to 8. */
3698 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3699 goto on_stack;
3700 }
3701 }
3702
3703 ncrn = pcum->aapcs_ncrn;
3abf17cf 3704 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3705
3706 /* C6 - C9. though the sign and zero extension semantics are
3707 handled elsewhere. This is the case where the argument fits
3708 entirely general registers. */
3709 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3710 {
43e9d192
IB
3711
3712 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3713
3714 /* C.8 if the argument has an alignment of 16 then the NGRN is
3715 rounded up to the next even number. */
985b8393
JJ
3716 if (nregs == 2
3717 && ncrn % 2
2ec07fa6 3718 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3719 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3720 alignment nregs should be > 2 and therefore it should be
3721 passed by reference rather than value. */
985b8393
JJ
3722 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3723 {
3724 ++ncrn;
3725 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3726 }
2ec07fa6 3727
43e9d192
IB
3728 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3729 A reg is still generated for it, but the caller should be smart
3730 enough not to use it. */
3731 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3732 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3733 else
3734 {
3735 rtx par;
3736 int i;
3737
3738 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3739 for (i = 0; i < nregs; i++)
3740 {
3741 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3742 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3743 GEN_INT (i * UNITS_PER_WORD));
3744 XVECEXP (par, 0, i) = tmp;
3745 }
3746 pcum->aapcs_reg = par;
3747 }
3748
3749 pcum->aapcs_nextncrn = ncrn + nregs;
3750 return;
3751 }
3752
3753 /* C.11 */
3754 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3755
3756 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3757 this argument and align the total size if necessary. */
43e9d192 3758on_stack:
3abf17cf 3759 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3760
985b8393 3761 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3762 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3763 16 / UNITS_PER_WORD);
43e9d192
IB
3764 return;
3765}
3766
3767/* Implement TARGET_FUNCTION_ARG. */
3768
3769static rtx
ef4bddc2 3770aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3771 const_tree type, bool named)
3772{
3773 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3774 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3775
3776 if (mode == VOIDmode)
3777 return NULL_RTX;
3778
3779 aarch64_layout_arg (pcum_v, mode, type, named);
3780 return pcum->aapcs_reg;
3781}
3782
3783void
3784aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3785 const_tree fntype ATTRIBUTE_UNUSED,
3786 rtx libname ATTRIBUTE_UNUSED,
3787 const_tree fndecl ATTRIBUTE_UNUSED,
3788 unsigned n_named ATTRIBUTE_UNUSED)
3789{
3790 pcum->aapcs_ncrn = 0;
3791 pcum->aapcs_nvrn = 0;
3792 pcum->aapcs_nextncrn = 0;
3793 pcum->aapcs_nextnvrn = 0;
3794 pcum->pcs_variant = ARM_PCS_AAPCS64;
3795 pcum->aapcs_reg = NULL_RTX;
3796 pcum->aapcs_arg_processed = false;
3797 pcum->aapcs_stack_words = 0;
3798 pcum->aapcs_stack_size = 0;
3799
261fb553
AL
3800 if (!TARGET_FLOAT
3801 && fndecl && TREE_PUBLIC (fndecl)
3802 && fntype && fntype != error_mark_node)
3803 {
3804 const_tree type = TREE_TYPE (fntype);
3805 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3806 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3807 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3808 &mode, &nregs, NULL))
fc29dfc9 3809 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 3810 }
43e9d192
IB
3811 return;
3812}
3813
3814static void
3815aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3816 machine_mode mode,
43e9d192
IB
3817 const_tree type,
3818 bool named)
3819{
3820 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3821 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3822 {
3823 aarch64_layout_arg (pcum_v, mode, type, named);
3824 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3825 != (pcum->aapcs_stack_words != 0));
3826 pcum->aapcs_arg_processed = false;
3827 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3828 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3829 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3830 pcum->aapcs_stack_words = 0;
3831 pcum->aapcs_reg = NULL_RTX;
3832 }
3833}
3834
3835bool
3836aarch64_function_arg_regno_p (unsigned regno)
3837{
3838 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3839 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3840}
3841
3842/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3843 PARM_BOUNDARY bits of alignment, but will be given anything up
3844 to STACK_BOUNDARY bits if the type requires it. This makes sure
3845 that both before and after the layout of each argument, the Next
3846 Stacked Argument Address (NSAA) will have a minimum alignment of
3847 8 bytes. */
3848
3849static unsigned int
ef4bddc2 3850aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3851{
985b8393
JJ
3852 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3853 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3854}
3855
43cacb12
RS
3856/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3857
3858static fixed_size_mode
3859aarch64_get_reg_raw_mode (int regno)
3860{
3861 if (TARGET_SVE && FP_REGNUM_P (regno))
3862 /* Don't use the SVE part of the register for __builtin_apply and
3863 __builtin_return. The SVE registers aren't used by the normal PCS,
3864 so using them there would be a waste of time. The PCS extensions
3865 for SVE types are fundamentally incompatible with the
3866 __builtin_return/__builtin_apply interface. */
3867 return as_a <fixed_size_mode> (V16QImode);
3868 return default_get_reg_raw_mode (regno);
3869}
3870
76b0cbf8 3871/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3872
3873 Small aggregate types are placed in the lowest memory address.
3874
3875 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3876
76b0cbf8
RS
3877static pad_direction
3878aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3879{
3880 /* On little-endian targets, the least significant byte of every stack
3881 argument is passed at the lowest byte address of the stack slot. */
3882 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3883 return PAD_UPWARD;
43e9d192 3884
00edcfbe 3885 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3886 the least significant byte of a stack argument is passed at the highest
3887 byte address of the stack slot. */
3888 if (type
00edcfbe
YZ
3889 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3890 || POINTER_TYPE_P (type))
43e9d192 3891 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3892 return PAD_DOWNWARD;
43e9d192
IB
3893
3894 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3895 return PAD_UPWARD;
43e9d192
IB
3896}
3897
3898/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3899
3900 It specifies padding for the last (may also be the only)
3901 element of a block move between registers and memory. If
3902 assuming the block is in the memory, padding upward means that
3903 the last element is padded after its highest significant byte,
3904 while in downward padding, the last element is padded at the
3905 its least significant byte side.
3906
3907 Small aggregates and small complex types are always padded
3908 upwards.
3909
3910 We don't need to worry about homogeneous floating-point or
3911 short-vector aggregates; their move is not affected by the
3912 padding direction determined here. Regardless of endianness,
3913 each element of such an aggregate is put in the least
3914 significant bits of a fp/simd register.
3915
3916 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3917 register has useful data, and return the opposite if the most
3918 significant byte does. */
3919
3920bool
ef4bddc2 3921aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3922 bool first ATTRIBUTE_UNUSED)
3923{
3924
3925 /* Small composite types are always padded upward. */
3926 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3927 {
6a70badb
RS
3928 HOST_WIDE_INT size;
3929 if (type)
3930 size = int_size_in_bytes (type);
3931 else
3932 /* No frontends can create types with variable-sized modes, so we
3933 shouldn't be asked to pass or return them. */
3934 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3935 if (size < 2 * UNITS_PER_WORD)
3936 return true;
3937 }
3938
3939 /* Otherwise, use the default padding. */
3940 return !BYTES_BIG_ENDIAN;
3941}
3942
095a2d76 3943static scalar_int_mode
43e9d192
IB
3944aarch64_libgcc_cmp_return_mode (void)
3945{
3946 return SImode;
3947}
3948
a3eb8a52
EB
3949#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3950
3951/* We use the 12-bit shifted immediate arithmetic instructions so values
3952 must be multiple of (1 << 12), i.e. 4096. */
3953#define ARITH_FACTOR 4096
3954
3955#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3956#error Cannot use simple address calculation for stack probing
3957#endif
3958
3959/* The pair of scratch registers used for stack probing. */
8921ccbb
OH
3960#define PROBE_STACK_FIRST_REG R9_REGNUM
3961#define PROBE_STACK_SECOND_REG R10_REGNUM
a3eb8a52 3962
6a70badb 3963/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3964 inclusive. These are offsets from the current stack pointer. */
3965
3966static void
6a70badb 3967aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3968{
6a70badb
RS
3969 HOST_WIDE_INT size;
3970 if (!poly_size.is_constant (&size))
3971 {
3972 sorry ("stack probes for SVE frames");
3973 return;
3974 }
3975
5f5c5e0f 3976 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3977
3978 /* See the same assertion on PROBE_INTERVAL above. */
3979 gcc_assert ((first % ARITH_FACTOR) == 0);
3980
3981 /* See if we have a constant small number of probes to generate. If so,
3982 that's the easy case. */
3983 if (size <= PROBE_INTERVAL)
3984 {
3985 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3986
3987 emit_set_insn (reg1,
5f5c5e0f 3988 plus_constant (Pmode,
a3eb8a52 3989 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3990 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3991 }
3992
3993 /* The run-time loop is made up of 8 insns in the generic case while the
3994 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3995 else if (size <= 4 * PROBE_INTERVAL)
3996 {
3997 HOST_WIDE_INT i, rem;
3998
3999 emit_set_insn (reg1,
5f5c5e0f 4000 plus_constant (Pmode,
a3eb8a52
EB
4001 stack_pointer_rtx,
4002 -(first + PROBE_INTERVAL)));
4003 emit_stack_probe (reg1);
4004
4005 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4006 it exceeds SIZE. If only two probes are needed, this will not
4007 generate any code. Then probe at FIRST + SIZE. */
4008 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4009 {
4010 emit_set_insn (reg1,
5f5c5e0f 4011 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
4012 emit_stack_probe (reg1);
4013 }
4014
4015 rem = size - (i - PROBE_INTERVAL);
4016 if (rem > 256)
4017 {
4018 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4019
5f5c5e0f
EB
4020 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
4021 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
4022 }
4023 else
5f5c5e0f 4024 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
4025 }
4026
4027 /* Otherwise, do the same as above, but in a loop. Note that we must be
4028 extra careful with variables wrapping around because we might be at
4029 the very top (or the very bottom) of the address space and we have
4030 to be able to handle this case properly; in particular, we use an
4031 equality test for the loop condition. */
4032 else
4033 {
5f5c5e0f 4034 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
4035
4036 /* Step 1: round SIZE to the previous multiple of the interval. */
4037
4038 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
4039
4040
4041 /* Step 2: compute initial and final value of the loop counter. */
4042
4043 /* TEST_ADDR = SP + FIRST. */
4044 emit_set_insn (reg1,
5f5c5e0f 4045 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
4046
4047 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
4048 HOST_WIDE_INT adjustment = - (first + rounded_size);
4049 if (! aarch64_uimm12_shift (adjustment))
4050 {
4051 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
4052 true, Pmode);
4053 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
4054 }
4055 else
8dd64cdf
EB
4056 emit_set_insn (reg2,
4057 plus_constant (Pmode, stack_pointer_rtx, adjustment));
4058
a3eb8a52
EB
4059 /* Step 3: the loop
4060
4061 do
4062 {
4063 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4064 probe at TEST_ADDR
4065 }
4066 while (TEST_ADDR != LAST_ADDR)
4067
4068 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4069 until it is equal to ROUNDED_SIZE. */
4070
5f5c5e0f 4071 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
4072
4073
4074 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4075 that SIZE is equal to ROUNDED_SIZE. */
4076
4077 if (size != rounded_size)
4078 {
4079 HOST_WIDE_INT rem = size - rounded_size;
4080
4081 if (rem > 256)
4082 {
4083 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
4084
5f5c5e0f
EB
4085 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
4086 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
4087 }
4088 else
5f5c5e0f 4089 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
4090 }
4091 }
4092
4093 /* Make sure nothing is scheduled before we are done. */
4094 emit_insn (gen_blockage ());
4095}
4096
4097/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4098 absolute addresses. */
4099
4100const char *
4101aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
4102{
4103 static int labelno = 0;
4104 char loop_lab[32];
4105 rtx xops[2];
4106
4107 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
4108
4109 /* Loop. */
4110 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4111
cd1bef27
JL
4112 HOST_WIDE_INT stack_clash_probe_interval
4113 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
4114
a3eb8a52
EB
4115 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4116 xops[0] = reg1;
cd1bef27
JL
4117 HOST_WIDE_INT interval;
4118 if (flag_stack_clash_protection)
4119 interval = stack_clash_probe_interval;
4120 else
4121 interval = PROBE_INTERVAL;
4122
4123 gcc_assert (aarch64_uimm12_shift (interval));
4124 xops[1] = GEN_INT (interval);
4125
a3eb8a52
EB
4126 output_asm_insn ("sub\t%0, %0, %1", xops);
4127
cd1bef27
JL
4128 /* If doing stack clash protection then we probe up by the ABI specified
4129 amount. We do this because we're dropping full pages at a time in the
4130 loop. But if we're doing non-stack clash probing, probe at SP 0. */
4131 if (flag_stack_clash_protection)
4132 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
4133 else
4134 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
4135
4136 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
4137 by this amount for each iteration. */
4138 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
4139
4140 /* Test if TEST_ADDR == LAST_ADDR. */
4141 xops[1] = reg2;
4142 output_asm_insn ("cmp\t%0, %1", xops);
4143
4144 /* Branch. */
4145 fputs ("\tb.ne\t", asm_out_file);
4146 assemble_name_raw (asm_out_file, loop_lab);
4147 fputc ('\n', asm_out_file);
4148
4149 return "";
4150}
4151
eb471ba3
TC
4152/* Emit the probe loop for doing stack clash probes and stack adjustments for
4153 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
4154 of GUARD_SIZE. When a probe is emitted it is done at most
4155 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
4156 at most MIN_PROBE_THRESHOLD. By the end of this function
4157 BASE = BASE - ADJUSTMENT. */
4158
4159const char *
4160aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
4161 rtx min_probe_threshold, rtx guard_size)
4162{
4163 /* This function is not allowed to use any instruction generation function
4164 like gen_ and friends. If you do you'll likely ICE during CFG validation,
4165 so instead emit the code you want using output_asm_insn. */
4166 gcc_assert (flag_stack_clash_protection);
4167 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
4168 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
4169
4170 /* The minimum required allocation before the residual requires probing. */
4171 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
4172
4173 /* Clamp the value down to the nearest value that can be used with a cmp. */
4174 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
4175 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
4176
4177 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
4178 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
4179
4180 static int labelno = 0;
4181 char loop_start_lab[32];
4182 char loop_end_lab[32];
4183 rtx xops[2];
4184
4185 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
4186 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
4187
4188 /* Emit loop start label. */
4189 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
4190
4191 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
4192 xops[0] = adjustment;
4193 xops[1] = probe_offset_value_rtx;
4194 output_asm_insn ("cmp\t%0, %1", xops);
4195
4196 /* Branch to end if not enough adjustment to probe. */
4197 fputs ("\tb.lt\t", asm_out_file);
4198 assemble_name_raw (asm_out_file, loop_end_lab);
4199 fputc ('\n', asm_out_file);
4200
4201 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
4202 xops[0] = base;
4203 xops[1] = probe_offset_value_rtx;
4204 output_asm_insn ("sub\t%0, %0, %1", xops);
4205
4206 /* Probe at BASE. */
4207 xops[1] = const0_rtx;
4208 output_asm_insn ("str\txzr, [%0, %1]", xops);
4209
4210 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
4211 xops[0] = adjustment;
4212 xops[1] = probe_offset_value_rtx;
4213 output_asm_insn ("sub\t%0, %0, %1", xops);
4214
4215 /* Branch to start if still more bytes to allocate. */
4216 fputs ("\tb\t", asm_out_file);
4217 assemble_name_raw (asm_out_file, loop_start_lab);
4218 fputc ('\n', asm_out_file);
4219
4220 /* No probe leave. */
4221 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
4222
4223 /* BASE = BASE - ADJUSTMENT. */
4224 xops[0] = base;
4225 xops[1] = adjustment;
4226 output_asm_insn ("sub\t%0, %0, %1", xops);
4227 return "";
4228}
4229
d6cb6d6a
WD
4230/* Determine whether a frame chain needs to be generated. */
4231static bool
4232aarch64_needs_frame_chain (void)
4233{
4234 /* Force a frame chain for EH returns so the return address is at FP+8. */
4235 if (frame_pointer_needed || crtl->calls_eh_return)
4236 return true;
4237
4238 /* A leaf function cannot have calls or write LR. */
4239 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
4240
4241 /* Don't use a frame chain in leaf functions if leaf frame pointers
4242 are disabled. */
4243 if (flag_omit_leaf_frame_pointer && is_leaf)
4244 return false;
4245
4246 return aarch64_use_frame_pointer;
4247}
4248
43e9d192
IB
4249/* Mark the registers that need to be saved by the callee and calculate
4250 the size of the callee-saved registers area and frame record (both FP
33a2e348 4251 and LR may be omitted). */
43e9d192
IB
4252static void
4253aarch64_layout_frame (void)
4254{
4255 HOST_WIDE_INT offset = 0;
4b0685d9 4256 int regno, last_fp_reg = INVALID_REGNUM;
a0d0b980 4257 bool simd_function = aarch64_simd_decl_p (cfun->decl);
43e9d192 4258
d6cb6d6a 4259 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 4260
8c6e3b23
TC
4261 /* Adjust the outgoing arguments size if required. Keep it in sync with what
4262 the mid-end is doing. */
4263 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
4264
97826595
MS
4265#define SLOT_NOT_REQUIRED (-2)
4266#define SLOT_REQUIRED (-1)
4267
71bfb77a
WD
4268 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
4269 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 4270
a0d0b980
SE
4271 /* If this is a non-leaf simd function with calls we assume that
4272 at least one of those calls is to a non-simd function and thus
4273 we must save V8 to V23 in the prologue. */
4274
4275 if (simd_function && !crtl->is_leaf)
4276 {
4277 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4278 if (FP_SIMD_SAVED_REGNUM_P (regno))
4279 df_set_regs_ever_live (regno, true);
4280 }
4281
43e9d192
IB
4282 /* First mark all the registers that really need to be saved... */
4283 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4284 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4285
4286 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4287 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4288
4289 /* ... that includes the eh data registers (if needed)... */
4290 if (crtl->calls_eh_return)
4291 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4292 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4293 = SLOT_REQUIRED;
43e9d192
IB
4294
4295 /* ... and any callee saved register that dataflow says is live. */
4296 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4297 if (df_regs_ever_live_p (regno)
1c923b60
JW
4298 && (regno == R30_REGNUM
4299 || !call_used_regs[regno]))
97826595 4300 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4301
4302 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4303 if (df_regs_ever_live_p (regno)
a0d0b980
SE
4304 && (!call_used_regs[regno]
4305 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))))
4b0685d9
WD
4306 {
4307 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4308 last_fp_reg = regno;
4309 }
43e9d192 4310
204d2c03 4311 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4312 {
2e1cdae5 4313 /* FP and LR are placed in the linkage record. */
43e9d192 4314 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4315 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4316 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4317 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4318 offset = 2 * UNITS_PER_WORD;
4319 }
43e9d192 4320
db6b62a8
TC
4321 /* With stack-clash, LR must be saved in non-leaf functions. */
4322 gcc_assert (crtl->is_leaf
4323 || (cfun->machine->frame.reg_offset[R30_REGNUM]
4324 != SLOT_NOT_REQUIRED));
4325
43e9d192 4326 /* Now assign stack slots for them. */
2e1cdae5 4327 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4328 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4329 {
4330 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4331 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4332 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4333 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4334 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4335 offset += UNITS_PER_WORD;
4336 }
4337
4b0685d9
WD
4338 HOST_WIDE_INT max_int_offset = offset;
4339 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4340 bool has_align_gap = offset != max_int_offset;
4341
43e9d192 4342 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4343 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4344 {
4b0685d9
WD
4345 /* If there is an alignment gap between integer and fp callee-saves,
4346 allocate the last fp register to it if possible. */
a0d0b980
SE
4347 if (regno == last_fp_reg
4348 && has_align_gap
4349 && !simd_function
4350 && (offset & 8) == 0)
4b0685d9
WD
4351 {
4352 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4353 break;
4354 }
4355
43e9d192 4356 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4357 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4358 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4359 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4360 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4361 cfun->machine->frame.wb_candidate2 = regno;
a0d0b980 4362 offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD;
43e9d192
IB
4363 }
4364
4f59f9f2 4365 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4366
4367 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4368
71bfb77a
WD
4369 HOST_WIDE_INT varargs_and_saved_regs_size
4370 = offset + cfun->machine->frame.saved_varargs_size;
4371
1c960e02 4372 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4373 = aligned_upper_bound (varargs_and_saved_regs_size
4374 + get_frame_size (),
4375 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4376
6a70badb
RS
4377 /* Both these values are already aligned. */
4378 gcc_assert (multiple_p (crtl->outgoing_args_size,
4379 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4380 cfun->machine->frame.frame_size
6a70badb
RS
4381 = (cfun->machine->frame.hard_fp_offset
4382 + crtl->outgoing_args_size);
1c960e02 4383
71bfb77a
WD
4384 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4385
4386 cfun->machine->frame.initial_adjust = 0;
4387 cfun->machine->frame.final_adjust = 0;
4388 cfun->machine->frame.callee_adjust = 0;
4389 cfun->machine->frame.callee_offset = 0;
4390
4391 HOST_WIDE_INT max_push_offset = 0;
4392 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4393 max_push_offset = 512;
4394 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4395 max_push_offset = 256;
4396
6a70badb
RS
4397 HOST_WIDE_INT const_size, const_fp_offset;
4398 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4399 && const_size < max_push_offset
4400 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4401 {
4402 /* Simple, small frame with no outgoing arguments:
4403 stp reg1, reg2, [sp, -frame_size]!
4404 stp reg3, reg4, [sp, 16] */
6a70badb 4405 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4406 }
6a70badb
RS
4407 else if (known_lt (crtl->outgoing_args_size
4408 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4409 && !(cfun->calls_alloca
6a70badb
RS
4410 && known_lt (cfun->machine->frame.hard_fp_offset,
4411 max_push_offset)))
71bfb77a
WD
4412 {
4413 /* Frame with small outgoing arguments:
4414 sub sp, sp, frame_size
4415 stp reg1, reg2, [sp, outgoing_args_size]
4416 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4417 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4418 cfun->machine->frame.callee_offset
4419 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4420 }
6a70badb
RS
4421 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4422 && const_fp_offset < max_push_offset)
71bfb77a
WD
4423 {
4424 /* Frame with large outgoing arguments but a small local area:
4425 stp reg1, reg2, [sp, -hard_fp_offset]!
4426 stp reg3, reg4, [sp, 16]
4427 sub sp, sp, outgoing_args_size */
6a70badb 4428 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4429 cfun->machine->frame.final_adjust
4430 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4431 }
71bfb77a
WD
4432 else
4433 {
4434 /* Frame with large local area and outgoing arguments using frame pointer:
4435 sub sp, sp, hard_fp_offset
4436 stp x29, x30, [sp, 0]
4437 add x29, sp, 0
4438 stp reg3, reg4, [sp, 16]
4439 sub sp, sp, outgoing_args_size */
4440 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4441 cfun->machine->frame.final_adjust
4442 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4443 }
4444
43e9d192
IB
4445 cfun->machine->frame.laid_out = true;
4446}
4447
04ddfe06
KT
4448/* Return true if the register REGNO is saved on entry to
4449 the current function. */
4450
43e9d192
IB
4451static bool
4452aarch64_register_saved_on_entry (int regno)
4453{
97826595 4454 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4455}
4456
04ddfe06
KT
4457/* Return the next register up from REGNO up to LIMIT for the callee
4458 to save. */
4459
64dedd72
JW
4460static unsigned
4461aarch64_next_callee_save (unsigned regno, unsigned limit)
4462{
4463 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4464 regno ++;
4465 return regno;
4466}
43e9d192 4467
04ddfe06
KT
4468/* Push the register number REGNO of mode MODE to the stack with write-back
4469 adjusting the stack by ADJUSTMENT. */
4470
c5e1f66e 4471static void
ef4bddc2 4472aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4473 HOST_WIDE_INT adjustment)
4474 {
4475 rtx base_rtx = stack_pointer_rtx;
4476 rtx insn, reg, mem;
4477
4478 reg = gen_rtx_REG (mode, regno);
4479 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4480 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4481 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4482
4483 insn = emit_move_insn (mem, reg);
4484 RTX_FRAME_RELATED_P (insn) = 1;
4485}
4486
04ddfe06
KT
4487/* Generate and return an instruction to store the pair of registers
4488 REG and REG2 of mode MODE to location BASE with write-back adjusting
4489 the stack location BASE by ADJUSTMENT. */
4490
80c11907 4491static rtx
ef4bddc2 4492aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4493 HOST_WIDE_INT adjustment)
4494{
4495 switch (mode)
4496 {
4e10a5a7 4497 case E_DImode:
80c11907
JW
4498 return gen_storewb_pairdi_di (base, base, reg, reg2,
4499 GEN_INT (-adjustment),
4500 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4501 case E_DFmode:
80c11907
JW
4502 return gen_storewb_pairdf_di (base, base, reg, reg2,
4503 GEN_INT (-adjustment),
4504 GEN_INT (UNITS_PER_WORD - adjustment));
a0d0b980
SE
4505 case E_TFmode:
4506 return gen_storewb_pairtf_di (base, base, reg, reg2,
4507 GEN_INT (-adjustment),
4508 GEN_INT (UNITS_PER_VREG - adjustment));
80c11907
JW
4509 default:
4510 gcc_unreachable ();
4511 }
4512}
4513
04ddfe06
KT
4514/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4515 stack pointer by ADJUSTMENT. */
4516
80c11907 4517static void
89ac681e 4518aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4519{
5d8a22a5 4520 rtx_insn *insn;
a0d0b980 4521 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e 4522
71bfb77a 4523 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4524 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4525
80c11907
JW
4526 rtx reg1 = gen_rtx_REG (mode, regno1);
4527 rtx reg2 = gen_rtx_REG (mode, regno2);
4528
4529 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4530 reg2, adjustment));
4531 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4532 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4533 RTX_FRAME_RELATED_P (insn) = 1;
4534}
4535
04ddfe06
KT
4536/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4537 adjusting it by ADJUSTMENT afterwards. */
4538
159313d9 4539static rtx
ef4bddc2 4540aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4541 HOST_WIDE_INT adjustment)
4542{
4543 switch (mode)
4544 {
4e10a5a7 4545 case E_DImode:
159313d9 4546 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4547 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4548 case E_DFmode:
159313d9 4549 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4550 GEN_INT (UNITS_PER_WORD));
a0d0b980
SE
4551 case E_TFmode:
4552 return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
4553 GEN_INT (UNITS_PER_VREG));
159313d9
JW
4554 default:
4555 gcc_unreachable ();
4556 }
4557}
4558
04ddfe06
KT
4559/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4560 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4561 into CFI_OPS. */
4562
89ac681e
WD
4563static void
4564aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4565 rtx *cfi_ops)
4566{
a0d0b980 4567 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
89ac681e
WD
4568 rtx reg1 = gen_rtx_REG (mode, regno1);
4569
4570 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4571
71bfb77a 4572 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4573 {
4574 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4575 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4576 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4577 }
4578 else
4579 {
4580 rtx reg2 = gen_rtx_REG (mode, regno2);
4581 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4582 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4583 reg2, adjustment));
4584 }
4585}
4586
04ddfe06
KT
4587/* Generate and return a store pair instruction of mode MODE to store
4588 register REG1 to MEM1 and register REG2 to MEM2. */
4589
72df5c1f 4590static rtx
ef4bddc2 4591aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4592 rtx reg2)
4593{
4594 switch (mode)
4595 {
4e10a5a7 4596 case E_DImode:
dfe1da23 4597 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4598
4e10a5a7 4599 case E_DFmode:
dfe1da23 4600 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f 4601
a0d0b980
SE
4602 case E_TFmode:
4603 return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
4604
72df5c1f
JW
4605 default:
4606 gcc_unreachable ();
4607 }
4608}
4609
04ddfe06
KT
4610/* Generate and regurn a load pair isntruction of mode MODE to load register
4611 REG1 from MEM1 and register REG2 from MEM2. */
4612
72df5c1f 4613static rtx
ef4bddc2 4614aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4615 rtx mem2)
4616{
4617 switch (mode)
4618 {
4e10a5a7 4619 case E_DImode:
dfe1da23 4620 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4621
4e10a5a7 4622 case E_DFmode:
dfe1da23 4623 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f 4624
a0d0b980
SE
4625 case E_TFmode:
4626 return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
4627
72df5c1f
JW
4628 default:
4629 gcc_unreachable ();
4630 }
4631}
4632
db58fd89
JW
4633/* Return TRUE if return address signing should be enabled for the current
4634 function, otherwise return FALSE. */
4635
4636bool
4637aarch64_return_address_signing_enabled (void)
4638{
4639 /* This function should only be called after frame laid out. */
4640 gcc_assert (cfun->machine->frame.laid_out);
4641
4642 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4643 if it's LR is pushed onto stack. */
4644 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4645 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4646 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4647}
4648
04ddfe06
KT
4649/* Emit code to save the callee-saved registers from register number START
4650 to LIMIT to the stack at the location starting at offset START_OFFSET,
4651 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4652
43e9d192 4653static void
6a70badb 4654aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4655 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4656{
5d8a22a5 4657 rtx_insn *insn;
43e9d192
IB
4658 unsigned regno;
4659 unsigned regno2;
4660
0ec74a1e 4661 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4662 regno <= limit;
4663 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4664 {
ae13fce3 4665 rtx reg, mem;
6a70badb 4666 poly_int64 offset;
a0d0b980 4667 int offset_diff;
64dedd72 4668
ae13fce3
JW
4669 if (skip_wb
4670 && (regno == cfun->machine->frame.wb_candidate1
4671 || regno == cfun->machine->frame.wb_candidate2))
4672 continue;
4673
827ab47a
KT
4674 if (cfun->machine->reg_is_wrapped_separately[regno])
4675 continue;
4676
ae13fce3
JW
4677 reg = gen_rtx_REG (mode, regno);
4678 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4679 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4680 offset));
64dedd72
JW
4681
4682 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
4683 offset_diff = cfun->machine->frame.reg_offset[regno2]
4684 - cfun->machine->frame.reg_offset[regno];
64dedd72
JW
4685
4686 if (regno2 <= limit
827ab47a 4687 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 4688 && known_eq (GET_MODE_SIZE (mode), offset_diff))
43e9d192 4689 {
0ec74a1e 4690 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4691 rtx mem2;
4692
4693 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4694 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4695 offset));
8ed2fc62
JW
4696 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4697 reg2));
0b4a9743 4698
64dedd72
JW
4699 /* The first part of a frame-related parallel insn is
4700 always assumed to be relevant to the frame
4701 calculations; subsequent parts, are only
4702 frame-related if explicitly marked. */
4703 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4704 regno = regno2;
4705 }
4706 else
8ed2fc62
JW
4707 insn = emit_move_insn (mem, reg);
4708
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 }
4711}
4712
04ddfe06
KT
4713/* Emit code to restore the callee registers of mode MODE from register
4714 number START up to and including LIMIT. Restore from the stack offset
4715 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4716 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4717
8ed2fc62 4718static void
ef4bddc2 4719aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4720 poly_int64 start_offset, unsigned start,
dd991abb 4721 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4722{
8ed2fc62 4723 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4724 unsigned regno;
4725 unsigned regno2;
6a70badb 4726 poly_int64 offset;
8ed2fc62
JW
4727
4728 for (regno = aarch64_next_callee_save (start, limit);
4729 regno <= limit;
4730 regno = aarch64_next_callee_save (regno + 1, limit))
4731 {
827ab47a
KT
4732 if (cfun->machine->reg_is_wrapped_separately[regno])
4733 continue;
4734
ae13fce3 4735 rtx reg, mem;
a0d0b980 4736 int offset_diff;
8ed2fc62 4737
ae13fce3
JW
4738 if (skip_wb
4739 && (regno == cfun->machine->frame.wb_candidate1
4740 || regno == cfun->machine->frame.wb_candidate2))
4741 continue;
4742
4743 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4744 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4745 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4746
4747 regno2 = aarch64_next_callee_save (regno + 1, limit);
a0d0b980
SE
4748 offset_diff = cfun->machine->frame.reg_offset[regno2]
4749 - cfun->machine->frame.reg_offset[regno];
8ed2fc62
JW
4750
4751 if (regno2 <= limit
827ab47a 4752 && !cfun->machine->reg_is_wrapped_separately[regno2]
a0d0b980 4753 && known_eq (GET_MODE_SIZE (mode), offset_diff))
64dedd72 4754 {
8ed2fc62
JW
4755 rtx reg2 = gen_rtx_REG (mode, regno2);
4756 rtx mem2;
4757
4758 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4759 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4760 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4761
dd991abb 4762 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4763 regno = regno2;
43e9d192 4764 }
8ed2fc62 4765 else
dd991abb
RH
4766 emit_move_insn (reg, mem);
4767 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4768 }
43e9d192
IB
4769}
4770
43cacb12
RS
4771/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4772 of MODE. */
4773
4774static inline bool
4775offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4776{
4777 HOST_WIDE_INT multiple;
4778 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4779 && IN_RANGE (multiple, -8, 7));
4780}
4781
4782/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4783 of MODE. */
4784
4785static inline bool
4786offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4787{
4788 HOST_WIDE_INT multiple;
4789 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4790 && IN_RANGE (multiple, 0, 63));
4791}
4792
4793/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4794 of MODE. */
4795
4796bool
4797aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4798{
4799 HOST_WIDE_INT multiple;
4800 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4801 && IN_RANGE (multiple, -64, 63));
4802}
4803
4804/* Return true if OFFSET is a signed 9-bit value. */
4805
3c5af608
MM
4806bool
4807aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
4808 poly_int64 offset)
827ab47a 4809{
6a70badb
RS
4810 HOST_WIDE_INT const_offset;
4811 return (offset.is_constant (&const_offset)
4812 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4813}
4814
43cacb12
RS
4815/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4816 of MODE. */
4817
827ab47a 4818static inline bool
43cacb12 4819offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4820{
6a70badb
RS
4821 HOST_WIDE_INT multiple;
4822 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4823 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4824}
4825
43cacb12
RS
4826/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4827 of MODE. */
4828
4829static inline bool
4830offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4831{
6a70badb
RS
4832 HOST_WIDE_INT multiple;
4833 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4834 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4835}
4836
4837/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4838
4839static sbitmap
4840aarch64_get_separate_components (void)
4841{
827ab47a
KT
4842 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4843 bitmap_clear (components);
4844
4845 /* The registers we need saved to the frame. */
4846 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4847 if (aarch64_register_saved_on_entry (regno))
4848 {
6a70badb 4849 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4850 if (!frame_pointer_needed)
4851 offset += cfun->machine->frame.frame_size
4852 - cfun->machine->frame.hard_fp_offset;
4853 /* Check that we can access the stack slot of the register with one
4854 direct load with no adjustments needed. */
4855 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4856 bitmap_set_bit (components, regno);
4857 }
4858
4859 /* Don't mess with the hard frame pointer. */
4860 if (frame_pointer_needed)
4861 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4862
4863 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4864 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 4865 /* If registers have been chosen to be stored/restored with
827ab47a
KT
4866 writeback don't interfere with them to avoid having to output explicit
4867 stack adjustment instructions. */
4868 if (reg2 != INVALID_REGNUM)
4869 bitmap_clear_bit (components, reg2);
4870 if (reg1 != INVALID_REGNUM)
4871 bitmap_clear_bit (components, reg1);
4872
4873 bitmap_clear_bit (components, LR_REGNUM);
4874 bitmap_clear_bit (components, SP_REGNUM);
4875
4876 return components;
4877}
4878
4879/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4880
4881static sbitmap
4882aarch64_components_for_bb (basic_block bb)
4883{
4884 bitmap in = DF_LIVE_IN (bb);
4885 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4886 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
a0d0b980 4887 bool simd_function = aarch64_simd_decl_p (cfun->decl);
827ab47a
KT
4888
4889 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4890 bitmap_clear (components);
4891
4892 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4893 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
a0d0b980
SE
4894 if ((!call_used_regs[regno]
4895 || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
827ab47a
KT
4896 && (bitmap_bit_p (in, regno)
4897 || bitmap_bit_p (gen, regno)
4898 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4899 {
4900 unsigned regno2, offset, offset2;
4901 bitmap_set_bit (components, regno);
4902
4903 /* If there is a callee-save at an adjacent offset, add it too
4904 to increase the use of LDP/STP. */
4905 offset = cfun->machine->frame.reg_offset[regno];
4906 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4907
4908 if (regno2 <= LAST_SAVED_REGNUM)
4909 {
4910 offset2 = cfun->machine->frame.reg_offset[regno2];
4911 if ((offset & ~8) == (offset2 & ~8))
4912 bitmap_set_bit (components, regno2);
4913 }
4914 }
827ab47a
KT
4915
4916 return components;
4917}
4918
4919/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4920 Nothing to do for aarch64. */
4921
4922static void
4923aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4924{
4925}
4926
4927/* Return the next set bit in BMP from START onwards. Return the total number
4928 of bits in BMP if no set bit is found at or after START. */
4929
4930static unsigned int
4931aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4932{
4933 unsigned int nbits = SBITMAP_SIZE (bmp);
4934 if (start == nbits)
4935 return start;
4936
4937 gcc_assert (start < nbits);
4938 for (unsigned int i = start; i < nbits; i++)
4939 if (bitmap_bit_p (bmp, i))
4940 return i;
4941
4942 return nbits;
4943}
4944
4945/* Do the work for aarch64_emit_prologue_components and
4946 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4947 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4948 for these components or the epilogue sequence. That is, it determines
4949 whether we should emit stores or loads and what kind of CFA notes to attach
4950 to the insns. Otherwise the logic for the two sequences is very
4951 similar. */
4952
4953static void
4954aarch64_process_components (sbitmap components, bool prologue_p)
4955{
4956 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4957 ? HARD_FRAME_POINTER_REGNUM
4958 : STACK_POINTER_REGNUM);
4959
4960 unsigned last_regno = SBITMAP_SIZE (components);
4961 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4962 rtx_insn *insn = NULL;
4963
4964 while (regno != last_regno)
4965 {
a0d0b980
SE
4966 /* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved
4967 so DFmode for the vector registers is enough. For simd functions
4968 we want to save the low 128 bits. */
4969 machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno);
4970
827ab47a 4971 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4972 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4973 if (!frame_pointer_needed)
4974 offset += cfun->machine->frame.frame_size
4975 - cfun->machine->frame.hard_fp_offset;
4976 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4977 rtx mem = gen_frame_mem (mode, addr);
4978
4979 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4980 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4981 /* No more registers to handle after REGNO.
4982 Emit a single save/restore and exit. */
4983 if (regno2 == last_regno)
4984 {
4985 insn = emit_insn (set);
4986 RTX_FRAME_RELATED_P (insn) = 1;
4987 if (prologue_p)
4988 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4989 else
4990 add_reg_note (insn, REG_CFA_RESTORE, reg);
4991 break;
4992 }
4993
6a70badb 4994 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4995 /* The next register is not of the same class or its offset is not
4996 mergeable with the current one into a pair. */
4997 if (!satisfies_constraint_Ump (mem)
4998 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
a0d0b980 4999 || (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno))
6a70badb
RS
5000 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
5001 GET_MODE_SIZE (mode)))
827ab47a
KT
5002 {
5003 insn = emit_insn (set);
5004 RTX_FRAME_RELATED_P (insn) = 1;
5005 if (prologue_p)
5006 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
5007 else
5008 add_reg_note (insn, REG_CFA_RESTORE, reg);
5009
5010 regno = regno2;
5011 continue;
5012 }
5013
5014 /* REGNO2 can be saved/restored in a pair with REGNO. */
5015 rtx reg2 = gen_rtx_REG (mode, regno2);
5016 if (!frame_pointer_needed)
5017 offset2 += cfun->machine->frame.frame_size
5018 - cfun->machine->frame.hard_fp_offset;
5019 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
5020 rtx mem2 = gen_frame_mem (mode, addr2);
5021 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
5022 : gen_rtx_SET (reg2, mem2);
5023
5024 if (prologue_p)
5025 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
5026 else
5027 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
5028
5029 RTX_FRAME_RELATED_P (insn) = 1;
5030 if (prologue_p)
5031 {
5032 add_reg_note (insn, REG_CFA_OFFSET, set);
5033 add_reg_note (insn, REG_CFA_OFFSET, set2);
5034 }
5035 else
5036 {
5037 add_reg_note (insn, REG_CFA_RESTORE, reg);
5038 add_reg_note (insn, REG_CFA_RESTORE, reg2);
5039 }
5040
5041 regno = aarch64_get_next_set_bit (components, regno2 + 1);
5042 }
5043}
5044
5045/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
5046
5047static void
5048aarch64_emit_prologue_components (sbitmap components)
5049{
5050 aarch64_process_components (components, true);
5051}
5052
5053/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
5054
5055static void
5056aarch64_emit_epilogue_components (sbitmap components)
5057{
5058 aarch64_process_components (components, false);
5059}
5060
5061/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
5062
5063static void
5064aarch64_set_handled_components (sbitmap components)
5065{
5066 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
5067 if (bitmap_bit_p (components, regno))
5068 cfun->machine->reg_is_wrapped_separately[regno] = true;
5069}
5070
8c6e3b23
TC
5071/* On AArch64 we have an ABI defined safe buffer. This constant is used to
5072 determining the probe offset for alloca. */
5073
5074static HOST_WIDE_INT
5075aarch64_stack_clash_protection_alloca_probe_range (void)
5076{
5077 return STACK_CLASH_CALLER_GUARD;
5078}
5079
5080
cd1bef27
JL
5081/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
5082 registers. If POLY_SIZE is not large enough to require a probe this function
5083 will only adjust the stack. When allocating the stack space
5084 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
5085 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
5086 arguments. If we are then we ensure that any allocation larger than the ABI
5087 defined buffer needs a probe so that the invariant of having a 1KB buffer is
5088 maintained.
5089
5090 We emit barriers after each stack adjustment to prevent optimizations from
5091 breaking the invariant that we never drop the stack more than a page. This
5092 invariant is needed to make it easier to correctly handle asynchronous
5093 events, e.g. if we were to allow the stack to be dropped by more than a page
5094 and then have multiple probes up and we take a signal somewhere in between
5095 then the signal handler doesn't know the state of the stack and can make no
5096 assumptions about which pages have been probed. */
5097
5098static void
5099aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
5100 poly_int64 poly_size,
5101 bool frame_related_p,
5102 bool final_adjustment_p)
5103{
5104 HOST_WIDE_INT guard_size
5105 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5106 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5107 /* When doing the final adjustment for the outgoing argument size we can't
5108 assume that LR was saved at position 0. So subtract it's offset from the
5109 ABI safe buffer so that we don't accidentally allow an adjustment that
5110 would result in an allocation larger than the ABI buffer without
5111 probing. */
5112 HOST_WIDE_INT min_probe_threshold
5113 = final_adjustment_p
5114 ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
5115 : guard_size - guard_used_by_caller;
5116
5117 poly_int64 frame_size = cfun->machine->frame.frame_size;
5118
5119 /* We should always have a positive probe threshold. */
5120 gcc_assert (min_probe_threshold > 0);
5121
5122 if (flag_stack_clash_protection && !final_adjustment_p)
5123 {
5124 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
5125 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5126
5127 if (known_eq (frame_size, 0))
5128 {
5129 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
5130 }
5131 else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
5132 && known_lt (final_adjust, guard_used_by_caller))
5133 {
5134 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
5135 }
5136 }
5137
cd1bef27
JL
5138 /* If SIZE is not large enough to require probing, just adjust the stack and
5139 exit. */
eb471ba3 5140 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
5141 || !flag_stack_clash_protection)
5142 {
5143 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
5144 return;
5145 }
5146
eb471ba3
TC
5147 HOST_WIDE_INT size;
5148 /* Handle the SVE non-constant case first. */
5149 if (!poly_size.is_constant (&size))
5150 {
5151 if (dump_file)
5152 {
5153 fprintf (dump_file, "Stack clash SVE prologue: ");
5154 print_dec (poly_size, dump_file);
5155 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
5156 }
5157
5158 /* First calculate the amount of bytes we're actually spilling. */
5159 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
5160 poly_size, temp1, temp2, false, true);
5161
5162 rtx_insn *insn = get_last_insn ();
5163
5164 if (frame_related_p)
5165 {
5166 /* This is done to provide unwinding information for the stack
5167 adjustments we're about to do, however to prevent the optimizers
5168 from removing the R15 move and leaving the CFA note (which would be
5169 very wrong) we tie the old and new stack pointer together.
5170 The tie will expand to nothing but the optimizers will not touch
5171 the instruction. */
5172 rtx stack_ptr_copy = gen_rtx_REG (Pmode, R15_REGNUM);
5173 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
5174 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
5175
5176 /* We want the CFA independent of the stack pointer for the
5177 duration of the loop. */
5178 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
5179 RTX_FRAME_RELATED_P (insn) = 1;
5180 }
5181
5182 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
5183 rtx guard_const = gen_int_mode (guard_size, Pmode);
5184
5185 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
5186 stack_pointer_rtx, temp1,
5187 probe_const, guard_const));
5188
5189 /* Now reset the CFA register if needed. */
5190 if (frame_related_p)
5191 {
5192 add_reg_note (insn, REG_CFA_DEF_CFA,
5193 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
5194 gen_int_mode (poly_size, Pmode)));
5195 RTX_FRAME_RELATED_P (insn) = 1;
5196 }
5197
5198 return;
5199 }
5200
cd1bef27
JL
5201 if (dump_file)
5202 fprintf (dump_file,
eb471ba3
TC
5203 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
5204 " bytes, probing will be required.\n", size);
cd1bef27
JL
5205
5206 /* Round size to the nearest multiple of guard_size, and calculate the
5207 residual as the difference between the original size and the rounded
5208 size. */
5209 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
5210 HOST_WIDE_INT residual = size - rounded_size;
5211
5212 /* We can handle a small number of allocations/probes inline. Otherwise
5213 punt to a loop. */
5214 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
5215 {
5216 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
5217 {
5218 aarch64_sub_sp (NULL, temp2, guard_size, true);
5219 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5220 guard_used_by_caller));
5221 emit_insn (gen_blockage ());
5222 }
5223 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
5224 }
5225 else
5226 {
5227 /* Compute the ending address. */
5228 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
5229 temp1, NULL, false, true);
5230 rtx_insn *insn = get_last_insn ();
5231
5232 /* For the initial allocation, we don't have a frame pointer
5233 set up, so we always need CFI notes. If we're doing the
5234 final allocation, then we may have a frame pointer, in which
5235 case it is the CFA, otherwise we need CFI notes.
5236
5237 We can determine which allocation we are doing by looking at
5238 the value of FRAME_RELATED_P since the final allocations are not
5239 frame related. */
5240 if (frame_related_p)
5241 {
5242 /* We want the CFA independent of the stack pointer for the
5243 duration of the loop. */
5244 add_reg_note (insn, REG_CFA_DEF_CFA,
5245 plus_constant (Pmode, temp1, rounded_size));
5246 RTX_FRAME_RELATED_P (insn) = 1;
5247 }
5248
5249 /* This allocates and probes the stack. Note that this re-uses some of
5250 the existing Ada stack protection code. However we are guaranteed not
5251 to enter the non loop or residual branches of that code.
5252
5253 The non-loop part won't be entered because if our allocation amount
5254 doesn't require a loop, the case above would handle it.
5255
5256 The residual amount won't be entered because TEMP1 is a mutliple of
5257 the allocation size. The residual will always be 0. As such, the only
5258 part we are actually using from that code is the loop setup. The
5259 actual probing is done in aarch64_output_probe_stack_range. */
5260 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
5261 stack_pointer_rtx, temp1));
5262
5263 /* Now reset the CFA register if needed. */
5264 if (frame_related_p)
5265 {
5266 add_reg_note (insn, REG_CFA_DEF_CFA,
5267 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
5268 RTX_FRAME_RELATED_P (insn) = 1;
5269 }
5270
5271 emit_insn (gen_blockage ());
5272 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
5273 }
5274
5275 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
5276 be probed. This maintains the requirement that each page is probed at
5277 least once. For initial probing we probe only if the allocation is
5278 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
5279 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
5280 GUARD_SIZE. This works that for any allocation that is large enough to
5281 trigger a probe here, we'll have at least one, and if they're not large
5282 enough for this code to emit anything for them, The page would have been
5283 probed by the saving of FP/LR either by this function or any callees. If
5284 we don't have any callees then we won't have more stack adjustments and so
5285 are still safe. */
5286 if (residual)
5287 {
5288 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
5289 /* If we're doing final adjustments, and we've done any full page
5290 allocations then any residual needs to be probed. */
5291 if (final_adjustment_p && rounded_size != 0)
5292 min_probe_threshold = 0;
5293 /* If doing a small final adjustment, we always probe at offset 0.
5294 This is done to avoid issues when LR is not at position 0 or when
5295 the final adjustment is smaller than the probing offset. */
5296 else if (final_adjustment_p && rounded_size == 0)
5297 residual_probe_offset = 0;
5298
5299 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
5300 if (residual >= min_probe_threshold)
5301 {
5302 if (dump_file)
5303 fprintf (dump_file,
5304 "Stack clash AArch64 prologue residuals: "
5305 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
5306 "\n", residual);
5307
5308 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5309 residual_probe_offset));
5310 emit_insn (gen_blockage ());
5311 }
5312 }
5313}
5314
a0d0b980
SE
5315/* Return 1 if the register is used by the epilogue. We need to say the
5316 return register is used, but only after epilogue generation is complete.
5317 Note that in the case of sibcalls, the values "used by the epilogue" are
5318 considered live at the start of the called function.
5319
5320 For SIMD functions we need to return 1 for FP registers that are saved and
5321 restored by a function but are not zero in call_used_regs. If we do not do
5322 this optimizations may remove the restore of the register. */
5323
5324int
5325aarch64_epilogue_uses (int regno)
5326{
5327 if (epilogue_completed)
5328 {
5329 if (regno == LR_REGNUM)
5330 return 1;
5331 if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno))
5332 return 1;
5333 }
5334 return 0;
5335}
5336
43cacb12
RS
5337/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
5338 is saved at BASE + OFFSET. */
5339
5340static void
5341aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
5342 rtx base, poly_int64 offset)
5343{
5344 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
5345 add_reg_note (insn, REG_CFA_EXPRESSION,
5346 gen_rtx_SET (mem, regno_reg_rtx[reg]));
5347}
5348
43e9d192
IB
5349/* AArch64 stack frames generated by this compiler look like:
5350
5351 +-------------------------------+
5352 | |
5353 | incoming stack arguments |
5354 | |
34834420
MS
5355 +-------------------------------+
5356 | | <-- incoming stack pointer (aligned)
43e9d192
IB
5357 | callee-allocated save area |
5358 | for register varargs |
5359 | |
34834420
MS
5360 +-------------------------------+
5361 | local variables | <-- frame_pointer_rtx
43e9d192
IB
5362 | |
5363 +-------------------------------+
cd1bef27 5364 | padding | \
454fdba9 5365 +-------------------------------+ |
454fdba9 5366 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
5367 +-------------------------------+ |
5368 | LR' | |
5369 +-------------------------------+ |
34834420
MS
5370 | FP' | / <- hard_frame_pointer_rtx (aligned)
5371 +-------------------------------+
43e9d192
IB
5372 | dynamic allocation |
5373 +-------------------------------+
34834420
MS
5374 | padding |
5375 +-------------------------------+
5376 | outgoing stack arguments | <-- arg_pointer
5377 | |
5378 +-------------------------------+
5379 | | <-- stack_pointer_rtx (aligned)
43e9d192 5380
34834420
MS
5381 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
5382 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
5383 unchanged.
5384
5385 By default for stack-clash we assume the guard is at least 64KB, but this
5386 value is configurable to either 4KB or 64KB. We also force the guard size to
5387 be the same as the probing interval and both values are kept in sync.
5388
5389 With those assumptions the callee can allocate up to 63KB (or 3KB depending
5390 on the guard size) of stack space without probing.
5391
5392 When probing is needed, we emit a probe at the start of the prologue
5393 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
5394
5395 We have to track how much space has been allocated and the only stores
5396 to the stack we track as implicit probes are the FP/LR stores.
5397
5398 For outgoing arguments we probe if the size is larger than 1KB, such that
5399 the ABI specified buffer is maintained for the next callee. */
43e9d192
IB
5400
5401/* Generate the prologue instructions for entry into a function.
5402 Establish the stack frame by decreasing the stack pointer with a
5403 properly calculated size and, if necessary, create a frame record
5404 filled with the values of LR and previous frame pointer. The
6991c977 5405 current FP is also set up if it is in use. */
43e9d192
IB
5406
5407void
5408aarch64_expand_prologue (void)
5409{
6a70badb
RS
5410 poly_int64 frame_size = cfun->machine->frame.frame_size;
5411 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5412 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5413 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5414 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5415 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5416 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 5417 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 5418 rtx_insn *insn;
43e9d192 5419
db58fd89
JW
5420 /* Sign return address for functions. */
5421 if (aarch64_return_address_signing_enabled ())
27169e45
JW
5422 {
5423 insn = emit_insn (gen_pacisp ());
5424 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5425 RTX_FRAME_RELATED_P (insn) = 1;
5426 }
db58fd89 5427
dd991abb 5428 if (flag_stack_usage_info)
6a70badb 5429 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 5430
a3eb8a52
EB
5431 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5432 {
5433 if (crtl->is_leaf && !cfun->calls_alloca)
5434 {
6a70badb
RS
5435 if (maybe_gt (frame_size, PROBE_INTERVAL)
5436 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
5437 aarch64_emit_probe_stack_range (get_stack_check_protect (),
5438 (frame_size
5439 - get_stack_check_protect ()));
a3eb8a52 5440 }
6a70badb 5441 else if (maybe_gt (frame_size, 0))
8c1dd970 5442 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
5443 }
5444
f5470a77
RS
5445 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5446 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
5447
cd1bef27
JL
5448 /* In theory we should never have both an initial adjustment
5449 and a callee save adjustment. Verify that is the case since the
5450 code below does not handle it for -fstack-clash-protection. */
5451 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
5452
5453 /* Will only probe if the initial adjustment is larger than the guard
5454 less the amount of the guard reserved for use by the caller's
5455 outgoing args. */
5456 aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust,
5457 true, false);
43e9d192 5458
71bfb77a
WD
5459 if (callee_adjust != 0)
5460 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 5461
204d2c03 5462 if (emit_frame_chain)
43e9d192 5463 {
43cacb12 5464 poly_int64 reg_offset = callee_adjust;
71bfb77a 5465 if (callee_adjust == 0)
43cacb12
RS
5466 {
5467 reg1 = R29_REGNUM;
5468 reg2 = R30_REGNUM;
5469 reg_offset = callee_offset;
5470 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
5471 }
f5470a77 5472 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
5473 stack_pointer_rtx, callee_offset,
5474 ip1_rtx, ip0_rtx, frame_pointer_needed);
5475 if (frame_pointer_needed && !frame_size.is_constant ())
5476 {
5477 /* Variable-sized frames need to describe the save slot
5478 address using DW_CFA_expression rather than DW_CFA_offset.
5479 This means that, without taking further action, the
5480 locations of the registers that we've already saved would
5481 remain based on the stack pointer even after we redefine
5482 the CFA based on the frame pointer. We therefore need new
5483 DW_CFA_expressions to re-express the save slots with addresses
5484 based on the frame pointer. */
5485 rtx_insn *insn = get_last_insn ();
5486 gcc_assert (RTX_FRAME_RELATED_P (insn));
5487
5488 /* Add an explicit CFA definition if this was previously
5489 implicit. */
5490 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
5491 {
5492 rtx src = plus_constant (Pmode, stack_pointer_rtx,
5493 callee_offset);
5494 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5495 gen_rtx_SET (hard_frame_pointer_rtx, src));
5496 }
5497
5498 /* Change the save slot expressions for the registers that
5499 we've already saved. */
5500 reg_offset -= callee_offset;
5501 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
5502 reg_offset + UNITS_PER_WORD);
5503 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
5504 reg_offset);
5505 }
71bfb77a 5506 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 5507 }
71bfb77a
WD
5508
5509 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 5510 callee_adjust != 0 || emit_frame_chain);
a0d0b980
SE
5511 if (aarch64_simd_decl_p (cfun->decl))
5512 aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5513 callee_adjust != 0 || emit_frame_chain);
5514 else
5515 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5516 callee_adjust != 0 || emit_frame_chain);
cd1bef27
JL
5517
5518 /* We may need to probe the final adjustment if it is larger than the guard
5519 that is assumed by the called. */
5520 aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx, final_adjust,
5521 !frame_pointer_needed, true);
43e9d192
IB
5522}
5523
4f942779
RL
5524/* Return TRUE if we can use a simple_return insn.
5525
5526 This function checks whether the callee saved stack is empty, which
5527 means no restore actions are need. The pro_and_epilogue will use
5528 this to check whether shrink-wrapping opt is feasible. */
5529
5530bool
5531aarch64_use_return_insn_p (void)
5532{
5533 if (!reload_completed)
5534 return false;
5535
5536 if (crtl->profile)
5537 return false;
5538
6a70badb 5539 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
5540}
5541
a0d0b980
SE
5542/* Return false for non-leaf SIMD functions in order to avoid
5543 shrink-wrapping them. Doing this will lose the necessary
5544 save/restore of FP registers. */
5545
5546bool
5547aarch64_use_simple_return_insn_p (void)
5548{
5549 if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
5550 return false;
5551
5552 return true;
5553}
5554
71bfb77a
WD
5555/* Generate the epilogue instructions for returning from a function.
5556 This is almost exactly the reverse of the prolog sequence, except
5557 that we need to insert barriers to avoid scheduling loads that read
5558 from a deallocated stack, and we optimize the unwind records by
5559 emitting them all together if possible. */
43e9d192
IB
5560void
5561aarch64_expand_epilogue (bool for_sibcall)
5562{
6a70badb 5563 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5564 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5565 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5566 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5567 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5568 unsigned reg2 = cfun->machine->frame.wb_candidate2;
5569 rtx cfi_ops = NULL;
5570 rtx_insn *insn;
43cacb12
RS
5571 /* A stack clash protection prologue may not have left IP0_REGNUM or
5572 IP1_REGNUM in a usable state. The same is true for allocations
5573 with an SVE component, since we then need both temporary registers
cd1bef27
JL
5574 for each allocation. For stack clash we are in a usable state if
5575 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
5576 HOST_WIDE_INT guard_size
5577 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5578 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5579
5580 /* We can re-use the registers when the allocation amount is smaller than
5581 guard_size - guard_used_by_caller because we won't be doing any probes
5582 then. In such situations the register should remain live with the correct
5583 value. */
43cacb12 5584 bool can_inherit_p = (initial_adjust.is_constant ()
cd1bef27
JL
5585 && final_adjust.is_constant ())
5586 && (!flag_stack_clash_protection
5587 || known_lt (initial_adjust,
5588 guard_size - guard_used_by_caller));
44c0e7b9 5589
71bfb77a 5590 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
5591 bool need_barrier_p
5592 = maybe_ne (get_frame_size ()
5593 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 5594
71bfb77a 5595 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
5596 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
5597 || cfun->calls_alloca
8144a493 5598 || crtl->calls_eh_return)
43e9d192 5599 {
71bfb77a
WD
5600 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5601 need_barrier_p = false;
5602 }
7e8c2bd5 5603
71bfb77a
WD
5604 /* Restore the stack pointer from the frame pointer if it may not
5605 be the same as the stack pointer. */
f5470a77
RS
5606 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5607 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
5608 if (frame_pointer_needed
5609 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
5610 /* If writeback is used when restoring callee-saves, the CFA
5611 is restored on the instruction doing the writeback. */
5612 aarch64_add_offset (Pmode, stack_pointer_rtx,
5613 hard_frame_pointer_rtx, -callee_offset,
43cacb12 5614 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 5615 else
cd1bef27
JL
5616 /* The case where we need to re-use the register here is very rare, so
5617 avoid the complicated condition and just always emit a move if the
5618 immediate doesn't fit. */
5619 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust, true);
43e9d192 5620
71bfb77a
WD
5621 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
5622 callee_adjust != 0, &cfi_ops);
a0d0b980
SE
5623 if (aarch64_simd_decl_p (cfun->decl))
5624 aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5625 callee_adjust != 0, &cfi_ops);
5626 else
5627 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5628 callee_adjust != 0, &cfi_ops);
43e9d192 5629
71bfb77a
WD
5630 if (need_barrier_p)
5631 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5632
5633 if (callee_adjust != 0)
5634 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
5635
6a70badb 5636 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
5637 {
5638 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5639 insn = get_last_insn ();
71bfb77a
WD
5640 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5641 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5642 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5643 cfi_ops = NULL;
43e9d192
IB
5644 }
5645
43cacb12
RS
5646 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
5647 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 5648
71bfb77a
WD
5649 if (cfi_ops)
5650 {
5651 /* Emit delayed restores and reset the CFA to be SP. */
5652 insn = get_last_insn ();
5653 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5654 REG_NOTES (insn) = cfi_ops;
5655 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5656 }
5657
db58fd89
JW
5658 /* We prefer to emit the combined return/authenticate instruction RETAA,
5659 however there are three cases in which we must instead emit an explicit
5660 authentication instruction.
5661
5662 1) Sibcalls don't return in a normal way, so if we're about to call one
5663 we must authenticate.
5664
5665 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5666 generating code for !TARGET_ARMV8_3 we can't use it and must
5667 explicitly authenticate.
5668
5669 3) On an eh_return path we make extra stack adjustments to update the
5670 canonical frame address to be the exception handler's CFA. We want
5671 to authenticate using the CFA of the function which calls eh_return.
5672 */
5673 if (aarch64_return_address_signing_enabled ()
5674 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5675 {
5676 insn = emit_insn (gen_autisp ());
5677 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5678 RTX_FRAME_RELATED_P (insn) = 1;
5679 }
db58fd89 5680
dd991abb
RH
5681 /* Stack adjustment for exception handler. */
5682 if (crtl->calls_eh_return)
5683 {
5684 /* We need to unwind the stack by the offset computed by
5685 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5686 to be SP; letting the CFA move during this adjustment
5687 is just as correct as retaining the CFA from the body
5688 of the function. Therefore, do nothing special. */
5689 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5690 }
5691
5692 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5693 if (!for_sibcall)
5694 emit_jump_insn (ret_rtx);
5695}
5696
8144a493
WD
5697/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5698 normally or return to a previous frame after unwinding.
1c960e02 5699
8144a493
WD
5700 An EH return uses a single shared return sequence. The epilogue is
5701 exactly like a normal epilogue except that it has an extra input
5702 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5703 that must be applied after the frame has been destroyed. An extra label
5704 is inserted before the epilogue which initializes this register to zero,
5705 and this is the entry point for a normal return.
43e9d192 5706
8144a493
WD
5707 An actual EH return updates the return address, initializes the stack
5708 adjustment and jumps directly into the epilogue (bypassing the zeroing
5709 of the adjustment). Since the return address is typically saved on the
5710 stack when a function makes a call, the saved LR must be updated outside
5711 the epilogue.
43e9d192 5712
8144a493
WD
5713 This poses problems as the store is generated well before the epilogue,
5714 so the offset of LR is not known yet. Also optimizations will remove the
5715 store as it appears dead, even after the epilogue is generated (as the
5716 base or offset for loading LR is different in many cases).
43e9d192 5717
8144a493
WD
5718 To avoid these problems this implementation forces the frame pointer
5719 in eh_return functions so that the location of LR is fixed and known early.
5720 It also marks the store volatile, so no optimization is permitted to
5721 remove the store. */
5722rtx
5723aarch64_eh_return_handler_rtx (void)
5724{
5725 rtx tmp = gen_frame_mem (Pmode,
5726 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5727
8144a493
WD
5728 /* Mark the store volatile, so no optimization is permitted to remove it. */
5729 MEM_VOLATILE_P (tmp) = true;
5730 return tmp;
43e9d192
IB
5731}
5732
43e9d192
IB
5733/* Output code to add DELTA to the first argument, and then jump
5734 to FUNCTION. Used for C++ multiple inheritance. */
5735static void
5736aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5737 HOST_WIDE_INT delta,
5738 HOST_WIDE_INT vcall_offset,
5739 tree function)
5740{
5741 /* The this pointer is always in x0. Note that this differs from
5742 Arm where the this pointer maybe bumped to r1 if r0 is required
5743 to return a pointer to an aggregate. On AArch64 a result value
5744 pointer will be in x8. */
5745 int this_regno = R0_REGNUM;
5d8a22a5
DM
5746 rtx this_rtx, temp0, temp1, addr, funexp;
5747 rtx_insn *insn;
43e9d192 5748
75f1d6fc
SN
5749 reload_completed = 1;
5750 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5751
f5470a77
RS
5752 this_rtx = gen_rtx_REG (Pmode, this_regno);
5753 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5754 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5755
43e9d192 5756 if (vcall_offset == 0)
43cacb12 5757 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5758 else
5759 {
28514dda 5760 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5761
75f1d6fc
SN
5762 addr = this_rtx;
5763 if (delta != 0)
5764 {
5765 if (delta >= -256 && delta < 256)
5766 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5767 plus_constant (Pmode, this_rtx, delta));
5768 else
43cacb12
RS
5769 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5770 temp1, temp0, false);
43e9d192
IB
5771 }
5772
28514dda
YZ
5773 if (Pmode == ptr_mode)
5774 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5775 else
5776 aarch64_emit_move (temp0,
5777 gen_rtx_ZERO_EXTEND (Pmode,
5778 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5779
28514dda 5780 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5781 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5782 else
5783 {
f43657b4
JW
5784 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5785 Pmode);
75f1d6fc 5786 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5787 }
5788
28514dda
YZ
5789 if (Pmode == ptr_mode)
5790 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5791 else
5792 aarch64_emit_move (temp1,
5793 gen_rtx_SIGN_EXTEND (Pmode,
5794 gen_rtx_MEM (ptr_mode, addr)));
5795
75f1d6fc 5796 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5797 }
5798
75f1d6fc
SN
5799 /* Generate a tail call to the target function. */
5800 if (!TREE_USED (function))
5801 {
5802 assemble_external (function);
5803 TREE_USED (function) = 1;
5804 }
5805 funexp = XEXP (DECL_RTL (function), 0);
5806 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5807 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5808 SIBLING_CALL_P (insn) = 1;
5809
5810 insn = get_insns ();
5811 shorten_branches (insn);
5812 final_start_function (insn, file, 1);
5813 final (insn, file, 1);
43e9d192 5814 final_end_function ();
75f1d6fc
SN
5815
5816 /* Stop pretending to be a post-reload pass. */
5817 reload_completed = 0;
43e9d192
IB
5818}
5819
43e9d192
IB
5820static bool
5821aarch64_tls_referenced_p (rtx x)
5822{
5823 if (!TARGET_HAVE_TLS)
5824 return false;
e7de8563
RS
5825 subrtx_iterator::array_type array;
5826 FOR_EACH_SUBRTX (iter, array, x, ALL)
5827 {
5828 const_rtx x = *iter;
5829 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5830 return true;
5831 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5832 TLS offsets, not real symbol references. */
5833 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5834 iter.skip_subrtxes ();
5835 }
5836 return false;
43e9d192
IB
5837}
5838
5839
43e9d192
IB
5840/* Return true if val can be encoded as a 12-bit unsigned immediate with
5841 a left shift of 0 or 12 bits. */
5842bool
5843aarch64_uimm12_shift (HOST_WIDE_INT val)
5844{
5845 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5846 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5847 );
5848}
5849
eb471ba3
TC
5850/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
5851 that can be created with a left shift of 0 or 12. */
5852static HOST_WIDE_INT
5853aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
5854{
5855 /* Check to see if the value fits in 24 bits, as that is the maximum we can
5856 handle correctly. */
5857 gcc_assert ((val & 0xffffff) == val);
5858
5859 if (((val & 0xfff) << 0) == val)
5860 return val;
5861
5862 return val & (0xfff << 12);
5863}
43e9d192
IB
5864
5865/* Return true if val is an immediate that can be loaded into a
5866 register by a MOVZ instruction. */
5867static bool
77e994c9 5868aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5869{
5870 if (GET_MODE_SIZE (mode) > 4)
5871 {
5872 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5873 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5874 return 1;
5875 }
5876 else
5877 {
43cacb12
RS
5878 /* Ignore sign extension. */
5879 val &= (HOST_WIDE_INT) 0xffffffff;
5880 }
5881 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5882 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5883}
5884
5885/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5886 64-bit (DImode) integer. */
5887
5888static unsigned HOST_WIDE_INT
5889aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5890{
5891 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5892 while (size < 64)
5893 {
5894 val &= (HOST_WIDE_INT_1U << size) - 1;
5895 val |= val << size;
5896 size *= 2;
43e9d192 5897 }
43cacb12 5898 return val;
43e9d192
IB
5899}
5900
a64c73a2
WD
5901/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5902
5903static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5904 {
5905 0x0000000100000001ull,
5906 0x0001000100010001ull,
5907 0x0101010101010101ull,
5908 0x1111111111111111ull,
5909 0x5555555555555555ull,
5910 };
5911
43e9d192
IB
5912
5913/* Return true if val is a valid bitmask immediate. */
a64c73a2 5914
43e9d192 5915bool
a64c73a2 5916aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5917{
a64c73a2
WD
5918 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5919 int bits;
5920
5921 /* Check for a single sequence of one bits and return quickly if so.
5922 The special cases of all ones and all zeroes returns false. */
43cacb12 5923 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5924 tmp = val + (val & -val);
5925
5926 if (tmp == (tmp & -tmp))
5927 return (val + 1) > 1;
5928
5929 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5930 if (mode == SImode)
5931 val = (val << 32) | (val & 0xffffffff);
5932
5933 /* Invert if the immediate doesn't start with a zero bit - this means we
5934 only need to search for sequences of one bits. */
5935 if (val & 1)
5936 val = ~val;
5937
5938 /* Find the first set bit and set tmp to val with the first sequence of one
5939 bits removed. Return success if there is a single sequence of ones. */
5940 first_one = val & -val;
5941 tmp = val & (val + first_one);
5942
5943 if (tmp == 0)
5944 return true;
5945
5946 /* Find the next set bit and compute the difference in bit position. */
5947 next_one = tmp & -tmp;
5948 bits = clz_hwi (first_one) - clz_hwi (next_one);
5949 mask = val ^ tmp;
5950
5951 /* Check the bit position difference is a power of 2, and that the first
5952 sequence of one bits fits within 'bits' bits. */
5953 if ((mask >> bits) != 0 || bits != (bits & -bits))
5954 return false;
5955
5956 /* Check the sequence of one bits is repeated 64/bits times. */
5957 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5958}
5959
43fd192f
MC
5960/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5961 Assumed precondition: VAL_IN Is not zero. */
5962
5963unsigned HOST_WIDE_INT
5964aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5965{
5966 int lowest_bit_set = ctz_hwi (val_in);
5967 int highest_bit_set = floor_log2 (val_in);
5968 gcc_assert (val_in != 0);
5969
5970 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5971 (HOST_WIDE_INT_1U << lowest_bit_set));
5972}
5973
5974/* Create constant where bits outside of lowest bit set to highest bit set
5975 are set to 1. */
5976
5977unsigned HOST_WIDE_INT
5978aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5979{
5980 return val_in | ~aarch64_and_split_imm1 (val_in);
5981}
5982
5983/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5984
5985bool
5986aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5987{
77e994c9
RS
5988 scalar_int_mode int_mode;
5989 if (!is_a <scalar_int_mode> (mode, &int_mode))
5990 return false;
5991
5992 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5993 return false;
5994
77e994c9 5995 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5996 return false;
5997
5998 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5999
77e994c9 6000 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 6001}
43e9d192
IB
6002
6003/* Return true if val is an immediate that can be loaded into a
6004 register in a single instruction. */
6005bool
ef4bddc2 6006aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 6007{
77e994c9
RS
6008 scalar_int_mode int_mode;
6009 if (!is_a <scalar_int_mode> (mode, &int_mode))
6010 return false;
6011
6012 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 6013 return 1;
77e994c9 6014 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
6015}
6016
6017static bool
ef4bddc2 6018aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
6019{
6020 rtx base, offset;
7eda14e1 6021
43e9d192
IB
6022 if (GET_CODE (x) == HIGH)
6023 return true;
6024
43cacb12
RS
6025 /* There's no way to calculate VL-based values using relocations. */
6026 subrtx_iterator::array_type array;
6027 FOR_EACH_SUBRTX (iter, array, x, ALL)
6028 if (GET_CODE (*iter) == CONST_POLY_INT)
6029 return true;
6030
43e9d192
IB
6031 split_const (x, &base, &offset);
6032 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 6033 {
43cacb12 6034 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
6035 != SYMBOL_FORCE_TO_MEM)
6036 return true;
6037 else
6038 /* Avoid generating a 64-bit relocation in ILP32; leave
6039 to aarch64_expand_mov_immediate to handle it properly. */
6040 return mode != ptr_mode;
6041 }
43e9d192
IB
6042
6043 return aarch64_tls_referenced_p (x);
6044}
6045
e79136e4
WD
6046/* Implement TARGET_CASE_VALUES_THRESHOLD.
6047 The expansion for a table switch is quite expensive due to the number
6048 of instructions, the table lookup and hard to predict indirect jump.
6049 When optimizing for speed, and -O3 enabled, use the per-core tuning if
6050 set, otherwise use tables for > 16 cases as a tradeoff between size and
6051 performance. When optimizing for size, use the default setting. */
50487d79
EM
6052
6053static unsigned int
6054aarch64_case_values_threshold (void)
6055{
6056 /* Use the specified limit for the number of cases before using jump
6057 tables at higher optimization levels. */
6058 if (optimize > 2
6059 && selected_cpu->tune->max_case_values != 0)
6060 return selected_cpu->tune->max_case_values;
6061 else
e79136e4 6062 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
6063}
6064
43e9d192
IB
6065/* Return true if register REGNO is a valid index register.
6066 STRICT_P is true if REG_OK_STRICT is in effect. */
6067
6068bool
6069aarch64_regno_ok_for_index_p (int regno, bool strict_p)
6070{
6071 if (!HARD_REGISTER_NUM_P (regno))
6072 {
6073 if (!strict_p)
6074 return true;
6075
6076 if (!reg_renumber)
6077 return false;
6078
6079 regno = reg_renumber[regno];
6080 }
6081 return GP_REGNUM_P (regno);
6082}
6083
6084/* Return true if register REGNO is a valid base register for mode MODE.
6085 STRICT_P is true if REG_OK_STRICT is in effect. */
6086
6087bool
6088aarch64_regno_ok_for_base_p (int regno, bool strict_p)
6089{
6090 if (!HARD_REGISTER_NUM_P (regno))
6091 {
6092 if (!strict_p)
6093 return true;
6094
6095 if (!reg_renumber)
6096 return false;
6097
6098 regno = reg_renumber[regno];
6099 }
6100
6101 /* The fake registers will be eliminated to either the stack or
6102 hard frame pointer, both of which are usually valid base registers.
6103 Reload deals with the cases where the eliminated form isn't valid. */
6104 return (GP_REGNUM_P (regno)
6105 || regno == SP_REGNUM
6106 || regno == FRAME_POINTER_REGNUM
6107 || regno == ARG_POINTER_REGNUM);
6108}
6109
6110/* Return true if X is a valid base register for mode MODE.
6111 STRICT_P is true if REG_OK_STRICT is in effect. */
6112
6113static bool
6114aarch64_base_register_rtx_p (rtx x, bool strict_p)
6115{
76160199
RS
6116 if (!strict_p
6117 && GET_CODE (x) == SUBREG
6118 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
6119 x = SUBREG_REG (x);
6120
6121 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
6122}
6123
6124/* Return true if address offset is a valid index. If it is, fill in INFO
6125 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
6126
6127static bool
6128aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 6129 machine_mode mode, bool strict_p)
43e9d192
IB
6130{
6131 enum aarch64_address_type type;
6132 rtx index;
6133 int shift;
6134
6135 /* (reg:P) */
6136 if ((REG_P (x) || GET_CODE (x) == SUBREG)
6137 && GET_MODE (x) == Pmode)
6138 {
6139 type = ADDRESS_REG_REG;
6140 index = x;
6141 shift = 0;
6142 }
6143 /* (sign_extend:DI (reg:SI)) */
6144 else if ((GET_CODE (x) == SIGN_EXTEND
6145 || GET_CODE (x) == ZERO_EXTEND)
6146 && GET_MODE (x) == DImode
6147 && GET_MODE (XEXP (x, 0)) == SImode)
6148 {
6149 type = (GET_CODE (x) == SIGN_EXTEND)
6150 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6151 index = XEXP (x, 0);
6152 shift = 0;
6153 }
6154 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
6155 else if (GET_CODE (x) == MULT
6156 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6157 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6158 && GET_MODE (XEXP (x, 0)) == DImode
6159 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6160 && CONST_INT_P (XEXP (x, 1)))
6161 {
6162 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6163 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6164 index = XEXP (XEXP (x, 0), 0);
6165 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6166 }
6167 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
6168 else if (GET_CODE (x) == ASHIFT
6169 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6170 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6171 && GET_MODE (XEXP (x, 0)) == DImode
6172 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6173 && CONST_INT_P (XEXP (x, 1)))
6174 {
6175 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6176 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6177 index = XEXP (XEXP (x, 0), 0);
6178 shift = INTVAL (XEXP (x, 1));
6179 }
6180 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
6181 else if ((GET_CODE (x) == SIGN_EXTRACT
6182 || GET_CODE (x) == ZERO_EXTRACT)
6183 && GET_MODE (x) == DImode
6184 && GET_CODE (XEXP (x, 0)) == MULT
6185 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6186 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6187 {
6188 type = (GET_CODE (x) == SIGN_EXTRACT)
6189 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6190 index = XEXP (XEXP (x, 0), 0);
6191 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6192 if (INTVAL (XEXP (x, 1)) != 32 + shift
6193 || INTVAL (XEXP (x, 2)) != 0)
6194 shift = -1;
6195 }
6196 /* (and:DI (mult:DI (reg:DI) (const_int scale))
6197 (const_int 0xffffffff<<shift)) */
6198 else if (GET_CODE (x) == AND
6199 && GET_MODE (x) == DImode
6200 && GET_CODE (XEXP (x, 0)) == MULT
6201 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6202 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6203 && CONST_INT_P (XEXP (x, 1)))
6204 {
6205 type = ADDRESS_REG_UXTW;
6206 index = XEXP (XEXP (x, 0), 0);
6207 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6208 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6209 shift = -1;
6210 }
6211 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
6212 else if ((GET_CODE (x) == SIGN_EXTRACT
6213 || GET_CODE (x) == ZERO_EXTRACT)
6214 && GET_MODE (x) == DImode
6215 && GET_CODE (XEXP (x, 0)) == ASHIFT
6216 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6217 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6218 {
6219 type = (GET_CODE (x) == SIGN_EXTRACT)
6220 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6221 index = XEXP (XEXP (x, 0), 0);
6222 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6223 if (INTVAL (XEXP (x, 1)) != 32 + shift
6224 || INTVAL (XEXP (x, 2)) != 0)
6225 shift = -1;
6226 }
6227 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
6228 (const_int 0xffffffff<<shift)) */
6229 else if (GET_CODE (x) == AND
6230 && GET_MODE (x) == DImode
6231 && GET_CODE (XEXP (x, 0)) == ASHIFT
6232 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6233 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6234 && CONST_INT_P (XEXP (x, 1)))
6235 {
6236 type = ADDRESS_REG_UXTW;
6237 index = XEXP (XEXP (x, 0), 0);
6238 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6239 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6240 shift = -1;
6241 }
6242 /* (mult:P (reg:P) (const_int scale)) */
6243 else if (GET_CODE (x) == MULT
6244 && GET_MODE (x) == Pmode
6245 && GET_MODE (XEXP (x, 0)) == Pmode
6246 && CONST_INT_P (XEXP (x, 1)))
6247 {
6248 type = ADDRESS_REG_REG;
6249 index = XEXP (x, 0);
6250 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6251 }
6252 /* (ashift:P (reg:P) (const_int shift)) */
6253 else if (GET_CODE (x) == ASHIFT
6254 && GET_MODE (x) == Pmode
6255 && GET_MODE (XEXP (x, 0)) == Pmode
6256 && CONST_INT_P (XEXP (x, 1)))
6257 {
6258 type = ADDRESS_REG_REG;
6259 index = XEXP (x, 0);
6260 shift = INTVAL (XEXP (x, 1));
6261 }
6262 else
6263 return false;
6264
76160199
RS
6265 if (!strict_p
6266 && GET_CODE (index) == SUBREG
6267 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
6268 index = SUBREG_REG (index);
6269
43cacb12
RS
6270 if (aarch64_sve_data_mode_p (mode))
6271 {
6272 if (type != ADDRESS_REG_REG
6273 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
6274 return false;
6275 }
6276 else
6277 {
6278 if (shift != 0
6279 && !(IN_RANGE (shift, 1, 3)
6280 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
6281 return false;
6282 }
6283
6284 if (REG_P (index)
43e9d192
IB
6285 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
6286 {
6287 info->type = type;
6288 info->offset = index;
6289 info->shift = shift;
6290 return true;
6291 }
6292
6293 return false;
6294}
6295
abc52318
KT
6296/* Return true if MODE is one of the modes for which we
6297 support LDP/STP operations. */
6298
6299static bool
6300aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
6301{
6302 return mode == SImode || mode == DImode
6303 || mode == SFmode || mode == DFmode
6304 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
6305 && (known_eq (GET_MODE_SIZE (mode), 8)
6306 || (known_eq (GET_MODE_SIZE (mode), 16)
6307 && (aarch64_tune_params.extra_tuning_flags
6308 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
6309}
6310
9e0218fc
RH
6311/* Return true if REGNO is a virtual pointer register, or an eliminable
6312 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
6313 include stack_pointer or hard_frame_pointer. */
6314static bool
6315virt_or_elim_regno_p (unsigned regno)
6316{
6317 return ((regno >= FIRST_VIRTUAL_REGISTER
6318 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
6319 || regno == FRAME_POINTER_REGNUM
6320 || regno == ARG_POINTER_REGNUM);
6321}
6322
a97d8b98
RS
6323/* Return true if X is a valid address of type TYPE for machine mode MODE.
6324 If it is, fill in INFO appropriately. STRICT_P is true if
6325 REG_OK_STRICT is in effect. */
43e9d192 6326
a98824ac 6327bool
43e9d192 6328aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 6329 rtx x, machine_mode mode, bool strict_p,
a98824ac 6330 aarch64_addr_query_type type)
43e9d192
IB
6331{
6332 enum rtx_code code = GET_CODE (x);
6333 rtx op0, op1;
dc640181
RS
6334 poly_int64 offset;
6335
6a70badb 6336 HOST_WIDE_INT const_size;
2d8c6dc1 6337
80d43579
WD
6338 /* On BE, we use load/store pair for all large int mode load/stores.
6339 TI/TFmode may also use a load/store pair. */
43cacb12
RS
6340 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6341 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 6342 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 6343 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
6344 || mode == TImode
6345 || mode == TFmode
43cacb12 6346 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 6347
a25831ac
AV
6348 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
6349 corresponds to the actual size of the memory being loaded/stored and the
6350 mode of the corresponding addressing mode is half of that. */
6351 if (type == ADDR_QUERY_LDP_STP_N
6352 && known_eq (GET_MODE_SIZE (mode), 16))
6353 mode = DFmode;
6354
6a70badb 6355 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
6356 && (known_lt (GET_MODE_SIZE (mode), 16)
6357 || vec_flags == VEC_ADVSIMD
6358 || vec_flags == VEC_SVE_DATA));
6359
6360 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
6361 [Rn, #offset, MUL VL]. */
6362 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
6363 && (code != REG && code != PLUS))
6364 return false;
2d8c6dc1
AH
6365
6366 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
6367 REG addressing. */
43cacb12
RS
6368 if (advsimd_struct_p
6369 && !BYTES_BIG_ENDIAN
43e9d192
IB
6370 && (code != POST_INC && code != REG))
6371 return false;
6372
43cacb12
RS
6373 gcc_checking_assert (GET_MODE (x) == VOIDmode
6374 || SCALAR_INT_MODE_P (GET_MODE (x)));
6375
43e9d192
IB
6376 switch (code)
6377 {
6378 case REG:
6379 case SUBREG:
6380 info->type = ADDRESS_REG_IMM;
6381 info->base = x;
6382 info->offset = const0_rtx;
dc640181 6383 info->const_offset = 0;
43e9d192
IB
6384 return aarch64_base_register_rtx_p (x, strict_p);
6385
6386 case PLUS:
6387 op0 = XEXP (x, 0);
6388 op1 = XEXP (x, 1);
15c0c5c9
JW
6389
6390 if (! strict_p
4aa81c2e 6391 && REG_P (op0)
9e0218fc 6392 && virt_or_elim_regno_p (REGNO (op0))
dc640181 6393 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
6394 {
6395 info->type = ADDRESS_REG_IMM;
6396 info->base = op0;
6397 info->offset = op1;
dc640181 6398 info->const_offset = offset;
15c0c5c9
JW
6399
6400 return true;
6401 }
6402
6a70badb 6403 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
6404 && aarch64_base_register_rtx_p (op0, strict_p)
6405 && poly_int_rtx_p (op1, &offset))
43e9d192 6406 {
43e9d192
IB
6407 info->type = ADDRESS_REG_IMM;
6408 info->base = op0;
6409 info->offset = op1;
dc640181 6410 info->const_offset = offset;
43e9d192
IB
6411
6412 /* TImode and TFmode values are allowed in both pairs of X
6413 registers and individual Q registers. The available
6414 address modes are:
6415 X,X: 7-bit signed scaled offset
6416 Q: 9-bit signed offset
6417 We conservatively require an offset representable in either mode.
8ed49fab
KT
6418 When performing the check for pairs of X registers i.e. LDP/STP
6419 pass down DImode since that is the natural size of the LDP/STP
6420 instruction memory accesses. */
43e9d192 6421 if (mode == TImode || mode == TFmode)
8ed49fab 6422 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 6423 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 6424 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 6425
2d8c6dc1
AH
6426 /* A 7bit offset check because OImode will emit a ldp/stp
6427 instruction (only big endian will get here).
6428 For ldp/stp instructions, the offset is scaled for the size of a
6429 single element of the pair. */
6430 if (mode == OImode)
6431 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
6432
6433 /* Three 9/12 bit offsets checks because CImode will emit three
6434 ldr/str instructions (only big endian will get here). */
6435 if (mode == CImode)
6436 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
6437 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
6438 offset + 32)
2d8c6dc1
AH
6439 || offset_12bit_unsigned_scaled_p (V16QImode,
6440 offset + 32)));
6441
6442 /* Two 7bit offsets checks because XImode will emit two ldp/stp
6443 instructions (only big endian will get here). */
6444 if (mode == XImode)
6445 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
6446 && aarch64_offset_7bit_signed_scaled_p (TImode,
6447 offset + 32));
6448
43cacb12
RS
6449 /* Make "m" use the LD1 offset range for SVE data modes, so
6450 that pre-RTL optimizers like ivopts will work to that
6451 instead of the wider LDR/STR range. */
6452 if (vec_flags == VEC_SVE_DATA)
6453 return (type == ADDR_QUERY_M
6454 ? offset_4bit_signed_scaled_p (mode, offset)
6455 : offset_9bit_signed_scaled_p (mode, offset));
6456
9f4cbab8
RS
6457 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
6458 {
6459 poly_int64 end_offset = (offset
6460 + GET_MODE_SIZE (mode)
6461 - BYTES_PER_SVE_VECTOR);
6462 return (type == ADDR_QUERY_M
6463 ? offset_4bit_signed_scaled_p (mode, offset)
6464 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
6465 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
6466 end_offset)));
6467 }
6468
43cacb12
RS
6469 if (vec_flags == VEC_SVE_PRED)
6470 return offset_9bit_signed_scaled_p (mode, offset);
6471
2d8c6dc1 6472 if (load_store_pair_p)
6a70badb 6473 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6474 || known_eq (GET_MODE_SIZE (mode), 8)
6475 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6476 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6477 else
3c5af608 6478 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
6479 || offset_12bit_unsigned_scaled_p (mode, offset));
6480 }
6481
6482 if (allow_reg_index_p)
6483 {
6484 /* Look for base + (scaled/extended) index register. */
6485 if (aarch64_base_register_rtx_p (op0, strict_p)
6486 && aarch64_classify_index (info, op1, mode, strict_p))
6487 {
6488 info->base = op0;
6489 return true;
6490 }
6491 if (aarch64_base_register_rtx_p (op1, strict_p)
6492 && aarch64_classify_index (info, op0, mode, strict_p))
6493 {
6494 info->base = op1;
6495 return true;
6496 }
6497 }
6498
6499 return false;
6500
6501 case POST_INC:
6502 case POST_DEC:
6503 case PRE_INC:
6504 case PRE_DEC:
6505 info->type = ADDRESS_REG_WB;
6506 info->base = XEXP (x, 0);
6507 info->offset = NULL_RTX;
6508 return aarch64_base_register_rtx_p (info->base, strict_p);
6509
6510 case POST_MODIFY:
6511 case PRE_MODIFY:
6512 info->type = ADDRESS_REG_WB;
6513 info->base = XEXP (x, 0);
6514 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 6515 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
6516 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
6517 && aarch64_base_register_rtx_p (info->base, strict_p))
6518 {
43e9d192 6519 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 6520 info->const_offset = offset;
43e9d192
IB
6521
6522 /* TImode and TFmode values are allowed in both pairs of X
6523 registers and individual Q registers. The available
6524 address modes are:
6525 X,X: 7-bit signed scaled offset
6526 Q: 9-bit signed offset
6527 We conservatively require an offset representable in either mode.
6528 */
6529 if (mode == TImode || mode == TFmode)
44707478 6530 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 6531 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 6532
2d8c6dc1 6533 if (load_store_pair_p)
6a70badb 6534 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6535 || known_eq (GET_MODE_SIZE (mode), 8)
6536 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6537 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6538 else
3c5af608 6539 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
6540 }
6541 return false;
6542
6543 case CONST:
6544 case SYMBOL_REF:
6545 case LABEL_REF:
79517551
SN
6546 /* load literal: pc-relative constant pool entry. Only supported
6547 for SI mode or larger. */
43e9d192 6548 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 6549
6a70badb
RS
6550 if (!load_store_pair_p
6551 && GET_MODE_SIZE (mode).is_constant (&const_size)
6552 && const_size >= 4)
43e9d192
IB
6553 {
6554 rtx sym, addend;
6555
6556 split_const (x, &sym, &addend);
b4f50fd4
RR
6557 return ((GET_CODE (sym) == LABEL_REF
6558 || (GET_CODE (sym) == SYMBOL_REF
6559 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 6560 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
6561 }
6562 return false;
6563
6564 case LO_SUM:
6565 info->type = ADDRESS_LO_SUM;
6566 info->base = XEXP (x, 0);
6567 info->offset = XEXP (x, 1);
6568 if (allow_reg_index_p
6569 && aarch64_base_register_rtx_p (info->base, strict_p))
6570 {
6571 rtx sym, offs;
6572 split_const (info->offset, &sym, &offs);
6573 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
6574 && (aarch64_classify_symbol (sym, INTVAL (offs))
6575 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
6576 {
6577 /* The symbol and offset must be aligned to the access size. */
6578 unsigned int align;
43e9d192
IB
6579
6580 if (CONSTANT_POOL_ADDRESS_P (sym))
6581 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
6582 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
6583 {
6584 tree exp = SYMBOL_REF_DECL (sym);
6585 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 6586 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
6587 }
6588 else if (SYMBOL_REF_DECL (sym))
6589 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
6590 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
6591 && SYMBOL_REF_BLOCK (sym) != NULL)
6592 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
6593 else
6594 align = BITS_PER_UNIT;
6595
6a70badb
RS
6596 poly_int64 ref_size = GET_MODE_SIZE (mode);
6597 if (known_eq (ref_size, 0))
43e9d192
IB
6598 ref_size = GET_MODE_SIZE (DImode);
6599
6a70badb
RS
6600 return (multiple_p (INTVAL (offs), ref_size)
6601 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
6602 }
6603 }
6604 return false;
6605
6606 default:
6607 return false;
6608 }
6609}
6610
9bf2f779
KT
6611/* Return true if the address X is valid for a PRFM instruction.
6612 STRICT_P is true if we should do strict checking with
6613 aarch64_classify_address. */
6614
6615bool
6616aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
6617{
6618 struct aarch64_address_info addr;
6619
6620 /* PRFM accepts the same addresses as DImode... */
a97d8b98 6621 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
6622 if (!res)
6623 return false;
6624
6625 /* ... except writeback forms. */
6626 return addr.type != ADDRESS_REG_WB;
6627}
6628
43e9d192
IB
6629bool
6630aarch64_symbolic_address_p (rtx x)
6631{
6632 rtx offset;
6633
6634 split_const (x, &x, &offset);
6635 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
6636}
6637
a6e0bfa7 6638/* Classify the base of symbolic expression X. */
da4f13a4
MS
6639
6640enum aarch64_symbol_type
a6e0bfa7 6641aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
6642{
6643 rtx offset;
da4f13a4 6644
43e9d192 6645 split_const (x, &x, &offset);
43cacb12 6646 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
6647}
6648
6649
6650/* Return TRUE if X is a legitimate address for accessing memory in
6651 mode MODE. */
6652static bool
ef4bddc2 6653aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
6654{
6655 struct aarch64_address_info addr;
6656
a97d8b98 6657 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
6658}
6659
a97d8b98
RS
6660/* Return TRUE if X is a legitimate address of type TYPE for accessing
6661 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 6662bool
a97d8b98
RS
6663aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
6664 aarch64_addr_query_type type)
43e9d192
IB
6665{
6666 struct aarch64_address_info addr;
6667
a97d8b98 6668 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6669}
6670
9005477f
RS
6671/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6672
491ec060 6673static bool
9005477f
RS
6674aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6675 poly_int64 orig_offset,
6676 machine_mode mode)
491ec060 6677{
6a70badb
RS
6678 HOST_WIDE_INT size;
6679 if (GET_MODE_SIZE (mode).is_constant (&size))
6680 {
9005477f
RS
6681 HOST_WIDE_INT const_offset, second_offset;
6682
6683 /* A general SVE offset is A * VQ + B. Remove the A component from
6684 coefficient 0 in order to get the constant B. */
6685 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6686
6687 /* Split an out-of-range address displacement into a base and
6688 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6689 range otherwise to increase opportunities for sharing the base
6690 address of different sizes. Unaligned accesses use the signed
6691 9-bit range, TImode/TFmode use the intersection of signed
6692 scaled 7-bit and signed 9-bit offset. */
6a70badb 6693 if (mode == TImode || mode == TFmode)
9005477f
RS
6694 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6695 else if ((const_offset & (size - 1)) != 0)
6696 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6697 else
9005477f 6698 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6699
9005477f
RS
6700 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6701 return false;
6702
6703 /* Split the offset into second_offset and the rest. */
6704 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6705 *offset2 = gen_int_mode (second_offset, Pmode);
6706 return true;
6707 }
6708 else
6709 {
6710 /* Get the mode we should use as the basis of the range. For structure
6711 modes this is the mode of one vector. */
6712 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6713 machine_mode step_mode
6714 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6715
6716 /* Get the "mul vl" multiplier we'd like to use. */
6717 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6718 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6719 if (vec_flags & VEC_SVE_DATA)
6720 /* LDR supports a 9-bit range, but the move patterns for
6721 structure modes require all vectors to be in range of the
6722 same base. The simplest way of accomodating that while still
6723 promoting reuse of anchor points between different modes is
6724 to use an 8-bit range unconditionally. */
6725 vnum = ((vnum + 128) & 255) - 128;
6726 else
6727 /* Predicates are only handled singly, so we might as well use
6728 the full range. */
6729 vnum = ((vnum + 256) & 511) - 256;
6730 if (vnum == 0)
6731 return false;
6732
6733 /* Convert the "mul vl" multiplier into a byte offset. */
6734 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6735 if (known_eq (second_offset, orig_offset))
6736 return false;
6737
6738 /* Split the offset into second_offset and the rest. */
6739 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6740 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6741 return true;
6742 }
491ec060
WD
6743}
6744
a2170965
TC
6745/* Return the binary representation of floating point constant VALUE in INTVAL.
6746 If the value cannot be converted, return false without setting INTVAL.
6747 The conversion is done in the given MODE. */
6748bool
6749aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6750{
6751
6752 /* We make a general exception for 0. */
6753 if (aarch64_float_const_zero_rtx_p (value))
6754 {
6755 *intval = 0;
6756 return true;
6757 }
6758
0d0e0188 6759 scalar_float_mode mode;
a2170965 6760 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6761 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6762 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6763 /* Only support up to DF mode. */
6764 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6765 return false;
6766
6767 unsigned HOST_WIDE_INT ival = 0;
6768
6769 long res[2];
6770 real_to_target (res,
6771 CONST_DOUBLE_REAL_VALUE (value),
6772 REAL_MODE_FORMAT (mode));
6773
5c22bb48
TC
6774 if (mode == DFmode)
6775 {
6776 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6777 ival = zext_hwi (res[order], 32);
6778 ival |= (zext_hwi (res[1 - order], 32) << 32);
6779 }
6780 else
6781 ival = zext_hwi (res[0], 32);
a2170965
TC
6782
6783 *intval = ival;
6784 return true;
6785}
6786
6787/* Return TRUE if rtx X is an immediate constant that can be moved using a
6788 single MOV(+MOVK) followed by an FMOV. */
6789bool
6790aarch64_float_const_rtx_p (rtx x)
6791{
6792 machine_mode mode = GET_MODE (x);
6793 if (mode == VOIDmode)
6794 return false;
6795
6796 /* Determine whether it's cheaper to write float constants as
6797 mov/movk pairs over ldr/adrp pairs. */
6798 unsigned HOST_WIDE_INT ival;
6799
6800 if (GET_CODE (x) == CONST_DOUBLE
6801 && SCALAR_FLOAT_MODE_P (mode)
6802 && aarch64_reinterpret_float_as_int (x, &ival))
6803 {
77e994c9
RS
6804 scalar_int_mode imode = (mode == HFmode
6805 ? SImode
6806 : int_mode_for_mode (mode).require ());
a2170965
TC
6807 int num_instr = aarch64_internal_mov_immediate
6808 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6809 return num_instr < 3;
6810 }
6811
6812 return false;
6813}
6814
43e9d192
IB
6815/* Return TRUE if rtx X is immediate constant 0.0 */
6816bool
3520f7cc 6817aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6818{
43e9d192
IB
6819 if (GET_MODE (x) == VOIDmode)
6820 return false;
6821
34a72c33 6822 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6823 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6824 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6825}
6826
a2170965
TC
6827/* Return TRUE if rtx X is immediate constant that fits in a single
6828 MOVI immediate operation. */
6829bool
6830aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6831{
6832 if (!TARGET_SIMD)
6833 return false;
6834
77e994c9
RS
6835 machine_mode vmode;
6836 scalar_int_mode imode;
a2170965
TC
6837 unsigned HOST_WIDE_INT ival;
6838
6839 if (GET_CODE (x) == CONST_DOUBLE
6840 && SCALAR_FLOAT_MODE_P (mode))
6841 {
6842 if (!aarch64_reinterpret_float_as_int (x, &ival))
6843 return false;
6844
35c38fa6
TC
6845 /* We make a general exception for 0. */
6846 if (aarch64_float_const_zero_rtx_p (x))
6847 return true;
6848
304b9962 6849 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6850 }
6851 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6852 && is_a <scalar_int_mode> (mode, &imode))
6853 ival = INTVAL (x);
a2170965
TC
6854 else
6855 return false;
6856
6857 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6858 a 128 bit vector mode. */
77e994c9 6859 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6860
6861 vmode = aarch64_simd_container_mode (imode, width);
6862 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6863
b187677b 6864 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6865}
6866
6867
70f09188
AP
6868/* Return the fixed registers used for condition codes. */
6869
6870static bool
6871aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6872{
6873 *p1 = CC_REGNUM;
6874 *p2 = INVALID_REGNUM;
6875 return true;
6876}
6877
47210a04
RL
6878/* This function is used by the call expanders of the machine description.
6879 RESULT is the register in which the result is returned. It's NULL for
6880 "call" and "sibcall".
6881 MEM is the location of the function call.
6882 SIBCALL indicates whether this function call is normal call or sibling call.
6883 It will generate different pattern accordingly. */
6884
6885void
6886aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6887{
6888 rtx call, callee, tmp;
6889 rtvec vec;
6890 machine_mode mode;
6891
6892 gcc_assert (MEM_P (mem));
6893 callee = XEXP (mem, 0);
6894 mode = GET_MODE (callee);
6895 gcc_assert (mode == Pmode);
6896
6897 /* Decide if we should generate indirect calls by loading the
6898 address of the callee into a register before performing
6899 the branch-and-link. */
6900 if (SYMBOL_REF_P (callee)
6901 ? (aarch64_is_long_call_p (callee)
6902 || aarch64_is_noplt_call_p (callee))
6903 : !REG_P (callee))
6904 XEXP (mem, 0) = force_reg (mode, callee);
6905
6906 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6907
6908 if (result != NULL_RTX)
6909 call = gen_rtx_SET (result, call);
6910
6911 if (sibcall)
6912 tmp = ret_rtx;
6913 else
6914 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6915
6916 vec = gen_rtvec (2, call, tmp);
6917 call = gen_rtx_PARALLEL (VOIDmode, vec);
6918
6919 aarch64_emit_call_insn (call);
6920}
6921
78607708
TV
6922/* Emit call insn with PAT and do aarch64-specific handling. */
6923
d07a3fed 6924void
78607708
TV
6925aarch64_emit_call_insn (rtx pat)
6926{
6927 rtx insn = emit_call_insn (pat);
6928
6929 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6930 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6931 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6932}
6933
ef4bddc2 6934machine_mode
43e9d192
IB
6935aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6936{
6937 /* All floating point compares return CCFP if it is an equality
6938 comparison, and CCFPE otherwise. */
6939 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6940 {
6941 switch (code)
6942 {
6943 case EQ:
6944 case NE:
6945 case UNORDERED:
6946 case ORDERED:
6947 case UNLT:
6948 case UNLE:
6949 case UNGT:
6950 case UNGE:
6951 case UNEQ:
43e9d192
IB
6952 return CCFPmode;
6953
6954 case LT:
6955 case LE:
6956 case GT:
6957 case GE:
8332c5ee 6958 case LTGT:
43e9d192
IB
6959 return CCFPEmode;
6960
6961 default:
6962 gcc_unreachable ();
6963 }
6964 }
6965
2b8568fe
KT
6966 /* Equality comparisons of short modes against zero can be performed
6967 using the TST instruction with the appropriate bitmask. */
6968 if (y == const0_rtx && REG_P (x)
6969 && (code == EQ || code == NE)
6970 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6971 return CC_NZmode;
6972
b06335f9
KT
6973 /* Similarly, comparisons of zero_extends from shorter modes can
6974 be performed using an ANDS with an immediate mask. */
6975 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6976 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6977 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6978 && (code == EQ || code == NE))
6979 return CC_NZmode;
6980
43e9d192
IB
6981 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6982 && y == const0_rtx
6983 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6984 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6985 || GET_CODE (x) == NEG
6986 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6987 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6988 return CC_NZmode;
6989
1c992d1e 6990 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6991 the comparison will have to be swapped when we emit the assembly
6992 code. */
6993 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6994 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6995 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6996 || GET_CODE (x) == LSHIFTRT
1c992d1e 6997 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6998 return CC_SWPmode;
6999
1c992d1e
RE
7000 /* Similarly for a negated operand, but we can only do this for
7001 equalities. */
7002 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 7003 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
7004 && (code == EQ || code == NE)
7005 && GET_CODE (x) == NEG)
7006 return CC_Zmode;
7007
ef22810a
RH
7008 /* A test for unsigned overflow. */
7009 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
7010 && code == NE
7011 && GET_CODE (x) == PLUS
7012 && GET_CODE (y) == ZERO_EXTEND)
7013 return CC_Cmode;
7014
30c46053
MC
7015 /* A test for signed overflow. */
7016 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
7017 && code == NE
7018 && GET_CODE (x) == PLUS
7019 && GET_CODE (y) == SIGN_EXTEND)
7020 return CC_Vmode;
7021
43e9d192
IB
7022 /* For everything else, return CCmode. */
7023 return CCmode;
7024}
7025
3dfa7055 7026static int
b8506a8a 7027aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 7028
cd5660ab 7029int
43e9d192
IB
7030aarch64_get_condition_code (rtx x)
7031{
ef4bddc2 7032 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
7033 enum rtx_code comp_code = GET_CODE (x);
7034
7035 if (GET_MODE_CLASS (mode) != MODE_CC)
7036 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
7037 return aarch64_get_condition_code_1 (mode, comp_code);
7038}
43e9d192 7039
3dfa7055 7040static int
b8506a8a 7041aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 7042{
43e9d192
IB
7043 switch (mode)
7044 {
4e10a5a7
RS
7045 case E_CCFPmode:
7046 case E_CCFPEmode:
43e9d192
IB
7047 switch (comp_code)
7048 {
7049 case GE: return AARCH64_GE;
7050 case GT: return AARCH64_GT;
7051 case LE: return AARCH64_LS;
7052 case LT: return AARCH64_MI;
7053 case NE: return AARCH64_NE;
7054 case EQ: return AARCH64_EQ;
7055 case ORDERED: return AARCH64_VC;
7056 case UNORDERED: return AARCH64_VS;
7057 case UNLT: return AARCH64_LT;
7058 case UNLE: return AARCH64_LE;
7059 case UNGT: return AARCH64_HI;
7060 case UNGE: return AARCH64_PL;
cd5660ab 7061 default: return -1;
43e9d192
IB
7062 }
7063 break;
7064
4e10a5a7 7065 case E_CCmode:
43e9d192
IB
7066 switch (comp_code)
7067 {
7068 case NE: return AARCH64_NE;
7069 case EQ: return AARCH64_EQ;
7070 case GE: return AARCH64_GE;
7071 case GT: return AARCH64_GT;
7072 case LE: return AARCH64_LE;
7073 case LT: return AARCH64_LT;
7074 case GEU: return AARCH64_CS;
7075 case GTU: return AARCH64_HI;
7076 case LEU: return AARCH64_LS;
7077 case LTU: return AARCH64_CC;
cd5660ab 7078 default: return -1;
43e9d192
IB
7079 }
7080 break;
7081
4e10a5a7 7082 case E_CC_SWPmode:
43e9d192
IB
7083 switch (comp_code)
7084 {
7085 case NE: return AARCH64_NE;
7086 case EQ: return AARCH64_EQ;
7087 case GE: return AARCH64_LE;
7088 case GT: return AARCH64_LT;
7089 case LE: return AARCH64_GE;
7090 case LT: return AARCH64_GT;
7091 case GEU: return AARCH64_LS;
7092 case GTU: return AARCH64_CC;
7093 case LEU: return AARCH64_CS;
7094 case LTU: return AARCH64_HI;
cd5660ab 7095 default: return -1;
43e9d192
IB
7096 }
7097 break;
7098
4e10a5a7 7099 case E_CC_NZmode:
43e9d192
IB
7100 switch (comp_code)
7101 {
7102 case NE: return AARCH64_NE;
7103 case EQ: return AARCH64_EQ;
7104 case GE: return AARCH64_PL;
7105 case LT: return AARCH64_MI;
cd5660ab 7106 default: return -1;
43e9d192
IB
7107 }
7108 break;
7109
4e10a5a7 7110 case E_CC_Zmode:
1c992d1e
RE
7111 switch (comp_code)
7112 {
7113 case NE: return AARCH64_NE;
7114 case EQ: return AARCH64_EQ;
cd5660ab 7115 default: return -1;
1c992d1e
RE
7116 }
7117 break;
7118
4e10a5a7 7119 case E_CC_Cmode:
ef22810a
RH
7120 switch (comp_code)
7121 {
7122 case NE: return AARCH64_CS;
7123 case EQ: return AARCH64_CC;
7124 default: return -1;
7125 }
7126 break;
7127
30c46053
MC
7128 case E_CC_Vmode:
7129 switch (comp_code)
7130 {
7131 case NE: return AARCH64_VS;
7132 case EQ: return AARCH64_VC;
7133 default: return -1;
7134 }
7135 break;
7136
43e9d192 7137 default:
cd5660ab 7138 return -1;
43e9d192 7139 }
3dfa7055 7140
3dfa7055 7141 return -1;
43e9d192
IB
7142}
7143
ddeabd3e
AL
7144bool
7145aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
7146 HOST_WIDE_INT minval,
7147 HOST_WIDE_INT maxval)
ddeabd3e 7148{
6a70badb
RS
7149 rtx elt;
7150 return (const_vec_duplicate_p (x, &elt)
7151 && CONST_INT_P (elt)
7152 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
7153}
7154
7155bool
7156aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
7157{
7158 return aarch64_const_vec_all_same_in_range_p (x, val, val);
7159}
7160
43cacb12
RS
7161/* Return true if VEC is a constant in which every element is in the range
7162 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
7163
7164static bool
7165aarch64_const_vec_all_in_range_p (rtx vec,
7166 HOST_WIDE_INT minval,
7167 HOST_WIDE_INT maxval)
7168{
7169 if (GET_CODE (vec) != CONST_VECTOR
7170 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
7171 return false;
7172
7173 int nunits;
7174 if (!CONST_VECTOR_STEPPED_P (vec))
7175 nunits = const_vector_encoded_nelts (vec);
7176 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
7177 return false;
7178
7179 for (int i = 0; i < nunits; i++)
7180 {
7181 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
7182 if (!CONST_INT_P (vec_elem)
7183 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
7184 return false;
7185 }
7186 return true;
7187}
43e9d192 7188
cf670503
ZC
7189/* N Z C V. */
7190#define AARCH64_CC_V 1
7191#define AARCH64_CC_C (1 << 1)
7192#define AARCH64_CC_Z (1 << 2)
7193#define AARCH64_CC_N (1 << 3)
7194
c8012fbc
WD
7195/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
7196static const int aarch64_nzcv_codes[] =
7197{
7198 0, /* EQ, Z == 1. */
7199 AARCH64_CC_Z, /* NE, Z == 0. */
7200 0, /* CS, C == 1. */
7201 AARCH64_CC_C, /* CC, C == 0. */
7202 0, /* MI, N == 1. */
7203 AARCH64_CC_N, /* PL, N == 0. */
7204 0, /* VS, V == 1. */
7205 AARCH64_CC_V, /* VC, V == 0. */
7206 0, /* HI, C ==1 && Z == 0. */
7207 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
7208 AARCH64_CC_V, /* GE, N == V. */
7209 0, /* LT, N != V. */
7210 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
7211 0, /* LE, !(Z == 0 && N == V). */
7212 0, /* AL, Any. */
7213 0 /* NV, Any. */
cf670503
ZC
7214};
7215
43cacb12
RS
7216/* Print floating-point vector immediate operand X to F, negating it
7217 first if NEGATE is true. Return true on success, false if it isn't
7218 a constant we can handle. */
7219
7220static bool
7221aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
7222{
7223 rtx elt;
7224
7225 if (!const_vec_duplicate_p (x, &elt))
7226 return false;
7227
7228 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
7229 if (negate)
7230 r = real_value_negate (&r);
7231
7232 /* We only handle the SVE single-bit immediates here. */
7233 if (real_equal (&r, &dconst0))
7234 asm_fprintf (f, "0.0");
7235 else if (real_equal (&r, &dconst1))
7236 asm_fprintf (f, "1.0");
7237 else if (real_equal (&r, &dconsthalf))
7238 asm_fprintf (f, "0.5");
7239 else
7240 return false;
7241
7242 return true;
7243}
7244
9f4cbab8
RS
7245/* Return the equivalent letter for size. */
7246static char
7247sizetochar (int size)
7248{
7249 switch (size)
7250 {
7251 case 64: return 'd';
7252 case 32: return 's';
7253 case 16: return 'h';
7254 case 8 : return 'b';
7255 default: gcc_unreachable ();
7256 }
7257}
7258
bcf19844
JW
7259/* Print operand X to file F in a target specific manner according to CODE.
7260 The acceptable formatting commands given by CODE are:
7261 'c': An integer or symbol address without a preceding #
7262 sign.
43cacb12
RS
7263 'C': Take the duplicated element in a vector constant
7264 and print it in hex.
7265 'D': Take the duplicated element in a vector constant
7266 and print it as an unsigned integer, in decimal.
bcf19844
JW
7267 'e': Print the sign/zero-extend size as a character 8->b,
7268 16->h, 32->w.
7269 'p': Prints N such that 2^N == X (X must be power of 2 and
7270 const int).
7271 'P': Print the number of non-zero bits in X (a const_int).
7272 'H': Print the higher numbered register of a pair (TImode)
7273 of regs.
7274 'm': Print a condition (eq, ne, etc).
7275 'M': Same as 'm', but invert condition.
43cacb12
RS
7276 'N': Take the duplicated element in a vector constant
7277 and print the negative of it in decimal.
bcf19844
JW
7278 'b/h/s/d/q': Print a scalar FP/SIMD register name.
7279 'S/T/U/V': Print a FP/SIMD register name for a register list.
7280 The register printed is the FP/SIMD register name
7281 of X + 0/1/2/3 for S/T/U/V.
7282 'R': Print a scalar FP/SIMD register name + 1.
7283 'X': Print bottom 16 bits of integer constant in hex.
7284 'w/x': Print a general register name or the zero register
7285 (32-bit or 64-bit).
7286 '0': Print a normal operand, if it's a general register,
7287 then we assume DImode.
7288 'k': Print NZCV for conditional compare instructions.
7289 'A': Output address constant representing the first
7290 argument of X, specifying a relocation offset
7291 if appropriate.
7292 'L': Output constant address specified by X
7293 with a relocation offset if appropriate.
7294 'G': Prints address of X, specifying a PC relative
e69a816d
WD
7295 relocation mode if appropriate.
7296 'y': Output address of LDP or STP - this is used for
7297 some LDP/STPs which don't use a PARALLEL in their
7298 pattern (so the mode needs to be adjusted).
7299 'z': Output address of a typical LDP or STP. */
bcf19844 7300
cc8ca59e
JB
7301static void
7302aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 7303{
43cacb12 7304 rtx elt;
43e9d192
IB
7305 switch (code)
7306 {
f541a481
KT
7307 case 'c':
7308 switch (GET_CODE (x))
7309 {
7310 case CONST_INT:
7311 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7312 break;
7313
7314 case SYMBOL_REF:
7315 output_addr_const (f, x);
7316 break;
7317
7318 case CONST:
7319 if (GET_CODE (XEXP (x, 0)) == PLUS
7320 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
7321 {
7322 output_addr_const (f, x);
7323 break;
7324 }
7325 /* Fall through. */
7326
7327 default:
ee61f880 7328 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
7329 }
7330 break;
7331
43e9d192 7332 case 'e':
43e9d192
IB
7333 {
7334 int n;
7335
4aa81c2e 7336 if (!CONST_INT_P (x)
43e9d192
IB
7337 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
7338 {
7339 output_operand_lossage ("invalid operand for '%%%c'", code);
7340 return;
7341 }
7342
7343 switch (n)
7344 {
7345 case 3:
7346 fputc ('b', f);
7347 break;
7348 case 4:
7349 fputc ('h', f);
7350 break;
7351 case 5:
7352 fputc ('w', f);
7353 break;
7354 default:
7355 output_operand_lossage ("invalid operand for '%%%c'", code);
7356 return;
7357 }
7358 }
7359 break;
7360
7361 case 'p':
7362 {
7363 int n;
7364
4aa81c2e 7365 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
7366 {
7367 output_operand_lossage ("invalid operand for '%%%c'", code);
7368 return;
7369 }
7370
7371 asm_fprintf (f, "%d", n);
7372 }
7373 break;
7374
7375 case 'P':
4aa81c2e 7376 if (!CONST_INT_P (x))
43e9d192
IB
7377 {
7378 output_operand_lossage ("invalid operand for '%%%c'", code);
7379 return;
7380 }
7381
8d55c61b 7382 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
7383 break;
7384
7385 case 'H':
c0111dc4
RE
7386 if (x == const0_rtx)
7387 {
7388 asm_fprintf (f, "xzr");
7389 break;
7390 }
7391
4aa81c2e 7392 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
7393 {
7394 output_operand_lossage ("invalid operand for '%%%c'", code);
7395 return;
7396 }
7397
01a3a324 7398 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
7399 break;
7400
43e9d192 7401 case 'M':
c8012fbc 7402 case 'm':
cd5660ab
KT
7403 {
7404 int cond_code;
c8012fbc
WD
7405 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
7406 if (x == const_true_rtx)
cd5660ab 7407 {
c8012fbc
WD
7408 if (code == 'M')
7409 fputs ("nv", f);
cd5660ab
KT
7410 return;
7411 }
43e9d192 7412
cd5660ab
KT
7413 if (!COMPARISON_P (x))
7414 {
7415 output_operand_lossage ("invalid operand for '%%%c'", code);
7416 return;
7417 }
c8012fbc 7418
cd5660ab
KT
7419 cond_code = aarch64_get_condition_code (x);
7420 gcc_assert (cond_code >= 0);
c8012fbc
WD
7421 if (code == 'M')
7422 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
7423 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 7424 }
43e9d192
IB
7425 break;
7426
43cacb12
RS
7427 case 'N':
7428 if (!const_vec_duplicate_p (x, &elt))
7429 {
7430 output_operand_lossage ("invalid vector constant");
7431 return;
7432 }
7433
7434 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7435 asm_fprintf (f, "%wd", -INTVAL (elt));
7436 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7437 && aarch64_print_vector_float_operand (f, x, true))
7438 ;
7439 else
7440 {
7441 output_operand_lossage ("invalid vector constant");
7442 return;
7443 }
7444 break;
7445
43e9d192
IB
7446 case 'b':
7447 case 'h':
7448 case 's':
7449 case 'd':
7450 case 'q':
43e9d192
IB
7451 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7452 {
7453 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7454 return;
7455 }
50ce6f88 7456 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
7457 break;
7458
7459 case 'S':
7460 case 'T':
7461 case 'U':
7462 case 'V':
43e9d192
IB
7463 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7464 {
7465 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7466 return;
7467 }
43cacb12
RS
7468 asm_fprintf (f, "%c%d",
7469 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
7470 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
7471 break;
7472
2d8c6dc1 7473 case 'R':
2d8c6dc1
AH
7474 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7475 {
7476 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7477 return;
7478 }
7479 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
7480 break;
7481
a05c0ddf 7482 case 'X':
4aa81c2e 7483 if (!CONST_INT_P (x))
a05c0ddf
IB
7484 {
7485 output_operand_lossage ("invalid operand for '%%%c'", code);
7486 return;
7487 }
50d38551 7488 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
7489 break;
7490
43cacb12
RS
7491 case 'C':
7492 {
7493 /* Print a replicated constant in hex. */
7494 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7495 {
7496 output_operand_lossage ("invalid operand for '%%%c'", code);
7497 return;
7498 }
7499 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7500 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7501 }
7502 break;
7503
7504 case 'D':
7505 {
7506 /* Print a replicated constant in decimal, treating it as
7507 unsigned. */
7508 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7509 {
7510 output_operand_lossage ("invalid operand for '%%%c'", code);
7511 return;
7512 }
7513 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7514 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7515 }
7516 break;
7517
43e9d192
IB
7518 case 'w':
7519 case 'x':
3520f7cc
JG
7520 if (x == const0_rtx
7521 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 7522 {
50ce6f88 7523 asm_fprintf (f, "%czr", code);
43e9d192
IB
7524 break;
7525 }
7526
7527 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
7528 {
50ce6f88 7529 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
7530 break;
7531 }
7532
7533 if (REG_P (x) && REGNO (x) == SP_REGNUM)
7534 {
50ce6f88 7535 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
7536 break;
7537 }
7538
7539 /* Fall through */
7540
7541 case 0:
43e9d192
IB
7542 if (x == NULL)
7543 {
7544 output_operand_lossage ("missing operand");
7545 return;
7546 }
7547
7548 switch (GET_CODE (x))
7549 {
7550 case REG:
43cacb12 7551 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
7552 {
7553 if (REG_NREGS (x) == 1)
7554 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
7555 else
7556 {
7557 char suffix
7558 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
7559 asm_fprintf (f, "{z%d.%c - z%d.%c}",
7560 REGNO (x) - V0_REGNUM, suffix,
7561 END_REGNO (x) - V0_REGNUM - 1, suffix);
7562 }
7563 }
43cacb12
RS
7564 else
7565 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
7566 break;
7567
7568 case MEM:
cc8ca59e 7569 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
7570 break;
7571
7572 case LABEL_REF:
7573 case SYMBOL_REF:
7574 output_addr_const (asm_out_file, x);
7575 break;
7576
7577 case CONST_INT:
7578 asm_fprintf (f, "%wd", INTVAL (x));
7579 break;
7580
43cacb12
RS
7581 case CONST:
7582 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 7583 {
43cacb12
RS
7584 output_addr_const (asm_out_file, x);
7585 break;
3520f7cc 7586 }
43cacb12
RS
7587 /* fall through */
7588
7589 case CONST_VECTOR:
7590 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 7591 {
43cacb12
RS
7592 output_operand_lossage ("invalid vector constant");
7593 return;
3520f7cc 7594 }
43cacb12
RS
7595
7596 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7597 asm_fprintf (f, "%wd", INTVAL (elt));
7598 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7599 && aarch64_print_vector_float_operand (f, x, false))
7600 ;
3520f7cc 7601 else
43cacb12
RS
7602 {
7603 output_operand_lossage ("invalid vector constant");
7604 return;
7605 }
43e9d192
IB
7606 break;
7607
3520f7cc 7608 case CONST_DOUBLE:
2ca5b430
KT
7609 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
7610 be getting CONST_DOUBLEs holding integers. */
7611 gcc_assert (GET_MODE (x) != VOIDmode);
7612 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
7613 {
7614 fputc ('0', f);
7615 break;
7616 }
7617 else if (aarch64_float_const_representable_p (x))
7618 {
7619#define buf_size 20
7620 char float_buf[buf_size] = {'\0'};
34a72c33
RS
7621 real_to_decimal_for_mode (float_buf,
7622 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
7623 buf_size, buf_size,
7624 1, GET_MODE (x));
7625 asm_fprintf (asm_out_file, "%s", float_buf);
7626 break;
7627#undef buf_size
7628 }
7629 output_operand_lossage ("invalid constant");
7630 return;
43e9d192
IB
7631 default:
7632 output_operand_lossage ("invalid operand");
7633 return;
7634 }
7635 break;
7636
7637 case 'A':
7638 if (GET_CODE (x) == HIGH)
7639 x = XEXP (x, 0);
7640
a6e0bfa7 7641 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7642 {
6642bdb4 7643 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7644 asm_fprintf (asm_out_file, ":got:");
7645 break;
7646
7647 case SYMBOL_SMALL_TLSGD:
7648 asm_fprintf (asm_out_file, ":tlsgd:");
7649 break;
7650
7651 case SYMBOL_SMALL_TLSDESC:
7652 asm_fprintf (asm_out_file, ":tlsdesc:");
7653 break;
7654
79496620 7655 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7656 asm_fprintf (asm_out_file, ":gottprel:");
7657 break;
7658
d18ba284 7659 case SYMBOL_TLSLE24:
43e9d192
IB
7660 asm_fprintf (asm_out_file, ":tprel:");
7661 break;
7662
87dd8ab0
MS
7663 case SYMBOL_TINY_GOT:
7664 gcc_unreachable ();
7665 break;
7666
43e9d192
IB
7667 default:
7668 break;
7669 }
7670 output_addr_const (asm_out_file, x);
7671 break;
7672
7673 case 'L':
a6e0bfa7 7674 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7675 {
6642bdb4 7676 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7677 asm_fprintf (asm_out_file, ":lo12:");
7678 break;
7679
7680 case SYMBOL_SMALL_TLSGD:
7681 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7682 break;
7683
7684 case SYMBOL_SMALL_TLSDESC:
7685 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7686 break;
7687
79496620 7688 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7689 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7690 break;
7691
cbf5629e
JW
7692 case SYMBOL_TLSLE12:
7693 asm_fprintf (asm_out_file, ":tprel_lo12:");
7694 break;
7695
d18ba284 7696 case SYMBOL_TLSLE24:
43e9d192
IB
7697 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7698 break;
7699
87dd8ab0
MS
7700 case SYMBOL_TINY_GOT:
7701 asm_fprintf (asm_out_file, ":got:");
7702 break;
7703
5ae7caad
JW
7704 case SYMBOL_TINY_TLSIE:
7705 asm_fprintf (asm_out_file, ":gottprel:");
7706 break;
7707
43e9d192
IB
7708 default:
7709 break;
7710 }
7711 output_addr_const (asm_out_file, x);
7712 break;
7713
7714 case 'G':
a6e0bfa7 7715 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7716 {
d18ba284 7717 case SYMBOL_TLSLE24:
43e9d192
IB
7718 asm_fprintf (asm_out_file, ":tprel_hi12:");
7719 break;
7720 default:
7721 break;
7722 }
7723 output_addr_const (asm_out_file, x);
7724 break;
7725
cf670503
ZC
7726 case 'k':
7727 {
c8012fbc 7728 HOST_WIDE_INT cond_code;
cf670503 7729
c8012fbc 7730 if (!CONST_INT_P (x))
cf670503
ZC
7731 {
7732 output_operand_lossage ("invalid operand for '%%%c'", code);
7733 return;
7734 }
7735
c8012fbc
WD
7736 cond_code = INTVAL (x);
7737 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7738 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7739 }
7740 break;
7741
e69a816d
WD
7742 case 'y':
7743 case 'z':
7744 {
7745 machine_mode mode = GET_MODE (x);
7746
c348cab0 7747 if (GET_CODE (x) != MEM
6a70badb 7748 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7749 {
7750 output_operand_lossage ("invalid operand for '%%%c'", code);
7751 return;
7752 }
7753
a25831ac
AV
7754 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
7755 code == 'y'
7756 ? ADDR_QUERY_LDP_STP_N
7757 : ADDR_QUERY_LDP_STP))
c348cab0 7758 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7759 }
7760 break;
7761
43e9d192
IB
7762 default:
7763 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7764 return;
7765 }
7766}
7767
e69a816d
WD
7768/* Print address 'x' of a memory access with mode 'mode'.
7769 'op' is the context required by aarch64_classify_address. It can either be
7770 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7771static bool
a97d8b98
RS
7772aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7773 aarch64_addr_query_type type)
43e9d192
IB
7774{
7775 struct aarch64_address_info addr;
6a70badb 7776 unsigned int size;
43e9d192 7777
e69a816d 7778 /* Check all addresses are Pmode - including ILP32. */
31460ed2
JJ
7779 if (GET_MODE (x) != Pmode
7780 && (!CONST_INT_P (x)
7781 || trunc_int_for_mode (INTVAL (x), Pmode) != INTVAL (x)))
7782 {
7783 output_operand_lossage ("invalid address mode");
7784 return false;
7785 }
e69a816d 7786
a97d8b98 7787 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7788 switch (addr.type)
7789 {
7790 case ADDRESS_REG_IMM:
dc640181 7791 if (known_eq (addr.const_offset, 0))
01a3a324 7792 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7793 else if (aarch64_sve_data_mode_p (mode))
7794 {
7795 HOST_WIDE_INT vnum
7796 = exact_div (addr.const_offset,
7797 BYTES_PER_SVE_VECTOR).to_constant ();
7798 asm_fprintf (f, "[%s, #%wd, mul vl]",
7799 reg_names[REGNO (addr.base)], vnum);
7800 }
7801 else if (aarch64_sve_pred_mode_p (mode))
7802 {
7803 HOST_WIDE_INT vnum
7804 = exact_div (addr.const_offset,
7805 BYTES_PER_SVE_PRED).to_constant ();
7806 asm_fprintf (f, "[%s, #%wd, mul vl]",
7807 reg_names[REGNO (addr.base)], vnum);
7808 }
43e9d192 7809 else
16a3246f 7810 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7811 INTVAL (addr.offset));
c348cab0 7812 return true;
43e9d192
IB
7813
7814 case ADDRESS_REG_REG:
7815 if (addr.shift == 0)
16a3246f 7816 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7817 reg_names [REGNO (addr.offset)]);
43e9d192 7818 else
16a3246f 7819 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7820 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7821 return true;
43e9d192
IB
7822
7823 case ADDRESS_REG_UXTW:
7824 if (addr.shift == 0)
16a3246f 7825 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7826 REGNO (addr.offset) - R0_REGNUM);
7827 else
16a3246f 7828 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7829 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7830 return true;
43e9d192
IB
7831
7832 case ADDRESS_REG_SXTW:
7833 if (addr.shift == 0)
16a3246f 7834 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7835 REGNO (addr.offset) - R0_REGNUM);
7836 else
16a3246f 7837 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7838 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7839 return true;
43e9d192
IB
7840
7841 case ADDRESS_REG_WB:
6a70badb
RS
7842 /* Writeback is only supported for fixed-width modes. */
7843 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7844 switch (GET_CODE (x))
7845 {
7846 case PRE_INC:
6a70badb 7847 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7848 return true;
43e9d192 7849 case POST_INC:
6a70badb 7850 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7851 return true;
43e9d192 7852 case PRE_DEC:
6a70badb 7853 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7854 return true;
43e9d192 7855 case POST_DEC:
6a70badb 7856 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7857 return true;
43e9d192 7858 case PRE_MODIFY:
6a70badb 7859 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7860 INTVAL (addr.offset));
c348cab0 7861 return true;
43e9d192 7862 case POST_MODIFY:
6a70badb 7863 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7864 INTVAL (addr.offset));
c348cab0 7865 return true;
43e9d192
IB
7866 default:
7867 break;
7868 }
7869 break;
7870
7871 case ADDRESS_LO_SUM:
16a3246f 7872 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7873 output_addr_const (f, addr.offset);
7874 asm_fprintf (f, "]");
c348cab0 7875 return true;
43e9d192
IB
7876
7877 case ADDRESS_SYMBOLIC:
d6591257 7878 output_addr_const (f, x);
c348cab0 7879 return true;
43e9d192
IB
7880 }
7881
c348cab0 7882 return false;
43e9d192
IB
7883}
7884
e69a816d
WD
7885/* Print address 'x' of a memory access with mode 'mode'. */
7886static void
7887aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7888{
43cacb12 7889 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7890 output_addr_const (f, x);
e69a816d
WD
7891}
7892
43e9d192
IB
7893bool
7894aarch64_label_mentioned_p (rtx x)
7895{
7896 const char *fmt;
7897 int i;
7898
7899 if (GET_CODE (x) == LABEL_REF)
7900 return true;
7901
7902 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7903 referencing instruction, but they are constant offsets, not
7904 symbols. */
7905 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7906 return false;
7907
7908 fmt = GET_RTX_FORMAT (GET_CODE (x));
7909 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7910 {
7911 if (fmt[i] == 'E')
7912 {
7913 int j;
7914
7915 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7916 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7917 return 1;
7918 }
7919 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7920 return 1;
7921 }
7922
7923 return 0;
7924}
7925
7926/* Implement REGNO_REG_CLASS. */
7927
7928enum reg_class
7929aarch64_regno_regclass (unsigned regno)
7930{
7931 if (GP_REGNUM_P (regno))
a4a182c6 7932 return GENERAL_REGS;
43e9d192
IB
7933
7934 if (regno == SP_REGNUM)
7935 return STACK_REG;
7936
7937 if (regno == FRAME_POINTER_REGNUM
7938 || regno == ARG_POINTER_REGNUM)
f24bb080 7939 return POINTER_REGS;
43e9d192
IB
7940
7941 if (FP_REGNUM_P (regno))
7942 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7943
43cacb12
RS
7944 if (PR_REGNUM_P (regno))
7945 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7946
43e9d192
IB
7947 return NO_REGS;
7948}
7949
6a70badb
RS
7950/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7951 If OFFSET is out of range, return an offset of an anchor point
7952 that is in range. Return 0 otherwise. */
7953
7954static HOST_WIDE_INT
7955aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7956 machine_mode mode)
7957{
7958 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7959 if (size > 16)
7960 return (offset + 0x400) & ~0x7f0;
7961
7962 /* For offsets that aren't a multiple of the access size, the limit is
7963 -256...255. */
7964 if (offset & (size - 1))
7965 {
7966 /* BLKmode typically uses LDP of X-registers. */
7967 if (mode == BLKmode)
7968 return (offset + 512) & ~0x3ff;
7969 return (offset + 0x100) & ~0x1ff;
7970 }
7971
7972 /* Small negative offsets are supported. */
7973 if (IN_RANGE (offset, -256, 0))
7974 return 0;
7975
7976 if (mode == TImode || mode == TFmode)
7977 return (offset + 0x100) & ~0x1ff;
7978
7979 /* Use 12-bit offset by access size. */
7980 return offset & (~0xfff * size);
7981}
7982
0c4ec427 7983static rtx
ef4bddc2 7984aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7985{
7986 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7987 where mask is selected by alignment and size of the offset.
7988 We try to pick as large a range for the offset as possible to
7989 maximize the chance of a CSE. However, for aligned addresses
7990 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7991 elements are likely to use the same base. We need to be careful
7992 not to split a CONST for some forms of address expression, otherwise
7993 it will generate sub-optimal code. */
0c4ec427
RE
7994
7995 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7996 {
9e0218fc 7997 rtx base = XEXP (x, 0);
17d7bdd8 7998 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7999 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 8000
9e0218fc 8001 if (GET_CODE (base) == PLUS)
e8426e0a 8002 {
9e0218fc
RH
8003 rtx op0 = XEXP (base, 0);
8004 rtx op1 = XEXP (base, 1);
8005
8006 /* Force any scaling into a temp for CSE. */
8007 op0 = force_reg (Pmode, op0);
8008 op1 = force_reg (Pmode, op1);
8009
8010 /* Let the pointer register be in op0. */
8011 if (REG_POINTER (op1))
8012 std::swap (op0, op1);
8013
8014 /* If the pointer is virtual or frame related, then we know that
8015 virtual register instantiation or register elimination is going
8016 to apply a second constant. We want the two constants folded
8017 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
8018 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 8019 {
9e0218fc
RH
8020 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
8021 NULL_RTX, true, OPTAB_DIRECT);
8022 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 8023 }
e8426e0a 8024
9e0218fc
RH
8025 /* Otherwise, in order to encourage CSE (and thence loop strength
8026 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
8027 base = expand_binop (Pmode, add_optab, op0, op1,
8028 NULL_RTX, true, OPTAB_DIRECT);
8029 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
8030 }
8031
6a70badb
RS
8032 HOST_WIDE_INT size;
8033 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 8034 {
6a70badb
RS
8035 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
8036 mode);
8037 if (base_offset != 0)
8038 {
8039 base = plus_constant (Pmode, base, base_offset);
8040 base = force_operand (base, NULL_RTX);
8041 return plus_constant (Pmode, base, offset - base_offset);
8042 }
9e0218fc 8043 }
0c4ec427
RE
8044 }
8045
8046 return x;
8047}
8048
43e9d192
IB
8049static reg_class_t
8050aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
8051 reg_class_t rclass,
ef4bddc2 8052 machine_mode mode,
43e9d192
IB
8053 secondary_reload_info *sri)
8054{
9a1b9cb4
RS
8055 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
8056 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
8057 comment at the head of aarch64-sve.md for more details about the
8058 big-endian handling. */
43cacb12
RS
8059 if (BYTES_BIG_ENDIAN
8060 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
8061 && !((REG_P (x) && HARD_REGISTER_P (x))
8062 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
8063 && aarch64_sve_data_mode_p (mode))
8064 {
8065 sri->icode = CODE_FOR_aarch64_sve_reload_be;
8066 return NO_REGS;
8067 }
b4f50fd4
RR
8068
8069 /* If we have to disable direct literal pool loads and stores because the
8070 function is too big, then we need a scratch register. */
8071 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
8072 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
8073 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 8074 && !aarch64_pcrelative_literal_loads)
b4f50fd4 8075 {
0016d8d9 8076 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
8077 return NO_REGS;
8078 }
8079
43e9d192
IB
8080 /* Without the TARGET_SIMD instructions we cannot move a Q register
8081 to a Q register directly. We need a scratch. */
8082 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
8083 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
8084 && reg_class_subset_p (rclass, FP_REGS))
8085 {
0016d8d9 8086 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
8087 return NO_REGS;
8088 }
8089
8090 /* A TFmode or TImode memory access should be handled via an FP_REGS
8091 because AArch64 has richer addressing modes for LDR/STR instructions
8092 than LDP/STP instructions. */
d5726973 8093 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 8094 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
8095 return FP_REGS;
8096
8097 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 8098 return GENERAL_REGS;
43e9d192
IB
8099
8100 return NO_REGS;
8101}
8102
8103static bool
6216fd90 8104aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 8105{
6216fd90 8106 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 8107
6216fd90
WD
8108 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
8109 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 8110 if (frame_pointer_needed)
6216fd90 8111 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
8112 return true;
8113}
8114
6a70badb 8115poly_int64
43e9d192
IB
8116aarch64_initial_elimination_offset (unsigned from, unsigned to)
8117{
78c29983
MS
8118 if (to == HARD_FRAME_POINTER_REGNUM)
8119 {
8120 if (from == ARG_POINTER_REGNUM)
71bfb77a 8121 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
8122
8123 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8124 return cfun->machine->frame.hard_fp_offset
8125 - cfun->machine->frame.locals_offset;
78c29983
MS
8126 }
8127
8128 if (to == STACK_POINTER_REGNUM)
8129 {
8130 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
8131 return cfun->machine->frame.frame_size
8132 - cfun->machine->frame.locals_offset;
78c29983
MS
8133 }
8134
1c960e02 8135 return cfun->machine->frame.frame_size;
43e9d192
IB
8136}
8137
43e9d192
IB
8138/* Implement RETURN_ADDR_RTX. We do not support moving back to a
8139 previous frame. */
8140
8141rtx
8142aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
8143{
8144 if (count != 0)
8145 return const0_rtx;
8146 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
8147}
8148
8149
8150static void
8151aarch64_asm_trampoline_template (FILE *f)
8152{
28514dda
YZ
8153 if (TARGET_ILP32)
8154 {
8155 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
8156 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
8157 }
8158 else
8159 {
8160 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
8161 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
8162 }
01a3a324 8163 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 8164 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
8165 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
8166 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
8167}
8168
8169static void
8170aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
8171{
8172 rtx fnaddr, mem, a_tramp;
28514dda 8173 const int tramp_code_sz = 16;
43e9d192
IB
8174
8175 /* Don't need to copy the trailing D-words, we fill those in below. */
8176 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
8177 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
8178 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 8179 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
8180 if (GET_MODE (fnaddr) != ptr_mode)
8181 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
8182 emit_move_insn (mem, fnaddr);
8183
28514dda 8184 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
8185 emit_move_insn (mem, chain_value);
8186
8187 /* XXX We should really define a "clear_cache" pattern and use
8188 gen_clear_cache(). */
8189 a_tramp = XEXP (m_tramp, 0);
8190 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 8191 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
8192 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
8193 ptr_mode);
43e9d192
IB
8194}
8195
8196static unsigned char
ef4bddc2 8197aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 8198{
6a70badb
RS
8199 /* ??? Logically we should only need to provide a value when
8200 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
8201 can hold MODE, but at the moment we need to handle all modes.
8202 Just ignore any runtime parts for registers that can't store them. */
8203 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 8204 unsigned int nregs;
43e9d192
IB
8205 switch (regclass)
8206 {
d677263e 8207 case TAILCALL_ADDR_REGS:
43e9d192
IB
8208 case POINTER_REGS:
8209 case GENERAL_REGS:
8210 case ALL_REGS:
f25a140b 8211 case POINTER_AND_FP_REGS:
43e9d192
IB
8212 case FP_REGS:
8213 case FP_LO_REGS:
43cacb12
RS
8214 if (aarch64_sve_data_mode_p (mode)
8215 && constant_multiple_p (GET_MODE_SIZE (mode),
8216 BYTES_PER_SVE_VECTOR, &nregs))
8217 return nregs;
8218 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
8219 ? CEIL (lowest_size, UNITS_PER_VREG)
8220 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 8221 case STACK_REG:
43cacb12
RS
8222 case PR_REGS:
8223 case PR_LO_REGS:
8224 case PR_HI_REGS:
43e9d192
IB
8225 return 1;
8226
8227 case NO_REGS:
8228 return 0;
8229
8230 default:
8231 break;
8232 }
8233 gcc_unreachable ();
8234}
8235
8236static reg_class_t
78d8b9f0 8237aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 8238{
51bb310d 8239 if (regclass == POINTER_REGS)
78d8b9f0
IB
8240 return GENERAL_REGS;
8241
51bb310d
MS
8242 if (regclass == STACK_REG)
8243 {
8244 if (REG_P(x)
8245 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
8246 return regclass;
8247
8248 return NO_REGS;
8249 }
8250
27bd251b
IB
8251 /* Register eliminiation can result in a request for
8252 SP+constant->FP_REGS. We cannot support such operations which
8253 use SP as source and an FP_REG as destination, so reject out
8254 right now. */
8255 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
8256 {
8257 rtx lhs = XEXP (x, 0);
8258
8259 /* Look through a possible SUBREG introduced by ILP32. */
8260 if (GET_CODE (lhs) == SUBREG)
8261 lhs = SUBREG_REG (lhs);
8262
8263 gcc_assert (REG_P (lhs));
8264 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
8265 POINTER_REGS));
8266 return NO_REGS;
8267 }
8268
78d8b9f0 8269 return regclass;
43e9d192
IB
8270}
8271
8272void
8273aarch64_asm_output_labelref (FILE* f, const char *name)
8274{
8275 asm_fprintf (f, "%U%s", name);
8276}
8277
8278static void
8279aarch64_elf_asm_constructor (rtx symbol, int priority)
8280{
8281 if (priority == DEFAULT_INIT_PRIORITY)
8282 default_ctor_section_asm_out_constructor (symbol, priority);
8283 else
8284 {
8285 section *s;
53d190c1
AT
8286 /* While priority is known to be in range [0, 65535], so 18 bytes
8287 would be enough, the compiler might not know that. To avoid
8288 -Wformat-truncation false positive, use a larger size. */
8289 char buf[23];
43e9d192 8290 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 8291 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8292 switch_to_section (s);
8293 assemble_align (POINTER_SIZE);
28514dda 8294 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8295 }
8296}
8297
8298static void
8299aarch64_elf_asm_destructor (rtx symbol, int priority)
8300{
8301 if (priority == DEFAULT_INIT_PRIORITY)
8302 default_dtor_section_asm_out_destructor (symbol, priority);
8303 else
8304 {
8305 section *s;
53d190c1
AT
8306 /* While priority is known to be in range [0, 65535], so 18 bytes
8307 would be enough, the compiler might not know that. To avoid
8308 -Wformat-truncation false positive, use a larger size. */
8309 char buf[23];
43e9d192 8310 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 8311 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8312 switch_to_section (s);
8313 assemble_align (POINTER_SIZE);
28514dda 8314 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8315 }
8316}
8317
8318const char*
8319aarch64_output_casesi (rtx *operands)
8320{
8321 char buf[100];
8322 char label[100];
b32d5189 8323 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
8324 int index;
8325 static const char *const patterns[4][2] =
8326 {
8327 {
8328 "ldrb\t%w3, [%0,%w1,uxtw]",
8329 "add\t%3, %4, %w3, sxtb #2"
8330 },
8331 {
8332 "ldrh\t%w3, [%0,%w1,uxtw #1]",
8333 "add\t%3, %4, %w3, sxth #2"
8334 },
8335 {
8336 "ldr\t%w3, [%0,%w1,uxtw #2]",
8337 "add\t%3, %4, %w3, sxtw #2"
8338 },
8339 /* We assume that DImode is only generated when not optimizing and
8340 that we don't really need 64-bit address offsets. That would
8341 imply an object file with 8GB of code in a single function! */
8342 {
8343 "ldr\t%w3, [%0,%w1,uxtw #2]",
8344 "add\t%3, %4, %w3, sxtw #2"
8345 }
8346 };
8347
8348 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
8349
77e994c9
RS
8350 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
8351 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
8352
8353 gcc_assert (index >= 0 && index <= 3);
8354
8355 /* Need to implement table size reduction, by chaning the code below. */
8356 output_asm_insn (patterns[index][0], operands);
8357 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
8358 snprintf (buf, sizeof (buf),
8359 "adr\t%%4, %s", targetm.strip_name_encoding (label));
8360 output_asm_insn (buf, operands);
8361 output_asm_insn (patterns[index][1], operands);
8362 output_asm_insn ("br\t%3", operands);
8363 assemble_label (asm_out_file, label);
8364 return "";
8365}
8366
8367
8368/* Return size in bits of an arithmetic operand which is shifted/scaled and
8369 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
8370 operator. */
8371
8372int
8373aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
8374{
8375 if (shift >= 0 && shift <= 3)
8376 {
8377 int size;
8378 for (size = 8; size <= 32; size *= 2)
8379 {
8380 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
8381 if (mask == bits << shift)
8382 return size;
8383 }
8384 }
8385 return 0;
8386}
8387
e78d485e
RR
8388/* Constant pools are per function only when PC relative
8389 literal loads are true or we are in the large memory
8390 model. */
8391
8392static inline bool
8393aarch64_can_use_per_function_literal_pools_p (void)
8394{
9ee6540a 8395 return (aarch64_pcrelative_literal_loads
e78d485e
RR
8396 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
8397}
8398
43e9d192 8399static bool
e78d485e 8400aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 8401{
74a9301d
VM
8402 /* We can't use blocks for constants when we're using a per-function
8403 constant pool. */
8404 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
8405}
8406
e78d485e
RR
8407/* Select appropriate section for constants depending
8408 on where we place literal pools. */
8409
43e9d192 8410static section *
e78d485e
RR
8411aarch64_select_rtx_section (machine_mode mode,
8412 rtx x,
8413 unsigned HOST_WIDE_INT align)
43e9d192 8414{
e78d485e
RR
8415 if (aarch64_can_use_per_function_literal_pools_p ())
8416 return function_section (current_function_decl);
43e9d192 8417
e78d485e
RR
8418 return default_elf_select_rtx_section (mode, x, align);
8419}
43e9d192 8420
5fca7b66
RH
8421/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
8422void
8423aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
8424 HOST_WIDE_INT offset)
8425{
8426 /* When using per-function literal pools, we must ensure that any code
8427 section is aligned to the minimal instruction length, lest we get
8428 errors from the assembler re "unaligned instructions". */
8429 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
8430 ASM_OUTPUT_ALIGN (f, 2);
8431}
8432
43e9d192
IB
8433/* Costs. */
8434
8435/* Helper function for rtx cost calculation. Strip a shift expression
8436 from X. Returns the inner operand if successful, or the original
8437 expression on failure. */
8438static rtx
8439aarch64_strip_shift (rtx x)
8440{
8441 rtx op = x;
8442
57b77d46
RE
8443 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
8444 we can convert both to ROR during final output. */
43e9d192
IB
8445 if ((GET_CODE (op) == ASHIFT
8446 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
8447 || GET_CODE (op) == LSHIFTRT
8448 || GET_CODE (op) == ROTATERT
8449 || GET_CODE (op) == ROTATE)
43e9d192
IB
8450 && CONST_INT_P (XEXP (op, 1)))
8451 return XEXP (op, 0);
8452
8453 if (GET_CODE (op) == MULT
8454 && CONST_INT_P (XEXP (op, 1))
8455 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
8456 return XEXP (op, 0);
8457
8458 return x;
8459}
8460
4745e701 8461/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
8462 expression from X. Returns the inner operand if successful, or the
8463 original expression on failure. We deal with a number of possible
b10f1009
AP
8464 canonicalization variations here. If STRIP_SHIFT is true, then
8465 we can strip off a shift also. */
43e9d192 8466static rtx
b10f1009 8467aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 8468{
77e994c9 8469 scalar_int_mode mode;
43e9d192
IB
8470 rtx op = x;
8471
77e994c9
RS
8472 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
8473 return op;
8474
43e9d192
IB
8475 /* Zero and sign extraction of a widened value. */
8476 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
8477 && XEXP (op, 2) == const0_rtx
4745e701 8478 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 8479 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
8480 XEXP (op, 1)))
8481 return XEXP (XEXP (op, 0), 0);
8482
8483 /* It can also be represented (for zero-extend) as an AND with an
8484 immediate. */
8485 if (GET_CODE (op) == AND
8486 && GET_CODE (XEXP (op, 0)) == MULT
8487 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
8488 && CONST_INT_P (XEXP (op, 1))
8489 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
8490 INTVAL (XEXP (op, 1))) != 0)
8491 return XEXP (XEXP (op, 0), 0);
8492
8493 /* Now handle extended register, as this may also have an optional
8494 left shift by 1..4. */
b10f1009
AP
8495 if (strip_shift
8496 && GET_CODE (op) == ASHIFT
43e9d192
IB
8497 && CONST_INT_P (XEXP (op, 1))
8498 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
8499 op = XEXP (op, 0);
8500
8501 if (GET_CODE (op) == ZERO_EXTEND
8502 || GET_CODE (op) == SIGN_EXTEND)
8503 op = XEXP (op, 0);
8504
8505 if (op != x)
8506 return op;
8507
4745e701
JG
8508 return x;
8509}
8510
0a78ebe4
KT
8511/* Return true iff CODE is a shift supported in combination
8512 with arithmetic instructions. */
4d1919ed 8513
0a78ebe4
KT
8514static bool
8515aarch64_shift_p (enum rtx_code code)
8516{
8517 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
8518}
8519
b10f1009
AP
8520
8521/* Return true iff X is a cheap shift without a sign extend. */
8522
8523static bool
8524aarch64_cheap_mult_shift_p (rtx x)
8525{
8526 rtx op0, op1;
8527
8528 op0 = XEXP (x, 0);
8529 op1 = XEXP (x, 1);
8530
8531 if (!(aarch64_tune_params.extra_tuning_flags
8532 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
8533 return false;
8534
8535 if (GET_CODE (op0) == SIGN_EXTEND)
8536 return false;
8537
8538 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
8539 && UINTVAL (op1) <= 4)
8540 return true;
8541
8542 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
8543 return false;
8544
8545 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
8546
8547 if (l2 > 0 && l2 <= 4)
8548 return true;
8549
8550 return false;
8551}
8552
4745e701 8553/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
8554 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
8555 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
8556 operands where needed. */
8557
8558static int
e548c9df 8559aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
8560{
8561 rtx op0, op1;
8562 const struct cpu_cost_table *extra_cost
b175b679 8563 = aarch64_tune_params.insn_extra_cost;
4745e701 8564 int cost = 0;
0a78ebe4 8565 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 8566 machine_mode mode = GET_MODE (x);
4745e701
JG
8567
8568 gcc_checking_assert (code == MULT);
8569
8570 op0 = XEXP (x, 0);
8571 op1 = XEXP (x, 1);
8572
8573 if (VECTOR_MODE_P (mode))
8574 mode = GET_MODE_INNER (mode);
8575
8576 /* Integer multiply/fma. */
8577 if (GET_MODE_CLASS (mode) == MODE_INT)
8578 {
8579 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
8580 if (aarch64_shift_p (GET_CODE (x))
8581 || (CONST_INT_P (op1)
8582 && exact_log2 (INTVAL (op1)) > 0))
4745e701 8583 {
0a78ebe4
KT
8584 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
8585 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
8586 if (speed)
8587 {
0a78ebe4
KT
8588 if (compound_p)
8589 {
b10f1009
AP
8590 /* If the shift is considered cheap,
8591 then don't add any cost. */
8592 if (aarch64_cheap_mult_shift_p (x))
8593 ;
8594 else if (REG_P (op1))
0a78ebe4
KT
8595 /* ARITH + shift-by-register. */
8596 cost += extra_cost->alu.arith_shift_reg;
8597 else if (is_extend)
8598 /* ARITH + extended register. We don't have a cost field
8599 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
8600 cost += extra_cost->alu.extend_arith;
8601 else
8602 /* ARITH + shift-by-immediate. */
8603 cost += extra_cost->alu.arith_shift;
8604 }
4745e701
JG
8605 else
8606 /* LSL (immediate). */
0a78ebe4
KT
8607 cost += extra_cost->alu.shift;
8608
4745e701 8609 }
0a78ebe4
KT
8610 /* Strip extends as we will have costed them in the case above. */
8611 if (is_extend)
b10f1009 8612 op0 = aarch64_strip_extend (op0, true);
4745e701 8613
e548c9df 8614 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
8615
8616 return cost;
8617 }
8618
d2ac256b
KT
8619 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
8620 compound and let the below cases handle it. After all, MNEG is a
8621 special-case alias of MSUB. */
8622 if (GET_CODE (op0) == NEG)
8623 {
8624 op0 = XEXP (op0, 0);
8625 compound_p = true;
8626 }
8627
4745e701
JG
8628 /* Integer multiplies or FMAs have zero/sign extending variants. */
8629 if ((GET_CODE (op0) == ZERO_EXTEND
8630 && GET_CODE (op1) == ZERO_EXTEND)
8631 || (GET_CODE (op0) == SIGN_EXTEND
8632 && GET_CODE (op1) == SIGN_EXTEND))
8633 {
e548c9df
AM
8634 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8635 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8636
8637 if (speed)
8638 {
0a78ebe4 8639 if (compound_p)
d2ac256b 8640 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8641 cost += extra_cost->mult[0].extend_add;
8642 else
8643 /* MUL/SMULL/UMULL. */
8644 cost += extra_cost->mult[0].extend;
8645 }
8646
8647 return cost;
8648 }
8649
d2ac256b 8650 /* This is either an integer multiply or a MADD. In both cases
4745e701 8651 we want to recurse and cost the operands. */
e548c9df
AM
8652 cost += rtx_cost (op0, mode, MULT, 0, speed);
8653 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8654
8655 if (speed)
8656 {
0a78ebe4 8657 if (compound_p)
d2ac256b 8658 /* MADD/MSUB. */
4745e701
JG
8659 cost += extra_cost->mult[mode == DImode].add;
8660 else
8661 /* MUL. */
8662 cost += extra_cost->mult[mode == DImode].simple;
8663 }
8664
8665 return cost;
8666 }
8667 else
8668 {
8669 if (speed)
8670 {
3d840f7d 8671 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8672 operands, unless the rounding mode is upward or downward in
8673 which case FNMUL is different than FMUL with operand negation. */
8674 bool neg0 = GET_CODE (op0) == NEG;
8675 bool neg1 = GET_CODE (op1) == NEG;
8676 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8677 {
8678 if (neg0)
8679 op0 = XEXP (op0, 0);
8680 if (neg1)
8681 op1 = XEXP (op1, 0);
8682 }
4745e701 8683
0a78ebe4 8684 if (compound_p)
4745e701
JG
8685 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8686 cost += extra_cost->fp[mode == DFmode].fma;
8687 else
3d840f7d 8688 /* FMUL/FNMUL. */
4745e701
JG
8689 cost += extra_cost->fp[mode == DFmode].mult;
8690 }
8691
e548c9df
AM
8692 cost += rtx_cost (op0, mode, MULT, 0, speed);
8693 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8694 return cost;
8695 }
43e9d192
IB
8696}
8697
67747367
JG
8698static int
8699aarch64_address_cost (rtx x,
ef4bddc2 8700 machine_mode mode,
67747367
JG
8701 addr_space_t as ATTRIBUTE_UNUSED,
8702 bool speed)
8703{
8704 enum rtx_code c = GET_CODE (x);
b175b679 8705 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8706 struct aarch64_address_info info;
8707 int cost = 0;
8708 info.shift = 0;
8709
a97d8b98 8710 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8711 {
8712 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8713 {
8714 /* This is a CONST or SYMBOL ref which will be split
8715 in a different way depending on the code model in use.
8716 Cost it through the generic infrastructure. */
e548c9df 8717 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8718 /* Divide through by the cost of one instruction to
8719 bring it to the same units as the address costs. */
8720 cost_symbol_ref /= COSTS_N_INSNS (1);
8721 /* The cost is then the cost of preparing the address,
8722 followed by an immediate (possibly 0) offset. */
8723 return cost_symbol_ref + addr_cost->imm_offset;
8724 }
8725 else
8726 {
8727 /* This is most likely a jump table from a case
8728 statement. */
8729 return addr_cost->register_offset;
8730 }
8731 }
8732
8733 switch (info.type)
8734 {
8735 case ADDRESS_LO_SUM:
8736 case ADDRESS_SYMBOLIC:
8737 case ADDRESS_REG_IMM:
8738 cost += addr_cost->imm_offset;
8739 break;
8740
8741 case ADDRESS_REG_WB:
8742 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8743 cost += addr_cost->pre_modify;
8744 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8745 cost += addr_cost->post_modify;
8746 else
8747 gcc_unreachable ();
8748
8749 break;
8750
8751 case ADDRESS_REG_REG:
8752 cost += addr_cost->register_offset;
8753 break;
8754
67747367 8755 case ADDRESS_REG_SXTW:
783879e6
EM
8756 cost += addr_cost->register_sextend;
8757 break;
8758
8759 case ADDRESS_REG_UXTW:
8760 cost += addr_cost->register_zextend;
67747367
JG
8761 break;
8762
8763 default:
8764 gcc_unreachable ();
8765 }
8766
8767
8768 if (info.shift > 0)
8769 {
8770 /* For the sake of calculating the cost of the shifted register
8771 component, we can treat same sized modes in the same way. */
6a70badb
RS
8772 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8773 cost += addr_cost->addr_scale_costs.hi;
8774 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8775 cost += addr_cost->addr_scale_costs.si;
8776 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8777 cost += addr_cost->addr_scale_costs.di;
8778 else
8779 /* We can't tell, or this is a 128-bit vector. */
8780 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8781 }
8782
8783 return cost;
8784}
8785
b9066f5a
MW
8786/* Return the cost of a branch. If SPEED_P is true then the compiler is
8787 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8788 to be taken. */
8789
8790int
8791aarch64_branch_cost (bool speed_p, bool predictable_p)
8792{
8793 /* When optimizing for speed, use the cost of unpredictable branches. */
8794 const struct cpu_branch_cost *branch_costs =
b175b679 8795 aarch64_tune_params.branch_costs;
b9066f5a
MW
8796
8797 if (!speed_p || predictable_p)
8798 return branch_costs->predictable;
8799 else
8800 return branch_costs->unpredictable;
8801}
8802
7cc2145f
JG
8803/* Return true if the RTX X in mode MODE is a zero or sign extract
8804 usable in an ADD or SUB (extended register) instruction. */
8805static bool
77e994c9 8806aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8807{
8808 /* Catch add with a sign extract.
8809 This is add_<optab><mode>_multp2. */
8810 if (GET_CODE (x) == SIGN_EXTRACT
8811 || GET_CODE (x) == ZERO_EXTRACT)
8812 {
8813 rtx op0 = XEXP (x, 0);
8814 rtx op1 = XEXP (x, 1);
8815 rtx op2 = XEXP (x, 2);
8816
8817 if (GET_CODE (op0) == MULT
8818 && CONST_INT_P (op1)
8819 && op2 == const0_rtx
8820 && CONST_INT_P (XEXP (op0, 1))
8821 && aarch64_is_extend_from_extract (mode,
8822 XEXP (op0, 1),
8823 op1))
8824 {
8825 return true;
8826 }
8827 }
e47c4031
KT
8828 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8829 No shift. */
8830 else if (GET_CODE (x) == SIGN_EXTEND
8831 || GET_CODE (x) == ZERO_EXTEND)
8832 return REG_P (XEXP (x, 0));
7cc2145f
JG
8833
8834 return false;
8835}
8836
61263118
KT
8837static bool
8838aarch64_frint_unspec_p (unsigned int u)
8839{
8840 switch (u)
8841 {
8842 case UNSPEC_FRINTZ:
8843 case UNSPEC_FRINTP:
8844 case UNSPEC_FRINTM:
8845 case UNSPEC_FRINTA:
8846 case UNSPEC_FRINTN:
8847 case UNSPEC_FRINTX:
8848 case UNSPEC_FRINTI:
8849 return true;
8850
8851 default:
8852 return false;
8853 }
8854}
8855
fb0cb7fa
KT
8856/* Return true iff X is an rtx that will match an extr instruction
8857 i.e. as described in the *extr<mode>5_insn family of patterns.
8858 OP0 and OP1 will be set to the operands of the shifts involved
8859 on success and will be NULL_RTX otherwise. */
8860
8861static bool
8862aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8863{
8864 rtx op0, op1;
77e994c9
RS
8865 scalar_int_mode mode;
8866 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8867 return false;
fb0cb7fa
KT
8868
8869 *res_op0 = NULL_RTX;
8870 *res_op1 = NULL_RTX;
8871
8872 if (GET_CODE (x) != IOR)
8873 return false;
8874
8875 op0 = XEXP (x, 0);
8876 op1 = XEXP (x, 1);
8877
8878 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8879 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8880 {
8881 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8882 if (GET_CODE (op1) == ASHIFT)
8883 std::swap (op0, op1);
8884
8885 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8886 return false;
8887
8888 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8889 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8890
8891 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8892 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8893 {
8894 *res_op0 = XEXP (op0, 0);
8895 *res_op1 = XEXP (op1, 0);
8896 return true;
8897 }
8898 }
8899
8900 return false;
8901}
8902
2d5ffe46
AP
8903/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8904 storing it in *COST. Result is true if the total cost of the operation
8905 has now been calculated. */
8906static bool
8907aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8908{
b9e3afe9
AP
8909 rtx inner;
8910 rtx comparator;
8911 enum rtx_code cmpcode;
8912
8913 if (COMPARISON_P (op0))
8914 {
8915 inner = XEXP (op0, 0);
8916 comparator = XEXP (op0, 1);
8917 cmpcode = GET_CODE (op0);
8918 }
8919 else
8920 {
8921 inner = op0;
8922 comparator = const0_rtx;
8923 cmpcode = NE;
8924 }
8925
2d5ffe46
AP
8926 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8927 {
8928 /* Conditional branch. */
b9e3afe9 8929 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8930 return true;
8931 else
8932 {
b9e3afe9 8933 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8934 {
2d5ffe46
AP
8935 if (comparator == const0_rtx)
8936 {
8937 /* TBZ/TBNZ/CBZ/CBNZ. */
8938 if (GET_CODE (inner) == ZERO_EXTRACT)
8939 /* TBZ/TBNZ. */
e548c9df
AM
8940 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8941 ZERO_EXTRACT, 0, speed);
8942 else
8943 /* CBZ/CBNZ. */
8944 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8945
8946 return true;
8947 }
8948 }
b9e3afe9 8949 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8950 {
2d5ffe46
AP
8951 /* TBZ/TBNZ. */
8952 if (comparator == const0_rtx)
8953 return true;
8954 }
8955 }
8956 }
b9e3afe9 8957 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8958 {
786298dc 8959 /* CCMP. */
6dfeb7ce 8960 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8961 {
8962 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8963 if (XEXP (op1, 1) == const0_rtx)
8964 *cost += 1;
8965 if (speed)
8966 {
8967 machine_mode mode = GET_MODE (XEXP (op1, 0));
8968 const struct cpu_cost_table *extra_cost
8969 = aarch64_tune_params.insn_extra_cost;
8970
8971 if (GET_MODE_CLASS (mode) == MODE_INT)
8972 *cost += extra_cost->alu.arith;
8973 else
8974 *cost += extra_cost->fp[mode == DFmode].compare;
8975 }
8976 return true;
8977 }
8978
2d5ffe46
AP
8979 /* It's a conditional operation based on the status flags,
8980 so it must be some flavor of CSEL. */
8981
8982 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8983 if (GET_CODE (op1) == NEG
8984 || GET_CODE (op1) == NOT
8985 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8986 op1 = XEXP (op1, 0);
bad00732
KT
8987 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8988 {
8989 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8990 op1 = XEXP (op1, 0);
8991 op2 = XEXP (op2, 0);
8992 }
2d5ffe46 8993
e548c9df
AM
8994 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8995 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8996 return true;
8997 }
8998
8999 /* We don't know what this is, cost all operands. */
9000 return false;
9001}
9002
283b6c85
KT
9003/* Check whether X is a bitfield operation of the form shift + extend that
9004 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
9005 operand to which the bitfield operation is applied. Otherwise return
9006 NULL_RTX. */
9007
9008static rtx
9009aarch64_extend_bitfield_pattern_p (rtx x)
9010{
9011 rtx_code outer_code = GET_CODE (x);
9012 machine_mode outer_mode = GET_MODE (x);
9013
9014 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
9015 && outer_mode != SImode && outer_mode != DImode)
9016 return NULL_RTX;
9017
9018 rtx inner = XEXP (x, 0);
9019 rtx_code inner_code = GET_CODE (inner);
9020 machine_mode inner_mode = GET_MODE (inner);
9021 rtx op = NULL_RTX;
9022
9023 switch (inner_code)
9024 {
9025 case ASHIFT:
9026 if (CONST_INT_P (XEXP (inner, 1))
9027 && (inner_mode == QImode || inner_mode == HImode))
9028 op = XEXP (inner, 0);
9029 break;
9030 case LSHIFTRT:
9031 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
9032 && (inner_mode == QImode || inner_mode == HImode))
9033 op = XEXP (inner, 0);
9034 break;
9035 case ASHIFTRT:
9036 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
9037 && (inner_mode == QImode || inner_mode == HImode))
9038 op = XEXP (inner, 0);
9039 break;
9040 default:
9041 break;
9042 }
9043
9044 return op;
9045}
9046
8c83f71d
KT
9047/* Return true if the mask and a shift amount from an RTX of the form
9048 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
9049 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
9050
9051bool
77e994c9
RS
9052aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
9053 rtx shft_amnt)
8c83f71d
KT
9054{
9055 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
9056 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
9057 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
9058 && (INTVAL (mask)
9059 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
9060}
9061
43e9d192
IB
9062/* Calculate the cost of calculating X, storing it in *COST. Result
9063 is true if the total cost of the operation has now been calculated. */
9064static bool
e548c9df 9065aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
9066 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
9067{
a8eecd00 9068 rtx op0, op1, op2;
73250c4c 9069 const struct cpu_cost_table *extra_cost
b175b679 9070 = aarch64_tune_params.insn_extra_cost;
e548c9df 9071 int code = GET_CODE (x);
b4206259 9072 scalar_int_mode int_mode;
43e9d192 9073
7fc5ef02
JG
9074 /* By default, assume that everything has equivalent cost to the
9075 cheapest instruction. Any additional costs are applied as a delta
9076 above this default. */
9077 *cost = COSTS_N_INSNS (1);
9078
43e9d192
IB
9079 switch (code)
9080 {
9081 case SET:
ba123b0d
JG
9082 /* The cost depends entirely on the operands to SET. */
9083 *cost = 0;
43e9d192
IB
9084 op0 = SET_DEST (x);
9085 op1 = SET_SRC (x);
9086
9087 switch (GET_CODE (op0))
9088 {
9089 case MEM:
9090 if (speed)
2961177e
JG
9091 {
9092 rtx address = XEXP (op0, 0);
b6875aac
KV
9093 if (VECTOR_MODE_P (mode))
9094 *cost += extra_cost->ldst.storev;
9095 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9096 *cost += extra_cost->ldst.store;
9097 else if (mode == SFmode)
9098 *cost += extra_cost->ldst.storef;
9099 else if (mode == DFmode)
9100 *cost += extra_cost->ldst.stored;
9101
9102 *cost +=
9103 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9104 0, speed));
9105 }
43e9d192 9106
e548c9df 9107 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
9108 return true;
9109
9110 case SUBREG:
9111 if (! REG_P (SUBREG_REG (op0)))
e548c9df 9112 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 9113
43e9d192
IB
9114 /* Fall through. */
9115 case REG:
b6875aac
KV
9116 /* The cost is one per vector-register copied. */
9117 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
9118 {
fe1447a1
RS
9119 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
9120 *cost = COSTS_N_INSNS (nregs);
b6875aac 9121 }
ba123b0d
JG
9122 /* const0_rtx is in general free, but we will use an
9123 instruction to set a register to 0. */
b6875aac
KV
9124 else if (REG_P (op1) || op1 == const0_rtx)
9125 {
9126 /* The cost is 1 per register copied. */
fe1447a1
RS
9127 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
9128 *cost = COSTS_N_INSNS (nregs);
b6875aac 9129 }
ba123b0d
JG
9130 else
9131 /* Cost is just the cost of the RHS of the set. */
e548c9df 9132 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
9133 return true;
9134
ba123b0d 9135 case ZERO_EXTRACT:
43e9d192 9136 case SIGN_EXTRACT:
ba123b0d
JG
9137 /* Bit-field insertion. Strip any redundant widening of
9138 the RHS to meet the width of the target. */
43e9d192
IB
9139 if (GET_CODE (op1) == SUBREG)
9140 op1 = SUBREG_REG (op1);
9141 if ((GET_CODE (op1) == ZERO_EXTEND
9142 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 9143 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
9144 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
9145 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 9146 op1 = XEXP (op1, 0);
ba123b0d
JG
9147
9148 if (CONST_INT_P (op1))
9149 {
9150 /* MOV immediate is assumed to always be cheap. */
9151 *cost = COSTS_N_INSNS (1);
9152 }
9153 else
9154 {
9155 /* BFM. */
9156 if (speed)
9157 *cost += extra_cost->alu.bfi;
e548c9df 9158 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
9159 }
9160
43e9d192
IB
9161 return true;
9162
9163 default:
ba123b0d
JG
9164 /* We can't make sense of this, assume default cost. */
9165 *cost = COSTS_N_INSNS (1);
61263118 9166 return false;
43e9d192
IB
9167 }
9168 return false;
9169
9dfc162c
JG
9170 case CONST_INT:
9171 /* If an instruction can incorporate a constant within the
9172 instruction, the instruction's expression avoids calling
9173 rtx_cost() on the constant. If rtx_cost() is called on a
9174 constant, then it is usually because the constant must be
9175 moved into a register by one or more instructions.
9176
9177 The exception is constant 0, which can be expressed
9178 as XZR/WZR and is therefore free. The exception to this is
9179 if we have (set (reg) (const0_rtx)) in which case we must cost
9180 the move. However, we can catch that when we cost the SET, so
9181 we don't need to consider that here. */
9182 if (x == const0_rtx)
9183 *cost = 0;
9184 else
9185 {
9186 /* To an approximation, building any other constant is
9187 proportionally expensive to the number of instructions
9188 required to build that constant. This is true whether we
9189 are compiling for SPEED or otherwise. */
77e994c9
RS
9190 if (!is_a <scalar_int_mode> (mode, &int_mode))
9191 int_mode = word_mode;
82614948 9192 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 9193 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
9194 }
9195 return true;
9196
9197 case CONST_DOUBLE:
a2170965
TC
9198
9199 /* First determine number of instructions to do the move
9200 as an integer constant. */
9201 if (!aarch64_float_const_representable_p (x)
9202 && !aarch64_can_const_movi_rtx_p (x, mode)
9203 && aarch64_float_const_rtx_p (x))
9204 {
9205 unsigned HOST_WIDE_INT ival;
9206 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
9207 gcc_assert (succeed);
9208
77e994c9
RS
9209 scalar_int_mode imode = (mode == HFmode
9210 ? SImode
9211 : int_mode_for_mode (mode).require ());
a2170965
TC
9212 int ncost = aarch64_internal_mov_immediate
9213 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
9214 *cost += COSTS_N_INSNS (ncost);
9215 return true;
9216 }
9217
9dfc162c
JG
9218 if (speed)
9219 {
9220 /* mov[df,sf]_aarch64. */
9221 if (aarch64_float_const_representable_p (x))
9222 /* FMOV (scalar immediate). */
9223 *cost += extra_cost->fp[mode == DFmode].fpconst;
9224 else if (!aarch64_float_const_zero_rtx_p (x))
9225 {
9226 /* This will be a load from memory. */
9227 if (mode == DFmode)
9228 *cost += extra_cost->ldst.loadd;
9229 else
9230 *cost += extra_cost->ldst.loadf;
9231 }
9232 else
9233 /* Otherwise this is +0.0. We get this using MOVI d0, #0
9234 or MOV v0.s[0], wzr - neither of which are modeled by the
9235 cost tables. Just use the default cost. */
9236 {
9237 }
9238 }
9239
9240 return true;
9241
43e9d192
IB
9242 case MEM:
9243 if (speed)
2961177e
JG
9244 {
9245 /* For loads we want the base cost of a load, plus an
9246 approximation for the additional cost of the addressing
9247 mode. */
9248 rtx address = XEXP (x, 0);
b6875aac
KV
9249 if (VECTOR_MODE_P (mode))
9250 *cost += extra_cost->ldst.loadv;
9251 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9252 *cost += extra_cost->ldst.load;
9253 else if (mode == SFmode)
9254 *cost += extra_cost->ldst.loadf;
9255 else if (mode == DFmode)
9256 *cost += extra_cost->ldst.loadd;
9257
9258 *cost +=
9259 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9260 0, speed));
9261 }
43e9d192
IB
9262
9263 return true;
9264
9265 case NEG:
4745e701
JG
9266 op0 = XEXP (x, 0);
9267
b6875aac
KV
9268 if (VECTOR_MODE_P (mode))
9269 {
9270 if (speed)
9271 {
9272 /* FNEG. */
9273 *cost += extra_cost->vect.alu;
9274 }
9275 return false;
9276 }
9277
e548c9df
AM
9278 if (GET_MODE_CLASS (mode) == MODE_INT)
9279 {
4745e701
JG
9280 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9281 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9282 {
9283 /* CSETM. */
e548c9df 9284 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
9285 return true;
9286 }
9287
9288 /* Cost this as SUB wzr, X. */
e548c9df 9289 op0 = CONST0_RTX (mode);
4745e701
JG
9290 op1 = XEXP (x, 0);
9291 goto cost_minus;
9292 }
9293
e548c9df 9294 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
9295 {
9296 /* Support (neg(fma...)) as a single instruction only if
9297 sign of zeros is unimportant. This matches the decision
9298 making in aarch64.md. */
9299 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
9300 {
9301 /* FNMADD. */
e548c9df 9302 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
9303 return true;
9304 }
d318517d
SN
9305 if (GET_CODE (op0) == MULT)
9306 {
9307 /* FNMUL. */
9308 *cost = rtx_cost (op0, mode, NEG, 0, speed);
9309 return true;
9310 }
4745e701
JG
9311 if (speed)
9312 /* FNEG. */
9313 *cost += extra_cost->fp[mode == DFmode].neg;
9314 return false;
9315 }
9316
9317 return false;
43e9d192 9318
781aeb73
KT
9319 case CLRSB:
9320 case CLZ:
9321 if (speed)
b6875aac
KV
9322 {
9323 if (VECTOR_MODE_P (mode))
9324 *cost += extra_cost->vect.alu;
9325 else
9326 *cost += extra_cost->alu.clz;
9327 }
781aeb73
KT
9328
9329 return false;
9330
43e9d192
IB
9331 case COMPARE:
9332 op0 = XEXP (x, 0);
9333 op1 = XEXP (x, 1);
9334
9335 if (op1 == const0_rtx
9336 && GET_CODE (op0) == AND)
9337 {
9338 x = op0;
e548c9df 9339 mode = GET_MODE (op0);
43e9d192
IB
9340 goto cost_logic;
9341 }
9342
a8eecd00
JG
9343 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
9344 {
9345 /* TODO: A write to the CC flags possibly costs extra, this
9346 needs encoding in the cost tables. */
9347
e548c9df 9348 mode = GET_MODE (op0);
a8eecd00
JG
9349 /* ANDS. */
9350 if (GET_CODE (op0) == AND)
9351 {
9352 x = op0;
9353 goto cost_logic;
9354 }
9355
9356 if (GET_CODE (op0) == PLUS)
9357 {
9358 /* ADDS (and CMN alias). */
9359 x = op0;
9360 goto cost_plus;
9361 }
9362
9363 if (GET_CODE (op0) == MINUS)
9364 {
9365 /* SUBS. */
9366 x = op0;
9367 goto cost_minus;
9368 }
9369
345854d8
KT
9370 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
9371 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
9372 && CONST_INT_P (XEXP (op0, 2)))
9373 {
9374 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
9375 Handle it here directly rather than going to cost_logic
9376 since we know the immediate generated for the TST is valid
9377 so we can avoid creating an intermediate rtx for it only
9378 for costing purposes. */
9379 if (speed)
9380 *cost += extra_cost->alu.logical;
9381
9382 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
9383 ZERO_EXTRACT, 0, speed);
9384 return true;
9385 }
9386
a8eecd00
JG
9387 if (GET_CODE (op1) == NEG)
9388 {
9389 /* CMN. */
9390 if (speed)
9391 *cost += extra_cost->alu.arith;
9392
e548c9df
AM
9393 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
9394 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
9395 return true;
9396 }
9397
9398 /* CMP.
9399
9400 Compare can freely swap the order of operands, and
9401 canonicalization puts the more complex operation first.
9402 But the integer MINUS logic expects the shift/extend
9403 operation in op1. */
9404 if (! (REG_P (op0)
9405 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
9406 {
9407 op0 = XEXP (x, 1);
9408 op1 = XEXP (x, 0);
9409 }
9410 goto cost_minus;
9411 }
9412
9413 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9414 {
9415 /* FCMP. */
9416 if (speed)
9417 *cost += extra_cost->fp[mode == DFmode].compare;
9418
9419 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
9420 {
e548c9df 9421 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
9422 /* FCMP supports constant 0.0 for no extra cost. */
9423 return true;
9424 }
9425 return false;
9426 }
9427
b6875aac
KV
9428 if (VECTOR_MODE_P (mode))
9429 {
9430 /* Vector compare. */
9431 if (speed)
9432 *cost += extra_cost->vect.alu;
9433
9434 if (aarch64_float_const_zero_rtx_p (op1))
9435 {
9436 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
9437 cost. */
9438 return true;
9439 }
9440 return false;
9441 }
a8eecd00 9442 return false;
43e9d192
IB
9443
9444 case MINUS:
4745e701
JG
9445 {
9446 op0 = XEXP (x, 0);
9447 op1 = XEXP (x, 1);
9448
9449cost_minus:
e548c9df 9450 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 9451
4745e701
JG
9452 /* Detect valid immediates. */
9453 if ((GET_MODE_CLASS (mode) == MODE_INT
9454 || (GET_MODE_CLASS (mode) == MODE_CC
9455 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
9456 && CONST_INT_P (op1)
9457 && aarch64_uimm12_shift (INTVAL (op1)))
9458 {
4745e701
JG
9459 if (speed)
9460 /* SUB(S) (immediate). */
9461 *cost += extra_cost->alu.arith;
9462 return true;
4745e701
JG
9463 }
9464
7cc2145f 9465 /* Look for SUB (extended register). */
77e994c9
RS
9466 if (is_a <scalar_int_mode> (mode, &int_mode)
9467 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
9468 {
9469 if (speed)
2533c820 9470 *cost += extra_cost->alu.extend_arith;
7cc2145f 9471
b10f1009 9472 op1 = aarch64_strip_extend (op1, true);
e47c4031 9473 *cost += rtx_cost (op1, VOIDmode,
e548c9df 9474 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
9475 return true;
9476 }
9477
b10f1009 9478 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
9479
9480 /* Cost this as an FMA-alike operation. */
9481 if ((GET_CODE (new_op1) == MULT
0a78ebe4 9482 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
9483 && code != COMPARE)
9484 {
9485 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
9486 (enum rtx_code) code,
9487 speed);
4745e701
JG
9488 return true;
9489 }
43e9d192 9490
e548c9df 9491 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 9492
4745e701
JG
9493 if (speed)
9494 {
b6875aac
KV
9495 if (VECTOR_MODE_P (mode))
9496 {
9497 /* Vector SUB. */
9498 *cost += extra_cost->vect.alu;
9499 }
9500 else if (GET_MODE_CLASS (mode) == MODE_INT)
9501 {
9502 /* SUB(S). */
9503 *cost += extra_cost->alu.arith;
9504 }
4745e701 9505 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9506 {
9507 /* FSUB. */
9508 *cost += extra_cost->fp[mode == DFmode].addsub;
9509 }
4745e701
JG
9510 }
9511 return true;
9512 }
43e9d192
IB
9513
9514 case PLUS:
4745e701
JG
9515 {
9516 rtx new_op0;
43e9d192 9517
4745e701
JG
9518 op0 = XEXP (x, 0);
9519 op1 = XEXP (x, 1);
43e9d192 9520
a8eecd00 9521cost_plus:
4745e701
JG
9522 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9523 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9524 {
9525 /* CSINC. */
e548c9df
AM
9526 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
9527 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
9528 return true;
9529 }
43e9d192 9530
4745e701 9531 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
9532 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
9533 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 9534 {
e548c9df 9535 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 9536
4745e701
JG
9537 if (speed)
9538 /* ADD (immediate). */
9539 *cost += extra_cost->alu.arith;
9540 return true;
9541 }
9542
e548c9df 9543 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 9544
7cc2145f 9545 /* Look for ADD (extended register). */
77e994c9
RS
9546 if (is_a <scalar_int_mode> (mode, &int_mode)
9547 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
9548 {
9549 if (speed)
2533c820 9550 *cost += extra_cost->alu.extend_arith;
7cc2145f 9551
b10f1009 9552 op0 = aarch64_strip_extend (op0, true);
e47c4031 9553 *cost += rtx_cost (op0, VOIDmode,
e548c9df 9554 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
9555 return true;
9556 }
9557
4745e701
JG
9558 /* Strip any extend, leave shifts behind as we will
9559 cost them through mult_cost. */
b10f1009 9560 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
9561
9562 if (GET_CODE (new_op0) == MULT
0a78ebe4 9563 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
9564 {
9565 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
9566 speed);
4745e701
JG
9567 return true;
9568 }
9569
e548c9df 9570 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
9571
9572 if (speed)
9573 {
b6875aac
KV
9574 if (VECTOR_MODE_P (mode))
9575 {
9576 /* Vector ADD. */
9577 *cost += extra_cost->vect.alu;
9578 }
9579 else if (GET_MODE_CLASS (mode) == MODE_INT)
9580 {
9581 /* ADD. */
9582 *cost += extra_cost->alu.arith;
9583 }
4745e701 9584 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9585 {
9586 /* FADD. */
9587 *cost += extra_cost->fp[mode == DFmode].addsub;
9588 }
4745e701
JG
9589 }
9590 return true;
9591 }
43e9d192 9592
18b42b2a
KT
9593 case BSWAP:
9594 *cost = COSTS_N_INSNS (1);
9595
9596 if (speed)
b6875aac
KV
9597 {
9598 if (VECTOR_MODE_P (mode))
9599 *cost += extra_cost->vect.alu;
9600 else
9601 *cost += extra_cost->alu.rev;
9602 }
18b42b2a
KT
9603 return false;
9604
43e9d192 9605 case IOR:
f7d5cf8d
KT
9606 if (aarch_rev16_p (x))
9607 {
9608 *cost = COSTS_N_INSNS (1);
9609
b6875aac
KV
9610 if (speed)
9611 {
9612 if (VECTOR_MODE_P (mode))
9613 *cost += extra_cost->vect.alu;
9614 else
9615 *cost += extra_cost->alu.rev;
9616 }
9617 return true;
f7d5cf8d 9618 }
fb0cb7fa
KT
9619
9620 if (aarch64_extr_rtx_p (x, &op0, &op1))
9621 {
e548c9df
AM
9622 *cost += rtx_cost (op0, mode, IOR, 0, speed);
9623 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
9624 if (speed)
9625 *cost += extra_cost->alu.shift;
9626
9627 return true;
9628 }
f7d5cf8d 9629 /* Fall through. */
43e9d192
IB
9630 case XOR:
9631 case AND:
9632 cost_logic:
9633 op0 = XEXP (x, 0);
9634 op1 = XEXP (x, 1);
9635
b6875aac
KV
9636 if (VECTOR_MODE_P (mode))
9637 {
9638 if (speed)
9639 *cost += extra_cost->vect.alu;
9640 return true;
9641 }
9642
268c3b47
JG
9643 if (code == AND
9644 && GET_CODE (op0) == MULT
9645 && CONST_INT_P (XEXP (op0, 1))
9646 && CONST_INT_P (op1)
9647 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9648 INTVAL (op1)) != 0)
9649 {
9650 /* This is a UBFM/SBFM. */
e548c9df 9651 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9652 if (speed)
9653 *cost += extra_cost->alu.bfx;
9654 return true;
9655 }
9656
b4206259 9657 if (is_int_mode (mode, &int_mode))
43e9d192 9658 {
8c83f71d 9659 if (CONST_INT_P (op1))
43e9d192 9660 {
8c83f71d
KT
9661 /* We have a mask + shift version of a UBFIZ
9662 i.e. the *andim_ashift<mode>_bfiz pattern. */
9663 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9664 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9665 XEXP (op0, 1)))
8c83f71d 9666 {
b4206259 9667 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9668 (enum rtx_code) code, 0, speed);
9669 if (speed)
9670 *cost += extra_cost->alu.bfx;
268c3b47 9671
8c83f71d
KT
9672 return true;
9673 }
b4206259 9674 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9675 {
9676 /* We possibly get the immediate for free, this is not
9677 modelled. */
b4206259
RS
9678 *cost += rtx_cost (op0, int_mode,
9679 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9680 if (speed)
9681 *cost += extra_cost->alu.logical;
268c3b47 9682
8c83f71d
KT
9683 return true;
9684 }
43e9d192
IB
9685 }
9686 else
9687 {
268c3b47
JG
9688 rtx new_op0 = op0;
9689
9690 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9691 if (GET_CODE (op0) == NOT)
9692 op0 = XEXP (op0, 0);
268c3b47
JG
9693
9694 new_op0 = aarch64_strip_shift (op0);
9695
9696 /* If we had a shift on op0 then this is a logical-shift-
9697 by-register/immediate operation. Otherwise, this is just
9698 a logical operation. */
9699 if (speed)
9700 {
9701 if (new_op0 != op0)
9702 {
9703 /* Shift by immediate. */
9704 if (CONST_INT_P (XEXP (op0, 1)))
9705 *cost += extra_cost->alu.log_shift;
9706 else
9707 *cost += extra_cost->alu.log_shift_reg;
9708 }
9709 else
9710 *cost += extra_cost->alu.logical;
9711 }
9712
9713 /* In both cases we want to cost both operands. */
b4206259
RS
9714 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9715 0, speed);
9716 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9717 1, speed);
268c3b47
JG
9718
9719 return true;
43e9d192 9720 }
43e9d192
IB
9721 }
9722 return false;
9723
268c3b47 9724 case NOT:
6365da9e
KT
9725 x = XEXP (x, 0);
9726 op0 = aarch64_strip_shift (x);
9727
b6875aac
KV
9728 if (VECTOR_MODE_P (mode))
9729 {
9730 /* Vector NOT. */
9731 *cost += extra_cost->vect.alu;
9732 return false;
9733 }
9734
6365da9e
KT
9735 /* MVN-shifted-reg. */
9736 if (op0 != x)
9737 {
e548c9df 9738 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9739
9740 if (speed)
9741 *cost += extra_cost->alu.log_shift;
9742
9743 return true;
9744 }
9745 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9746 Handle the second form here taking care that 'a' in the above can
9747 be a shift. */
9748 else if (GET_CODE (op0) == XOR)
9749 {
9750 rtx newop0 = XEXP (op0, 0);
9751 rtx newop1 = XEXP (op0, 1);
9752 rtx op0_stripped = aarch64_strip_shift (newop0);
9753
e548c9df
AM
9754 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9755 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9756
9757 if (speed)
9758 {
9759 if (op0_stripped != newop0)
9760 *cost += extra_cost->alu.log_shift;
9761 else
9762 *cost += extra_cost->alu.logical;
9763 }
9764
9765 return true;
9766 }
268c3b47
JG
9767 /* MVN. */
9768 if (speed)
9769 *cost += extra_cost->alu.logical;
9770
268c3b47
JG
9771 return false;
9772
43e9d192 9773 case ZERO_EXTEND:
b1685e62
JG
9774
9775 op0 = XEXP (x, 0);
9776 /* If a value is written in SI mode, then zero extended to DI
9777 mode, the operation will in general be free as a write to
9778 a 'w' register implicitly zeroes the upper bits of an 'x'
9779 register. However, if this is
9780
9781 (set (reg) (zero_extend (reg)))
9782
9783 we must cost the explicit register move. */
9784 if (mode == DImode
9785 && GET_MODE (op0) == SImode
9786 && outer == SET)
9787 {
e548c9df 9788 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9789
dde23f43
KM
9790 /* If OP_COST is non-zero, then the cost of the zero extend
9791 is effectively the cost of the inner operation. Otherwise
9792 we have a MOV instruction and we take the cost from the MOV
9793 itself. This is true independently of whether we are
9794 optimizing for space or time. */
9795 if (op_cost)
b1685e62
JG
9796 *cost = op_cost;
9797
9798 return true;
9799 }
e548c9df 9800 else if (MEM_P (op0))
43e9d192 9801 {
b1685e62 9802 /* All loads can zero extend to any size for free. */
e548c9df 9803 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9804 return true;
9805 }
b1685e62 9806
283b6c85
KT
9807 op0 = aarch64_extend_bitfield_pattern_p (x);
9808 if (op0)
9809 {
9810 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9811 if (speed)
9812 *cost += extra_cost->alu.bfx;
9813 return true;
9814 }
9815
b1685e62 9816 if (speed)
b6875aac
KV
9817 {
9818 if (VECTOR_MODE_P (mode))
9819 {
9820 /* UMOV. */
9821 *cost += extra_cost->vect.alu;
9822 }
9823 else
9824 {
63715e5e
WD
9825 /* We generate an AND instead of UXTB/UXTH. */
9826 *cost += extra_cost->alu.logical;
b6875aac
KV
9827 }
9828 }
43e9d192
IB
9829 return false;
9830
9831 case SIGN_EXTEND:
b1685e62 9832 if (MEM_P (XEXP (x, 0)))
43e9d192 9833 {
b1685e62
JG
9834 /* LDRSH. */
9835 if (speed)
9836 {
9837 rtx address = XEXP (XEXP (x, 0), 0);
9838 *cost += extra_cost->ldst.load_sign_extend;
9839
9840 *cost +=
9841 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9842 0, speed));
9843 }
43e9d192
IB
9844 return true;
9845 }
b1685e62 9846
283b6c85
KT
9847 op0 = aarch64_extend_bitfield_pattern_p (x);
9848 if (op0)
9849 {
9850 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9851 if (speed)
9852 *cost += extra_cost->alu.bfx;
9853 return true;
9854 }
9855
b1685e62 9856 if (speed)
b6875aac
KV
9857 {
9858 if (VECTOR_MODE_P (mode))
9859 *cost += extra_cost->vect.alu;
9860 else
9861 *cost += extra_cost->alu.extend;
9862 }
43e9d192
IB
9863 return false;
9864
ba0cfa17
JG
9865 case ASHIFT:
9866 op0 = XEXP (x, 0);
9867 op1 = XEXP (x, 1);
9868
9869 if (CONST_INT_P (op1))
9870 {
ba0cfa17 9871 if (speed)
b6875aac
KV
9872 {
9873 if (VECTOR_MODE_P (mode))
9874 {
9875 /* Vector shift (immediate). */
9876 *cost += extra_cost->vect.alu;
9877 }
9878 else
9879 {
9880 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9881 aliases. */
9882 *cost += extra_cost->alu.shift;
9883 }
9884 }
ba0cfa17
JG
9885
9886 /* We can incorporate zero/sign extend for free. */
9887 if (GET_CODE (op0) == ZERO_EXTEND
9888 || GET_CODE (op0) == SIGN_EXTEND)
9889 op0 = XEXP (op0, 0);
9890
e548c9df 9891 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9892 return true;
9893 }
9894 else
9895 {
7813b280 9896 if (VECTOR_MODE_P (mode))
b6875aac 9897 {
7813b280
KT
9898 if (speed)
9899 /* Vector shift (register). */
9900 *cost += extra_cost->vect.alu;
9901 }
9902 else
9903 {
9904 if (speed)
9905 /* LSLV. */
9906 *cost += extra_cost->alu.shift_reg;
9907
9908 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9909 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9910 && known_eq (INTVAL (XEXP (op1, 1)),
9911 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9912 {
7813b280
KT
9913 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9914 /* We already demanded XEXP (op1, 0) to be REG_P, so
9915 don't recurse into it. */
9916 return true;
b6875aac
KV
9917 }
9918 }
ba0cfa17
JG
9919 return false; /* All arguments need to be in registers. */
9920 }
9921
43e9d192 9922 case ROTATE:
43e9d192
IB
9923 case ROTATERT:
9924 case LSHIFTRT:
43e9d192 9925 case ASHIFTRT:
ba0cfa17
JG
9926 op0 = XEXP (x, 0);
9927 op1 = XEXP (x, 1);
43e9d192 9928
ba0cfa17
JG
9929 if (CONST_INT_P (op1))
9930 {
9931 /* ASR (immediate) and friends. */
9932 if (speed)
b6875aac
KV
9933 {
9934 if (VECTOR_MODE_P (mode))
9935 *cost += extra_cost->vect.alu;
9936 else
9937 *cost += extra_cost->alu.shift;
9938 }
43e9d192 9939
e548c9df 9940 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9941 return true;
9942 }
9943 else
9944 {
7813b280 9945 if (VECTOR_MODE_P (mode))
b6875aac 9946 {
7813b280
KT
9947 if (speed)
9948 /* Vector shift (register). */
b6875aac 9949 *cost += extra_cost->vect.alu;
7813b280
KT
9950 }
9951 else
9952 {
9953 if (speed)
9954 /* ASR (register) and friends. */
b6875aac 9955 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9956
9957 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9958 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9959 && known_eq (INTVAL (XEXP (op1, 1)),
9960 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9961 {
9962 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9963 /* We already demanded XEXP (op1, 0) to be REG_P, so
9964 don't recurse into it. */
9965 return true;
9966 }
b6875aac 9967 }
ba0cfa17
JG
9968 return false; /* All arguments need to be in registers. */
9969 }
43e9d192 9970
909734be
JG
9971 case SYMBOL_REF:
9972
1b1e81f8
JW
9973 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9974 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9975 {
9976 /* LDR. */
9977 if (speed)
9978 *cost += extra_cost->ldst.load;
9979 }
9980 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9981 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9982 {
9983 /* ADRP, followed by ADD. */
9984 *cost += COSTS_N_INSNS (1);
9985 if (speed)
9986 *cost += 2 * extra_cost->alu.arith;
9987 }
9988 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9989 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9990 {
9991 /* ADR. */
9992 if (speed)
9993 *cost += extra_cost->alu.arith;
9994 }
9995
9996 if (flag_pic)
9997 {
9998 /* One extra load instruction, after accessing the GOT. */
9999 *cost += COSTS_N_INSNS (1);
10000 if (speed)
10001 *cost += extra_cost->ldst.load;
10002 }
43e9d192
IB
10003 return true;
10004
909734be 10005 case HIGH:
43e9d192 10006 case LO_SUM:
909734be
JG
10007 /* ADRP/ADD (immediate). */
10008 if (speed)
10009 *cost += extra_cost->alu.arith;
43e9d192
IB
10010 return true;
10011
10012 case ZERO_EXTRACT:
10013 case SIGN_EXTRACT:
7cc2145f
JG
10014 /* UBFX/SBFX. */
10015 if (speed)
b6875aac
KV
10016 {
10017 if (VECTOR_MODE_P (mode))
10018 *cost += extra_cost->vect.alu;
10019 else
10020 *cost += extra_cost->alu.bfx;
10021 }
7cc2145f
JG
10022
10023 /* We can trust that the immediates used will be correct (there
10024 are no by-register forms), so we need only cost op0. */
e548c9df 10025 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
10026 return true;
10027
10028 case MULT:
4745e701
JG
10029 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
10030 /* aarch64_rtx_mult_cost always handles recursion to its
10031 operands. */
10032 return true;
43e9d192
IB
10033
10034 case MOD:
4f58fe36
KT
10035 /* We can expand signed mod by power of 2 using a NEGS, two parallel
10036 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
10037 an unconditional negate. This case should only ever be reached through
10038 the set_smod_pow2_cheap check in expmed.c. */
10039 if (CONST_INT_P (XEXP (x, 1))
10040 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10041 && (mode == SImode || mode == DImode))
10042 {
10043 /* We expand to 4 instructions. Reset the baseline. */
10044 *cost = COSTS_N_INSNS (4);
10045
10046 if (speed)
10047 *cost += 2 * extra_cost->alu.logical
10048 + 2 * extra_cost->alu.arith;
10049
10050 return true;
10051 }
10052
10053 /* Fall-through. */
43e9d192 10054 case UMOD:
43e9d192
IB
10055 if (speed)
10056 {
cb9ac430 10057 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
10058 if (VECTOR_MODE_P (mode))
10059 *cost += extra_cost->vect.alu;
e548c9df
AM
10060 else if (GET_MODE_CLASS (mode) == MODE_INT)
10061 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
10062 + extra_cost->mult[mode == DImode].idiv
10063 + (code == MOD ? 1 : 0));
43e9d192
IB
10064 }
10065 return false; /* All arguments need to be in registers. */
10066
10067 case DIV:
10068 case UDIV:
4105fe38 10069 case SQRT:
43e9d192
IB
10070 if (speed)
10071 {
b6875aac
KV
10072 if (VECTOR_MODE_P (mode))
10073 *cost += extra_cost->vect.alu;
10074 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
10075 /* There is no integer SQRT, so only DIV and UDIV can get
10076 here. */
cb9ac430
TC
10077 *cost += (extra_cost->mult[mode == DImode].idiv
10078 /* Slighly prefer UDIV over SDIV. */
10079 + (code == DIV ? 1 : 0));
4105fe38
JG
10080 else
10081 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
10082 }
10083 return false; /* All arguments need to be in registers. */
10084
a8eecd00 10085 case IF_THEN_ELSE:
2d5ffe46
AP
10086 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
10087 XEXP (x, 2), cost, speed);
a8eecd00
JG
10088
10089 case EQ:
10090 case NE:
10091 case GT:
10092 case GTU:
10093 case LT:
10094 case LTU:
10095 case GE:
10096 case GEU:
10097 case LE:
10098 case LEU:
10099
10100 return false; /* All arguments must be in registers. */
10101
b292109f
JG
10102 case FMA:
10103 op0 = XEXP (x, 0);
10104 op1 = XEXP (x, 1);
10105 op2 = XEXP (x, 2);
10106
10107 if (speed)
b6875aac
KV
10108 {
10109 if (VECTOR_MODE_P (mode))
10110 *cost += extra_cost->vect.alu;
10111 else
10112 *cost += extra_cost->fp[mode == DFmode].fma;
10113 }
b292109f
JG
10114
10115 /* FMSUB, FNMADD, and FNMSUB are free. */
10116 if (GET_CODE (op0) == NEG)
10117 op0 = XEXP (op0, 0);
10118
10119 if (GET_CODE (op2) == NEG)
10120 op2 = XEXP (op2, 0);
10121
10122 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
10123 and the by-element operand as operand 0. */
10124 if (GET_CODE (op1) == NEG)
10125 op1 = XEXP (op1, 0);
10126
10127 /* Catch vector-by-element operations. The by-element operand can
10128 either be (vec_duplicate (vec_select (x))) or just
10129 (vec_select (x)), depending on whether we are multiplying by
10130 a vector or a scalar.
10131
10132 Canonicalization is not very good in these cases, FMA4 will put the
10133 by-element operand as operand 0, FNMA4 will have it as operand 1. */
10134 if (GET_CODE (op0) == VEC_DUPLICATE)
10135 op0 = XEXP (op0, 0);
10136 else if (GET_CODE (op1) == VEC_DUPLICATE)
10137 op1 = XEXP (op1, 0);
10138
10139 if (GET_CODE (op0) == VEC_SELECT)
10140 op0 = XEXP (op0, 0);
10141 else if (GET_CODE (op1) == VEC_SELECT)
10142 op1 = XEXP (op1, 0);
10143
10144 /* If the remaining parameters are not registers,
10145 get the cost to put them into registers. */
e548c9df
AM
10146 *cost += rtx_cost (op0, mode, FMA, 0, speed);
10147 *cost += rtx_cost (op1, mode, FMA, 1, speed);
10148 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
10149 return true;
10150
5e2a765b
KT
10151 case FLOAT:
10152 case UNSIGNED_FLOAT:
10153 if (speed)
10154 *cost += extra_cost->fp[mode == DFmode].fromint;
10155 return false;
10156
b292109f
JG
10157 case FLOAT_EXTEND:
10158 if (speed)
b6875aac
KV
10159 {
10160 if (VECTOR_MODE_P (mode))
10161 {
10162 /*Vector truncate. */
10163 *cost += extra_cost->vect.alu;
10164 }
10165 else
10166 *cost += extra_cost->fp[mode == DFmode].widen;
10167 }
b292109f
JG
10168 return false;
10169
10170 case FLOAT_TRUNCATE:
10171 if (speed)
b6875aac
KV
10172 {
10173 if (VECTOR_MODE_P (mode))
10174 {
10175 /*Vector conversion. */
10176 *cost += extra_cost->vect.alu;
10177 }
10178 else
10179 *cost += extra_cost->fp[mode == DFmode].narrow;
10180 }
b292109f
JG
10181 return false;
10182
61263118
KT
10183 case FIX:
10184 case UNSIGNED_FIX:
10185 x = XEXP (x, 0);
10186 /* Strip the rounding part. They will all be implemented
10187 by the fcvt* family of instructions anyway. */
10188 if (GET_CODE (x) == UNSPEC)
10189 {
10190 unsigned int uns_code = XINT (x, 1);
10191
10192 if (uns_code == UNSPEC_FRINTA
10193 || uns_code == UNSPEC_FRINTM
10194 || uns_code == UNSPEC_FRINTN
10195 || uns_code == UNSPEC_FRINTP
10196 || uns_code == UNSPEC_FRINTZ)
10197 x = XVECEXP (x, 0, 0);
10198 }
10199
10200 if (speed)
b6875aac
KV
10201 {
10202 if (VECTOR_MODE_P (mode))
10203 *cost += extra_cost->vect.alu;
10204 else
10205 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
10206 }
39252973
KT
10207
10208 /* We can combine fmul by a power of 2 followed by a fcvt into a single
10209 fixed-point fcvt. */
10210 if (GET_CODE (x) == MULT
10211 && ((VECTOR_MODE_P (mode)
10212 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
10213 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
10214 {
10215 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
10216 0, speed);
10217 return true;
10218 }
10219
e548c9df 10220 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
10221 return true;
10222
b292109f 10223 case ABS:
b6875aac
KV
10224 if (VECTOR_MODE_P (mode))
10225 {
10226 /* ABS (vector). */
10227 if (speed)
10228 *cost += extra_cost->vect.alu;
10229 }
10230 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 10231 {
19261b99
KT
10232 op0 = XEXP (x, 0);
10233
10234 /* FABD, which is analogous to FADD. */
10235 if (GET_CODE (op0) == MINUS)
10236 {
e548c9df
AM
10237 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
10238 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
10239 if (speed)
10240 *cost += extra_cost->fp[mode == DFmode].addsub;
10241
10242 return true;
10243 }
10244 /* Simple FABS is analogous to FNEG. */
b292109f
JG
10245 if (speed)
10246 *cost += extra_cost->fp[mode == DFmode].neg;
10247 }
10248 else
10249 {
10250 /* Integer ABS will either be split to
10251 two arithmetic instructions, or will be an ABS
10252 (scalar), which we don't model. */
10253 *cost = COSTS_N_INSNS (2);
10254 if (speed)
10255 *cost += 2 * extra_cost->alu.arith;
10256 }
10257 return false;
10258
10259 case SMAX:
10260 case SMIN:
10261 if (speed)
10262 {
b6875aac
KV
10263 if (VECTOR_MODE_P (mode))
10264 *cost += extra_cost->vect.alu;
10265 else
10266 {
10267 /* FMAXNM/FMINNM/FMAX/FMIN.
10268 TODO: This may not be accurate for all implementations, but
10269 we do not model this in the cost tables. */
10270 *cost += extra_cost->fp[mode == DFmode].addsub;
10271 }
b292109f
JG
10272 }
10273 return false;
10274
61263118
KT
10275 case UNSPEC:
10276 /* The floating point round to integer frint* instructions. */
10277 if (aarch64_frint_unspec_p (XINT (x, 1)))
10278 {
10279 if (speed)
10280 *cost += extra_cost->fp[mode == DFmode].roundint;
10281
10282 return false;
10283 }
781aeb73
KT
10284
10285 if (XINT (x, 1) == UNSPEC_RBIT)
10286 {
10287 if (speed)
10288 *cost += extra_cost->alu.rev;
10289
10290 return false;
10291 }
61263118
KT
10292 break;
10293
fb620c4a
JG
10294 case TRUNCATE:
10295
10296 /* Decompose <su>muldi3_highpart. */
10297 if (/* (truncate:DI */
10298 mode == DImode
10299 /* (lshiftrt:TI */
10300 && GET_MODE (XEXP (x, 0)) == TImode
10301 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
10302 /* (mult:TI */
10303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10304 /* (ANY_EXTEND:TI (reg:DI))
10305 (ANY_EXTEND:TI (reg:DI))) */
10306 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10307 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
10308 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10309 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
10310 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
10311 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
10312 /* (const_int 64) */
10313 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10314 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
10315 {
10316 /* UMULH/SMULH. */
10317 if (speed)
10318 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
10319 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
10320 mode, MULT, 0, speed);
10321 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
10322 mode, MULT, 1, speed);
fb620c4a
JG
10323 return true;
10324 }
10325
10326 /* Fall through. */
43e9d192 10327 default:
61263118 10328 break;
43e9d192 10329 }
61263118 10330
c10e3d7f
AP
10331 if (dump_file
10332 && flag_aarch64_verbose_cost)
61263118
KT
10333 fprintf (dump_file,
10334 "\nFailed to cost RTX. Assuming default cost.\n");
10335
10336 return true;
43e9d192
IB
10337}
10338
0ee859b5
JG
10339/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
10340 calculated for X. This cost is stored in *COST. Returns true
10341 if the total cost of X was calculated. */
10342static bool
e548c9df 10343aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
10344 int param, int *cost, bool speed)
10345{
e548c9df 10346 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 10347
c10e3d7f
AP
10348 if (dump_file
10349 && flag_aarch64_verbose_cost)
0ee859b5
JG
10350 {
10351 print_rtl_single (dump_file, x);
10352 fprintf (dump_file, "\n%s cost: %d (%s)\n",
10353 speed ? "Hot" : "Cold",
10354 *cost, result ? "final" : "partial");
10355 }
10356
10357 return result;
10358}
10359
43e9d192 10360static int
ef4bddc2 10361aarch64_register_move_cost (machine_mode mode,
8a3a7e67 10362 reg_class_t from_i, reg_class_t to_i)
43e9d192 10363{
8a3a7e67
RH
10364 enum reg_class from = (enum reg_class) from_i;
10365 enum reg_class to = (enum reg_class) to_i;
43e9d192 10366 const struct cpu_regmove_cost *regmove_cost
b175b679 10367 = aarch64_tune_params.regmove_cost;
43e9d192 10368
3be07662 10369 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 10370 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
10371 to = GENERAL_REGS;
10372
d677263e 10373 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
10374 from = GENERAL_REGS;
10375
6ee70f81
AP
10376 /* Moving between GPR and stack cost is the same as GP2GP. */
10377 if ((from == GENERAL_REGS && to == STACK_REG)
10378 || (to == GENERAL_REGS && from == STACK_REG))
10379 return regmove_cost->GP2GP;
10380
10381 /* To/From the stack register, we move via the gprs. */
10382 if (to == STACK_REG || from == STACK_REG)
10383 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
10384 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
10385
6a70badb 10386 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
10387 {
10388 /* 128-bit operations on general registers require 2 instructions. */
10389 if (from == GENERAL_REGS && to == GENERAL_REGS)
10390 return regmove_cost->GP2GP * 2;
10391 else if (from == GENERAL_REGS)
10392 return regmove_cost->GP2FP * 2;
10393 else if (to == GENERAL_REGS)
10394 return regmove_cost->FP2GP * 2;
10395
10396 /* When AdvSIMD instructions are disabled it is not possible to move
10397 a 128-bit value directly between Q registers. This is handled in
10398 secondary reload. A general register is used as a scratch to move
10399 the upper DI value and the lower DI value is moved directly,
10400 hence the cost is the sum of three moves. */
10401 if (! TARGET_SIMD)
10402 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
10403
10404 return regmove_cost->FP2FP;
10405 }
10406
43e9d192
IB
10407 if (from == GENERAL_REGS && to == GENERAL_REGS)
10408 return regmove_cost->GP2GP;
10409 else if (from == GENERAL_REGS)
10410 return regmove_cost->GP2FP;
10411 else if (to == GENERAL_REGS)
10412 return regmove_cost->FP2GP;
10413
43e9d192
IB
10414 return regmove_cost->FP2FP;
10415}
10416
10417static int
ef4bddc2 10418aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
10419 reg_class_t rclass ATTRIBUTE_UNUSED,
10420 bool in ATTRIBUTE_UNUSED)
10421{
b175b679 10422 return aarch64_tune_params.memmov_cost;
43e9d192
IB
10423}
10424
0c30e0f3
EM
10425/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
10426 to optimize 1.0/sqrt. */
ee62a5a6
RS
10427
10428static bool
9acc9cbe 10429use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
10430{
10431 return (!flag_trapping_math
10432 && flag_unsafe_math_optimizations
9acc9cbe
EM
10433 && ((aarch64_tune_params.approx_modes->recip_sqrt
10434 & AARCH64_APPROX_MODE (mode))
1a33079e 10435 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
10436}
10437
0c30e0f3
EM
10438/* Function to decide when to use the approximate reciprocal square root
10439 builtin. */
a6fc00da
BH
10440
10441static tree
ee62a5a6 10442aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 10443{
9acc9cbe
EM
10444 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
10445
10446 if (!use_rsqrt_p (mode))
a6fc00da 10447 return NULL_TREE;
ee62a5a6 10448 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
10449}
10450
98daafa0
EM
10451/* Emit instruction sequence to compute either the approximate square root
10452 or its approximate reciprocal, depending on the flag RECP, and return
10453 whether the sequence was emitted or not. */
a6fc00da 10454
98daafa0
EM
10455bool
10456aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 10457{
98daafa0 10458 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
10459
10460 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
10461 {
10462 gcc_assert (!recp);
10463 return false;
10464 }
10465
2e19adc8
RE
10466 if (!recp)
10467 {
10468 if (!(flag_mlow_precision_sqrt
10469 || (aarch64_tune_params.approx_modes->sqrt
10470 & AARCH64_APPROX_MODE (mode))))
10471 return false;
10472
10473 if (flag_finite_math_only
10474 || flag_trapping_math
10475 || !flag_unsafe_math_optimizations
10476 || optimize_function_for_size_p (cfun))
10477 return false;
10478 }
10479 else
10480 /* Caller assumes we cannot fail. */
10481 gcc_assert (use_rsqrt_p (mode));
daef0a8c 10482
ddc203a7 10483 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
10484 rtx xmsk = gen_reg_rtx (mmsk);
10485 if (!recp)
2e19adc8
RE
10486 /* When calculating the approximate square root, compare the
10487 argument with 0.0 and create a mask. */
10488 emit_insn (gen_rtx_SET (xmsk,
10489 gen_rtx_NEG (mmsk,
10490 gen_rtx_EQ (mmsk, src,
10491 CONST0_RTX (mode)))));
a6fc00da 10492
98daafa0
EM
10493 /* Estimate the approximate reciprocal square root. */
10494 rtx xdst = gen_reg_rtx (mode);
0016d8d9 10495 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 10496
98daafa0
EM
10497 /* Iterate over the series twice for SF and thrice for DF. */
10498 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 10499
98daafa0
EM
10500 /* Optionally iterate over the series once less for faster performance
10501 while sacrificing the accuracy. */
10502 if ((recp && flag_mrecip_low_precision_sqrt)
10503 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
10504 iterations--;
10505
98daafa0
EM
10506 /* Iterate over the series to calculate the approximate reciprocal square
10507 root. */
10508 rtx x1 = gen_reg_rtx (mode);
10509 while (iterations--)
a6fc00da 10510 {
a6fc00da 10511 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
10512 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
10513
0016d8d9 10514 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 10515
98daafa0
EM
10516 if (iterations > 0)
10517 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
10518 }
10519
10520 if (!recp)
10521 {
10522 /* Qualify the approximate reciprocal square root when the argument is
10523 0.0 by squashing the intermediary result to 0.0. */
10524 rtx xtmp = gen_reg_rtx (mmsk);
10525 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
10526 gen_rtx_SUBREG (mmsk, xdst, 0)));
10527 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 10528
98daafa0
EM
10529 /* Calculate the approximate square root. */
10530 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
10531 }
10532
98daafa0
EM
10533 /* Finalize the approximation. */
10534 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
10535
10536 return true;
a6fc00da
BH
10537}
10538
79a2bc2d
EM
10539/* Emit the instruction sequence to compute the approximation for the division
10540 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
10541
10542bool
10543aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
10544{
10545 machine_mode mode = GET_MODE (quo);
33d72b63
JW
10546
10547 if (GET_MODE_INNER (mode) == HFmode)
10548 return false;
10549
79a2bc2d
EM
10550 bool use_approx_division_p = (flag_mlow_precision_div
10551 || (aarch64_tune_params.approx_modes->division
10552 & AARCH64_APPROX_MODE (mode)));
10553
10554 if (!flag_finite_math_only
10555 || flag_trapping_math
10556 || !flag_unsafe_math_optimizations
10557 || optimize_function_for_size_p (cfun)
10558 || !use_approx_division_p)
10559 return false;
10560
1be49a38
RR
10561 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10562 return false;
10563
79a2bc2d
EM
10564 /* Estimate the approximate reciprocal. */
10565 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 10566 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
10567
10568 /* Iterate over the series twice for SF and thrice for DF. */
10569 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10570
10571 /* Optionally iterate over the series once less for faster performance,
10572 while sacrificing the accuracy. */
10573 if (flag_mlow_precision_div)
10574 iterations--;
10575
10576 /* Iterate over the series to calculate the approximate reciprocal. */
10577 rtx xtmp = gen_reg_rtx (mode);
10578 while (iterations--)
10579 {
0016d8d9 10580 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
10581
10582 if (iterations > 0)
10583 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10584 }
10585
10586 if (num != CONST1_RTX (mode))
10587 {
10588 /* As the approximate reciprocal of DEN is already calculated, only
10589 calculate the approximate division when NUM is not 1.0. */
10590 rtx xnum = force_reg (mode, num);
10591 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10592 }
10593
10594 /* Finalize the approximation. */
10595 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10596 return true;
10597}
10598
d126a4ae
AP
10599/* Return the number of instructions that can be issued per cycle. */
10600static int
10601aarch64_sched_issue_rate (void)
10602{
b175b679 10603 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10604}
10605
d03f7e44
MK
10606static int
10607aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10608{
10609 int issue_rate = aarch64_sched_issue_rate ();
10610
10611 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10612}
10613
2d6bc7fa
KT
10614
10615/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10616 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10617 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10618
10619static int
10620aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10621 int ready_index)
10622{
10623 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10624}
10625
10626
8990e73a
TB
10627/* Vectorizer cost model target hooks. */
10628
10629/* Implement targetm.vectorize.builtin_vectorization_cost. */
10630static int
10631aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10632 tree vectype,
10633 int misalign ATTRIBUTE_UNUSED)
10634{
10635 unsigned elements;
cd8ae5ed
AP
10636 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10637 bool fp = false;
10638
10639 if (vectype != NULL)
10640 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10641
10642 switch (type_of_cost)
10643 {
10644 case scalar_stmt:
cd8ae5ed 10645 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10646
10647 case scalar_load:
cd8ae5ed 10648 return costs->scalar_load_cost;
8990e73a
TB
10649
10650 case scalar_store:
cd8ae5ed 10651 return costs->scalar_store_cost;
8990e73a
TB
10652
10653 case vector_stmt:
cd8ae5ed 10654 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10655
10656 case vector_load:
cd8ae5ed 10657 return costs->vec_align_load_cost;
8990e73a
TB
10658
10659 case vector_store:
cd8ae5ed 10660 return costs->vec_store_cost;
8990e73a
TB
10661
10662 case vec_to_scalar:
cd8ae5ed 10663 return costs->vec_to_scalar_cost;
8990e73a
TB
10664
10665 case scalar_to_vec:
cd8ae5ed 10666 return costs->scalar_to_vec_cost;
8990e73a
TB
10667
10668 case unaligned_load:
cc9fe6bb 10669 case vector_gather_load:
cd8ae5ed 10670 return costs->vec_unalign_load_cost;
8990e73a
TB
10671
10672 case unaligned_store:
cc9fe6bb 10673 case vector_scatter_store:
cd8ae5ed 10674 return costs->vec_unalign_store_cost;
8990e73a
TB
10675
10676 case cond_branch_taken:
cd8ae5ed 10677 return costs->cond_taken_branch_cost;
8990e73a
TB
10678
10679 case cond_branch_not_taken:
cd8ae5ed 10680 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10681
10682 case vec_perm:
cd8ae5ed 10683 return costs->vec_permute_cost;
c428f91c 10684
8990e73a 10685 case vec_promote_demote:
cd8ae5ed 10686 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10687
10688 case vec_construct:
6a70badb 10689 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10690 return elements / 2 + 1;
10691
10692 default:
10693 gcc_unreachable ();
10694 }
10695}
10696
10697/* Implement targetm.vectorize.add_stmt_cost. */
10698static unsigned
10699aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10700 struct _stmt_vec_info *stmt_info, int misalign,
10701 enum vect_cost_model_location where)
10702{
10703 unsigned *cost = (unsigned *) data;
10704 unsigned retval = 0;
10705
10706 if (flag_vect_cost_model)
10707 {
10708 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10709 int stmt_cost =
10710 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10711
10712 /* Statements in an inner loop relative to the loop being
10713 vectorized are weighted more heavily. The value here is
058e4c71 10714 arbitrary and could potentially be improved with analysis. */
8990e73a 10715 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10716 count *= 50; /* FIXME */
8990e73a
TB
10717
10718 retval = (unsigned) (count * stmt_cost);
10719 cost[where] += retval;
10720 }
10721
10722 return retval;
10723}
10724
0cfff2a1 10725static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10726
0cfff2a1
KT
10727/* Parse the TO_PARSE string and put the architecture struct that it
10728 selects into RES and the architectural features into ISA_FLAGS.
10729 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
10730 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
10731 When the TO_PARSE string contains an invalid extension,
10732 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10733
0cfff2a1
KT
10734static enum aarch64_parse_opt_result
10735aarch64_parse_arch (const char *to_parse, const struct processor **res,
c7887347 10736 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10737{
ff150bc4 10738 const char *ext;
43e9d192 10739 const struct processor *arch;
43e9d192
IB
10740 size_t len;
10741
ff150bc4 10742 ext = strchr (to_parse, '+');
43e9d192
IB
10743
10744 if (ext != NULL)
ff150bc4 10745 len = ext - to_parse;
43e9d192 10746 else
ff150bc4 10747 len = strlen (to_parse);
43e9d192
IB
10748
10749 if (len == 0)
0cfff2a1
KT
10750 return AARCH64_PARSE_MISSING_ARG;
10751
43e9d192 10752
0cfff2a1 10753 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10754 for (arch = all_architectures; arch->name != NULL; arch++)
10755 {
ff150bc4
ML
10756 if (strlen (arch->name) == len
10757 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 10758 {
0cfff2a1 10759 unsigned long isa_temp = arch->flags;
43e9d192
IB
10760
10761 if (ext != NULL)
10762 {
0cfff2a1
KT
10763 /* TO_PARSE string contains at least one extension. */
10764 enum aarch64_parse_opt_result ext_res
c7887347 10765 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10766
0cfff2a1
KT
10767 if (ext_res != AARCH64_PARSE_OK)
10768 return ext_res;
ffee7aa9 10769 }
0cfff2a1
KT
10770 /* Extension parsing was successful. Confirm the result
10771 arch and ISA flags. */
10772 *res = arch;
10773 *isa_flags = isa_temp;
10774 return AARCH64_PARSE_OK;
43e9d192
IB
10775 }
10776 }
10777
10778 /* ARCH name not found in list. */
0cfff2a1 10779 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10780}
10781
0cfff2a1
KT
10782/* Parse the TO_PARSE string and put the result tuning in RES and the
10783 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10784 describing the parse result. If there is an error parsing, RES and
c7887347
ML
10785 ISA_FLAGS are left unchanged.
10786 When the TO_PARSE string contains an invalid extension,
10787 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10788
0cfff2a1
KT
10789static enum aarch64_parse_opt_result
10790aarch64_parse_cpu (const char *to_parse, const struct processor **res,
c7887347 10791 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10792{
ff150bc4 10793 const char *ext;
43e9d192 10794 const struct processor *cpu;
43e9d192
IB
10795 size_t len;
10796
ff150bc4 10797 ext = strchr (to_parse, '+');
43e9d192
IB
10798
10799 if (ext != NULL)
ff150bc4 10800 len = ext - to_parse;
43e9d192 10801 else
ff150bc4 10802 len = strlen (to_parse);
43e9d192
IB
10803
10804 if (len == 0)
0cfff2a1
KT
10805 return AARCH64_PARSE_MISSING_ARG;
10806
43e9d192
IB
10807
10808 /* Loop through the list of supported CPUs to find a match. */
10809 for (cpu = all_cores; cpu->name != NULL; cpu++)
10810 {
ff150bc4 10811 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 10812 {
0cfff2a1
KT
10813 unsigned long isa_temp = cpu->flags;
10814
43e9d192
IB
10815
10816 if (ext != NULL)
10817 {
0cfff2a1
KT
10818 /* TO_PARSE string contains at least one extension. */
10819 enum aarch64_parse_opt_result ext_res
c7887347 10820 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10821
0cfff2a1
KT
10822 if (ext_res != AARCH64_PARSE_OK)
10823 return ext_res;
10824 }
10825 /* Extension parsing was successfull. Confirm the result
10826 cpu and ISA flags. */
10827 *res = cpu;
10828 *isa_flags = isa_temp;
10829 return AARCH64_PARSE_OK;
43e9d192
IB
10830 }
10831 }
10832
10833 /* CPU name not found in list. */
0cfff2a1 10834 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10835}
10836
0cfff2a1
KT
10837/* Parse the TO_PARSE string and put the cpu it selects into RES.
10838 Return an aarch64_parse_opt_result describing the parse result.
10839 If the parsing fails the RES does not change. */
43e9d192 10840
0cfff2a1
KT
10841static enum aarch64_parse_opt_result
10842aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10843{
10844 const struct processor *cpu;
43e9d192
IB
10845
10846 /* Loop through the list of supported CPUs to find a match. */
10847 for (cpu = all_cores; cpu->name != NULL; cpu++)
10848 {
ff150bc4 10849 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 10850 {
0cfff2a1
KT
10851 *res = cpu;
10852 return AARCH64_PARSE_OK;
43e9d192
IB
10853 }
10854 }
10855
10856 /* CPU name not found in list. */
0cfff2a1 10857 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10858}
10859
8dec06f2
JG
10860/* Parse TOKEN, which has length LENGTH to see if it is an option
10861 described in FLAG. If it is, return the index bit for that fusion type.
10862 If not, error (printing OPTION_NAME) and return zero. */
10863
10864static unsigned int
10865aarch64_parse_one_option_token (const char *token,
10866 size_t length,
10867 const struct aarch64_flag_desc *flag,
10868 const char *option_name)
10869{
10870 for (; flag->name != NULL; flag++)
10871 {
10872 if (length == strlen (flag->name)
10873 && !strncmp (flag->name, token, length))
10874 return flag->flag;
10875 }
10876
10877 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10878 return 0;
10879}
10880
10881/* Parse OPTION which is a comma-separated list of flags to enable.
10882 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10883 default state we inherit from the CPU tuning structures. OPTION_NAME
10884 gives the top-level option we are parsing in the -moverride string,
10885 for use in error messages. */
10886
10887static unsigned int
10888aarch64_parse_boolean_options (const char *option,
10889 const struct aarch64_flag_desc *flags,
10890 unsigned int initial_state,
10891 const char *option_name)
10892{
10893 const char separator = '.';
10894 const char* specs = option;
10895 const char* ntoken = option;
10896 unsigned int found_flags = initial_state;
10897
10898 while ((ntoken = strchr (specs, separator)))
10899 {
10900 size_t token_length = ntoken - specs;
10901 unsigned token_ops = aarch64_parse_one_option_token (specs,
10902 token_length,
10903 flags,
10904 option_name);
10905 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10906 in the token stream, reset the supported operations. So:
10907
10908 adrp+add.cmp+branch.none.adrp+add
10909
10910 would have the result of turning on only adrp+add fusion. */
10911 if (!token_ops)
10912 found_flags = 0;
10913
10914 found_flags |= token_ops;
10915 specs = ++ntoken;
10916 }
10917
10918 /* We ended with a comma, print something. */
10919 if (!(*specs))
10920 {
10921 error ("%s string ill-formed\n", option_name);
10922 return 0;
10923 }
10924
10925 /* We still have one more token to parse. */
10926 size_t token_length = strlen (specs);
10927 unsigned token_ops = aarch64_parse_one_option_token (specs,
10928 token_length,
10929 flags,
10930 option_name);
10931 if (!token_ops)
10932 found_flags = 0;
10933
10934 found_flags |= token_ops;
10935 return found_flags;
10936}
10937
10938/* Support for overriding instruction fusion. */
10939
10940static void
10941aarch64_parse_fuse_string (const char *fuse_string,
10942 struct tune_params *tune)
10943{
10944 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10945 aarch64_fusible_pairs,
10946 tune->fusible_ops,
10947 "fuse=");
10948}
10949
10950/* Support for overriding other tuning flags. */
10951
10952static void
10953aarch64_parse_tune_string (const char *tune_string,
10954 struct tune_params *tune)
10955{
10956 tune->extra_tuning_flags
10957 = aarch64_parse_boolean_options (tune_string,
10958 aarch64_tuning_flags,
10959 tune->extra_tuning_flags,
10960 "tune=");
10961}
10962
886f092f
KT
10963/* Parse the sve_width tuning moverride string in TUNE_STRING.
10964 Accept the valid SVE vector widths allowed by
10965 aarch64_sve_vector_bits_enum and use it to override sve_width
10966 in TUNE. */
10967
10968static void
10969aarch64_parse_sve_width_string (const char *tune_string,
10970 struct tune_params *tune)
10971{
10972 int width = -1;
10973
10974 int n = sscanf (tune_string, "%d", &width);
10975 if (n == EOF)
10976 {
10977 error ("invalid format for sve_width");
10978 return;
10979 }
10980 switch (width)
10981 {
10982 case SVE_128:
10983 case SVE_256:
10984 case SVE_512:
10985 case SVE_1024:
10986 case SVE_2048:
10987 break;
10988 default:
10989 error ("invalid sve_width value: %d", width);
10990 }
10991 tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
10992}
10993
8dec06f2
JG
10994/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10995 we understand. If it is, extract the option string and handoff to
10996 the appropriate function. */
10997
10998void
10999aarch64_parse_one_override_token (const char* token,
11000 size_t length,
11001 struct tune_params *tune)
11002{
11003 const struct aarch64_tuning_override_function *fn
11004 = aarch64_tuning_override_functions;
11005
11006 const char *option_part = strchr (token, '=');
11007 if (!option_part)
11008 {
11009 error ("tuning string missing in option (%s)", token);
11010 return;
11011 }
11012
11013 /* Get the length of the option name. */
11014 length = option_part - token;
11015 /* Skip the '=' to get to the option string. */
11016 option_part++;
11017
11018 for (; fn->name != NULL; fn++)
11019 {
11020 if (!strncmp (fn->name, token, length))
11021 {
11022 fn->parse_override (option_part, tune);
11023 return;
11024 }
11025 }
11026
11027 error ("unknown tuning option (%s)",token);
11028 return;
11029}
11030
5eee3c34
JW
11031/* A checking mechanism for the implementation of the tls size. */
11032
11033static void
11034initialize_aarch64_tls_size (struct gcc_options *opts)
11035{
11036 if (aarch64_tls_size == 0)
11037 aarch64_tls_size = 24;
11038
11039 switch (opts->x_aarch64_cmodel_var)
11040 {
11041 case AARCH64_CMODEL_TINY:
11042 /* Both the default and maximum TLS size allowed under tiny is 1M which
11043 needs two instructions to address, so we clamp the size to 24. */
11044 if (aarch64_tls_size > 24)
11045 aarch64_tls_size = 24;
11046 break;
11047 case AARCH64_CMODEL_SMALL:
11048 /* The maximum TLS size allowed under small is 4G. */
11049 if (aarch64_tls_size > 32)
11050 aarch64_tls_size = 32;
11051 break;
11052 case AARCH64_CMODEL_LARGE:
11053 /* The maximum TLS size allowed under large is 16E.
11054 FIXME: 16E should be 64bit, we only support 48bit offset now. */
11055 if (aarch64_tls_size > 48)
11056 aarch64_tls_size = 48;
11057 break;
11058 default:
11059 gcc_unreachable ();
11060 }
11061
11062 return;
11063}
11064
8dec06f2
JG
11065/* Parse STRING looking for options in the format:
11066 string :: option:string
11067 option :: name=substring
11068 name :: {a-z}
11069 substring :: defined by option. */
11070
11071static void
11072aarch64_parse_override_string (const char* input_string,
11073 struct tune_params* tune)
11074{
11075 const char separator = ':';
11076 size_t string_length = strlen (input_string) + 1;
11077 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
11078 char *string = string_root;
11079 strncpy (string, input_string, string_length);
11080 string[string_length - 1] = '\0';
11081
11082 char* ntoken = string;
11083
11084 while ((ntoken = strchr (string, separator)))
11085 {
11086 size_t token_length = ntoken - string;
11087 /* Make this substring look like a string. */
11088 *ntoken = '\0';
11089 aarch64_parse_one_override_token (string, token_length, tune);
11090 string = ++ntoken;
11091 }
11092
11093 /* One last option to parse. */
11094 aarch64_parse_one_override_token (string, strlen (string), tune);
11095 free (string_root);
11096}
43e9d192 11097
43e9d192
IB
11098
11099static void
0cfff2a1 11100aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 11101{
acea40ac
WD
11102 /* PR 70044: We have to be careful about being called multiple times for the
11103 same function. This means all changes should be repeatable. */
11104
d6cb6d6a
WD
11105 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
11106 Disable the frame pointer flag so the mid-end will not use a frame
11107 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
11108 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
11109 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
11110 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 11111 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 11112 opts->x_flag_omit_frame_pointer = 2;
43e9d192 11113
1be34295 11114 /* If not optimizing for size, set the default
0cfff2a1
KT
11115 alignment to what the target wants. */
11116 if (!opts->x_optimize_size)
43e9d192 11117 {
c518c102
ML
11118 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
11119 opts->x_str_align_loops = aarch64_tune_params.loop_align;
11120 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
11121 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
11122 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
11123 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 11124 }
b4f50fd4 11125
9ee6540a
WD
11126 /* We default to no pc-relative literal loads. */
11127
11128 aarch64_pcrelative_literal_loads = false;
11129
11130 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 11131 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
11132 if (opts->x_pcrelative_literal_loads == 1)
11133 aarch64_pcrelative_literal_loads = true;
b4f50fd4 11134
9ee6540a
WD
11135 /* In the tiny memory model it makes no sense to disallow PC relative
11136 literal pool loads. */
11137 if (aarch64_cmodel == AARCH64_CMODEL_TINY
11138 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
11139 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
11140
11141 /* When enabling the lower precision Newton series for the square root, also
11142 enable it for the reciprocal square root, since the latter is an
11143 intermediary step for the former. */
11144 if (flag_mlow_precision_sqrt)
11145 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 11146}
43e9d192 11147
0cfff2a1
KT
11148/* 'Unpack' up the internal tuning structs and update the options
11149 in OPTS. The caller must have set up selected_tune and selected_arch
11150 as all the other target-specific codegen decisions are
11151 derived from them. */
11152
e4ea20c8 11153void
0cfff2a1
KT
11154aarch64_override_options_internal (struct gcc_options *opts)
11155{
11156 aarch64_tune_flags = selected_tune->flags;
11157 aarch64_tune = selected_tune->sched_core;
11158 /* Make a copy of the tuning parameters attached to the core, which
11159 we may later overwrite. */
11160 aarch64_tune_params = *(selected_tune->tune);
11161 aarch64_architecture_version = selected_arch->architecture_version;
11162
11163 if (opts->x_aarch64_override_tune_string)
11164 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
11165 &aarch64_tune_params);
11166
11167 /* This target defaults to strict volatile bitfields. */
11168 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
11169 opts->x_flag_strict_volatile_bitfields = 1;
11170
0cfff2a1 11171 initialize_aarch64_code_model (opts);
5eee3c34 11172 initialize_aarch64_tls_size (opts);
63892fa2 11173
2d6bc7fa
KT
11174 int queue_depth = 0;
11175 switch (aarch64_tune_params.autoprefetcher_model)
11176 {
11177 case tune_params::AUTOPREFETCHER_OFF:
11178 queue_depth = -1;
11179 break;
11180 case tune_params::AUTOPREFETCHER_WEAK:
11181 queue_depth = 0;
11182 break;
11183 case tune_params::AUTOPREFETCHER_STRONG:
11184 queue_depth = max_insn_queue_index + 1;
11185 break;
11186 default:
11187 gcc_unreachable ();
11188 }
11189
11190 /* We don't mind passing in global_options_set here as we don't use
11191 the *options_set structs anyway. */
11192 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
11193 queue_depth,
11194 opts->x_param_values,
11195 global_options_set.x_param_values);
11196
9d2c6e2e
MK
11197 /* Set up parameters to be used in prefetching algorithm. Do not
11198 override the defaults unless we are tuning for a core we have
11199 researched values for. */
11200 if (aarch64_tune_params.prefetch->num_slots > 0)
11201 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
11202 aarch64_tune_params.prefetch->num_slots,
11203 opts->x_param_values,
11204 global_options_set.x_param_values);
11205 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
11206 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
11207 aarch64_tune_params.prefetch->l1_cache_size,
11208 opts->x_param_values,
11209 global_options_set.x_param_values);
11210 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 11211 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
11212 aarch64_tune_params.prefetch->l1_cache_line_size,
11213 opts->x_param_values,
11214 global_options_set.x_param_values);
11215 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
11216 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
11217 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
11218 opts->x_param_values,
11219 global_options_set.x_param_values);
d2ff35c0
LM
11220 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
11221 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
11222 0,
11223 opts->x_param_values,
11224 global_options_set.x_param_values);
59100dfc
LM
11225 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
11226 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
11227 aarch64_tune_params.prefetch->minimum_stride,
11228 opts->x_param_values,
11229 global_options_set.x_param_values);
50487d79 11230
13494fcb
WD
11231 /* Use the alternative scheduling-pressure algorithm by default. */
11232 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
11233 opts->x_param_values,
11234 global_options_set.x_param_values);
11235
fbe9af50
TC
11236 /* If the user hasn't changed it via configure then set the default to 64 KB
11237 for the backend. */
11238 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
11239 DEFAULT_STK_CLASH_GUARD_SIZE == 0
11240 ? 16 : DEFAULT_STK_CLASH_GUARD_SIZE,
11241 opts->x_param_values,
11242 global_options_set.x_param_values);
11243
11244 /* Validate the guard size. */
11245 int guard_size = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
fbe9af50
TC
11246
11247 /* Enforce that interval is the same size as size so the mid-end does the
11248 right thing. */
11249 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
11250 guard_size,
11251 opts->x_param_values,
11252 global_options_set.x_param_values);
11253
11254 /* The maybe_set calls won't update the value if the user has explicitly set
11255 one. Which means we need to validate that probing interval and guard size
11256 are equal. */
11257 int probe_interval
11258 = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11259 if (guard_size != probe_interval)
11260 error ("stack clash guard size '%d' must be equal to probing interval "
11261 "'%d'", guard_size, probe_interval);
11262
16b2cafd
MK
11263 /* Enable sw prefetching at specified optimization level for
11264 CPUS that have prefetch. Lower optimization level threshold by 1
11265 when profiling is enabled. */
11266 if (opts->x_flag_prefetch_loop_arrays < 0
11267 && !opts->x_optimize_size
11268 && aarch64_tune_params.prefetch->default_opt_level >= 0
11269 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
11270 opts->x_flag_prefetch_loop_arrays = 1;
11271
266c2b54
ML
11272 if (opts->x_aarch64_arch_string == NULL)
11273 opts->x_aarch64_arch_string = selected_arch->name;
11274 if (opts->x_aarch64_cpu_string == NULL)
11275 opts->x_aarch64_cpu_string = selected_cpu->name;
11276 if (opts->x_aarch64_tune_string == NULL)
11277 opts->x_aarch64_tune_string = selected_tune->name;
11278
0cfff2a1
KT
11279 aarch64_override_options_after_change_1 (opts);
11280}
43e9d192 11281
01f44038
KT
11282/* Print a hint with a suggestion for a core or architecture name that
11283 most closely resembles what the user passed in STR. ARCH is true if
11284 the user is asking for an architecture name. ARCH is false if the user
11285 is asking for a core name. */
11286
11287static void
11288aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
11289{
11290 auto_vec<const char *> candidates;
11291 const struct processor *entry = arch ? all_architectures : all_cores;
11292 for (; entry->name != NULL; entry++)
11293 candidates.safe_push (entry->name);
a08b5429
ML
11294
11295#ifdef HAVE_LOCAL_CPU_DETECT
11296 /* Add also "native" as possible value. */
11297 if (arch)
11298 candidates.safe_push ("native");
11299#endif
11300
01f44038
KT
11301 char *s;
11302 const char *hint = candidates_list_and_hint (str, s, candidates);
11303 if (hint)
11304 inform (input_location, "valid arguments are: %s;"
11305 " did you mean %qs?", s, hint);
6285e915
ML
11306 else
11307 inform (input_location, "valid arguments are: %s", s);
11308
01f44038
KT
11309 XDELETEVEC (s);
11310}
11311
11312/* Print a hint with a suggestion for a core name that most closely resembles
11313 what the user passed in STR. */
11314
11315inline static void
11316aarch64_print_hint_for_core (const char *str)
11317{
11318 aarch64_print_hint_for_core_or_arch (str, false);
11319}
11320
11321/* Print a hint with a suggestion for an architecture name that most closely
11322 resembles what the user passed in STR. */
11323
11324inline static void
11325aarch64_print_hint_for_arch (const char *str)
11326{
11327 aarch64_print_hint_for_core_or_arch (str, true);
11328}
11329
c7887347
ML
11330
11331/* Print a hint with a suggestion for an extension name
11332 that most closely resembles what the user passed in STR. */
11333
11334void
11335aarch64_print_hint_for_extensions (const std::string &str)
11336{
11337 auto_vec<const char *> candidates;
11338 aarch64_get_all_extension_candidates (&candidates);
11339 char *s;
11340 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
11341 if (hint)
11342 inform (input_location, "valid arguments are: %s;"
11343 " did you mean %qs?", s, hint);
11344 else
11345 inform (input_location, "valid arguments are: %s;", s);
11346
11347 XDELETEVEC (s);
11348}
11349
0cfff2a1
KT
11350/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
11351 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
11352 they are valid in RES and ISA_FLAGS. Return whether the option is
11353 valid. */
43e9d192 11354
361fb3ee 11355static bool
0cfff2a1
KT
11356aarch64_validate_mcpu (const char *str, const struct processor **res,
11357 unsigned long *isa_flags)
11358{
c7887347 11359 std::string invalid_extension;
0cfff2a1 11360 enum aarch64_parse_opt_result parse_res
c7887347 11361 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11362
11363 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11364 return true;
0cfff2a1
KT
11365
11366 switch (parse_res)
11367 {
11368 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11369 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
11370 break;
11371 case AARCH64_PARSE_INVALID_ARG:
11372 error ("unknown value %qs for -mcpu", str);
01f44038 11373 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11374 break;
11375 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11376 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
11377 invalid_extension.c_str (), str);
11378 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11379 break;
11380 default:
11381 gcc_unreachable ();
11382 }
361fb3ee
KT
11383
11384 return false;
0cfff2a1
KT
11385}
11386
11387/* Validate a command-line -march option. Parse the arch and extensions
11388 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11389 results, if they are valid, in RES and ISA_FLAGS. Return whether the
11390 option is valid. */
0cfff2a1 11391
361fb3ee 11392static bool
0cfff2a1 11393aarch64_validate_march (const char *str, const struct processor **res,
01f44038 11394 unsigned long *isa_flags)
0cfff2a1 11395{
c7887347 11396 std::string invalid_extension;
0cfff2a1 11397 enum aarch64_parse_opt_result parse_res
c7887347 11398 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11399
11400 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11401 return true;
0cfff2a1
KT
11402
11403 switch (parse_res)
11404 {
11405 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11406 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
11407 break;
11408 case AARCH64_PARSE_INVALID_ARG:
11409 error ("unknown value %qs for -march", str);
01f44038 11410 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
11411 break;
11412 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11413 error ("invalid feature modifier %qs in %<-march=%s%>",
11414 invalid_extension.c_str (), str);
11415 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11416 break;
11417 default:
11418 gcc_unreachable ();
11419 }
361fb3ee
KT
11420
11421 return false;
0cfff2a1
KT
11422}
11423
11424/* Validate a command-line -mtune option. Parse the cpu
11425 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11426 result, if it is valid, in RES. Return whether the option is
11427 valid. */
0cfff2a1 11428
361fb3ee 11429static bool
0cfff2a1
KT
11430aarch64_validate_mtune (const char *str, const struct processor **res)
11431{
11432 enum aarch64_parse_opt_result parse_res
11433 = aarch64_parse_tune (str, res);
11434
11435 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11436 return true;
0cfff2a1
KT
11437
11438 switch (parse_res)
11439 {
11440 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11441 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
11442 break;
11443 case AARCH64_PARSE_INVALID_ARG:
11444 error ("unknown value %qs for -mtune", str);
01f44038 11445 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11446 break;
11447 default:
11448 gcc_unreachable ();
11449 }
361fb3ee
KT
11450 return false;
11451}
11452
11453/* Return the CPU corresponding to the enum CPU.
11454 If it doesn't specify a cpu, return the default. */
11455
11456static const struct processor *
11457aarch64_get_tune_cpu (enum aarch64_processor cpu)
11458{
11459 if (cpu != aarch64_none)
11460 return &all_cores[cpu];
11461
11462 /* The & 0x3f is to extract the bottom 6 bits that encode the
11463 default cpu as selected by the --with-cpu GCC configure option
11464 in config.gcc.
11465 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
11466 flags mechanism should be reworked to make it more sane. */
11467 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11468}
11469
11470/* Return the architecture corresponding to the enum ARCH.
11471 If it doesn't specify a valid architecture, return the default. */
11472
11473static const struct processor *
11474aarch64_get_arch (enum aarch64_arch arch)
11475{
11476 if (arch != aarch64_no_arch)
11477 return &all_architectures[arch];
11478
11479 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11480
11481 return &all_architectures[cpu->arch];
0cfff2a1
KT
11482}
11483
43cacb12
RS
11484/* Return the VG value associated with -msve-vector-bits= value VALUE. */
11485
11486static poly_uint16
11487aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
11488{
11489 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
11490 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
11491 deciding which .md file patterns to use and when deciding whether
11492 something is a legitimate address or constant. */
11493 if (value == SVE_SCALABLE || value == SVE_128)
11494 return poly_uint16 (2, 2);
11495 else
11496 return (int) value / 64;
11497}
11498
0cfff2a1
KT
11499/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
11500 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
11501 tuning structs. In particular it must set selected_tune and
11502 aarch64_isa_flags that define the available ISA features and tuning
11503 decisions. It must also set selected_arch as this will be used to
11504 output the .arch asm tags for each function. */
11505
11506static void
11507aarch64_override_options (void)
11508{
11509 unsigned long cpu_isa = 0;
11510 unsigned long arch_isa = 0;
11511 aarch64_isa_flags = 0;
11512
361fb3ee
KT
11513 bool valid_cpu = true;
11514 bool valid_tune = true;
11515 bool valid_arch = true;
11516
0cfff2a1
KT
11517 selected_cpu = NULL;
11518 selected_arch = NULL;
11519 selected_tune = NULL;
11520
11521 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
11522 If either of -march or -mtune is given, they override their
11523 respective component of -mcpu. */
11524 if (aarch64_cpu_string)
361fb3ee
KT
11525 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
11526 &cpu_isa);
0cfff2a1
KT
11527
11528 if (aarch64_arch_string)
361fb3ee
KT
11529 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
11530 &arch_isa);
0cfff2a1
KT
11531
11532 if (aarch64_tune_string)
361fb3ee 11533 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192 11534
6881e3c1
OH
11535#ifdef SUBTARGET_OVERRIDE_OPTIONS
11536 SUBTARGET_OVERRIDE_OPTIONS;
11537#endif
11538
43e9d192
IB
11539 /* If the user did not specify a processor, choose the default
11540 one for them. This will be the CPU set during configuration using
a3cd0246 11541 --with-cpu, otherwise it is "generic". */
43e9d192
IB
11542 if (!selected_cpu)
11543 {
0cfff2a1
KT
11544 if (selected_arch)
11545 {
11546 selected_cpu = &all_cores[selected_arch->ident];
11547 aarch64_isa_flags = arch_isa;
361fb3ee 11548 explicit_arch = selected_arch->arch;
0cfff2a1
KT
11549 }
11550 else
11551 {
361fb3ee
KT
11552 /* Get default configure-time CPU. */
11553 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
11554 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
11555 }
361fb3ee
KT
11556
11557 if (selected_tune)
11558 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
11559 }
11560 /* If both -mcpu and -march are specified check that they are architecturally
11561 compatible, warn if they're not and prefer the -march ISA flags. */
11562 else if (selected_arch)
11563 {
11564 if (selected_arch->arch != selected_cpu->arch)
11565 {
11566 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
11567 all_architectures[selected_cpu->arch].name,
11568 selected_arch->name);
11569 }
11570 aarch64_isa_flags = arch_isa;
361fb3ee
KT
11571 explicit_arch = selected_arch->arch;
11572 explicit_tune_core = selected_tune ? selected_tune->ident
11573 : selected_cpu->ident;
0cfff2a1
KT
11574 }
11575 else
11576 {
11577 /* -mcpu but no -march. */
11578 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
11579 explicit_tune_core = selected_tune ? selected_tune->ident
11580 : selected_cpu->ident;
11581 gcc_assert (selected_cpu);
11582 selected_arch = &all_architectures[selected_cpu->arch];
11583 explicit_arch = selected_arch->arch;
43e9d192
IB
11584 }
11585
0cfff2a1
KT
11586 /* Set the arch as well as we will need it when outputing
11587 the .arch directive in assembly. */
11588 if (!selected_arch)
11589 {
11590 gcc_assert (selected_cpu);
11591 selected_arch = &all_architectures[selected_cpu->arch];
11592 }
43e9d192 11593
43e9d192 11594 if (!selected_tune)
3edaf26d 11595 selected_tune = selected_cpu;
43e9d192 11596
0cfff2a1
KT
11597#ifndef HAVE_AS_MABI_OPTION
11598 /* The compiler may have been configured with 2.23.* binutils, which does
11599 not have support for ILP32. */
11600 if (TARGET_ILP32)
ee61f880 11601 error ("assembler does not support -mabi=ilp32");
0cfff2a1 11602#endif
43e9d192 11603
43cacb12
RS
11604 /* Convert -msve-vector-bits to a VG count. */
11605 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
11606
db58fd89 11607 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 11608 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 11609
361fb3ee
KT
11610 /* Make sure we properly set up the explicit options. */
11611 if ((aarch64_cpu_string && valid_cpu)
11612 || (aarch64_tune_string && valid_tune))
11613 gcc_assert (explicit_tune_core != aarch64_none);
11614
11615 if ((aarch64_cpu_string && valid_cpu)
11616 || (aarch64_arch_string && valid_arch))
11617 gcc_assert (explicit_arch != aarch64_no_arch);
11618
5f7dbaa0
RE
11619 /* The pass to insert speculation tracking runs before
11620 shrink-wrapping and the latter does not know how to update the
11621 tracking status. So disable it in this case. */
11622 if (aarch64_track_speculation)
11623 flag_shrink_wrap = 0;
11624
0cfff2a1
KT
11625 aarch64_override_options_internal (&global_options);
11626
11627 /* Save these options as the default ones in case we push and pop them later
11628 while processing functions with potential target attributes. */
11629 target_option_default_node = target_option_current_node
11630 = build_target_option_node (&global_options);
43e9d192
IB
11631}
11632
11633/* Implement targetm.override_options_after_change. */
11634
11635static void
11636aarch64_override_options_after_change (void)
11637{
0cfff2a1 11638 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
11639}
11640
11641static struct machine_function *
11642aarch64_init_machine_status (void)
11643{
11644 struct machine_function *machine;
766090c2 11645 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
11646 return machine;
11647}
11648
11649void
11650aarch64_init_expanders (void)
11651{
11652 init_machine_status = aarch64_init_machine_status;
11653}
11654
11655/* A checking mechanism for the implementation of the various code models. */
11656static void
0cfff2a1 11657initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 11658{
0cfff2a1 11659 if (opts->x_flag_pic)
43e9d192 11660 {
0cfff2a1 11661 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
11662 {
11663 case AARCH64_CMODEL_TINY:
11664 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
11665 break;
11666 case AARCH64_CMODEL_SMALL:
34ecdb0f 11667#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
11668 aarch64_cmodel = (flag_pic == 2
11669 ? AARCH64_CMODEL_SMALL_PIC
11670 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
11671#else
11672 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
11673#endif
43e9d192
IB
11674 break;
11675 case AARCH64_CMODEL_LARGE:
11676 sorry ("code model %qs with -f%s", "large",
0cfff2a1 11677 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 11678 break;
43e9d192
IB
11679 default:
11680 gcc_unreachable ();
11681 }
11682 }
11683 else
0cfff2a1 11684 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
11685}
11686
361fb3ee
KT
11687/* Implement TARGET_OPTION_SAVE. */
11688
11689static void
11690aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
11691{
11692 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
11693}
11694
11695/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
11696 using the information saved in PTR. */
11697
11698static void
11699aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
11700{
11701 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
11702 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11703 opts->x_explicit_arch = ptr->x_explicit_arch;
11704 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
11705 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
11706
11707 aarch64_override_options_internal (opts);
11708}
11709
11710/* Implement TARGET_OPTION_PRINT. */
11711
11712static void
11713aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
11714{
11715 const struct processor *cpu
11716 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11717 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
11718 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 11719 std::string extension
04a99ebe 11720 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
11721
11722 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
11723 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
11724 arch->name, extension.c_str ());
361fb3ee
KT
11725}
11726
d78006d9
KT
11727static GTY(()) tree aarch64_previous_fndecl;
11728
e4ea20c8
KT
11729void
11730aarch64_reset_previous_fndecl (void)
11731{
11732 aarch64_previous_fndecl = NULL;
11733}
11734
acfc1ac1
KT
11735/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
11736 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
11737 make sure optab availability predicates are recomputed when necessary. */
11738
11739void
11740aarch64_save_restore_target_globals (tree new_tree)
11741{
11742 if (TREE_TARGET_GLOBALS (new_tree))
11743 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
11744 else if (new_tree == target_option_default_node)
11745 restore_target_globals (&default_target_globals);
11746 else
11747 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
11748}
11749
d78006d9
KT
11750/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
11751 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
11752 of the function, if such exists. This function may be called multiple
11753 times on a single function so use aarch64_previous_fndecl to avoid
11754 setting up identical state. */
11755
11756static void
11757aarch64_set_current_function (tree fndecl)
11758{
acfc1ac1
KT
11759 if (!fndecl || fndecl == aarch64_previous_fndecl)
11760 return;
11761
d78006d9
KT
11762 tree old_tree = (aarch64_previous_fndecl
11763 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
11764 : NULL_TREE);
11765
acfc1ac1 11766 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 11767
acfc1ac1
KT
11768 /* If current function has no attributes but the previous one did,
11769 use the default node. */
11770 if (!new_tree && old_tree)
11771 new_tree = target_option_default_node;
d78006d9 11772
acfc1ac1
KT
11773 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
11774 the default have been handled by aarch64_save_restore_target_globals from
11775 aarch64_pragma_target_parse. */
11776 if (old_tree == new_tree)
11777 return;
d78006d9 11778
acfc1ac1 11779 aarch64_previous_fndecl = fndecl;
6e17a23b 11780
acfc1ac1
KT
11781 /* First set the target options. */
11782 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11783
acfc1ac1 11784 aarch64_save_restore_target_globals (new_tree);
d78006d9 11785}
361fb3ee 11786
5a2c8331
KT
11787/* Enum describing the various ways we can handle attributes.
11788 In many cases we can reuse the generic option handling machinery. */
11789
11790enum aarch64_attr_opt_type
11791{
11792 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11793 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11794 aarch64_attr_enum, /* Attribute sets an enum variable. */
11795 aarch64_attr_custom /* Attribute requires a custom handling function. */
11796};
11797
11798/* All the information needed to handle a target attribute.
11799 NAME is the name of the attribute.
9c582551 11800 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11801 in the definition of enum aarch64_attr_opt_type.
11802 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11803 HANDLER is the function that takes the attribute string as an argument
11804 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11805 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11806 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11807 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11808 aarch64_attr_enum. */
11809
11810struct aarch64_attribute_info
11811{
11812 const char *name;
11813 enum aarch64_attr_opt_type attr_type;
11814 bool allow_neg;
ab93e9b7 11815 bool (*handler) (const char *);
5a2c8331
KT
11816 enum opt_code opt_num;
11817};
11818
ab93e9b7 11819/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11820
11821static bool
ab93e9b7 11822aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11823{
11824 const struct processor *tmp_arch = NULL;
c7887347 11825 std::string invalid_extension;
5a2c8331 11826 enum aarch64_parse_opt_result parse_res
c7887347 11827 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11828
11829 if (parse_res == AARCH64_PARSE_OK)
11830 {
11831 gcc_assert (tmp_arch);
11832 selected_arch = tmp_arch;
11833 explicit_arch = selected_arch->arch;
11834 return true;
11835 }
11836
11837 switch (parse_res)
11838 {
11839 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11840 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11841 break;
11842 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11843 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11844 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11845 break;
11846 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11847 error ("invalid feature modifier %s of value (\"%s\") in "
11848 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11849 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11850 break;
11851 default:
11852 gcc_unreachable ();
11853 }
11854
11855 return false;
11856}
11857
ab93e9b7 11858/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11859
11860static bool
ab93e9b7 11861aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11862{
11863 const struct processor *tmp_cpu = NULL;
c7887347 11864 std::string invalid_extension;
5a2c8331 11865 enum aarch64_parse_opt_result parse_res
c7887347 11866 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11867
11868 if (parse_res == AARCH64_PARSE_OK)
11869 {
11870 gcc_assert (tmp_cpu);
11871 selected_tune = tmp_cpu;
11872 explicit_tune_core = selected_tune->ident;
11873
11874 selected_arch = &all_architectures[tmp_cpu->arch];
11875 explicit_arch = selected_arch->arch;
11876 return true;
11877 }
11878
11879 switch (parse_res)
11880 {
11881 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11882 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11883 break;
11884 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11885 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11886 aarch64_print_hint_for_core (str);
5a2c8331
KT
11887 break;
11888 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11889 error ("invalid feature modifier %s of value (\"%s\") in "
11890 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11891 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11892 break;
11893 default:
11894 gcc_unreachable ();
11895 }
11896
11897 return false;
11898}
11899
ab93e9b7 11900/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11901
11902static bool
ab93e9b7 11903aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11904{
11905 const struct processor *tmp_tune = NULL;
11906 enum aarch64_parse_opt_result parse_res
11907 = aarch64_parse_tune (str, &tmp_tune);
11908
11909 if (parse_res == AARCH64_PARSE_OK)
11910 {
11911 gcc_assert (tmp_tune);
11912 selected_tune = tmp_tune;
11913 explicit_tune_core = selected_tune->ident;
11914 return true;
11915 }
11916
11917 switch (parse_res)
11918 {
11919 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11920 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11921 aarch64_print_hint_for_core (str);
5a2c8331
KT
11922 break;
11923 default:
11924 gcc_unreachable ();
11925 }
11926
11927 return false;
11928}
11929
11930/* Parse an architecture extensions target attribute string specified in STR.
11931 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11932 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11933 modified. */
5a2c8331
KT
11934
11935static bool
ab93e9b7 11936aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11937{
11938 enum aarch64_parse_opt_result parse_res;
11939 unsigned long isa_flags = aarch64_isa_flags;
11940
e4ea20c8
KT
11941 /* We allow "+nothing" in the beginning to clear out all architectural
11942 features if the user wants to handpick specific features. */
11943 if (strncmp ("+nothing", str, 8) == 0)
11944 {
11945 isa_flags = 0;
11946 str += 8;
11947 }
11948
c7887347
ML
11949 std::string invalid_extension;
11950 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
11951
11952 if (parse_res == AARCH64_PARSE_OK)
11953 {
11954 aarch64_isa_flags = isa_flags;
11955 return true;
11956 }
11957
11958 switch (parse_res)
11959 {
11960 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11961 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11962 break;
11963
11964 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11965 error ("invalid feature modifier %s of value (\"%s\") in "
11966 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
11967 break;
11968
11969 default:
11970 gcc_unreachable ();
11971 }
11972
11973 return false;
11974}
11975
11976/* The target attributes that we support. On top of these we also support just
11977 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11978 handled explicitly in aarch64_process_one_target_attr. */
11979
11980static const struct aarch64_attribute_info aarch64_attributes[] =
11981{
11982 { "general-regs-only", aarch64_attr_mask, false, NULL,
11983 OPT_mgeneral_regs_only },
11984 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11985 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11986 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11987 OPT_mfix_cortex_a53_843419 },
5a2c8331 11988 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11989 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11990 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11991 OPT_momit_leaf_frame_pointer },
11992 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11993 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11994 OPT_march_ },
11995 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11996 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11997 OPT_mtune_ },
db58fd89
JW
11998 { "sign-return-address", aarch64_attr_enum, false, NULL,
11999 OPT_msign_return_address_ },
5a2c8331
KT
12000 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
12001};
12002
12003/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 12004 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
12005
12006static bool
ab93e9b7 12007aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
12008{
12009 bool invert = false;
12010
12011 size_t len = strlen (arg_str);
12012
12013 if (len == 0)
12014 {
ab93e9b7 12015 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
12016 return false;
12017 }
12018
12019 char *str_to_check = (char *) alloca (len + 1);
12020 strcpy (str_to_check, arg_str);
12021
12022 /* Skip leading whitespace. */
12023 while (*str_to_check == ' ' || *str_to_check == '\t')
12024 str_to_check++;
12025
12026 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
12027 It is easier to detect and handle it explicitly here rather than going
12028 through the machinery for the rest of the target attributes in this
12029 function. */
12030 if (*str_to_check == '+')
ab93e9b7 12031 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
12032
12033 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
12034 {
12035 invert = true;
12036 str_to_check += 3;
12037 }
12038 char *arg = strchr (str_to_check, '=');
12039
12040 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
12041 and point ARG to "foo". */
12042 if (arg)
12043 {
12044 *arg = '\0';
12045 arg++;
12046 }
12047 const struct aarch64_attribute_info *p_attr;
16d12992 12048 bool found = false;
5a2c8331
KT
12049 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
12050 {
12051 /* If the names don't match up, or the user has given an argument
12052 to an attribute that doesn't accept one, or didn't give an argument
12053 to an attribute that expects one, fail to match. */
12054 if (strcmp (str_to_check, p_attr->name) != 0)
12055 continue;
12056
16d12992 12057 found = true;
5a2c8331
KT
12058 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
12059 || p_attr->attr_type == aarch64_attr_enum;
12060
12061 if (attr_need_arg_p ^ (arg != NULL))
12062 {
ab93e9b7 12063 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
12064 return false;
12065 }
12066
12067 /* If the name matches but the attribute does not allow "no-" versions
12068 then we can't match. */
12069 if (invert && !p_attr->allow_neg)
12070 {
ab93e9b7 12071 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
12072 return false;
12073 }
12074
12075 switch (p_attr->attr_type)
12076 {
12077 /* Has a custom handler registered.
12078 For example, cpu=, arch=, tune=. */
12079 case aarch64_attr_custom:
12080 gcc_assert (p_attr->handler);
ab93e9b7 12081 if (!p_attr->handler (arg))
5a2c8331
KT
12082 return false;
12083 break;
12084
12085 /* Either set or unset a boolean option. */
12086 case aarch64_attr_bool:
12087 {
12088 struct cl_decoded_option decoded;
12089
12090 generate_option (p_attr->opt_num, NULL, !invert,
12091 CL_TARGET, &decoded);
12092 aarch64_handle_option (&global_options, &global_options_set,
12093 &decoded, input_location);
12094 break;
12095 }
12096 /* Set or unset a bit in the target_flags. aarch64_handle_option
12097 should know what mask to apply given the option number. */
12098 case aarch64_attr_mask:
12099 {
12100 struct cl_decoded_option decoded;
12101 /* We only need to specify the option number.
12102 aarch64_handle_option will know which mask to apply. */
12103 decoded.opt_index = p_attr->opt_num;
12104 decoded.value = !invert;
12105 aarch64_handle_option (&global_options, &global_options_set,
12106 &decoded, input_location);
12107 break;
12108 }
12109 /* Use the option setting machinery to set an option to an enum. */
12110 case aarch64_attr_enum:
12111 {
12112 gcc_assert (arg);
12113 bool valid;
12114 int value;
12115 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
12116 &value, CL_TARGET);
12117 if (valid)
12118 {
12119 set_option (&global_options, NULL, p_attr->opt_num, value,
12120 NULL, DK_UNSPECIFIED, input_location,
12121 global_dc);
12122 }
12123 else
12124 {
ab93e9b7 12125 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
12126 }
12127 break;
12128 }
12129 default:
12130 gcc_unreachable ();
12131 }
12132 }
12133
16d12992
KT
12134 /* If we reached here we either have found an attribute and validated
12135 it or didn't match any. If we matched an attribute but its arguments
12136 were malformed we will have returned false already. */
12137 return found;
5a2c8331
KT
12138}
12139
12140/* Count how many times the character C appears in
12141 NULL-terminated string STR. */
12142
12143static unsigned int
12144num_occurences_in_str (char c, char *str)
12145{
12146 unsigned int res = 0;
12147 while (*str != '\0')
12148 {
12149 if (*str == c)
12150 res++;
12151
12152 str++;
12153 }
12154
12155 return res;
12156}
12157
12158/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 12159 and update the global target options space. */
5a2c8331
KT
12160
12161bool
ab93e9b7 12162aarch64_process_target_attr (tree args)
5a2c8331
KT
12163{
12164 if (TREE_CODE (args) == TREE_LIST)
12165 {
12166 do
12167 {
12168 tree head = TREE_VALUE (args);
12169 if (head)
12170 {
ab93e9b7 12171 if (!aarch64_process_target_attr (head))
5a2c8331
KT
12172 return false;
12173 }
12174 args = TREE_CHAIN (args);
12175 } while (args);
12176
12177 return true;
12178 }
3b6cb9e3
ML
12179
12180 if (TREE_CODE (args) != STRING_CST)
12181 {
12182 error ("attribute %<target%> argument not a string");
12183 return false;
12184 }
5a2c8331
KT
12185
12186 size_t len = strlen (TREE_STRING_POINTER (args));
12187 char *str_to_check = (char *) alloca (len + 1);
12188 strcpy (str_to_check, TREE_STRING_POINTER (args));
12189
12190 if (len == 0)
12191 {
ab93e9b7 12192 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
12193 return false;
12194 }
12195
12196 /* Used to catch empty spaces between commas i.e.
12197 attribute ((target ("attr1,,attr2"))). */
12198 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
12199
12200 /* Handle multiple target attributes separated by ','. */
7185a4eb 12201 char *token = strtok_r (str_to_check, ",", &str_to_check);
5a2c8331
KT
12202
12203 unsigned int num_attrs = 0;
12204 while (token)
12205 {
12206 num_attrs++;
ab93e9b7 12207 if (!aarch64_process_one_target_attr (token))
5a2c8331 12208 {
ab93e9b7 12209 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
12210 return false;
12211 }
12212
7185a4eb 12213 token = strtok_r (NULL, ",", &str_to_check);
5a2c8331
KT
12214 }
12215
12216 if (num_attrs != num_commas + 1)
12217 {
ab93e9b7 12218 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
12219 return false;
12220 }
12221
12222 return true;
12223}
12224
12225/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
12226 process attribute ((target ("..."))). */
12227
12228static bool
12229aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
12230{
12231 struct cl_target_option cur_target;
12232 bool ret;
12233 tree old_optimize;
12234 tree new_target, new_optimize;
12235 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
12236
12237 /* If what we're processing is the current pragma string then the
12238 target option node is already stored in target_option_current_node
12239 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
12240 having to re-parse the string. This is especially useful to keep
12241 arm_neon.h compile times down since that header contains a lot
12242 of intrinsics enclosed in pragmas. */
12243 if (!existing_target && args == current_target_pragma)
12244 {
12245 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
12246 return true;
12247 }
5a2c8331
KT
12248 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12249
12250 old_optimize = build_optimization_node (&global_options);
12251 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12252
12253 /* If the function changed the optimization levels as well as setting
12254 target options, start with the optimizations specified. */
12255 if (func_optimize && func_optimize != old_optimize)
12256 cl_optimization_restore (&global_options,
12257 TREE_OPTIMIZATION (func_optimize));
12258
12259 /* Save the current target options to restore at the end. */
12260 cl_target_option_save (&cur_target, &global_options);
12261
12262 /* If fndecl already has some target attributes applied to it, unpack
12263 them so that we add this attribute on top of them, rather than
12264 overwriting them. */
12265 if (existing_target)
12266 {
12267 struct cl_target_option *existing_options
12268 = TREE_TARGET_OPTION (existing_target);
12269
12270 if (existing_options)
12271 cl_target_option_restore (&global_options, existing_options);
12272 }
12273 else
12274 cl_target_option_restore (&global_options,
12275 TREE_TARGET_OPTION (target_option_current_node));
12276
ab93e9b7 12277 ret = aarch64_process_target_attr (args);
5a2c8331
KT
12278
12279 /* Set up any additional state. */
12280 if (ret)
12281 {
12282 aarch64_override_options_internal (&global_options);
e95a988a
KT
12283 /* Initialize SIMD builtins if we haven't already.
12284 Set current_target_pragma to NULL for the duration so that
12285 the builtin initialization code doesn't try to tag the functions
12286 being built with the attributes specified by any current pragma, thus
12287 going into an infinite recursion. */
12288 if (TARGET_SIMD)
12289 {
12290 tree saved_current_target_pragma = current_target_pragma;
12291 current_target_pragma = NULL;
12292 aarch64_init_simd_builtins ();
12293 current_target_pragma = saved_current_target_pragma;
12294 }
5a2c8331
KT
12295 new_target = build_target_option_node (&global_options);
12296 }
12297 else
12298 new_target = NULL;
12299
12300 new_optimize = build_optimization_node (&global_options);
12301
12302 if (fndecl && ret)
12303 {
12304 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
12305
12306 if (old_optimize != new_optimize)
12307 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
12308 }
12309
12310 cl_target_option_restore (&global_options, &cur_target);
12311
12312 if (old_optimize != new_optimize)
12313 cl_optimization_restore (&global_options,
12314 TREE_OPTIMIZATION (old_optimize));
12315 return ret;
12316}
12317
1fd8d40c
KT
12318/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
12319 tri-bool options (yes, no, don't care) and the default value is
12320 DEF, determine whether to reject inlining. */
12321
12322static bool
12323aarch64_tribools_ok_for_inlining_p (int caller, int callee,
12324 int dont_care, int def)
12325{
12326 /* If the callee doesn't care, always allow inlining. */
12327 if (callee == dont_care)
12328 return true;
12329
12330 /* If the caller doesn't care, always allow inlining. */
12331 if (caller == dont_care)
12332 return true;
12333
12334 /* Otherwise, allow inlining if either the callee and caller values
12335 agree, or if the callee is using the default value. */
12336 return (callee == caller || callee == def);
12337}
12338
12339/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
12340 to inline CALLEE into CALLER based on target-specific info.
12341 Make sure that the caller and callee have compatible architectural
12342 features. Then go through the other possible target attributes
12343 and see if they can block inlining. Try not to reject always_inline
12344 callees unless they are incompatible architecturally. */
12345
12346static bool
12347aarch64_can_inline_p (tree caller, tree callee)
12348{
12349 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
12350 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
12351
1fd8d40c
KT
12352 struct cl_target_option *caller_opts
12353 = TREE_TARGET_OPTION (caller_tree ? caller_tree
12354 : target_option_default_node);
12355
675d044c
SD
12356 struct cl_target_option *callee_opts
12357 = TREE_TARGET_OPTION (callee_tree ? callee_tree
12358 : target_option_default_node);
1fd8d40c
KT
12359
12360 /* Callee's ISA flags should be a subset of the caller's. */
12361 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
12362 != callee_opts->x_aarch64_isa_flags)
12363 return false;
12364
12365 /* Allow non-strict aligned functions inlining into strict
12366 aligned ones. */
12367 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
12368 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
12369 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
12370 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
12371 return false;
12372
12373 bool always_inline = lookup_attribute ("always_inline",
12374 DECL_ATTRIBUTES (callee));
12375
12376 /* If the architectural features match up and the callee is always_inline
12377 then the other attributes don't matter. */
12378 if (always_inline)
12379 return true;
12380
12381 if (caller_opts->x_aarch64_cmodel_var
12382 != callee_opts->x_aarch64_cmodel_var)
12383 return false;
12384
12385 if (caller_opts->x_aarch64_tls_dialect
12386 != callee_opts->x_aarch64_tls_dialect)
12387 return false;
12388
12389 /* Honour explicit requests to workaround errata. */
12390 if (!aarch64_tribools_ok_for_inlining_p (
12391 caller_opts->x_aarch64_fix_a53_err835769,
12392 callee_opts->x_aarch64_fix_a53_err835769,
12393 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
12394 return false;
12395
48bb1a55
CL
12396 if (!aarch64_tribools_ok_for_inlining_p (
12397 caller_opts->x_aarch64_fix_a53_err843419,
12398 callee_opts->x_aarch64_fix_a53_err843419,
12399 2, TARGET_FIX_ERR_A53_843419))
12400 return false;
12401
1fd8d40c
KT
12402 /* If the user explicitly specified -momit-leaf-frame-pointer for the
12403 caller and calle and they don't match up, reject inlining. */
12404 if (!aarch64_tribools_ok_for_inlining_p (
12405 caller_opts->x_flag_omit_leaf_frame_pointer,
12406 callee_opts->x_flag_omit_leaf_frame_pointer,
12407 2, 1))
12408 return false;
12409
12410 /* If the callee has specific tuning overrides, respect them. */
12411 if (callee_opts->x_aarch64_override_tune_string != NULL
12412 && caller_opts->x_aarch64_override_tune_string == NULL)
12413 return false;
12414
12415 /* If the user specified tuning override strings for the
12416 caller and callee and they don't match up, reject inlining.
12417 We just do a string compare here, we don't analyze the meaning
12418 of the string, as it would be too costly for little gain. */
12419 if (callee_opts->x_aarch64_override_tune_string
12420 && caller_opts->x_aarch64_override_tune_string
12421 && (strcmp (callee_opts->x_aarch64_override_tune_string,
12422 caller_opts->x_aarch64_override_tune_string) != 0))
12423 return false;
12424
12425 return true;
12426}
12427
43e9d192
IB
12428/* Return true if SYMBOL_REF X binds locally. */
12429
12430static bool
12431aarch64_symbol_binds_local_p (const_rtx x)
12432{
12433 return (SYMBOL_REF_DECL (x)
12434 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
12435 : SYMBOL_REF_LOCAL_P (x));
12436}
12437
12438/* Return true if SYMBOL_REF X is thread local */
12439static bool
12440aarch64_tls_symbol_p (rtx x)
12441{
12442 if (! TARGET_HAVE_TLS)
12443 return false;
12444
12445 if (GET_CODE (x) != SYMBOL_REF)
12446 return false;
12447
12448 return SYMBOL_REF_TLS_MODEL (x) != 0;
12449}
12450
12451/* Classify a TLS symbol into one of the TLS kinds. */
12452enum aarch64_symbol_type
12453aarch64_classify_tls_symbol (rtx x)
12454{
12455 enum tls_model tls_kind = tls_symbolic_operand_type (x);
12456
12457 switch (tls_kind)
12458 {
12459 case TLS_MODEL_GLOBAL_DYNAMIC:
12460 case TLS_MODEL_LOCAL_DYNAMIC:
12461 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
12462
12463 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
12464 switch (aarch64_cmodel)
12465 {
12466 case AARCH64_CMODEL_TINY:
12467 case AARCH64_CMODEL_TINY_PIC:
12468 return SYMBOL_TINY_TLSIE;
12469 default:
79496620 12470 return SYMBOL_SMALL_TLSIE;
5ae7caad 12471 }
43e9d192
IB
12472
12473 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
12474 if (aarch64_tls_size == 12)
12475 return SYMBOL_TLSLE12;
12476 else if (aarch64_tls_size == 24)
12477 return SYMBOL_TLSLE24;
12478 else if (aarch64_tls_size == 32)
12479 return SYMBOL_TLSLE32;
12480 else if (aarch64_tls_size == 48)
12481 return SYMBOL_TLSLE48;
12482 else
12483 gcc_unreachable ();
43e9d192
IB
12484
12485 case TLS_MODEL_EMULATED:
12486 case TLS_MODEL_NONE:
12487 return SYMBOL_FORCE_TO_MEM;
12488
12489 default:
12490 gcc_unreachable ();
12491 }
12492}
12493
43cacb12
RS
12494/* Return the correct method for accessing X + OFFSET, where X is either
12495 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 12496
43e9d192 12497enum aarch64_symbol_type
43cacb12 12498aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
12499{
12500 if (GET_CODE (x) == LABEL_REF)
12501 {
12502 switch (aarch64_cmodel)
12503 {
12504 case AARCH64_CMODEL_LARGE:
12505 return SYMBOL_FORCE_TO_MEM;
12506
12507 case AARCH64_CMODEL_TINY_PIC:
12508 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
12509 return SYMBOL_TINY_ABSOLUTE;
12510
1b1e81f8 12511 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12512 case AARCH64_CMODEL_SMALL_PIC:
12513 case AARCH64_CMODEL_SMALL:
12514 return SYMBOL_SMALL_ABSOLUTE;
12515
12516 default:
12517 gcc_unreachable ();
12518 }
12519 }
12520
17f4d4bf 12521 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 12522 {
43e9d192
IB
12523 if (aarch64_tls_symbol_p (x))
12524 return aarch64_classify_tls_symbol (x);
12525
17f4d4bf
CSS
12526 switch (aarch64_cmodel)
12527 {
12528 case AARCH64_CMODEL_TINY:
15f6e0da 12529 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
12530 the offset does not cause overflow of the final address. But
12531 we have no way of knowing the address of symbol at compile time
12532 so we can't accurately say if the distance between the PC and
12533 symbol + offset is outside the addressible range of +/-1M in the
12534 TINY code model. So we rely on images not being greater than
12535 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
12536 be loaded using an alternative mechanism. Furthermore if the
12537 symbol is a weak reference to something that isn't known to
12538 resolve to a symbol in this module, then force to memory. */
12539 if ((SYMBOL_REF_WEAK (x)
12540 && !aarch64_symbol_binds_local_p (x))
43cacb12 12541 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
12542 return SYMBOL_FORCE_TO_MEM;
12543 return SYMBOL_TINY_ABSOLUTE;
12544
17f4d4bf 12545 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
12546 /* Same reasoning as the tiny code model, but the offset cap here is
12547 4G. */
15f6e0da
RR
12548 if ((SYMBOL_REF_WEAK (x)
12549 && !aarch64_symbol_binds_local_p (x))
43cacb12 12550 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 12551 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
12552 return SYMBOL_FORCE_TO_MEM;
12553 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12554
17f4d4bf 12555 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 12556 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 12557 return SYMBOL_TINY_GOT;
38e6c9a6
MS
12558 return SYMBOL_TINY_ABSOLUTE;
12559
1b1e81f8 12560 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
12561 case AARCH64_CMODEL_SMALL_PIC:
12562 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
12563 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
12564 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 12565 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12566
9ee6540a
WD
12567 case AARCH64_CMODEL_LARGE:
12568 /* This is alright even in PIC code as the constant
12569 pool reference is always PC relative and within
12570 the same translation unit. */
d47d34bb 12571 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
12572 return SYMBOL_SMALL_ABSOLUTE;
12573 else
12574 return SYMBOL_FORCE_TO_MEM;
12575
17f4d4bf
CSS
12576 default:
12577 gcc_unreachable ();
12578 }
43e9d192 12579 }
17f4d4bf 12580
43e9d192
IB
12581 /* By default push everything into the constant pool. */
12582 return SYMBOL_FORCE_TO_MEM;
12583}
12584
43e9d192
IB
12585bool
12586aarch64_constant_address_p (rtx x)
12587{
12588 return (CONSTANT_P (x) && memory_address_p (DImode, x));
12589}
12590
12591bool
12592aarch64_legitimate_pic_operand_p (rtx x)
12593{
12594 if (GET_CODE (x) == SYMBOL_REF
12595 || (GET_CODE (x) == CONST
12596 && GET_CODE (XEXP (x, 0)) == PLUS
12597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
12598 return false;
12599
12600 return true;
12601}
12602
26895c21
WD
12603/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
12604 that should be rematerialized rather than spilled. */
3520f7cc 12605
43e9d192 12606static bool
ef4bddc2 12607aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 12608{
26895c21 12609 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 12610 if (CONST_INT_P (x)
9f7b87ca 12611 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 12612 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
12613 return true;
12614
43cacb12
RS
12615 /* Do not allow vector struct mode constants for Advanced SIMD.
12616 We could support 0 and -1 easily, but they need support in
12617 aarch64-simd.md. */
12618 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12619 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
12620 return false;
12621
43cacb12
RS
12622 /* Only accept variable-length vector constants if they can be
12623 handled directly.
12624
12625 ??? It would be possible to handle rematerialization of other
12626 constants via secondary reloads. */
12627 if (vec_flags & VEC_ANY_SVE)
12628 return aarch64_simd_valid_immediate (x, NULL);
12629
509bb9b6
RS
12630 if (GET_CODE (x) == HIGH)
12631 x = XEXP (x, 0);
12632
43cacb12
RS
12633 /* Accept polynomial constants that can be calculated by using the
12634 destination of a move as the sole temporary. Constants that
12635 require a second temporary cannot be rematerialized (they can't be
12636 forced to memory and also aren't legitimate constants). */
12637 poly_int64 offset;
12638 if (poly_int_rtx_p (x, &offset))
12639 return aarch64_offset_temporaries (false, offset) <= 1;
12640
12641 /* If an offset is being added to something else, we need to allow the
12642 base to be moved into the destination register, meaning that there
12643 are no free temporaries for the offset. */
12644 x = strip_offset (x, &offset);
12645 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
12646 return false;
26895c21 12647
43cacb12
RS
12648 /* Do not allow const (plus (anchor_symbol, const_int)). */
12649 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
12650 return false;
26895c21 12651
f28e54bd
WD
12652 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
12653 so spilling them is better than rematerialization. */
12654 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
12655 return true;
12656
26895c21
WD
12657 /* Label references are always constant. */
12658 if (GET_CODE (x) == LABEL_REF)
12659 return true;
12660
12661 return false;
43e9d192
IB
12662}
12663
a5bc806c 12664rtx
43e9d192
IB
12665aarch64_load_tp (rtx target)
12666{
12667 if (!target
12668 || GET_MODE (target) != Pmode
12669 || !register_operand (target, Pmode))
12670 target = gen_reg_rtx (Pmode);
12671
12672 /* Can return in any reg. */
12673 emit_insn (gen_aarch64_load_tp_hard (target));
12674 return target;
12675}
12676
43e9d192
IB
12677/* On AAPCS systems, this is the "struct __va_list". */
12678static GTY(()) tree va_list_type;
12679
12680/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
12681 Return the type to use as __builtin_va_list.
12682
12683 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
12684
12685 struct __va_list
12686 {
12687 void *__stack;
12688 void *__gr_top;
12689 void *__vr_top;
12690 int __gr_offs;
12691 int __vr_offs;
12692 }; */
12693
12694static tree
12695aarch64_build_builtin_va_list (void)
12696{
12697 tree va_list_name;
12698 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12699
12700 /* Create the type. */
12701 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
12702 /* Give it the required name. */
12703 va_list_name = build_decl (BUILTINS_LOCATION,
12704 TYPE_DECL,
12705 get_identifier ("__va_list"),
12706 va_list_type);
12707 DECL_ARTIFICIAL (va_list_name) = 1;
12708 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 12709 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
12710
12711 /* Create the fields. */
12712 f_stack = build_decl (BUILTINS_LOCATION,
12713 FIELD_DECL, get_identifier ("__stack"),
12714 ptr_type_node);
12715 f_grtop = build_decl (BUILTINS_LOCATION,
12716 FIELD_DECL, get_identifier ("__gr_top"),
12717 ptr_type_node);
12718 f_vrtop = build_decl (BUILTINS_LOCATION,
12719 FIELD_DECL, get_identifier ("__vr_top"),
12720 ptr_type_node);
12721 f_groff = build_decl (BUILTINS_LOCATION,
12722 FIELD_DECL, get_identifier ("__gr_offs"),
12723 integer_type_node);
12724 f_vroff = build_decl (BUILTINS_LOCATION,
12725 FIELD_DECL, get_identifier ("__vr_offs"),
12726 integer_type_node);
12727
88e3bdd1 12728 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
12729 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
12730 purpose to identify whether the code is updating va_list internal
12731 offset fields through irregular way. */
12732 va_list_gpr_counter_field = f_groff;
12733 va_list_fpr_counter_field = f_vroff;
12734
43e9d192
IB
12735 DECL_ARTIFICIAL (f_stack) = 1;
12736 DECL_ARTIFICIAL (f_grtop) = 1;
12737 DECL_ARTIFICIAL (f_vrtop) = 1;
12738 DECL_ARTIFICIAL (f_groff) = 1;
12739 DECL_ARTIFICIAL (f_vroff) = 1;
12740
12741 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
12742 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
12743 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
12744 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
12745 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
12746
12747 TYPE_FIELDS (va_list_type) = f_stack;
12748 DECL_CHAIN (f_stack) = f_grtop;
12749 DECL_CHAIN (f_grtop) = f_vrtop;
12750 DECL_CHAIN (f_vrtop) = f_groff;
12751 DECL_CHAIN (f_groff) = f_vroff;
12752
12753 /* Compute its layout. */
12754 layout_type (va_list_type);
12755
12756 return va_list_type;
12757}
12758
12759/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
12760static void
12761aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12762{
12763 const CUMULATIVE_ARGS *cum;
12764 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12765 tree stack, grtop, vrtop, groff, vroff;
12766 tree t;
88e3bdd1
JW
12767 int gr_save_area_size = cfun->va_list_gpr_size;
12768 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
12769 int vr_offset;
12770
12771 cum = &crtl->args.info;
88e3bdd1
JW
12772 if (cfun->va_list_gpr_size)
12773 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
12774 cfun->va_list_gpr_size);
12775 if (cfun->va_list_fpr_size)
12776 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
12777 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 12778
d5726973 12779 if (!TARGET_FLOAT)
43e9d192 12780 {
261fb553 12781 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
12782 vr_save_area_size = 0;
12783 }
12784
12785 f_stack = TYPE_FIELDS (va_list_type_node);
12786 f_grtop = DECL_CHAIN (f_stack);
12787 f_vrtop = DECL_CHAIN (f_grtop);
12788 f_groff = DECL_CHAIN (f_vrtop);
12789 f_vroff = DECL_CHAIN (f_groff);
12790
12791 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12792 NULL_TREE);
12793 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12794 NULL_TREE);
12795 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12796 NULL_TREE);
12797 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12798 NULL_TREE);
12799 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12800 NULL_TREE);
12801
12802 /* Emit code to initialize STACK, which points to the next varargs stack
12803 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12804 by named arguments. STACK is 8-byte aligned. */
12805 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12806 if (cum->aapcs_stack_size > 0)
12807 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12808 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12809 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12810
12811 /* Emit code to initialize GRTOP, the top of the GR save area.
12812 virtual_incoming_args_rtx should have been 16 byte aligned. */
12813 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12814 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12815 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12816
12817 /* Emit code to initialize VRTOP, the top of the VR save area.
12818 This address is gr_save_area_bytes below GRTOP, rounded
12819 down to the next 16-byte boundary. */
12820 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12821 vr_offset = ROUND_UP (gr_save_area_size,
12822 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12823
12824 if (vr_offset)
12825 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12826 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12827 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12828
12829 /* Emit code to initialize GROFF, the offset from GRTOP of the
12830 next GPR argument. */
12831 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12832 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12833 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12834
12835 /* Likewise emit code to initialize VROFF, the offset from FTOP
12836 of the next VR argument. */
12837 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12838 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12839 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12840}
12841
12842/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12843
12844static tree
12845aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12846 gimple_seq *post_p ATTRIBUTE_UNUSED)
12847{
12848 tree addr;
12849 bool indirect_p;
12850 bool is_ha; /* is HFA or HVA. */
12851 bool dw_align; /* double-word align. */
ef4bddc2 12852 machine_mode ag_mode = VOIDmode;
43e9d192 12853 int nregs;
ef4bddc2 12854 machine_mode mode;
43e9d192
IB
12855
12856 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12857 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12858 HOST_WIDE_INT size, rsize, adjust, align;
12859 tree t, u, cond1, cond2;
12860
12861 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12862 if (indirect_p)
12863 type = build_pointer_type (type);
12864
12865 mode = TYPE_MODE (type);
12866
12867 f_stack = TYPE_FIELDS (va_list_type_node);
12868 f_grtop = DECL_CHAIN (f_stack);
12869 f_vrtop = DECL_CHAIN (f_grtop);
12870 f_groff = DECL_CHAIN (f_vrtop);
12871 f_vroff = DECL_CHAIN (f_groff);
12872
12873 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12874 f_stack, NULL_TREE);
12875 size = int_size_in_bytes (type);
985b8393 12876 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12877
12878 dw_align = false;
12879 adjust = 0;
12880 if (aarch64_vfp_is_call_or_return_candidate (mode,
12881 type,
12882 &ag_mode,
12883 &nregs,
12884 &is_ha))
12885 {
6a70badb
RS
12886 /* No frontends can create types with variable-sized modes, so we
12887 shouldn't be asked to pass or return them. */
12888 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12889
43e9d192 12890 /* TYPE passed in fp/simd registers. */
d5726973 12891 if (!TARGET_FLOAT)
fc29dfc9 12892 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
12893
12894 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12895 unshare_expr (valist), f_vrtop, NULL_TREE);
12896 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12897 unshare_expr (valist), f_vroff, NULL_TREE);
12898
12899 rsize = nregs * UNITS_PER_VREG;
12900
12901 if (is_ha)
12902 {
6a70badb
RS
12903 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12904 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12905 }
76b0cbf8 12906 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12907 && size < UNITS_PER_VREG)
12908 {
12909 adjust = UNITS_PER_VREG - size;
12910 }
12911 }
12912 else
12913 {
12914 /* TYPE passed in general registers. */
12915 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12916 unshare_expr (valist), f_grtop, NULL_TREE);
12917 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12918 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12919 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12920 nregs = rsize / UNITS_PER_WORD;
12921
12922 if (align > 8)
12923 dw_align = true;
12924
76b0cbf8 12925 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12926 && size < UNITS_PER_WORD)
12927 {
12928 adjust = UNITS_PER_WORD - size;
12929 }
12930 }
12931
12932 /* Get a local temporary for the field value. */
12933 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12934
12935 /* Emit code to branch if off >= 0. */
12936 t = build2 (GE_EXPR, boolean_type_node, off,
12937 build_int_cst (TREE_TYPE (off), 0));
12938 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12939
12940 if (dw_align)
12941 {
12942 /* Emit: offs = (offs + 15) & -16. */
12943 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12944 build_int_cst (TREE_TYPE (off), 15));
12945 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12946 build_int_cst (TREE_TYPE (off), -16));
12947 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12948 }
12949 else
12950 roundup = NULL;
12951
12952 /* Update ap.__[g|v]r_offs */
12953 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12954 build_int_cst (TREE_TYPE (off), rsize));
12955 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12956
12957 /* String up. */
12958 if (roundup)
12959 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12960
12961 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12962 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12963 build_int_cst (TREE_TYPE (f_off), 0));
12964 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12965
12966 /* String up: make sure the assignment happens before the use. */
12967 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12968 COND_EXPR_ELSE (cond1) = t;
12969
12970 /* Prepare the trees handling the argument that is passed on the stack;
12971 the top level node will store in ON_STACK. */
12972 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12973 if (align > 8)
12974 {
12975 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12976 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12977 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12978 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12979 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12980 }
12981 else
12982 roundup = NULL;
12983 /* Advance ap.__stack */
4bdc2738 12984 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12985 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12986 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12987 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12988 /* String up roundup and advance. */
12989 if (roundup)
12990 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12991 /* String up with arg */
12992 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12993 /* Big-endianness related address adjustment. */
76b0cbf8 12994 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12995 && size < UNITS_PER_WORD)
12996 {
12997 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12998 size_int (UNITS_PER_WORD - size));
12999 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
13000 }
13001
13002 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
13003 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
13004
13005 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
13006 t = off;
13007 if (adjust)
13008 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
13009 build_int_cst (TREE_TYPE (off), adjust));
13010
13011 t = fold_convert (sizetype, t);
13012 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
13013
13014 if (is_ha)
13015 {
13016 /* type ha; // treat as "struct {ftype field[n];}"
13017 ... [computing offs]
13018 for (i = 0; i <nregs; ++i, offs += 16)
13019 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
13020 return ha; */
13021 int i;
13022 tree tmp_ha, field_t, field_ptr_t;
13023
13024 /* Declare a local variable. */
13025 tmp_ha = create_tmp_var_raw (type, "ha");
13026 gimple_add_tmp_var (tmp_ha);
13027
13028 /* Establish the base type. */
13029 switch (ag_mode)
13030 {
4e10a5a7 13031 case E_SFmode:
43e9d192
IB
13032 field_t = float_type_node;
13033 field_ptr_t = float_ptr_type_node;
13034 break;
4e10a5a7 13035 case E_DFmode:
43e9d192
IB
13036 field_t = double_type_node;
13037 field_ptr_t = double_ptr_type_node;
13038 break;
4e10a5a7 13039 case E_TFmode:
43e9d192
IB
13040 field_t = long_double_type_node;
13041 field_ptr_t = long_double_ptr_type_node;
13042 break;
4e10a5a7 13043 case E_HFmode:
1b62ed4f
JG
13044 field_t = aarch64_fp16_type_node;
13045 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 13046 break;
4e10a5a7
RS
13047 case E_V2SImode:
13048 case E_V4SImode:
43e9d192
IB
13049 {
13050 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
13051 field_t = build_vector_type_for_mode (innertype, ag_mode);
13052 field_ptr_t = build_pointer_type (field_t);
13053 }
13054 break;
13055 default:
13056 gcc_assert (0);
13057 }
13058
13059 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
13060 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
13061 addr = t;
13062 t = fold_convert (field_ptr_t, addr);
13063 t = build2 (MODIFY_EXPR, field_t,
13064 build1 (INDIRECT_REF, field_t, tmp_ha),
13065 build1 (INDIRECT_REF, field_t, t));
13066
13067 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
13068 for (i = 1; i < nregs; ++i)
13069 {
13070 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
13071 u = fold_convert (field_ptr_t, addr);
13072 u = build2 (MODIFY_EXPR, field_t,
13073 build2 (MEM_REF, field_t, tmp_ha,
13074 build_int_cst (field_ptr_t,
13075 (i *
13076 int_size_in_bytes (field_t)))),
13077 build1 (INDIRECT_REF, field_t, u));
13078 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
13079 }
13080
13081 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
13082 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
13083 }
13084
13085 COND_EXPR_ELSE (cond2) = t;
13086 addr = fold_convert (build_pointer_type (type), cond1);
13087 addr = build_va_arg_indirect_ref (addr);
13088
13089 if (indirect_p)
13090 addr = build_va_arg_indirect_ref (addr);
13091
13092 return addr;
13093}
13094
13095/* Implement TARGET_SETUP_INCOMING_VARARGS. */
13096
13097static void
ef4bddc2 13098aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
13099 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13100 int no_rtl)
13101{
13102 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13103 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
13104 int gr_saved = cfun->va_list_gpr_size;
13105 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
13106
13107 /* The caller has advanced CUM up to, but not beyond, the last named
13108 argument. Advance a local copy of CUM past the last "real" named
13109 argument, to find out how many registers are left over. */
13110 local_cum = *cum;
13111 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
13112
88e3bdd1
JW
13113 /* Found out how many registers we need to save.
13114 Honor tree-stdvar analysis results. */
13115 if (cfun->va_list_gpr_size)
13116 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
13117 cfun->va_list_gpr_size / UNITS_PER_WORD);
13118 if (cfun->va_list_fpr_size)
13119 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
13120 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 13121
d5726973 13122 if (!TARGET_FLOAT)
43e9d192 13123 {
261fb553 13124 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
13125 vr_saved = 0;
13126 }
13127
13128 if (!no_rtl)
13129 {
13130 if (gr_saved > 0)
13131 {
13132 rtx ptr, mem;
13133
13134 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
13135 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
13136 - gr_saved * UNITS_PER_WORD);
13137 mem = gen_frame_mem (BLKmode, ptr);
13138 set_mem_alias_set (mem, get_varargs_alias_set ());
13139
13140 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
13141 mem, gr_saved);
13142 }
13143 if (vr_saved > 0)
13144 {
13145 /* We can't use move_block_from_reg, because it will use
13146 the wrong mode, storing D regs only. */
ef4bddc2 13147 machine_mode mode = TImode;
88e3bdd1 13148 int off, i, vr_start;
43e9d192
IB
13149
13150 /* Set OFF to the offset from virtual_incoming_args_rtx of
13151 the first vector register. The VR save area lies below
13152 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
13153 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
13154 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
13155 off -= vr_saved * UNITS_PER_VREG;
13156
88e3bdd1
JW
13157 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
13158 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
13159 {
13160 rtx ptr, mem;
13161
13162 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
13163 mem = gen_frame_mem (mode, ptr);
13164 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 13165 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
13166 off += UNITS_PER_VREG;
13167 }
13168 }
13169 }
13170
13171 /* We don't save the size into *PRETEND_SIZE because we want to avoid
13172 any complication of having crtl->args.pretend_args_size changed. */
8799637a 13173 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
13174 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
13175 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
13176 + vr_saved * UNITS_PER_VREG);
13177}
13178
13179static void
13180aarch64_conditional_register_usage (void)
13181{
13182 int i;
13183 if (!TARGET_FLOAT)
13184 {
13185 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
13186 {
13187 fixed_regs[i] = 1;
13188 call_used_regs[i] = 1;
13189 }
13190 }
43cacb12
RS
13191 if (!TARGET_SVE)
13192 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
13193 {
13194 fixed_regs[i] = 1;
13195 call_used_regs[i] = 1;
13196 }
3751345d
RE
13197
13198 /* When tracking speculation, we need a couple of call-clobbered registers
13199 to track the speculation state. It would be nice to just use
13200 IP0 and IP1, but currently there are numerous places that just
13201 assume these registers are free for other uses (eg pointer
13202 authentication). */
13203 if (aarch64_track_speculation)
13204 {
13205 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
13206 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
13207 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13208 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13209 }
43e9d192
IB
13210}
13211
13212/* Walk down the type tree of TYPE counting consecutive base elements.
13213 If *MODEP is VOIDmode, then set it to the first valid floating point
13214 type. If a non-floating point type is found, or if a floating point
13215 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
13216 otherwise return the count in the sub-tree. */
13217static int
ef4bddc2 13218aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 13219{
ef4bddc2 13220 machine_mode mode;
43e9d192
IB
13221 HOST_WIDE_INT size;
13222
13223 switch (TREE_CODE (type))
13224 {
13225 case REAL_TYPE:
13226 mode = TYPE_MODE (type);
1b62ed4f
JG
13227 if (mode != DFmode && mode != SFmode
13228 && mode != TFmode && mode != HFmode)
43e9d192
IB
13229 return -1;
13230
13231 if (*modep == VOIDmode)
13232 *modep = mode;
13233
13234 if (*modep == mode)
13235 return 1;
13236
13237 break;
13238
13239 case COMPLEX_TYPE:
13240 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
13241 if (mode != DFmode && mode != SFmode
13242 && mode != TFmode && mode != HFmode)
43e9d192
IB
13243 return -1;
13244
13245 if (*modep == VOIDmode)
13246 *modep = mode;
13247
13248 if (*modep == mode)
13249 return 2;
13250
13251 break;
13252
13253 case VECTOR_TYPE:
13254 /* Use V2SImode and V4SImode as representatives of all 64-bit
13255 and 128-bit vector types. */
13256 size = int_size_in_bytes (type);
13257 switch (size)
13258 {
13259 case 8:
13260 mode = V2SImode;
13261 break;
13262 case 16:
13263 mode = V4SImode;
13264 break;
13265 default:
13266 return -1;
13267 }
13268
13269 if (*modep == VOIDmode)
13270 *modep = mode;
13271
13272 /* Vector modes are considered to be opaque: two vectors are
13273 equivalent for the purposes of being homogeneous aggregates
13274 if they are the same size. */
13275 if (*modep == mode)
13276 return 1;
13277
13278 break;
13279
13280 case ARRAY_TYPE:
13281 {
13282 int count;
13283 tree index = TYPE_DOMAIN (type);
13284
807e902e
KZ
13285 /* Can't handle incomplete types nor sizes that are not
13286 fixed. */
13287 if (!COMPLETE_TYPE_P (type)
13288 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13289 return -1;
13290
13291 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
13292 if (count == -1
13293 || !index
13294 || !TYPE_MAX_VALUE (index)
cc269bb6 13295 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 13296 || !TYPE_MIN_VALUE (index)
cc269bb6 13297 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
13298 || count < 0)
13299 return -1;
13300
ae7e9ddd
RS
13301 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
13302 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
13303
13304 /* There must be no padding. */
6a70badb
RS
13305 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13306 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13307 return -1;
13308
13309 return count;
13310 }
13311
13312 case RECORD_TYPE:
13313 {
13314 int count = 0;
13315 int sub_count;
13316 tree field;
13317
807e902e
KZ
13318 /* Can't handle incomplete types nor sizes that are not
13319 fixed. */
13320 if (!COMPLETE_TYPE_P (type)
13321 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13322 return -1;
13323
13324 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13325 {
13326 if (TREE_CODE (field) != FIELD_DECL)
13327 continue;
13328
13329 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13330 if (sub_count < 0)
13331 return -1;
13332 count += sub_count;
13333 }
13334
13335 /* There must be no padding. */
6a70badb
RS
13336 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13337 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13338 return -1;
13339
13340 return count;
13341 }
13342
13343 case UNION_TYPE:
13344 case QUAL_UNION_TYPE:
13345 {
13346 /* These aren't very interesting except in a degenerate case. */
13347 int count = 0;
13348 int sub_count;
13349 tree field;
13350
807e902e
KZ
13351 /* Can't handle incomplete types nor sizes that are not
13352 fixed. */
13353 if (!COMPLETE_TYPE_P (type)
13354 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13355 return -1;
13356
13357 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13358 {
13359 if (TREE_CODE (field) != FIELD_DECL)
13360 continue;
13361
13362 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13363 if (sub_count < 0)
13364 return -1;
13365 count = count > sub_count ? count : sub_count;
13366 }
13367
13368 /* There must be no padding. */
6a70badb
RS
13369 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13370 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13371 return -1;
13372
13373 return count;
13374 }
13375
13376 default:
13377 break;
13378 }
13379
13380 return -1;
13381}
13382
b6ec6215
KT
13383/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
13384 type as described in AAPCS64 \S 4.1.2.
13385
13386 See the comment above aarch64_composite_type_p for the notes on MODE. */
13387
13388static bool
13389aarch64_short_vector_p (const_tree type,
13390 machine_mode mode)
13391{
6a70badb 13392 poly_int64 size = -1;
b6ec6215
KT
13393
13394 if (type && TREE_CODE (type) == VECTOR_TYPE)
13395 size = int_size_in_bytes (type);
13396 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13397 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13398 size = GET_MODE_SIZE (mode);
13399
6a70badb 13400 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
13401}
13402
43e9d192
IB
13403/* Return TRUE if the type, as described by TYPE and MODE, is a composite
13404 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
13405 array types. The C99 floating-point complex types are also considered
13406 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
13407 types, which are GCC extensions and out of the scope of AAPCS64, are
13408 treated as composite types here as well.
13409
13410 Note that MODE itself is not sufficient in determining whether a type
13411 is such a composite type or not. This is because
13412 stor-layout.c:compute_record_mode may have already changed the MODE
13413 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
13414 structure with only one field may have its MODE set to the mode of the
13415 field. Also an integer mode whose size matches the size of the
13416 RECORD_TYPE type may be used to substitute the original mode
13417 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
13418 solely relied on. */
13419
13420static bool
13421aarch64_composite_type_p (const_tree type,
ef4bddc2 13422 machine_mode mode)
43e9d192 13423{
b6ec6215
KT
13424 if (aarch64_short_vector_p (type, mode))
13425 return false;
13426
43e9d192
IB
13427 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
13428 return true;
13429
13430 if (mode == BLKmode
13431 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
13432 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13433 return true;
13434
13435 return false;
13436}
13437
43e9d192
IB
13438/* Return TRUE if an argument, whose type is described by TYPE and MODE,
13439 shall be passed or returned in simd/fp register(s) (providing these
13440 parameter passing registers are available).
13441
13442 Upon successful return, *COUNT returns the number of needed registers,
13443 *BASE_MODE returns the mode of the individual register and when IS_HAF
13444 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
13445 floating-point aggregate or a homogeneous short-vector aggregate. */
13446
13447static bool
ef4bddc2 13448aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 13449 const_tree type,
ef4bddc2 13450 machine_mode *base_mode,
43e9d192
IB
13451 int *count,
13452 bool *is_ha)
13453{
ef4bddc2 13454 machine_mode new_mode = VOIDmode;
43e9d192
IB
13455 bool composite_p = aarch64_composite_type_p (type, mode);
13456
13457 if (is_ha != NULL) *is_ha = false;
13458
13459 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
13460 || aarch64_short_vector_p (type, mode))
13461 {
13462 *count = 1;
13463 new_mode = mode;
13464 }
13465 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
13466 {
13467 if (is_ha != NULL) *is_ha = true;
13468 *count = 2;
13469 new_mode = GET_MODE_INNER (mode);
13470 }
13471 else if (type && composite_p)
13472 {
13473 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
13474
13475 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
13476 {
13477 if (is_ha != NULL) *is_ha = true;
13478 *count = ag_count;
13479 }
13480 else
13481 return false;
13482 }
13483 else
13484 return false;
13485
13486 *base_mode = new_mode;
13487 return true;
13488}
13489
13490/* Implement TARGET_STRUCT_VALUE_RTX. */
13491
13492static rtx
13493aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
13494 int incoming ATTRIBUTE_UNUSED)
13495{
13496 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
13497}
13498
13499/* Implements target hook vector_mode_supported_p. */
13500static bool
ef4bddc2 13501aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 13502{
43cacb12
RS
13503 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13504 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
13505}
13506
b7342d25
IB
13507/* Return appropriate SIMD container
13508 for MODE within a vector of WIDTH bits. */
ef4bddc2 13509static machine_mode
43cacb12 13510aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 13511{
43cacb12
RS
13512 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
13513 switch (mode)
13514 {
13515 case E_DFmode:
13516 return VNx2DFmode;
13517 case E_SFmode:
13518 return VNx4SFmode;
13519 case E_HFmode:
13520 return VNx8HFmode;
13521 case E_DImode:
13522 return VNx2DImode;
13523 case E_SImode:
13524 return VNx4SImode;
13525 case E_HImode:
13526 return VNx8HImode;
13527 case E_QImode:
13528 return VNx16QImode;
13529 default:
13530 return word_mode;
13531 }
13532
13533 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 13534 if (TARGET_SIMD)
b7342d25 13535 {
43cacb12 13536 if (known_eq (width, 128))
b7342d25
IB
13537 switch (mode)
13538 {
4e10a5a7 13539 case E_DFmode:
b7342d25 13540 return V2DFmode;
4e10a5a7 13541 case E_SFmode:
b7342d25 13542 return V4SFmode;
4e10a5a7 13543 case E_HFmode:
b719f884 13544 return V8HFmode;
4e10a5a7 13545 case E_SImode:
b7342d25 13546 return V4SImode;
4e10a5a7 13547 case E_HImode:
b7342d25 13548 return V8HImode;
4e10a5a7 13549 case E_QImode:
b7342d25 13550 return V16QImode;
4e10a5a7 13551 case E_DImode:
b7342d25
IB
13552 return V2DImode;
13553 default:
13554 break;
13555 }
13556 else
13557 switch (mode)
13558 {
4e10a5a7 13559 case E_SFmode:
b7342d25 13560 return V2SFmode;
4e10a5a7 13561 case E_HFmode:
b719f884 13562 return V4HFmode;
4e10a5a7 13563 case E_SImode:
b7342d25 13564 return V2SImode;
4e10a5a7 13565 case E_HImode:
b7342d25 13566 return V4HImode;
4e10a5a7 13567 case E_QImode:
b7342d25
IB
13568 return V8QImode;
13569 default:
13570 break;
13571 }
13572 }
43e9d192
IB
13573 return word_mode;
13574}
13575
b7342d25 13576/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 13577static machine_mode
005ba29c 13578aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 13579{
43cacb12
RS
13580 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
13581 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
13582}
13583
86e36728 13584/* Return a list of possible vector sizes for the vectorizer
3b357264 13585 to iterate over. */
86e36728
RS
13586static void
13587aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 13588{
43cacb12
RS
13589 if (TARGET_SVE)
13590 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
13591 sizes->safe_push (16);
13592 sizes->safe_push (8);
3b357264
JG
13593}
13594
ac2b960f
YZ
13595/* Implement TARGET_MANGLE_TYPE. */
13596
6f549691 13597static const char *
ac2b960f
YZ
13598aarch64_mangle_type (const_tree type)
13599{
13600 /* The AArch64 ABI documents say that "__va_list" has to be
17f8ace2 13601 mangled as if it is in the "std" namespace. */
ac2b960f
YZ
13602 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
13603 return "St9__va_list";
13604
c2ec330c
AL
13605 /* Half-precision float. */
13606 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
13607 return "Dh";
13608
f9d53c27
TB
13609 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
13610 builtin types. */
13611 if (TYPE_NAME (type) != NULL)
13612 return aarch64_mangle_builtin_type (type);
c6fc9e43 13613
ac2b960f
YZ
13614 /* Use the default mangling. */
13615 return NULL;
13616}
13617
75cf1494
KT
13618/* Find the first rtx_insn before insn that will generate an assembly
13619 instruction. */
13620
13621static rtx_insn *
13622aarch64_prev_real_insn (rtx_insn *insn)
13623{
13624 if (!insn)
13625 return NULL;
13626
13627 do
13628 {
13629 insn = prev_real_insn (insn);
13630 }
13631 while (insn && recog_memoized (insn) < 0);
13632
13633 return insn;
13634}
13635
13636static bool
13637is_madd_op (enum attr_type t1)
13638{
13639 unsigned int i;
13640 /* A number of these may be AArch32 only. */
13641 enum attr_type mlatypes[] = {
13642 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
13643 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
13644 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
13645 };
13646
13647 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
13648 {
13649 if (t1 == mlatypes[i])
13650 return true;
13651 }
13652
13653 return false;
13654}
13655
13656/* Check if there is a register dependency between a load and the insn
13657 for which we hold recog_data. */
13658
13659static bool
13660dep_between_memop_and_curr (rtx memop)
13661{
13662 rtx load_reg;
13663 int opno;
13664
8baff86e 13665 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
13666
13667 if (!REG_P (SET_DEST (memop)))
13668 return false;
13669
13670 load_reg = SET_DEST (memop);
8baff86e 13671 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
13672 {
13673 rtx operand = recog_data.operand[opno];
13674 if (REG_P (operand)
13675 && reg_overlap_mentioned_p (load_reg, operand))
13676 return true;
13677
13678 }
13679 return false;
13680}
13681
8baff86e
KT
13682
13683/* When working around the Cortex-A53 erratum 835769,
13684 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
13685 instruction and has a preceding memory instruction such that a NOP
13686 should be inserted between them. */
13687
75cf1494
KT
13688bool
13689aarch64_madd_needs_nop (rtx_insn* insn)
13690{
13691 enum attr_type attr_type;
13692 rtx_insn *prev;
13693 rtx body;
13694
b32c1043 13695 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
13696 return false;
13697
e322d6e3 13698 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
13699 return false;
13700
13701 attr_type = get_attr_type (insn);
13702 if (!is_madd_op (attr_type))
13703 return false;
13704
13705 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
13706 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
13707 Restore recog state to INSN to avoid state corruption. */
13708 extract_constrain_insn_cached (insn);
13709
550e2205 13710 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
13711 return false;
13712
13713 body = single_set (prev);
13714
13715 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
13716 it and the DImode madd, emit a NOP between them. If body is NULL then we
13717 have a complex memory operation, probably a load/store pair.
13718 Be conservative for now and emit a NOP. */
13719 if (GET_MODE (recog_data.operand[0]) == DImode
13720 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
13721 return true;
13722
13723 return false;
13724
13725}
13726
8baff86e
KT
13727
13728/* Implement FINAL_PRESCAN_INSN. */
13729
75cf1494
KT
13730void
13731aarch64_final_prescan_insn (rtx_insn *insn)
13732{
13733 if (aarch64_madd_needs_nop (insn))
13734 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
13735}
13736
13737
43cacb12
RS
13738/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
13739 instruction. */
13740
13741bool
13742aarch64_sve_index_immediate_p (rtx base_or_step)
13743{
13744 return (CONST_INT_P (base_or_step)
13745 && IN_RANGE (INTVAL (base_or_step), -16, 15));
13746}
13747
13748/* Return true if X is a valid immediate for the SVE ADD and SUB
13749 instructions. Negate X first if NEGATE_P is true. */
13750
13751bool
13752aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
13753{
13754 rtx elt;
13755
13756 if (!const_vec_duplicate_p (x, &elt)
13757 || !CONST_INT_P (elt))
13758 return false;
13759
13760 HOST_WIDE_INT val = INTVAL (elt);
13761 if (negate_p)
13762 val = -val;
13763 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
13764
13765 if (val & 0xff)
13766 return IN_RANGE (val, 0, 0xff);
13767 return IN_RANGE (val, 0, 0xff00);
13768}
13769
13770/* Return true if X is a valid immediate operand for an SVE logical
13771 instruction such as AND. */
13772
13773bool
13774aarch64_sve_bitmask_immediate_p (rtx x)
13775{
13776 rtx elt;
13777
13778 return (const_vec_duplicate_p (x, &elt)
13779 && CONST_INT_P (elt)
13780 && aarch64_bitmask_imm (INTVAL (elt),
13781 GET_MODE_INNER (GET_MODE (x))));
13782}
13783
13784/* Return true if X is a valid immediate for the SVE DUP and CPY
13785 instructions. */
13786
13787bool
13788aarch64_sve_dup_immediate_p (rtx x)
13789{
13790 rtx elt;
13791
13792 if (!const_vec_duplicate_p (x, &elt)
13793 || !CONST_INT_P (elt))
13794 return false;
13795
13796 HOST_WIDE_INT val = INTVAL (elt);
13797 if (val & 0xff)
13798 return IN_RANGE (val, -0x80, 0x7f);
13799 return IN_RANGE (val, -0x8000, 0x7f00);
13800}
13801
13802/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13803 SIGNED_P says whether the operand is signed rather than unsigned. */
13804
13805bool
13806aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13807{
13808 rtx elt;
13809
13810 return (const_vec_duplicate_p (x, &elt)
13811 && CONST_INT_P (elt)
13812 && (signed_p
13813 ? IN_RANGE (INTVAL (elt), -16, 15)
13814 : IN_RANGE (INTVAL (elt), 0, 127)));
13815}
13816
13817/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13818 instruction. Negate X first if NEGATE_P is true. */
13819
13820bool
13821aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13822{
13823 rtx elt;
13824 REAL_VALUE_TYPE r;
13825
13826 if (!const_vec_duplicate_p (x, &elt)
13827 || GET_CODE (elt) != CONST_DOUBLE)
13828 return false;
13829
13830 r = *CONST_DOUBLE_REAL_VALUE (elt);
13831
13832 if (negate_p)
13833 r = real_value_negate (&r);
13834
13835 if (real_equal (&r, &dconst1))
13836 return true;
13837 if (real_equal (&r, &dconsthalf))
13838 return true;
13839 return false;
13840}
13841
13842/* Return true if X is a valid immediate operand for an SVE FMUL
13843 instruction. */
13844
13845bool
13846aarch64_sve_float_mul_immediate_p (rtx x)
13847{
13848 rtx elt;
13849
13850 /* GCC will never generate a multiply with an immediate of 2, so there is no
13851 point testing for it (even though it is a valid constant). */
13852 return (const_vec_duplicate_p (x, &elt)
13853 && GET_CODE (elt) == CONST_DOUBLE
13854 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13855}
13856
b187677b
RS
13857/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13858 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13859 is nonnull, use it to describe valid immediates. */
3520f7cc 13860static bool
b187677b
RS
13861aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13862 simd_immediate_info *info,
13863 enum simd_immediate_check which,
13864 simd_immediate_info::insn_type insn)
13865{
13866 /* Try a 4-byte immediate with LSL. */
13867 for (unsigned int shift = 0; shift < 32; shift += 8)
13868 if ((val32 & (0xff << shift)) == val32)
13869 {
13870 if (info)
13871 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13872 simd_immediate_info::LSL, shift);
13873 return true;
13874 }
3520f7cc 13875
b187677b
RS
13876 /* Try a 2-byte immediate with LSL. */
13877 unsigned int imm16 = val32 & 0xffff;
13878 if (imm16 == (val32 >> 16))
13879 for (unsigned int shift = 0; shift < 16; shift += 8)
13880 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13881 {
b187677b
RS
13882 if (info)
13883 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13884 simd_immediate_info::LSL, shift);
13885 return true;
48063b9d 13886 }
3520f7cc 13887
b187677b
RS
13888 /* Try a 4-byte immediate with MSL, except for cases that MVN
13889 can handle. */
13890 if (which == AARCH64_CHECK_MOV)
13891 for (unsigned int shift = 8; shift < 24; shift += 8)
13892 {
13893 unsigned int low = (1 << shift) - 1;
13894 if (((val32 & (0xff << shift)) | low) == val32)
13895 {
13896 if (info)
13897 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13898 simd_immediate_info::MSL, shift);
13899 return true;
13900 }
13901 }
43e9d192 13902
b187677b
RS
13903 return false;
13904}
13905
13906/* Return true if replicating VAL64 is a valid immediate for the
13907 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13908 use it to describe valid immediates. */
13909static bool
13910aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13911 simd_immediate_info *info,
13912 enum simd_immediate_check which)
13913{
13914 unsigned int val32 = val64 & 0xffffffff;
13915 unsigned int val16 = val64 & 0xffff;
13916 unsigned int val8 = val64 & 0xff;
13917
13918 if (val32 == (val64 >> 32))
43e9d192 13919 {
b187677b
RS
13920 if ((which & AARCH64_CHECK_ORR) != 0
13921 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13922 simd_immediate_info::MOV))
13923 return true;
43e9d192 13924
b187677b
RS
13925 if ((which & AARCH64_CHECK_BIC) != 0
13926 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13927 simd_immediate_info::MVN))
13928 return true;
ee78df47 13929
b187677b
RS
13930 /* Try using a replicated byte. */
13931 if (which == AARCH64_CHECK_MOV
13932 && val16 == (val32 >> 16)
13933 && val8 == (val16 >> 8))
ee78df47 13934 {
b187677b
RS
13935 if (info)
13936 *info = simd_immediate_info (QImode, val8);
13937 return true;
ee78df47 13938 }
43e9d192
IB
13939 }
13940
b187677b
RS
13941 /* Try using a bit-to-bytemask. */
13942 if (which == AARCH64_CHECK_MOV)
43e9d192 13943 {
b187677b
RS
13944 unsigned int i;
13945 for (i = 0; i < 64; i += 8)
ab6501d7 13946 {
b187677b
RS
13947 unsigned char byte = (val64 >> i) & 0xff;
13948 if (byte != 0 && byte != 0xff)
13949 break;
ab6501d7 13950 }
b187677b 13951 if (i == 64)
ab6501d7 13952 {
b187677b
RS
13953 if (info)
13954 *info = simd_immediate_info (DImode, val64);
13955 return true;
ab6501d7 13956 }
43e9d192 13957 }
b187677b
RS
13958 return false;
13959}
43e9d192 13960
43cacb12
RS
13961/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13962 instruction. If INFO is nonnull, use it to describe valid immediates. */
13963
13964static bool
13965aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13966 simd_immediate_info *info)
13967{
13968 scalar_int_mode mode = DImode;
13969 unsigned int val32 = val64 & 0xffffffff;
13970 if (val32 == (val64 >> 32))
13971 {
13972 mode = SImode;
13973 unsigned int val16 = val32 & 0xffff;
13974 if (val16 == (val32 >> 16))
13975 {
13976 mode = HImode;
13977 unsigned int val8 = val16 & 0xff;
13978 if (val8 == (val16 >> 8))
13979 mode = QImode;
13980 }
13981 }
13982 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13983 if (IN_RANGE (val, -0x80, 0x7f))
13984 {
13985 /* DUP with no shift. */
13986 if (info)
13987 *info = simd_immediate_info (mode, val);
13988 return true;
13989 }
13990 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13991 {
13992 /* DUP with LSL #8. */
13993 if (info)
13994 *info = simd_immediate_info (mode, val);
13995 return true;
13996 }
13997 if (aarch64_bitmask_imm (val64, mode))
13998 {
13999 /* DUPM. */
14000 if (info)
14001 *info = simd_immediate_info (mode, val);
14002 return true;
14003 }
14004 return false;
14005}
14006
b187677b
RS
14007/* Return true if OP is a valid SIMD immediate for the operation
14008 described by WHICH. If INFO is nonnull, use it to describe valid
14009 immediates. */
14010bool
14011aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
14012 enum simd_immediate_check which)
14013{
43cacb12
RS
14014 machine_mode mode = GET_MODE (op);
14015 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
14016 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
14017 return false;
14018
14019 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 14020 rtx base, step;
b187677b 14021 unsigned int n_elts;
f9093f23
RS
14022 if (GET_CODE (op) == CONST_VECTOR
14023 && CONST_VECTOR_DUPLICATE_P (op))
14024 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
14025 else if ((vec_flags & VEC_SVE_DATA)
14026 && const_vec_series_p (op, &base, &step))
14027 {
14028 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
14029 if (!aarch64_sve_index_immediate_p (base)
14030 || !aarch64_sve_index_immediate_p (step))
14031 return false;
14032
14033 if (info)
14034 *info = simd_immediate_info (elt_mode, base, step);
14035 return true;
14036 }
6a70badb
RS
14037 else if (GET_CODE (op) == CONST_VECTOR
14038 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
14039 /* N_ELTS set above. */;
b187677b 14040 else
d8edd899 14041 return false;
43e9d192 14042
43cacb12
RS
14043 /* Handle PFALSE and PTRUE. */
14044 if (vec_flags & VEC_SVE_PRED)
14045 return (op == CONST0_RTX (mode)
14046 || op == CONSTM1_RTX (mode));
14047
b187677b 14048 scalar_float_mode elt_float_mode;
f9093f23
RS
14049 if (n_elts == 1
14050 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 14051 {
f9093f23
RS
14052 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
14053 if (aarch64_float_const_zero_rtx_p (elt)
14054 || aarch64_float_const_representable_p (elt))
14055 {
14056 if (info)
14057 *info = simd_immediate_info (elt_float_mode, elt);
14058 return true;
14059 }
b187677b 14060 }
43e9d192 14061
b187677b
RS
14062 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
14063 if (elt_size > 8)
14064 return false;
e4f0f84d 14065
b187677b 14066 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 14067
b187677b
RS
14068 /* Expand the vector constant out into a byte vector, with the least
14069 significant byte of the register first. */
14070 auto_vec<unsigned char, 16> bytes;
14071 bytes.reserve (n_elts * elt_size);
14072 for (unsigned int i = 0; i < n_elts; i++)
14073 {
f9093f23
RS
14074 /* The vector is provided in gcc endian-neutral fashion.
14075 For aarch64_be Advanced SIMD, it must be laid out in the vector
14076 register in reverse order. */
14077 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
14078 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 14079
b187677b
RS
14080 if (elt_mode != elt_int_mode)
14081 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 14082
b187677b
RS
14083 if (!CONST_INT_P (elt))
14084 return false;
43e9d192 14085
b187677b
RS
14086 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
14087 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 14088 {
b187677b
RS
14089 bytes.quick_push (elt_val & 0xff);
14090 elt_val >>= BITS_PER_UNIT;
48063b9d 14091 }
43e9d192
IB
14092 }
14093
b187677b
RS
14094 /* The immediate must repeat every eight bytes. */
14095 unsigned int nbytes = bytes.length ();
14096 for (unsigned i = 8; i < nbytes; ++i)
14097 if (bytes[i] != bytes[i - 8])
14098 return false;
14099
14100 /* Get the repeating 8-byte value as an integer. No endian correction
14101 is needed here because bytes is already in lsb-first order. */
14102 unsigned HOST_WIDE_INT val64 = 0;
14103 for (unsigned int i = 0; i < 8; i++)
14104 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
14105 << (i * BITS_PER_UNIT));
14106
43cacb12
RS
14107 if (vec_flags & VEC_SVE_DATA)
14108 return aarch64_sve_valid_immediate (val64, info);
14109 else
14110 return aarch64_advsimd_valid_immediate (val64, info, which);
14111}
14112
14113/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
14114 has a step in the range of INDEX. Return the index expression if so,
14115 otherwise return null. */
14116rtx
14117aarch64_check_zero_based_sve_index_immediate (rtx x)
14118{
14119 rtx base, step;
14120 if (const_vec_series_p (x, &base, &step)
14121 && base == const0_rtx
14122 && aarch64_sve_index_immediate_p (step))
14123 return step;
14124 return NULL_RTX;
43e9d192
IB
14125}
14126
43e9d192
IB
14127/* Check of immediate shift constants are within range. */
14128bool
ef4bddc2 14129aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
14130{
14131 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
14132 if (left)
ddeabd3e 14133 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 14134 else
ddeabd3e 14135 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
14136}
14137
7325d85a
KT
14138/* Return the bitmask CONST_INT to select the bits required by a zero extract
14139 operation of width WIDTH at bit position POS. */
14140
14141rtx
14142aarch64_mask_from_zextract_ops (rtx width, rtx pos)
14143{
14144 gcc_assert (CONST_INT_P (width));
14145 gcc_assert (CONST_INT_P (pos));
14146
14147 unsigned HOST_WIDE_INT mask
14148 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
14149 return GEN_INT (mask << UINTVAL (pos));
14150}
14151
83f8c414 14152bool
a6e0bfa7 14153aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 14154{
83f8c414
CSS
14155 if (GET_CODE (x) == HIGH
14156 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
14157 return true;
14158
82614948 14159 if (CONST_INT_P (x))
83f8c414
CSS
14160 return true;
14161
43cacb12
RS
14162 if (VECTOR_MODE_P (GET_MODE (x)))
14163 return aarch64_simd_valid_immediate (x, NULL);
14164
83f8c414
CSS
14165 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
14166 return true;
14167
43cacb12
RS
14168 if (aarch64_sve_cnt_immediate_p (x))
14169 return true;
14170
a6e0bfa7 14171 return aarch64_classify_symbolic_expression (x)
a5350ddc 14172 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
14173}
14174
43e9d192
IB
14175/* Return a const_int vector of VAL. */
14176rtx
ab014eb3 14177aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 14178{
59d06c05
RS
14179 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
14180 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
14181}
14182
051d0e2f
SN
14183/* Check OP is a legal scalar immediate for the MOVI instruction. */
14184
14185bool
77e994c9 14186aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 14187{
ef4bddc2 14188 machine_mode vmode;
051d0e2f 14189
43cacb12 14190 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 14191 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 14192 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
14193}
14194
988fa693
JG
14195/* Construct and return a PARALLEL RTX vector with elements numbering the
14196 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
14197 the vector - from the perspective of the architecture. This does not
14198 line up with GCC's perspective on lane numbers, so we end up with
14199 different masks depending on our target endian-ness. The diagram
14200 below may help. We must draw the distinction when building masks
14201 which select one half of the vector. An instruction selecting
14202 architectural low-lanes for a big-endian target, must be described using
14203 a mask selecting GCC high-lanes.
14204
14205 Big-Endian Little-Endian
14206
14207GCC 0 1 2 3 3 2 1 0
14208 | x | x | x | x | | x | x | x | x |
14209Architecture 3 2 1 0 3 2 1 0
14210
14211Low Mask: { 2, 3 } { 0, 1 }
14212High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
14213
14214 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 14215
43e9d192 14216rtx
f5cbabc1 14217aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 14218{
43e9d192 14219 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
14220 int high_base = nunits / 2;
14221 int low_base = 0;
14222 int base;
43e9d192
IB
14223 rtx t1;
14224 int i;
14225
988fa693
JG
14226 if (BYTES_BIG_ENDIAN)
14227 base = high ? low_base : high_base;
14228 else
14229 base = high ? high_base : low_base;
14230
14231 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
14232 RTVEC_ELT (v, i) = GEN_INT (base + i);
14233
14234 t1 = gen_rtx_PARALLEL (mode, v);
14235 return t1;
14236}
14237
988fa693
JG
14238/* Check OP for validity as a PARALLEL RTX vector with elements
14239 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
14240 from the perspective of the architecture. See the diagram above
14241 aarch64_simd_vect_par_cnst_half for more details. */
14242
14243bool
ef4bddc2 14244aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
14245 bool high)
14246{
6a70badb
RS
14247 int nelts;
14248 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
14249 return false;
14250
6a70badb 14251 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
14252 HOST_WIDE_INT count_op = XVECLEN (op, 0);
14253 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
14254 int i = 0;
14255
988fa693
JG
14256 if (count_op != count_ideal)
14257 return false;
14258
14259 for (i = 0; i < count_ideal; i++)
14260 {
14261 rtx elt_op = XVECEXP (op, 0, i);
14262 rtx elt_ideal = XVECEXP (ideal, 0, i);
14263
4aa81c2e 14264 if (!CONST_INT_P (elt_op)
988fa693
JG
14265 || INTVAL (elt_ideal) != INTVAL (elt_op))
14266 return false;
14267 }
14268 return true;
14269}
14270
43e9d192
IB
14271/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
14272 HIGH (exclusive). */
14273void
46ed6024
CB
14274aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
14275 const_tree exp)
43e9d192
IB
14276{
14277 HOST_WIDE_INT lane;
4aa81c2e 14278 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
14279 lane = INTVAL (operand);
14280
14281 if (lane < low || lane >= high)
46ed6024
CB
14282 {
14283 if (exp)
cf0c27ef 14284 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 14285 else
cf0c27ef 14286 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 14287 }
43e9d192
IB
14288}
14289
7ac29c0f
RS
14290/* Peform endian correction on lane number N, which indexes a vector
14291 of mode MODE, and return the result as an SImode rtx. */
14292
14293rtx
14294aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
14295{
14296 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
14297}
14298
43e9d192 14299/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 14300
43e9d192
IB
14301bool
14302aarch64_simd_mem_operand_p (rtx op)
14303{
14304 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 14305 || REG_P (XEXP (op, 0)));
43e9d192
IB
14306}
14307
43cacb12
RS
14308/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
14309
14310bool
14311aarch64_sve_ld1r_operand_p (rtx op)
14312{
14313 struct aarch64_address_info addr;
14314 scalar_mode mode;
14315
14316 return (MEM_P (op)
14317 && is_a <scalar_mode> (GET_MODE (op), &mode)
14318 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
14319 && addr.type == ADDRESS_REG_IMM
14320 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
14321}
14322
14323/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
14324 The conditions for STR are the same. */
14325bool
14326aarch64_sve_ldr_operand_p (rtx op)
14327{
14328 struct aarch64_address_info addr;
14329
14330 return (MEM_P (op)
14331 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
14332 false, ADDR_QUERY_ANY)
14333 && addr.type == ADDRESS_REG_IMM);
14334}
14335
9f4cbab8
RS
14336/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
14337 We need to be able to access the individual pieces, so the range
14338 is different from LD[234] and ST[234]. */
14339bool
14340aarch64_sve_struct_memory_operand_p (rtx op)
14341{
14342 if (!MEM_P (op))
14343 return false;
14344
14345 machine_mode mode = GET_MODE (op);
14346 struct aarch64_address_info addr;
14347 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
14348 ADDR_QUERY_ANY)
14349 || addr.type != ADDRESS_REG_IMM)
14350 return false;
14351
14352 poly_int64 first = addr.const_offset;
14353 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
14354 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
14355 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
14356}
14357
2d8c6dc1
AH
14358/* Emit a register copy from operand to operand, taking care not to
14359 early-clobber source registers in the process.
43e9d192 14360
2d8c6dc1
AH
14361 COUNT is the number of components into which the copy needs to be
14362 decomposed. */
43e9d192 14363void
b8506a8a 14364aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 14365 unsigned int count)
43e9d192
IB
14366{
14367 unsigned int i;
2d8c6dc1
AH
14368 int rdest = REGNO (operands[0]);
14369 int rsrc = REGNO (operands[1]);
43e9d192
IB
14370
14371 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
14372 || rdest < rsrc)
14373 for (i = 0; i < count; i++)
14374 emit_move_insn (gen_rtx_REG (mode, rdest + i),
14375 gen_rtx_REG (mode, rsrc + i));
43e9d192 14376 else
2d8c6dc1
AH
14377 for (i = 0; i < count; i++)
14378 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
14379 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
14380}
14381
668046d1 14382/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 14383 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 14384int
b8506a8a 14385aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 14386{
6a70badb
RS
14387 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
14388 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
14389}
14390
db0253a4 14391/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
14392 alignment of a vector to 128 bits. SVE predicates have an alignment of
14393 16 bits. */
db0253a4
TB
14394static HOST_WIDE_INT
14395aarch64_simd_vector_alignment (const_tree type)
14396{
43cacb12
RS
14397 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
14398 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
14399 be set for non-predicate vectors of booleans. Modes are the most
14400 direct way we have of identifying real SVE predicate types. */
14401 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 14402 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
14403 return MIN (align, 128);
14404}
14405
43cacb12 14406/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 14407static poly_uint64
43cacb12
RS
14408aarch64_vectorize_preferred_vector_alignment (const_tree type)
14409{
14410 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
14411 {
14412 /* If the length of the vector is fixed, try to align to that length,
14413 otherwise don't try to align at all. */
14414 HOST_WIDE_INT result;
14415 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
14416 result = TYPE_ALIGN (TREE_TYPE (type));
14417 return result;
14418 }
14419 return TYPE_ALIGN (type);
14420}
14421
db0253a4
TB
14422/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
14423static bool
14424aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
14425{
14426 if (is_packed)
14427 return false;
14428
43cacb12
RS
14429 /* For fixed-length vectors, check that the vectorizer will aim for
14430 full-vector alignment. This isn't true for generic GCC vectors
14431 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
14432 poly_uint64 preferred_alignment =
14433 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 14434 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
14435 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
14436 preferred_alignment))
db0253a4
TB
14437 return false;
14438
14439 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
14440 return true;
14441}
14442
7df76747
N
14443/* Return true if the vector misalignment factor is supported by the
14444 target. */
14445static bool
14446aarch64_builtin_support_vector_misalignment (machine_mode mode,
14447 const_tree type, int misalignment,
14448 bool is_packed)
14449{
14450 if (TARGET_SIMD && STRICT_ALIGNMENT)
14451 {
14452 /* Return if movmisalign pattern is not supported for this mode. */
14453 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
14454 return false;
14455
a509c571 14456 /* Misalignment factor is unknown at compile time. */
7df76747 14457 if (misalignment == -1)
a509c571 14458 return false;
7df76747
N
14459 }
14460 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14461 is_packed);
14462}
14463
4369c11e
TB
14464/* If VALS is a vector constant that can be loaded into a register
14465 using DUP, generate instructions to do so and return an RTX to
14466 assign to the register. Otherwise return NULL_RTX. */
14467static rtx
14468aarch64_simd_dup_constant (rtx vals)
14469{
ef4bddc2
RS
14470 machine_mode mode = GET_MODE (vals);
14471 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 14472 rtx x;
4369c11e 14473
92695fbb 14474 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
14475 return NULL_RTX;
14476
14477 /* We can load this constant by using DUP and a constant in a
14478 single ARM register. This will be cheaper than a vector
14479 load. */
92695fbb 14480 x = copy_to_mode_reg (inner_mode, x);
59d06c05 14481 return gen_vec_duplicate (mode, x);
4369c11e
TB
14482}
14483
14484
14485/* Generate code to load VALS, which is a PARALLEL containing only
14486 constants (for vec_init) or CONST_VECTOR, efficiently into a
14487 register. Returns an RTX to copy into the register, or NULL_RTX
14488 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 14489static rtx
4369c11e
TB
14490aarch64_simd_make_constant (rtx vals)
14491{
ef4bddc2 14492 machine_mode mode = GET_MODE (vals);
4369c11e
TB
14493 rtx const_dup;
14494 rtx const_vec = NULL_RTX;
4369c11e
TB
14495 int n_const = 0;
14496 int i;
14497
14498 if (GET_CODE (vals) == CONST_VECTOR)
14499 const_vec = vals;
14500 else if (GET_CODE (vals) == PARALLEL)
14501 {
14502 /* A CONST_VECTOR must contain only CONST_INTs and
14503 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
14504 Only store valid constants in a CONST_VECTOR. */
6a70badb 14505 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
14506 for (i = 0; i < n_elts; ++i)
14507 {
14508 rtx x = XVECEXP (vals, 0, i);
14509 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14510 n_const++;
14511 }
14512 if (n_const == n_elts)
14513 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
14514 }
14515 else
14516 gcc_unreachable ();
14517
14518 if (const_vec != NULL_RTX
b187677b 14519 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
14520 /* Load using MOVI/MVNI. */
14521 return const_vec;
14522 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
14523 /* Loaded using DUP. */
14524 return const_dup;
14525 else if (const_vec != NULL_RTX)
14526 /* Load from constant pool. We can not take advantage of single-cycle
14527 LD1 because we need a PC-relative addressing mode. */
14528 return const_vec;
14529 else
14530 /* A PARALLEL containing something not valid inside CONST_VECTOR.
14531 We can not construct an initializer. */
14532 return NULL_RTX;
14533}
14534
35a093b6
JG
14535/* Expand a vector initialisation sequence, such that TARGET is
14536 initialised to contain VALS. */
14537
4369c11e
TB
14538void
14539aarch64_expand_vector_init (rtx target, rtx vals)
14540{
ef4bddc2 14541 machine_mode mode = GET_MODE (target);
146c2e3a 14542 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 14543 /* The number of vector elements. */
6a70badb 14544 int n_elts = XVECLEN (vals, 0);
35a093b6 14545 /* The number of vector elements which are not constant. */
8b66a2d4
AL
14546 int n_var = 0;
14547 rtx any_const = NULL_RTX;
35a093b6
JG
14548 /* The first element of vals. */
14549 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 14550 bool all_same = true;
4369c11e 14551
35a093b6 14552 /* Count the number of variable elements to initialise. */
8b66a2d4 14553 for (int i = 0; i < n_elts; ++i)
4369c11e 14554 {
8b66a2d4 14555 rtx x = XVECEXP (vals, 0, i);
35a093b6 14556 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
14557 ++n_var;
14558 else
14559 any_const = x;
4369c11e 14560
35a093b6 14561 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
14562 }
14563
35a093b6
JG
14564 /* No variable elements, hand off to aarch64_simd_make_constant which knows
14565 how best to handle this. */
4369c11e
TB
14566 if (n_var == 0)
14567 {
14568 rtx constant = aarch64_simd_make_constant (vals);
14569 if (constant != NULL_RTX)
14570 {
14571 emit_move_insn (target, constant);
14572 return;
14573 }
14574 }
14575
14576 /* Splat a single non-constant element if we can. */
14577 if (all_same)
14578 {
35a093b6 14579 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 14580 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
14581 return;
14582 }
14583
85c1b6d7
AP
14584 enum insn_code icode = optab_handler (vec_set_optab, mode);
14585 gcc_assert (icode != CODE_FOR_nothing);
14586
14587 /* If there are only variable elements, try to optimize
14588 the insertion using dup for the most common element
14589 followed by insertions. */
14590
14591 /* The algorithm will fill matches[*][0] with the earliest matching element,
14592 and matches[X][1] with the count of duplicate elements (if X is the
14593 earliest element which has duplicates). */
14594
14595 if (n_var == n_elts && n_elts <= 16)
14596 {
14597 int matches[16][2] = {0};
14598 for (int i = 0; i < n_elts; i++)
14599 {
14600 for (int j = 0; j <= i; j++)
14601 {
14602 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
14603 {
14604 matches[i][0] = j;
14605 matches[j][1]++;
14606 break;
14607 }
14608 }
14609 }
14610 int maxelement = 0;
14611 int maxv = 0;
14612 for (int i = 0; i < n_elts; i++)
14613 if (matches[i][1] > maxv)
14614 {
14615 maxelement = i;
14616 maxv = matches[i][1];
14617 }
14618
b4e2cd5b
JG
14619 /* Create a duplicate of the most common element, unless all elements
14620 are equally useless to us, in which case just immediately set the
14621 vector register using the first element. */
14622
14623 if (maxv == 1)
14624 {
14625 /* For vectors of two 64-bit elements, we can do even better. */
14626 if (n_elts == 2
14627 && (inner_mode == E_DImode
14628 || inner_mode == E_DFmode))
14629
14630 {
14631 rtx x0 = XVECEXP (vals, 0, 0);
14632 rtx x1 = XVECEXP (vals, 0, 1);
14633 /* Combine can pick up this case, but handling it directly
14634 here leaves clearer RTL.
14635
14636 This is load_pair_lanes<mode>, and also gives us a clean-up
14637 for store_pair_lanes<mode>. */
14638 if (memory_operand (x0, inner_mode)
14639 && memory_operand (x1, inner_mode)
14640 && !STRICT_ALIGNMENT
14641 && rtx_equal_p (XEXP (x1, 0),
14642 plus_constant (Pmode,
14643 XEXP (x0, 0),
14644 GET_MODE_SIZE (inner_mode))))
14645 {
14646 rtx t;
14647 if (inner_mode == DFmode)
14648 t = gen_load_pair_lanesdf (target, x0, x1);
14649 else
14650 t = gen_load_pair_lanesdi (target, x0, x1);
14651 emit_insn (t);
14652 return;
14653 }
14654 }
14655 /* The subreg-move sequence below will move into lane zero of the
14656 vector register. For big-endian we want that position to hold
14657 the last element of VALS. */
14658 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
14659 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14660 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
14661 }
14662 else
14663 {
14664 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14665 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
14666 }
85c1b6d7
AP
14667
14668 /* Insert the rest. */
14669 for (int i = 0; i < n_elts; i++)
14670 {
14671 rtx x = XVECEXP (vals, 0, i);
14672 if (matches[i][0] == maxelement)
14673 continue;
14674 x = copy_to_mode_reg (inner_mode, x);
14675 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14676 }
14677 return;
14678 }
14679
35a093b6
JG
14680 /* Initialise a vector which is part-variable. We want to first try
14681 to build those lanes which are constant in the most efficient way we
14682 can. */
14683 if (n_var != n_elts)
4369c11e
TB
14684 {
14685 rtx copy = copy_rtx (vals);
4369c11e 14686
8b66a2d4
AL
14687 /* Load constant part of vector. We really don't care what goes into the
14688 parts we will overwrite, but we're more likely to be able to load the
14689 constant efficiently if it has fewer, larger, repeating parts
14690 (see aarch64_simd_valid_immediate). */
14691 for (int i = 0; i < n_elts; i++)
14692 {
14693 rtx x = XVECEXP (vals, 0, i);
14694 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14695 continue;
14696 rtx subst = any_const;
14697 for (int bit = n_elts / 2; bit > 0; bit /= 2)
14698 {
14699 /* Look in the copied vector, as more elements are const. */
14700 rtx test = XVECEXP (copy, 0, i ^ bit);
14701 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
14702 {
14703 subst = test;
14704 break;
14705 }
14706 }
14707 XVECEXP (copy, 0, i) = subst;
14708 }
4369c11e 14709 aarch64_expand_vector_init (target, copy);
35a093b6 14710 }
4369c11e 14711
35a093b6 14712 /* Insert the variable lanes directly. */
8b66a2d4 14713 for (int i = 0; i < n_elts; i++)
35a093b6
JG
14714 {
14715 rtx x = XVECEXP (vals, 0, i);
14716 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14717 continue;
14718 x = copy_to_mode_reg (inner_mode, x);
14719 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14720 }
4369c11e
TB
14721}
14722
43e9d192 14723static unsigned HOST_WIDE_INT
ef4bddc2 14724aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 14725{
43cacb12
RS
14726 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
14727 return 0;
14728 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
14729}
14730
43e9d192
IB
14731/* Select a format to encode pointers in exception handling data. */
14732int
14733aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
14734{
14735 int type;
14736 switch (aarch64_cmodel)
14737 {
14738 case AARCH64_CMODEL_TINY:
14739 case AARCH64_CMODEL_TINY_PIC:
14740 case AARCH64_CMODEL_SMALL:
14741 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 14742 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
14743 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
14744 for everything. */
14745 type = DW_EH_PE_sdata4;
14746 break;
14747 default:
14748 /* No assumptions here. 8-byte relocs required. */
14749 type = DW_EH_PE_sdata8;
14750 break;
14751 }
14752 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
14753}
14754
e1c1ecb0
KT
14755/* The last .arch and .tune assembly strings that we printed. */
14756static std::string aarch64_last_printed_arch_string;
14757static std::string aarch64_last_printed_tune_string;
14758
361fb3ee
KT
14759/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
14760 by the function fndecl. */
14761
14762void
14763aarch64_declare_function_name (FILE *stream, const char* name,
14764 tree fndecl)
14765{
14766 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14767
14768 struct cl_target_option *targ_options;
14769 if (target_parts)
14770 targ_options = TREE_TARGET_OPTION (target_parts);
14771 else
14772 targ_options = TREE_TARGET_OPTION (target_option_current_node);
14773 gcc_assert (targ_options);
14774
14775 const struct processor *this_arch
14776 = aarch64_get_arch (targ_options->x_explicit_arch);
14777
054b4005
JG
14778 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
14779 std::string extension
04a99ebe
JG
14780 = aarch64_get_extension_string_for_isa_flags (isa_flags,
14781 this_arch->flags);
e1c1ecb0
KT
14782 /* Only update the assembler .arch string if it is distinct from the last
14783 such string we printed. */
14784 std::string to_print = this_arch->name + extension;
14785 if (to_print != aarch64_last_printed_arch_string)
14786 {
14787 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14788 aarch64_last_printed_arch_string = to_print;
14789 }
361fb3ee
KT
14790
14791 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14792 useful to readers of the generated asm. Do it only when it changes
14793 from function to function and verbose assembly is requested. */
361fb3ee
KT
14794 const struct processor *this_tune
14795 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14796
e1c1ecb0
KT
14797 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14798 {
14799 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14800 this_tune->name);
14801 aarch64_last_printed_tune_string = this_tune->name;
14802 }
361fb3ee
KT
14803
14804 /* Don't forget the type directive for ELF. */
14805 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14806 ASM_OUTPUT_LABEL (stream, name);
14807}
14808
e1c1ecb0
KT
14809/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14810
14811static void
14812aarch64_start_file (void)
14813{
14814 struct cl_target_option *default_options
14815 = TREE_TARGET_OPTION (target_option_default_node);
14816
14817 const struct processor *default_arch
14818 = aarch64_get_arch (default_options->x_explicit_arch);
14819 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14820 std::string extension
04a99ebe
JG
14821 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14822 default_arch->flags);
e1c1ecb0
KT
14823
14824 aarch64_last_printed_arch_string = default_arch->name + extension;
14825 aarch64_last_printed_tune_string = "";
14826 asm_fprintf (asm_out_file, "\t.arch %s\n",
14827 aarch64_last_printed_arch_string.c_str ());
14828
14829 default_file_start ();
14830}
14831
0462169c
SN
14832/* Emit load exclusive. */
14833
14834static void
ef4bddc2 14835aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14836 rtx mem, rtx model_rtx)
14837{
0016d8d9 14838 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
14839}
14840
14841/* Emit store exclusive. */
14842
14843static void
ef4bddc2 14844aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14845 rtx rval, rtx mem, rtx model_rtx)
14846{
0016d8d9 14847 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
14848}
14849
14850/* Mark the previous jump instruction as unlikely. */
14851
14852static void
14853aarch64_emit_unlikely_jump (rtx insn)
14854{
f370536c 14855 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14856 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14857}
14858
14859/* Expand a compare and swap pattern. */
14860
14861void
14862aarch64_expand_compare_and_swap (rtx operands[])
14863{
d400fda3
RH
14864 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
14865 machine_mode mode, r_mode;
0462169c
SN
14866
14867 bval = operands[0];
14868 rval = operands[1];
14869 mem = operands[2];
14870 oldval = operands[3];
14871 newval = operands[4];
14872 is_weak = operands[5];
14873 mod_s = operands[6];
14874 mod_f = operands[7];
14875 mode = GET_MODE (mem);
0462169c
SN
14876
14877 /* Normally the succ memory model must be stronger than fail, but in the
14878 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14879 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
14880 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14881 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14882 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14883
d400fda3
RH
14884 r_mode = mode;
14885 if (mode == QImode || mode == HImode)
0462169c 14886 {
d400fda3
RH
14887 r_mode = SImode;
14888 rval = gen_reg_rtx (r_mode);
0462169c
SN
14889 }
14890
b0770c0f 14891 if (TARGET_LSE)
77f33f44
RH
14892 {
14893 /* The CAS insn requires oldval and rval overlap, but we need to
14894 have a copy of oldval saved across the operation to tell if
14895 the operation is successful. */
d400fda3
RH
14896 if (reg_overlap_mentioned_p (rval, oldval))
14897 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 14898 else
d400fda3
RH
14899 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
14900
77f33f44
RH
14901 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
14902 newval, mod_s));
d400fda3 14903 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 14904 }
b0770c0f 14905 else
d400fda3
RH
14906 {
14907 /* The oldval predicate varies by mode. Test it and force to reg. */
14908 insn_code code = code_for_aarch64_compare_and_swap (mode);
14909 if (!insn_data[code].operand[2].predicate (oldval, mode))
14910 oldval = force_reg (mode, oldval);
0462169c 14911
d400fda3
RH
14912 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
14913 is_weak, mod_s, mod_f));
14914 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
14915 }
14916
14917 if (r_mode != mode)
77f33f44
RH
14918 rval = gen_lowpart (mode, rval);
14919 emit_move_insn (operands[1], rval);
0462169c 14920
d400fda3 14921 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 14922 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14923}
14924
f70fb3b6
MW
14925/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14926 sequence implementing an atomic operation. */
14927
14928static void
14929aarch64_emit_post_barrier (enum memmodel model)
14930{
14931 const enum memmodel base_model = memmodel_base (model);
14932
14933 if (is_mm_sync (model)
14934 && (base_model == MEMMODEL_ACQUIRE
14935 || base_model == MEMMODEL_ACQ_REL
14936 || base_model == MEMMODEL_SEQ_CST))
14937 {
14938 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14939 }
14940}
14941
0462169c
SN
14942/* Split a compare and swap pattern. */
14943
14944void
14945aarch64_split_compare_and_swap (rtx operands[])
14946{
14947 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14948 machine_mode mode;
0462169c 14949 bool is_weak;
5d8a22a5
DM
14950 rtx_code_label *label1, *label2;
14951 rtx x, cond;
ab876106
MW
14952 enum memmodel model;
14953 rtx model_rtx;
0462169c
SN
14954
14955 rval = operands[0];
14956 mem = operands[1];
14957 oldval = operands[2];
14958 newval = operands[3];
14959 is_weak = (operands[4] != const0_rtx);
ab876106 14960 model_rtx = operands[5];
0462169c
SN
14961 scratch = operands[7];
14962 mode = GET_MODE (mem);
ab876106 14963 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14964
17f47f86
KT
14965 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14966 loop:
14967 .label1:
14968 LD[A]XR rval, [mem]
14969 CBNZ rval, .label2
14970 ST[L]XR scratch, newval, [mem]
14971 CBNZ scratch, .label1
14972 .label2:
14973 CMP rval, 0. */
14974 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14975
5d8a22a5 14976 label1 = NULL;
0462169c
SN
14977 if (!is_weak)
14978 {
14979 label1 = gen_label_rtx ();
14980 emit_label (label1);
14981 }
14982 label2 = gen_label_rtx ();
14983
ab876106
MW
14984 /* The initial load can be relaxed for a __sync operation since a final
14985 barrier will be emitted to stop code hoisting. */
14986 if (is_mm_sync (model))
14987 aarch64_emit_load_exclusive (mode, rval, mem,
14988 GEN_INT (MEMMODEL_RELAXED));
14989 else
14990 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14991
17f47f86
KT
14992 if (strong_zero_p)
14993 {
6e1eaca9
RE
14994 if (aarch64_track_speculation)
14995 {
14996 /* Emit an explicit compare instruction, so that we can correctly
14997 track the condition codes. */
14998 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
14999 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15000 }
15001 else
15002 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
15003
17f47f86
KT
15004 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15005 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
15006 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
15007 }
15008 else
15009 {
d400fda3 15010 cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
17f47f86
KT
15011 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15012 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
d400fda3 15013 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
17f47f86
KT
15014 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
15015 }
0462169c 15016
ab876106 15017 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
15018
15019 if (!is_weak)
15020 {
6e1eaca9
RE
15021 if (aarch64_track_speculation)
15022 {
15023 /* Emit an explicit compare instruction, so that we can correctly
15024 track the condition codes. */
15025 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
15026 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15027 }
15028 else
15029 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
15030
0462169c
SN
15031 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15032 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 15033 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
15034 }
15035 else
15036 {
15037 cond = gen_rtx_REG (CCmode, CC_REGNUM);
15038 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 15039 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
15040 }
15041
15042 emit_label (label2);
17f47f86
KT
15043 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
15044 to set the condition flags. If this is not used it will be removed by
15045 later passes. */
15046 if (strong_zero_p)
15047 {
15048 cond = gen_rtx_REG (CCmode, CC_REGNUM);
15049 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
15050 emit_insn (gen_rtx_SET (cond, x));
15051 }
ab876106
MW
15052 /* Emit any final barrier needed for a __sync operation. */
15053 if (is_mm_sync (model))
15054 aarch64_emit_post_barrier (model);
0462169c 15055}
9cd7b720 15056
0462169c
SN
15057/* Split an atomic operation. */
15058
15059void
15060aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 15061 rtx value, rtx model_rtx, rtx cond)
0462169c 15062{
ef4bddc2
RS
15063 machine_mode mode = GET_MODE (mem);
15064 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
15065 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
15066 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
15067 rtx_code_label *label;
15068 rtx x;
0462169c 15069
9cd7b720 15070 /* Split the atomic operation into a sequence. */
0462169c
SN
15071 label = gen_label_rtx ();
15072 emit_label (label);
15073
15074 if (new_out)
15075 new_out = gen_lowpart (wmode, new_out);
15076 if (old_out)
15077 old_out = gen_lowpart (wmode, old_out);
15078 else
15079 old_out = new_out;
15080 value = simplify_gen_subreg (wmode, value, mode, 0);
15081
f70fb3b6
MW
15082 /* The initial load can be relaxed for a __sync operation since a final
15083 barrier will be emitted to stop code hoisting. */
15084 if (is_sync)
15085 aarch64_emit_load_exclusive (mode, old_out, mem,
15086 GEN_INT (MEMMODEL_RELAXED));
15087 else
15088 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
15089
15090 switch (code)
15091 {
15092 case SET:
15093 new_out = value;
15094 break;
15095
15096 case NOT:
15097 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 15098 emit_insn (gen_rtx_SET (new_out, x));
0462169c 15099 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 15100 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
15101 break;
15102
15103 case MINUS:
15104 if (CONST_INT_P (value))
15105 {
15106 value = GEN_INT (-INTVAL (value));
15107 code = PLUS;
15108 }
15109 /* Fall through. */
15110
15111 default:
15112 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 15113 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
15114 break;
15115 }
15116
15117 aarch64_emit_store_exclusive (mode, cond, mem,
15118 gen_lowpart (mode, new_out), model_rtx);
15119
6e1eaca9
RE
15120 if (aarch64_track_speculation)
15121 {
15122 /* Emit an explicit compare instruction, so that we can correctly
15123 track the condition codes. */
15124 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
15125 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
15126 }
15127 else
15128 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15129
0462169c
SN
15130 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
15131 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 15132 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
15133
15134 /* Emit any final barrier needed for a __sync operation. */
15135 if (is_sync)
15136 aarch64_emit_post_barrier (model);
0462169c
SN
15137}
15138
c2ec330c
AL
15139static void
15140aarch64_init_libfuncs (void)
15141{
15142 /* Half-precision float operations. The compiler handles all operations
15143 with NULL libfuncs by converting to SFmode. */
15144
15145 /* Conversions. */
15146 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
15147 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
15148
15149 /* Arithmetic. */
15150 set_optab_libfunc (add_optab, HFmode, NULL);
15151 set_optab_libfunc (sdiv_optab, HFmode, NULL);
15152 set_optab_libfunc (smul_optab, HFmode, NULL);
15153 set_optab_libfunc (neg_optab, HFmode, NULL);
15154 set_optab_libfunc (sub_optab, HFmode, NULL);
15155
15156 /* Comparisons. */
15157 set_optab_libfunc (eq_optab, HFmode, NULL);
15158 set_optab_libfunc (ne_optab, HFmode, NULL);
15159 set_optab_libfunc (lt_optab, HFmode, NULL);
15160 set_optab_libfunc (le_optab, HFmode, NULL);
15161 set_optab_libfunc (ge_optab, HFmode, NULL);
15162 set_optab_libfunc (gt_optab, HFmode, NULL);
15163 set_optab_libfunc (unord_optab, HFmode, NULL);
15164}
15165
43e9d192 15166/* Target hook for c_mode_for_suffix. */
ef4bddc2 15167static machine_mode
43e9d192
IB
15168aarch64_c_mode_for_suffix (char suffix)
15169{
15170 if (suffix == 'q')
15171 return TFmode;
15172
15173 return VOIDmode;
15174}
15175
3520f7cc
JG
15176/* We can only represent floating point constants which will fit in
15177 "quarter-precision" values. These values are characterised by
15178 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
15179 by:
15180
15181 (-1)^s * (n/16) * 2^r
15182
15183 Where:
15184 's' is the sign bit.
15185 'n' is an integer in the range 16 <= n <= 31.
15186 'r' is an integer in the range -3 <= r <= 4. */
15187
15188/* Return true iff X can be represented by a quarter-precision
15189 floating point immediate operand X. Note, we cannot represent 0.0. */
15190bool
15191aarch64_float_const_representable_p (rtx x)
15192{
15193 /* This represents our current view of how many bits
15194 make up the mantissa. */
15195 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 15196 int exponent;
3520f7cc 15197 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 15198 REAL_VALUE_TYPE r, m;
807e902e 15199 bool fail;
3520f7cc
JG
15200
15201 if (!CONST_DOUBLE_P (x))
15202 return false;
15203
a4518821
RS
15204 if (GET_MODE (x) == VOIDmode
15205 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
15206 return false;
15207
34a72c33 15208 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
15209
15210 /* We cannot represent infinities, NaNs or +/-zero. We won't
15211 know if we have +zero until we analyse the mantissa, but we
15212 can reject the other invalid values. */
15213 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
15214 || REAL_VALUE_MINUS_ZERO (r))
15215 return false;
15216
ba96cdfb 15217 /* Extract exponent. */
3520f7cc
JG
15218 r = real_value_abs (&r);
15219 exponent = REAL_EXP (&r);
15220
15221 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
15222 highest (sign) bit, with a fixed binary point at bit point_pos.
15223 m1 holds the low part of the mantissa, m2 the high part.
15224 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
15225 bits for the mantissa, this can fail (low bits will be lost). */
15226 real_ldexp (&m, &r, point_pos - exponent);
807e902e 15227 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
15228
15229 /* If the low part of the mantissa has bits set we cannot represent
15230 the value. */
d9074b29 15231 if (w.ulow () != 0)
3520f7cc
JG
15232 return false;
15233 /* We have rejected the lower HOST_WIDE_INT, so update our
15234 understanding of how many bits lie in the mantissa and
15235 look only at the high HOST_WIDE_INT. */
807e902e 15236 mantissa = w.elt (1);
3520f7cc
JG
15237 point_pos -= HOST_BITS_PER_WIDE_INT;
15238
15239 /* We can only represent values with a mantissa of the form 1.xxxx. */
15240 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
15241 if ((mantissa & mask) != 0)
15242 return false;
15243
15244 /* Having filtered unrepresentable values, we may now remove all
15245 but the highest 5 bits. */
15246 mantissa >>= point_pos - 5;
15247
15248 /* We cannot represent the value 0.0, so reject it. This is handled
15249 elsewhere. */
15250 if (mantissa == 0)
15251 return false;
15252
15253 /* Then, as bit 4 is always set, we can mask it off, leaving
15254 the mantissa in the range [0, 15]. */
15255 mantissa &= ~(1 << 4);
15256 gcc_assert (mantissa <= 15);
15257
15258 /* GCC internally does not use IEEE754-like encoding (where normalized
15259 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
15260 Our mantissa values are shifted 4 places to the left relative to
15261 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
15262 by 5 places to correct for GCC's representation. */
15263 exponent = 5 - exponent;
15264
15265 return (exponent >= 0 && exponent <= 7);
15266}
15267
ab6501d7
SD
15268/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
15269 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
15270 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 15271char*
b187677b 15272aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 15273 enum simd_immediate_check which)
3520f7cc 15274{
3ea63f60 15275 bool is_valid;
3520f7cc 15276 static char templ[40];
3520f7cc 15277 const char *mnemonic;
e4f0f84d 15278 const char *shift_op;
3520f7cc 15279 unsigned int lane_count = 0;
81c2dfb9 15280 char element_char;
3520f7cc 15281
b187677b 15282 struct simd_immediate_info info;
48063b9d
IB
15283
15284 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
15285 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
15286 It will also update INFO to show how the immediate should be generated.
15287 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 15288 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
15289 gcc_assert (is_valid);
15290
b187677b
RS
15291 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15292 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 15293
b187677b 15294 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 15295 {
b187677b 15296 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
15297 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
15298 move immediate path. */
48063b9d
IB
15299 if (aarch64_float_const_zero_rtx_p (info.value))
15300 info.value = GEN_INT (0);
15301 else
15302 {
83faf7d0 15303 const unsigned int buf_size = 20;
48063b9d 15304 char float_buf[buf_size] = {'\0'};
34a72c33
RS
15305 real_to_decimal_for_mode (float_buf,
15306 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 15307 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
15308
15309 if (lane_count == 1)
15310 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
15311 else
15312 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 15313 lane_count, element_char, float_buf);
48063b9d
IB
15314 return templ;
15315 }
3520f7cc 15316 }
3520f7cc 15317
0d8e1702 15318 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
15319
15320 if (which == AARCH64_CHECK_MOV)
15321 {
b187677b
RS
15322 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
15323 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
15324 if (lane_count == 1)
15325 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
15326 mnemonic, UINTVAL (info.value));
15327 else if (info.shift)
15328 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15329 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
15330 element_char, UINTVAL (info.value), shift_op, info.shift);
15331 else
15332 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15333 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
15334 element_char, UINTVAL (info.value));
15335 }
3520f7cc 15336 else
ab6501d7
SD
15337 {
15338 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 15339 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
15340 if (info.shift)
15341 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15342 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
15343 element_char, UINTVAL (info.value), "lsl", info.shift);
15344 else
15345 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15346 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
15347 element_char, UINTVAL (info.value));
15348 }
3520f7cc
JG
15349 return templ;
15350}
15351
b7342d25 15352char*
77e994c9 15353aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 15354{
a2170965
TC
15355
15356 /* If a floating point number was passed and we desire to use it in an
15357 integer mode do the conversion to integer. */
15358 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
15359 {
15360 unsigned HOST_WIDE_INT ival;
15361 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15362 gcc_unreachable ();
15363 immediate = gen_int_mode (ival, mode);
15364 }
15365
ef4bddc2 15366 machine_mode vmode;
a2170965
TC
15367 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15368 a 128 bit vector mode. */
15369 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15370
a2170965 15371 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15372 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15373 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15374}
15375
43cacb12
RS
15376/* Return the output string to use for moving immediate CONST_VECTOR
15377 into an SVE register. */
15378
15379char *
15380aarch64_output_sve_mov_immediate (rtx const_vector)
15381{
15382 static char templ[40];
15383 struct simd_immediate_info info;
15384 char element_char;
15385
15386 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15387 gcc_assert (is_valid);
15388
15389 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15390
15391 if (info.step)
15392 {
15393 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15394 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15395 element_char, INTVAL (info.value), INTVAL (info.step));
15396 return templ;
15397 }
15398
15399 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15400 {
15401 if (aarch64_float_const_zero_rtx_p (info.value))
15402 info.value = GEN_INT (0);
15403 else
15404 {
15405 const int buf_size = 20;
15406 char float_buf[buf_size] = {};
15407 real_to_decimal_for_mode (float_buf,
15408 CONST_DOUBLE_REAL_VALUE (info.value),
15409 buf_size, buf_size, 1, info.elt_mode);
15410
15411 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15412 element_char, float_buf);
15413 return templ;
15414 }
15415 }
15416
15417 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15418 element_char, INTVAL (info.value));
15419 return templ;
15420}
15421
15422/* Return the asm format for a PTRUE instruction whose destination has
15423 mode MODE. SUFFIX is the element size suffix. */
15424
15425char *
15426aarch64_output_ptrue (machine_mode mode, char suffix)
15427{
15428 unsigned int nunits;
15429 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15430 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15431 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15432 else
15433 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15434 return buf;
15435}
15436
88b08073
JG
15437/* Split operands into moves from op[1] + op[2] into op[0]. */
15438
15439void
15440aarch64_split_combinev16qi (rtx operands[3])
15441{
15442 unsigned int dest = REGNO (operands[0]);
15443 unsigned int src1 = REGNO (operands[1]);
15444 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15445 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15446 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15447 rtx destlo, desthi;
15448
15449 gcc_assert (halfmode == V16QImode);
15450
15451 if (src1 == dest && src2 == dest + halfregs)
15452 {
15453 /* No-op move. Can't split to nothing; emit something. */
15454 emit_note (NOTE_INSN_DELETED);
15455 return;
15456 }
15457
15458 /* Preserve register attributes for variable tracking. */
15459 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15460 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15461 GET_MODE_SIZE (halfmode));
15462
15463 /* Special case of reversed high/low parts. */
15464 if (reg_overlap_mentioned_p (operands[2], destlo)
15465 && reg_overlap_mentioned_p (operands[1], desthi))
15466 {
15467 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15468 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15469 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15470 }
15471 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15472 {
15473 /* Try to avoid unnecessary moves if part of the result
15474 is in the right place already. */
15475 if (src1 != dest)
15476 emit_move_insn (destlo, operands[1]);
15477 if (src2 != dest + halfregs)
15478 emit_move_insn (desthi, operands[2]);
15479 }
15480 else
15481 {
15482 if (src2 != dest + halfregs)
15483 emit_move_insn (desthi, operands[2]);
15484 if (src1 != dest)
15485 emit_move_insn (destlo, operands[1]);
15486 }
15487}
15488
15489/* vec_perm support. */
15490
88b08073
JG
15491struct expand_vec_perm_d
15492{
15493 rtx target, op0, op1;
e3342de4 15494 vec_perm_indices perm;
ef4bddc2 15495 machine_mode vmode;
43cacb12 15496 unsigned int vec_flags;
88b08073
JG
15497 bool one_vector_p;
15498 bool testing_p;
15499};
15500
15501/* Generate a variable permutation. */
15502
15503static void
15504aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
15505{
ef4bddc2 15506 machine_mode vmode = GET_MODE (target);
88b08073
JG
15507 bool one_vector_p = rtx_equal_p (op0, op1);
15508
15509 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
15510 gcc_checking_assert (GET_MODE (op0) == vmode);
15511 gcc_checking_assert (GET_MODE (op1) == vmode);
15512 gcc_checking_assert (GET_MODE (sel) == vmode);
15513 gcc_checking_assert (TARGET_SIMD);
15514
15515 if (one_vector_p)
15516 {
15517 if (vmode == V8QImode)
15518 {
15519 /* Expand the argument to a V16QI mode by duplicating it. */
15520 rtx pair = gen_reg_rtx (V16QImode);
15521 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
15522 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15523 }
15524 else
15525 {
15526 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
15527 }
15528 }
15529 else
15530 {
15531 rtx pair;
15532
15533 if (vmode == V8QImode)
15534 {
15535 pair = gen_reg_rtx (V16QImode);
15536 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
15537 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15538 }
15539 else
15540 {
15541 pair = gen_reg_rtx (OImode);
15542 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
15543 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
15544 }
15545 }
15546}
15547
80940017
RS
15548/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
15549 NELT is the number of elements in the vector. */
15550
88b08073 15551void
80940017
RS
15552aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
15553 unsigned int nelt)
88b08073 15554{
ef4bddc2 15555 machine_mode vmode = GET_MODE (target);
88b08073 15556 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 15557 rtx mask;
88b08073
JG
15558
15559 /* The TBL instruction does not use a modulo index, so we must take care
15560 of that ourselves. */
f7c4e5b8
AL
15561 mask = aarch64_simd_gen_const_vector_dup (vmode,
15562 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
15563 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
15564
f7c4e5b8
AL
15565 /* For big-endian, we also need to reverse the index within the vector
15566 (but not which vector). */
15567 if (BYTES_BIG_ENDIAN)
15568 {
15569 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
15570 if (!one_vector_p)
15571 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15572 sel = expand_simple_binop (vmode, XOR, sel, mask,
15573 NULL, 0, OPTAB_LIB_WIDEN);
15574 }
88b08073
JG
15575 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15576}
15577
43cacb12
RS
15578/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15579
15580static void
15581emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15582{
15583 emit_insn (gen_rtx_SET (target,
15584 gen_rtx_UNSPEC (GET_MODE (target),
15585 gen_rtvec (2, op0, op1), code)));
15586}
15587
15588/* Expand an SVE vec_perm with the given operands. */
15589
15590void
15591aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15592{
15593 machine_mode data_mode = GET_MODE (target);
15594 machine_mode sel_mode = GET_MODE (sel);
15595 /* Enforced by the pattern condition. */
15596 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15597
15598 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15599 size of the two value vectors, i.e. the upper bits of the indices
15600 are effectively ignored. SVE TBL instead produces 0 for any
15601 out-of-range indices, so we need to modulo all the vec_perm indices
15602 to ensure they are all in range. */
15603 rtx sel_reg = force_reg (sel_mode, sel);
15604
15605 /* Check if the sel only references the first values vector. */
15606 if (GET_CODE (sel) == CONST_VECTOR
15607 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15608 {
15609 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15610 return;
15611 }
15612
15613 /* Check if the two values vectors are the same. */
15614 if (rtx_equal_p (op0, op1))
15615 {
15616 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15617 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15618 NULL, 0, OPTAB_DIRECT);
15619 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15620 return;
15621 }
15622
15623 /* Run TBL on for each value vector and combine the results. */
15624
15625 rtx res0 = gen_reg_rtx (data_mode);
15626 rtx res1 = gen_reg_rtx (data_mode);
15627 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15628 if (GET_CODE (sel) != CONST_VECTOR
15629 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15630 {
15631 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15632 2 * nunits - 1);
15633 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15634 NULL, 0, OPTAB_DIRECT);
15635 }
15636 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15637 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15638 NULL, 0, OPTAB_DIRECT);
15639 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15640 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15641 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15642 else
15643 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15644}
15645
cc4d934f
JG
15646/* Recognize patterns suitable for the TRN instructions. */
15647static bool
15648aarch64_evpc_trn (struct expand_vec_perm_d *d)
15649{
6a70badb
RS
15650 HOST_WIDE_INT odd;
15651 poly_uint64 nelt = d->perm.length ();
cc4d934f 15652 rtx out, in0, in1, x;
ef4bddc2 15653 machine_mode vmode = d->vmode;
cc4d934f
JG
15654
15655 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15656 return false;
15657
15658 /* Note that these are little-endian tests.
15659 We correct for big-endian later. */
6a70badb
RS
15660 if (!d->perm[0].is_constant (&odd)
15661 || (odd != 0 && odd != 1)
326ac20e
RS
15662 || !d->perm.series_p (0, 2, odd, 2)
15663 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15664 return false;
cc4d934f
JG
15665
15666 /* Success! */
15667 if (d->testing_p)
15668 return true;
15669
15670 in0 = d->op0;
15671 in1 = d->op1;
43cacb12
RS
15672 /* We don't need a big-endian lane correction for SVE; see the comment
15673 at the head of aarch64-sve.md for details. */
15674 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15675 {
15676 x = in0, in0 = in1, in1 = x;
15677 odd = !odd;
15678 }
15679 out = d->target;
15680
3f8334a5
RS
15681 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15682 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15683 return true;
15684}
15685
15686/* Recognize patterns suitable for the UZP instructions. */
15687static bool
15688aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15689{
6a70badb 15690 HOST_WIDE_INT odd;
cc4d934f 15691 rtx out, in0, in1, x;
ef4bddc2 15692 machine_mode vmode = d->vmode;
cc4d934f
JG
15693
15694 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15695 return false;
15696
15697 /* Note that these are little-endian tests.
15698 We correct for big-endian later. */
6a70badb
RS
15699 if (!d->perm[0].is_constant (&odd)
15700 || (odd != 0 && odd != 1)
326ac20e 15701 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15702 return false;
cc4d934f
JG
15703
15704 /* Success! */
15705 if (d->testing_p)
15706 return true;
15707
15708 in0 = d->op0;
15709 in1 = d->op1;
43cacb12
RS
15710 /* We don't need a big-endian lane correction for SVE; see the comment
15711 at the head of aarch64-sve.md for details. */
15712 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15713 {
15714 x = in0, in0 = in1, in1 = x;
15715 odd = !odd;
15716 }
15717 out = d->target;
15718
3f8334a5
RS
15719 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15720 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15721 return true;
15722}
15723
15724/* Recognize patterns suitable for the ZIP instructions. */
15725static bool
15726aarch64_evpc_zip (struct expand_vec_perm_d *d)
15727{
6a70badb
RS
15728 unsigned int high;
15729 poly_uint64 nelt = d->perm.length ();
cc4d934f 15730 rtx out, in0, in1, x;
ef4bddc2 15731 machine_mode vmode = d->vmode;
cc4d934f
JG
15732
15733 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15734 return false;
15735
15736 /* Note that these are little-endian tests.
15737 We correct for big-endian later. */
6a70badb
RS
15738 poly_uint64 first = d->perm[0];
15739 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15740 || !d->perm.series_p (0, 2, first, 1)
15741 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15742 return false;
6a70badb 15743 high = maybe_ne (first, 0U);
cc4d934f
JG
15744
15745 /* Success! */
15746 if (d->testing_p)
15747 return true;
15748
15749 in0 = d->op0;
15750 in1 = d->op1;
43cacb12
RS
15751 /* We don't need a big-endian lane correction for SVE; see the comment
15752 at the head of aarch64-sve.md for details. */
15753 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15754 {
15755 x = in0, in0 = in1, in1 = x;
15756 high = !high;
15757 }
15758 out = d->target;
15759
3f8334a5
RS
15760 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15761 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15762 return true;
15763}
15764
ae0533da
AL
15765/* Recognize patterns for the EXT insn. */
15766
15767static bool
15768aarch64_evpc_ext (struct expand_vec_perm_d *d)
15769{
6a70badb 15770 HOST_WIDE_INT location;
ae0533da
AL
15771 rtx offset;
15772
6a70badb
RS
15773 /* The first element always refers to the first vector.
15774 Check if the extracted indices are increasing by one. */
43cacb12
RS
15775 if (d->vec_flags == VEC_SVE_PRED
15776 || !d->perm[0].is_constant (&location)
6a70badb 15777 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15778 return false;
ae0533da 15779
ae0533da
AL
15780 /* Success! */
15781 if (d->testing_p)
15782 return true;
15783
b31e65bb 15784 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15785 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15786
43cacb12
RS
15787 We don't need a big-endian lane correction for SVE; see the comment
15788 at the head of aarch64-sve.md for details. */
15789 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15790 {
15791 /* After setup, we want the high elements of the first vector (stored
15792 at the LSB end of the register), and the low elements of the second
15793 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15794 std::swap (d->op0, d->op1);
6a70badb
RS
15795 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15796 to_constant () is safe since this is restricted to Advanced SIMD
15797 vectors. */
15798 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15799 }
15800
15801 offset = GEN_INT (location);
3f8334a5
RS
15802 emit_set_insn (d->target,
15803 gen_rtx_UNSPEC (d->vmode,
15804 gen_rtvec (3, d->op0, d->op1, offset),
15805 UNSPEC_EXT));
ae0533da
AL
15806 return true;
15807}
15808
43cacb12
RS
15809/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15810 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15811
15812static bool
43cacb12 15813aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15814{
6a70badb
RS
15815 HOST_WIDE_INT diff;
15816 unsigned int i, size, unspec;
43cacb12 15817 machine_mode pred_mode;
923fcec3 15818
43cacb12
RS
15819 if (d->vec_flags == VEC_SVE_PRED
15820 || !d->one_vector_p
6a70badb 15821 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15822 return false;
15823
3f8334a5
RS
15824 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15825 if (size == 8)
43cacb12
RS
15826 {
15827 unspec = UNSPEC_REV64;
15828 pred_mode = VNx2BImode;
15829 }
3f8334a5 15830 else if (size == 4)
43cacb12
RS
15831 {
15832 unspec = UNSPEC_REV32;
15833 pred_mode = VNx4BImode;
15834 }
3f8334a5 15835 else if (size == 2)
43cacb12
RS
15836 {
15837 unspec = UNSPEC_REV16;
15838 pred_mode = VNx8BImode;
15839 }
3f8334a5
RS
15840 else
15841 return false;
923fcec3 15842
326ac20e
RS
15843 unsigned int step = diff + 1;
15844 for (i = 0; i < step; ++i)
15845 if (!d->perm.series_p (i, step, diff - i, step))
15846 return false;
923fcec3
AL
15847
15848 /* Success! */
15849 if (d->testing_p)
15850 return true;
15851
43cacb12
RS
15852 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15853 if (d->vec_flags == VEC_SVE_DATA)
15854 {
15855 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15856 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15857 UNSPEC_MERGE_PTRUE);
15858 }
15859 emit_set_insn (d->target, src);
15860 return true;
15861}
15862
15863/* Recognize patterns for the REV insn, which reverses elements within
15864 a full vector. */
15865
15866static bool
15867aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15868{
15869 poly_uint64 nelt = d->perm.length ();
15870
15871 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15872 return false;
15873
15874 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15875 return false;
15876
15877 /* Success! */
15878 if (d->testing_p)
15879 return true;
15880
15881 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15882 emit_set_insn (d->target, src);
923fcec3
AL
15883 return true;
15884}
15885
91bd4114
JG
15886static bool
15887aarch64_evpc_dup (struct expand_vec_perm_d *d)
15888{
91bd4114
JG
15889 rtx out = d->target;
15890 rtx in0;
6a70badb 15891 HOST_WIDE_INT elt;
ef4bddc2 15892 machine_mode vmode = d->vmode;
91bd4114
JG
15893 rtx lane;
15894
43cacb12
RS
15895 if (d->vec_flags == VEC_SVE_PRED
15896 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15897 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15898 return false;
15899
43cacb12
RS
15900 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15901 return false;
15902
326ac20e
RS
15903 /* Success! */
15904 if (d->testing_p)
15905 return true;
15906
91bd4114
JG
15907 /* The generic preparation in aarch64_expand_vec_perm_const_1
15908 swaps the operand order and the permute indices if it finds
15909 d->perm[0] to be in the second operand. Thus, we can always
15910 use d->op0 and need not do any extra arithmetic to get the
15911 correct lane number. */
15912 in0 = d->op0;
f901401e 15913 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15914
3f8334a5
RS
15915 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15916 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15917 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15918 return true;
15919}
15920
88b08073
JG
15921static bool
15922aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15923{
43cacb12 15924 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15925 machine_mode vmode = d->vmode;
6a70badb
RS
15926
15927 /* Make sure that the indices are constant. */
15928 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15929 for (unsigned int i = 0; i < encoded_nelts; ++i)
15930 if (!d->perm[i].is_constant ())
15931 return false;
88b08073 15932
88b08073
JG
15933 if (d->testing_p)
15934 return true;
15935
15936 /* Generic code will try constant permutation twice. Once with the
15937 original mode and again with the elements lowered to QImode.
15938 So wait and don't do the selector expansion ourselves. */
15939 if (vmode != V8QImode && vmode != V16QImode)
15940 return false;
15941
6a70badb
RS
15942 /* to_constant is safe since this routine is specific to Advanced SIMD
15943 vectors. */
15944 unsigned int nelt = d->perm.length ().to_constant ();
15945 for (unsigned int i = 0; i < nelt; ++i)
15946 /* If big-endian and two vectors we end up with a weird mixed-endian
15947 mode on NEON. Reverse the index within each word but not the word
15948 itself. to_constant is safe because we checked is_constant above. */
15949 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15950 ? d->perm[i].to_constant () ^ (nelt - 1)
15951 : d->perm[i].to_constant ());
bbcc9c00 15952
88b08073
JG
15953 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15954 sel = force_reg (vmode, sel);
15955
15956 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15957 return true;
15958}
15959
43cacb12
RS
15960/* Try to implement D using an SVE TBL instruction. */
15961
15962static bool
15963aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15964{
15965 unsigned HOST_WIDE_INT nelt;
15966
15967 /* Permuting two variable-length vectors could overflow the
15968 index range. */
15969 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15970 return false;
15971
15972 if (d->testing_p)
15973 return true;
15974
15975 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15976 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
15977 if (d->one_vector_p)
15978 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
15979 else
15980 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
15981 return true;
15982}
15983
88b08073
JG
15984static bool
15985aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15986{
15987 /* The pattern matching functions above are written to look for a small
15988 number to begin the sequence (0, 1, N/2). If we begin with an index
15989 from the second operand, we can swap the operands. */
6a70badb
RS
15990 poly_int64 nelt = d->perm.length ();
15991 if (known_ge (d->perm[0], nelt))
88b08073 15992 {
e3342de4 15993 d->perm.rotate_inputs (1);
cb5c6c29 15994 std::swap (d->op0, d->op1);
88b08073
JG
15995 }
15996
43cacb12
RS
15997 if ((d->vec_flags == VEC_ADVSIMD
15998 || d->vec_flags == VEC_SVE_DATA
15999 || d->vec_flags == VEC_SVE_PRED)
16000 && known_gt (nelt, 1))
cc4d934f 16001 {
43cacb12
RS
16002 if (aarch64_evpc_rev_local (d))
16003 return true;
16004 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
16005 return true;
16006 else if (aarch64_evpc_ext (d))
ae0533da 16007 return true;
f901401e
AL
16008 else if (aarch64_evpc_dup (d))
16009 return true;
ae0533da 16010 else if (aarch64_evpc_zip (d))
cc4d934f
JG
16011 return true;
16012 else if (aarch64_evpc_uzp (d))
16013 return true;
16014 else if (aarch64_evpc_trn (d))
16015 return true;
43cacb12
RS
16016 if (d->vec_flags == VEC_SVE_DATA)
16017 return aarch64_evpc_sve_tbl (d);
4ec8bb67 16018 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 16019 return aarch64_evpc_tbl (d);
cc4d934f 16020 }
88b08073
JG
16021 return false;
16022}
16023
f151c9e1 16024/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 16025
f151c9e1
RS
16026static bool
16027aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
16028 rtx op1, const vec_perm_indices &sel)
88b08073
JG
16029{
16030 struct expand_vec_perm_d d;
88b08073 16031
326ac20e 16032 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
16033 if (sel.ninputs () == 1
16034 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
16035 d.one_vector_p = true;
16036 else if (sel.all_from_input_p (0))
88b08073 16037 {
326ac20e
RS
16038 d.one_vector_p = true;
16039 op1 = op0;
88b08073 16040 }
326ac20e 16041 else if (sel.all_from_input_p (1))
88b08073 16042 {
88b08073 16043 d.one_vector_p = true;
326ac20e 16044 op0 = op1;
88b08073 16045 }
326ac20e
RS
16046 else
16047 d.one_vector_p = false;
88b08073 16048
326ac20e
RS
16049 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
16050 sel.nelts_per_input ());
16051 d.vmode = vmode;
43cacb12 16052 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
16053 d.target = target;
16054 d.op0 = op0;
16055 d.op1 = op1;
16056 d.testing_p = !target;
e3342de4 16057
f151c9e1
RS
16058 if (!d.testing_p)
16059 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 16060
326ac20e 16061 rtx_insn *last = get_last_insn ();
f151c9e1 16062 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 16063 gcc_assert (last == get_last_insn ());
88b08073
JG
16064
16065 return ret;
16066}
16067
73e3da51
RS
16068/* Generate a byte permute mask for a register of mode MODE,
16069 which has NUNITS units. */
16070
668046d1 16071rtx
73e3da51 16072aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
16073{
16074 /* We have to reverse each vector because we dont have
16075 a permuted load that can reverse-load according to ABI rules. */
16076 rtx mask;
16077 rtvec v = rtvec_alloc (16);
73e3da51
RS
16078 unsigned int i, j;
16079 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
16080
16081 gcc_assert (BYTES_BIG_ENDIAN);
16082 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
16083
16084 for (i = 0; i < nunits; i++)
16085 for (j = 0; j < usize; j++)
16086 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
16087 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
16088 return force_reg (V16QImode, mask);
16089}
16090
43cacb12
RS
16091/* Return true if X is a valid second operand for the SVE instruction
16092 that implements integer comparison OP_CODE. */
16093
16094static bool
16095aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
16096{
16097 if (register_operand (x, VOIDmode))
16098 return true;
16099
16100 switch (op_code)
16101 {
16102 case LTU:
16103 case LEU:
16104 case GEU:
16105 case GTU:
16106 return aarch64_sve_cmp_immediate_p (x, false);
16107 case LT:
16108 case LE:
16109 case GE:
16110 case GT:
16111 case NE:
16112 case EQ:
16113 return aarch64_sve_cmp_immediate_p (x, true);
16114 default:
16115 gcc_unreachable ();
16116 }
16117}
16118
f22d7973
RS
16119/* Use predicated SVE instructions to implement the equivalent of:
16120
16121 (set TARGET OP)
16122
16123 given that PTRUE is an all-true predicate of the appropriate mode. */
16124
16125static void
16126aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
16127{
16128 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16129 gen_rtvec (2, ptrue, op),
16130 UNSPEC_MERGE_PTRUE);
16131 rtx_insn *insn = emit_set_insn (target, unspec);
16132 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16133}
16134
16135/* Likewise, but also clobber the condition codes. */
16136
16137static void
16138aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
16139{
16140 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
16141 gen_rtvec (2, ptrue, op),
16142 UNSPEC_MERGE_PTRUE);
16143 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
16144 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
16145}
16146
43cacb12
RS
16147/* Return the UNSPEC_COND_* code for comparison CODE. */
16148
16149static unsigned int
16150aarch64_unspec_cond_code (rtx_code code)
16151{
16152 switch (code)
16153 {
16154 case NE:
16155 return UNSPEC_COND_NE;
16156 case EQ:
16157 return UNSPEC_COND_EQ;
16158 case LT:
16159 return UNSPEC_COND_LT;
16160 case GT:
16161 return UNSPEC_COND_GT;
16162 case LE:
16163 return UNSPEC_COND_LE;
16164 case GE:
16165 return UNSPEC_COND_GE;
43cacb12
RS
16166 default:
16167 gcc_unreachable ();
16168 }
16169}
16170
f22d7973 16171/* Emit:
43cacb12 16172
f22d7973
RS
16173 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
16174
16175 where <X> is the operation associated with comparison CODE. This form
16176 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
16177 semantics, such as when PRED might not be all-true and when comparing
16178 inactive lanes could have side effects. */
16179
16180static void
16181aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
16182 rtx pred, rtx op0, rtx op1)
43cacb12 16183{
f22d7973
RS
16184 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
16185 gen_rtvec (3, pred, op0, op1),
16186 aarch64_unspec_cond_code (code));
16187 emit_set_insn (target, unspec);
43cacb12
RS
16188}
16189
f22d7973 16190/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 16191
f22d7973 16192 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
16193
16194void
16195aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
16196{
16197 machine_mode pred_mode = GET_MODE (target);
16198 machine_mode data_mode = GET_MODE (op0);
16199
16200 if (!aarch64_sve_cmp_operand_p (code, op1))
16201 op1 = force_reg (data_mode, op1);
16202
16203 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
16204 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16205 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
16206}
16207
f22d7973 16208/* Emit the SVE equivalent of:
43cacb12 16209
f22d7973
RS
16210 (set TMP1 (CODE1 OP0 OP1))
16211 (set TMP2 (CODE2 OP0 OP1))
16212 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 16213
f22d7973 16214 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16215
16216static void
f22d7973
RS
16217aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
16218 rtx ptrue, rtx op0, rtx op1)
43cacb12 16219{
f22d7973 16220 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 16221 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
16222 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
16223 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 16224 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
16225 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
16226 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
16227 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
16228}
16229
f22d7973 16230/* Emit the SVE equivalent of:
43cacb12 16231
f22d7973
RS
16232 (set TMP (CODE OP0 OP1))
16233 (set TARGET (not TMP))
43cacb12 16234
f22d7973 16235 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16236
16237static void
f22d7973
RS
16238aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
16239 rtx op0, rtx op1)
43cacb12 16240{
f22d7973
RS
16241 machine_mode pred_mode = GET_MODE (ptrue);
16242 rtx tmp = gen_reg_rtx (pred_mode);
16243 aarch64_emit_sve_ptrue_op (tmp, ptrue,
16244 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
16245 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
16246}
16247
f22d7973 16248/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 16249
f22d7973 16250 (set TARGET (CODE OP0 OP1))
43cacb12
RS
16251
16252 If CAN_INVERT_P is true, the caller can also handle inverted results;
16253 return true if the result is in fact inverted. */
16254
16255bool
16256aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
16257 rtx op0, rtx op1, bool can_invert_p)
16258{
16259 machine_mode pred_mode = GET_MODE (target);
16260 machine_mode data_mode = GET_MODE (op0);
16261
16262 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16263 switch (code)
16264 {
16265 case UNORDERED:
16266 /* UNORDERED has no immediate form. */
16267 op1 = force_reg (data_mode, op1);
f22d7973 16268 /* fall through */
43cacb12
RS
16269 case LT:
16270 case LE:
16271 case GT:
16272 case GE:
16273 case EQ:
16274 case NE:
f22d7973
RS
16275 {
16276 /* There is native support for the comparison. */
16277 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16278 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16279 return false;
16280 }
43cacb12
RS
16281
16282 case LTGT:
16283 /* This is a trapping operation (LT or GT). */
f22d7973 16284 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
16285 return false;
16286
16287 case UNEQ:
16288 if (!flag_trapping_math)
16289 {
16290 /* This would trap for signaling NaNs. */
16291 op1 = force_reg (data_mode, op1);
f22d7973 16292 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
16293 return false;
16294 }
16295 /* fall through */
43cacb12
RS
16296 case UNLT:
16297 case UNLE:
16298 case UNGT:
16299 case UNGE:
f22d7973
RS
16300 if (flag_trapping_math)
16301 {
16302 /* Work out which elements are ordered. */
16303 rtx ordered = gen_reg_rtx (pred_mode);
16304 op1 = force_reg (data_mode, op1);
16305 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
16306
16307 /* Test the opposite condition for the ordered elements,
16308 then invert the result. */
16309 if (code == UNEQ)
16310 code = NE;
16311 else
16312 code = reverse_condition_maybe_unordered (code);
16313 if (can_invert_p)
16314 {
16315 aarch64_emit_sve_predicated_cond (target, code,
16316 ordered, op0, op1);
16317 return true;
16318 }
16319 rtx tmp = gen_reg_rtx (pred_mode);
16320 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
16321 aarch64_emit_unop (target, one_cmpl_optab, tmp);
16322 return false;
16323 }
16324 break;
16325
16326 case ORDERED:
16327 /* ORDERED has no immediate form. */
16328 op1 = force_reg (data_mode, op1);
16329 break;
43cacb12
RS
16330
16331 default:
16332 gcc_unreachable ();
16333 }
f22d7973
RS
16334
16335 /* There is native support for the inverse comparison. */
16336 code = reverse_condition_maybe_unordered (code);
16337 if (can_invert_p)
16338 {
16339 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16340 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16341 return true;
16342 }
16343 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
16344 return false;
43cacb12
RS
16345}
16346
16347/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
16348 of the data being selected and CMP_MODE is the mode of the values being
16349 compared. */
16350
16351void
16352aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
16353 rtx *ops)
16354{
16355 machine_mode pred_mode
16356 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
16357 GET_MODE_SIZE (cmp_mode)).require ();
16358 rtx pred = gen_reg_rtx (pred_mode);
16359 if (FLOAT_MODE_P (cmp_mode))
16360 {
16361 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
16362 ops[4], ops[5], true))
16363 std::swap (ops[1], ops[2]);
16364 }
16365 else
16366 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16367
16368 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16369 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16370}
16371
99e1629f
RS
16372/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16373 true. However due to issues with register allocation it is preferable
16374 to avoid tieing integer scalar and FP scalar modes. Executing integer
16375 operations in general registers is better than treating them as scalar
16376 vector operations. This reduces latency and avoids redundant int<->FP
16377 moves. So tie modes if they are either the same class, or vector modes
16378 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16379
99e1629f 16380static bool
ef4bddc2 16381aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16382{
16383 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16384 return true;
16385
16386 /* We specifically want to allow elements of "structure" modes to
16387 be tieable to the structure. This more general condition allows
43cacb12
RS
16388 other rarer situations too. The reason we don't extend this to
16389 predicate modes is that there are no predicate structure modes
16390 nor any specific instructions for extracting part of a predicate
16391 register. */
16392 if (aarch64_vector_data_mode_p (mode1)
16393 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16394 return true;
16395
16396 /* Also allow any scalar modes with vectors. */
16397 if (aarch64_vector_mode_supported_p (mode1)
16398 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16399 return true;
16400
16401 return false;
16402}
16403
e2c75eea
JG
16404/* Return a new RTX holding the result of moving POINTER forward by
16405 AMOUNT bytes. */
16406
16407static rtx
6a70badb 16408aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16409{
16410 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16411
16412 return adjust_automodify_address (pointer, GET_MODE (pointer),
16413 next, amount);
16414}
16415
16416/* Return a new RTX holding the result of moving POINTER forward by the
16417 size of the mode it points to. */
16418
16419static rtx
16420aarch64_progress_pointer (rtx pointer)
16421{
6a70badb 16422 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16423}
16424
16425/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16426 MODE bytes. */
16427
16428static void
16429aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16430 machine_mode mode)
e2c75eea
JG
16431{
16432 rtx reg = gen_reg_rtx (mode);
16433
16434 /* "Cast" the pointers to the correct mode. */
16435 *src = adjust_address (*src, mode, 0);
16436 *dst = adjust_address (*dst, mode, 0);
16437 /* Emit the memcpy. */
16438 emit_move_insn (reg, *src);
16439 emit_move_insn (*dst, reg);
16440 /* Move the pointers forward. */
16441 *src = aarch64_progress_pointer (*src);
16442 *dst = aarch64_progress_pointer (*dst);
16443}
16444
16445/* Expand movmem, as if from a __builtin_memcpy. Return true if
16446 we succeed, otherwise return false. */
16447
16448bool
16449aarch64_expand_movmem (rtx *operands)
16450{
89c52e5e 16451 int n, mode_bits;
e2c75eea
JG
16452 rtx dst = operands[0];
16453 rtx src = operands[1];
16454 rtx base;
89c52e5e 16455 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
16456 bool speed_p = !optimize_function_for_size_p (cfun);
16457
16458 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
16459 memcpy call, but use the default otherwise. Moves larger than 8 bytes
16460 will always require an even number of instructions to do now. And each
16461 operation requires both a load+store, so devide the max number by 2. */
16462 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
16463
16464 /* We can't do anything smart if the amount to copy is not constant. */
16465 if (!CONST_INT_P (operands[2]))
16466 return false;
16467
89c52e5e 16468 n = INTVAL (operands[2]);
e2c75eea 16469
89c52e5e
TC
16470 /* Try to keep the number of instructions low. For all cases we will do at
16471 most two moves for the residual amount, since we'll always overlap the
16472 remainder. */
16473 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
16474 return false;
16475
16476 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16477 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16478
16479 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
16480 src = adjust_automodify_address (src, VOIDmode, base, 0);
16481
89c52e5e
TC
16482 /* Convert n to bits to make the rest of the code simpler. */
16483 n = n * BITS_PER_UNIT;
e2c75eea 16484
f7e1d19d
TC
16485 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
16486 larger than TImode, but we should not use them for loads/stores here. */
16487 const int copy_limit = GET_MODE_BITSIZE (TImode);
16488
89c52e5e 16489 while (n > 0)
e2c75eea 16490 {
89c52e5e
TC
16491 /* Find the largest mode in which to do the copy in without over reading
16492 or writing. */
16493 opt_scalar_int_mode mode_iter;
16494 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 16495 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 16496 cur_mode = mode_iter.require ();
e2c75eea 16497
89c52e5e 16498 gcc_assert (cur_mode != BLKmode);
e2c75eea 16499
89c52e5e
TC
16500 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
16501 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 16502
89c52e5e 16503 n -= mode_bits;
e2c75eea 16504
89c52e5e
TC
16505 /* Do certain trailing copies as overlapping if it's going to be
16506 cheaper. i.e. less instructions to do so. For instance doing a 15
16507 byte copy it's more efficient to do two overlapping 8 byte copies than
16508 8 + 6 + 1. */
f7e1d19d 16509 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 16510 {
f7e1d19d
TC
16511 next_mode = smallest_mode_for_size (n, MODE_INT);
16512 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
16513 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
16514 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
16515 n = n_bits;
e2c75eea
JG
16516 }
16517 }
16518
16519 return true;
16520}
16521
141a3ccf
KT
16522/* Split a DImode store of a CONST_INT SRC to MEM DST as two
16523 SImode stores. Handle the case when the constant has identical
16524 bottom and top halves. This is beneficial when the two stores can be
16525 merged into an STP and we avoid synthesising potentially expensive
16526 immediates twice. Return true if such a split is possible. */
16527
16528bool
16529aarch64_split_dimode_const_store (rtx dst, rtx src)
16530{
16531 rtx lo = gen_lowpart (SImode, src);
16532 rtx hi = gen_highpart_mode (SImode, DImode, src);
16533
16534 bool size_p = optimize_function_for_size_p (cfun);
16535
16536 if (!rtx_equal_p (lo, hi))
16537 return false;
16538
16539 unsigned int orig_cost
16540 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
16541 unsigned int lo_cost
16542 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
16543
16544 /* We want to transform:
16545 MOV x1, 49370
16546 MOVK x1, 0x140, lsl 16
16547 MOVK x1, 0xc0da, lsl 32
16548 MOVK x1, 0x140, lsl 48
16549 STR x1, [x0]
16550 into:
16551 MOV w1, 49370
16552 MOVK w1, 0x140, lsl 16
16553 STP w1, w1, [x0]
16554 So we want to perform this only when we save two instructions
16555 or more. When optimizing for size, however, accept any code size
16556 savings we can. */
16557 if (size_p && orig_cost <= lo_cost)
16558 return false;
16559
16560 if (!size_p
16561 && (orig_cost <= lo_cost + 1))
16562 return false;
16563
16564 rtx mem_lo = adjust_address (dst, SImode, 0);
16565 if (!aarch64_mem_pair_operand (mem_lo, SImode))
16566 return false;
16567
16568 rtx tmp_reg = gen_reg_rtx (SImode);
16569 aarch64_expand_mov_immediate (tmp_reg, lo);
16570 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
16571 /* Don't emit an explicit store pair as this may not be always profitable.
16572 Let the sched-fusion logic decide whether to merge them. */
16573 emit_move_insn (mem_lo, tmp_reg);
16574 emit_move_insn (mem_hi, tmp_reg);
16575
16576 return true;
16577}
16578
30c46053
MC
16579/* Generate RTL for a conditional branch with rtx comparison CODE in
16580 mode CC_MODE. The destination of the unlikely conditional branch
16581 is LABEL_REF. */
16582
16583void
16584aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
16585 rtx label_ref)
16586{
16587 rtx x;
16588 x = gen_rtx_fmt_ee (code, VOIDmode,
16589 gen_rtx_REG (cc_mode, CC_REGNUM),
16590 const0_rtx);
16591
16592 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16593 gen_rtx_LABEL_REF (VOIDmode, label_ref),
16594 pc_rtx);
16595 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16596}
16597
16598/* Generate DImode scratch registers for 128-bit (TImode) addition.
16599
16600 OP1 represents the TImode destination operand 1
16601 OP2 represents the TImode destination operand 2
16602 LOW_DEST represents the low half (DImode) of TImode operand 0
16603 LOW_IN1 represents the low half (DImode) of TImode operand 1
16604 LOW_IN2 represents the low half (DImode) of TImode operand 2
16605 HIGH_DEST represents the high half (DImode) of TImode operand 0
16606 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16607 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16608
16609void
16610aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16611 rtx *low_in1, rtx *low_in2,
16612 rtx *high_dest, rtx *high_in1,
16613 rtx *high_in2)
16614{
16615 *low_dest = gen_reg_rtx (DImode);
16616 *low_in1 = gen_lowpart (DImode, op1);
16617 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16618 subreg_lowpart_offset (DImode, TImode));
16619 *high_dest = gen_reg_rtx (DImode);
16620 *high_in1 = gen_highpart (DImode, op1);
16621 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16622 subreg_highpart_offset (DImode, TImode));
16623}
16624
16625/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
16626
16627 This function differs from 'arch64_addti_scratch_regs' in that
16628 OP1 can be an immediate constant (zero). We must call
16629 subreg_highpart_offset with DImode and TImode arguments, otherwise
16630 VOIDmode will be used for the const_int which generates an internal
16631 error from subreg_size_highpart_offset which does not expect a size of zero.
16632
16633 OP1 represents the TImode destination operand 1
16634 OP2 represents the TImode destination operand 2
16635 LOW_DEST represents the low half (DImode) of TImode operand 0
16636 LOW_IN1 represents the low half (DImode) of TImode operand 1
16637 LOW_IN2 represents the low half (DImode) of TImode operand 2
16638 HIGH_DEST represents the high half (DImode) of TImode operand 0
16639 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16640 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16641
16642
16643void
16644aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16645 rtx *low_in1, rtx *low_in2,
16646 rtx *high_dest, rtx *high_in1,
16647 rtx *high_in2)
16648{
16649 *low_dest = gen_reg_rtx (DImode);
16650 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
16651 subreg_lowpart_offset (DImode, TImode));
16652
16653 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16654 subreg_lowpart_offset (DImode, TImode));
16655 *high_dest = gen_reg_rtx (DImode);
16656
16657 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
16658 subreg_highpart_offset (DImode, TImode));
16659 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16660 subreg_highpart_offset (DImode, TImode));
16661}
16662
16663/* Generate RTL for 128-bit (TImode) subtraction with overflow.
16664
16665 OP0 represents the TImode destination operand 0
16666 LOW_DEST represents the low half (DImode) of TImode operand 0
16667 LOW_IN1 represents the low half (DImode) of TImode operand 1
16668 LOW_IN2 represents the low half (DImode) of TImode operand 2
16669 HIGH_DEST represents the high half (DImode) of TImode operand 0
16670 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16671 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16672
16673void
16674aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
16675 rtx low_in2, rtx high_dest, rtx high_in1,
16676 rtx high_in2)
16677{
16678 if (low_in2 == const0_rtx)
16679 {
16680 low_dest = low_in1;
16681 emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
16682 force_reg (DImode, high_in2)));
16683 }
16684 else
16685 {
16686 if (CONST_INT_P (low_in2))
16687 {
16688 low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
16689 high_in2 = force_reg (DImode, high_in2);
16690 emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
16691 }
16692 else
16693 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
16694 emit_insn (gen_subdi3_carryinCV (high_dest,
16695 force_reg (DImode, high_in1),
16696 high_in2));
16697 }
16698
16699 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
16700 emit_move_insn (gen_highpart (DImode, op0), high_dest);
16701
16702}
16703
a3125fc2
CL
16704/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16705
16706static unsigned HOST_WIDE_INT
16707aarch64_asan_shadow_offset (void)
16708{
16709 return (HOST_WIDE_INT_1 << 36);
16710}
16711
5f3bc026 16712static rtx
cb4347e8 16713aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16714 int code, tree treeop0, tree treeop1)
16715{
c8012fbc
WD
16716 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16717 rtx op0, op1;
5f3bc026 16718 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16719 insn_code icode;
5f3bc026
ZC
16720 struct expand_operand ops[4];
16721
5f3bc026
ZC
16722 start_sequence ();
16723 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16724
16725 op_mode = GET_MODE (op0);
16726 if (op_mode == VOIDmode)
16727 op_mode = GET_MODE (op1);
16728
16729 switch (op_mode)
16730 {
4e10a5a7
RS
16731 case E_QImode:
16732 case E_HImode:
16733 case E_SImode:
5f3bc026
ZC
16734 cmp_mode = SImode;
16735 icode = CODE_FOR_cmpsi;
16736 break;
16737
4e10a5a7 16738 case E_DImode:
5f3bc026
ZC
16739 cmp_mode = DImode;
16740 icode = CODE_FOR_cmpdi;
16741 break;
16742
4e10a5a7 16743 case E_SFmode:
786e3c06
WD
16744 cmp_mode = SFmode;
16745 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16746 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16747 break;
16748
4e10a5a7 16749 case E_DFmode:
786e3c06
WD
16750 cmp_mode = DFmode;
16751 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16752 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16753 break;
16754
5f3bc026
ZC
16755 default:
16756 end_sequence ();
16757 return NULL_RTX;
16758 }
16759
c8012fbc
WD
16760 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16761 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16762 if (!op0 || !op1)
16763 {
16764 end_sequence ();
16765 return NULL_RTX;
16766 }
16767 *prep_seq = get_insns ();
16768 end_sequence ();
16769
c8012fbc
WD
16770 create_fixed_operand (&ops[0], op0);
16771 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16772
16773 start_sequence ();
c8012fbc 16774 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16775 {
16776 end_sequence ();
16777 return NULL_RTX;
16778 }
16779 *gen_seq = get_insns ();
16780 end_sequence ();
16781
c8012fbc
WD
16782 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16783 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16784}
16785
16786static rtx
cb4347e8
TS
16787aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16788 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16789{
c8012fbc
WD
16790 rtx op0, op1, target;
16791 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16792 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16793 insn_code icode;
5f3bc026 16794 struct expand_operand ops[6];
c8012fbc 16795 int aarch64_cond;
5f3bc026 16796
cb4347e8 16797 push_to_sequence (*prep_seq);
5f3bc026
ZC
16798 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16799
16800 op_mode = GET_MODE (op0);
16801 if (op_mode == VOIDmode)
16802 op_mode = GET_MODE (op1);
16803
16804 switch (op_mode)
16805 {
4e10a5a7
RS
16806 case E_QImode:
16807 case E_HImode:
16808 case E_SImode:
5f3bc026 16809 cmp_mode = SImode;
c8012fbc 16810 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16811 break;
16812
4e10a5a7 16813 case E_DImode:
5f3bc026 16814 cmp_mode = DImode;
c8012fbc 16815 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16816 break;
16817
4e10a5a7 16818 case E_SFmode:
786e3c06
WD
16819 cmp_mode = SFmode;
16820 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16821 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16822 break;
16823
4e10a5a7 16824 case E_DFmode:
786e3c06
WD
16825 cmp_mode = DFmode;
16826 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16827 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16828 break;
16829
5f3bc026
ZC
16830 default:
16831 end_sequence ();
16832 return NULL_RTX;
16833 }
16834
16835 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16836 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16837 if (!op0 || !op1)
16838 {
16839 end_sequence ();
16840 return NULL_RTX;
16841 }
16842 *prep_seq = get_insns ();
16843 end_sequence ();
16844
16845 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16846 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16847
c8012fbc
WD
16848 if (bit_code != AND)
16849 {
16850 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16851 GET_MODE (XEXP (prev, 0))),
16852 VOIDmode, XEXP (prev, 0), const0_rtx);
16853 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16854 }
16855
16856 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16857 create_fixed_operand (&ops[1], target);
16858 create_fixed_operand (&ops[2], op0);
16859 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16860 create_fixed_operand (&ops[4], prev);
16861 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16862
cb4347e8 16863 push_to_sequence (*gen_seq);
5f3bc026
ZC
16864 if (!maybe_expand_insn (icode, 6, ops))
16865 {
16866 end_sequence ();
16867 return NULL_RTX;
16868 }
16869
16870 *gen_seq = get_insns ();
16871 end_sequence ();
16872
c8012fbc 16873 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16874}
16875
16876#undef TARGET_GEN_CCMP_FIRST
16877#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16878
16879#undef TARGET_GEN_CCMP_NEXT
16880#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16881
6a569cdd
KT
16882/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16883 instruction fusion of some sort. */
16884
16885static bool
16886aarch64_macro_fusion_p (void)
16887{
b175b679 16888 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16889}
16890
16891
16892/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16893 should be kept together during scheduling. */
16894
16895static bool
16896aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16897{
16898 rtx set_dest;
16899 rtx prev_set = single_set (prev);
16900 rtx curr_set = single_set (curr);
16901 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16902 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16903
16904 if (!aarch64_macro_fusion_p ())
16905 return false;
16906
d7b03373 16907 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16908 {
16909 /* We are trying to match:
16910 prev (mov) == (set (reg r0) (const_int imm16))
16911 curr (movk) == (set (zero_extract (reg r0)
16912 (const_int 16)
16913 (const_int 16))
16914 (const_int imm16_1)) */
16915
16916 set_dest = SET_DEST (curr_set);
16917
16918 if (GET_CODE (set_dest) == ZERO_EXTRACT
16919 && CONST_INT_P (SET_SRC (curr_set))
16920 && CONST_INT_P (SET_SRC (prev_set))
16921 && CONST_INT_P (XEXP (set_dest, 2))
16922 && INTVAL (XEXP (set_dest, 2)) == 16
16923 && REG_P (XEXP (set_dest, 0))
16924 && REG_P (SET_DEST (prev_set))
16925 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16926 {
16927 return true;
16928 }
16929 }
16930
d7b03373 16931 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16932 {
16933
16934 /* We're trying to match:
16935 prev (adrp) == (set (reg r1)
16936 (high (symbol_ref ("SYM"))))
16937 curr (add) == (set (reg r0)
16938 (lo_sum (reg r1)
16939 (symbol_ref ("SYM"))))
16940 Note that r0 need not necessarily be the same as r1, especially
16941 during pre-regalloc scheduling. */
16942
16943 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16944 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16945 {
16946 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16947 && REG_P (XEXP (SET_SRC (curr_set), 0))
16948 && REGNO (XEXP (SET_SRC (curr_set), 0))
16949 == REGNO (SET_DEST (prev_set))
16950 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16951 XEXP (SET_SRC (curr_set), 1)))
16952 return true;
16953 }
16954 }
16955
d7b03373 16956 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16957 {
16958
16959 /* We're trying to match:
16960 prev (movk) == (set (zero_extract (reg r0)
16961 (const_int 16)
16962 (const_int 32))
16963 (const_int imm16_1))
16964 curr (movk) == (set (zero_extract (reg r0)
16965 (const_int 16)
16966 (const_int 48))
16967 (const_int imm16_2)) */
16968
16969 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16970 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16971 && REG_P (XEXP (SET_DEST (prev_set), 0))
16972 && REG_P (XEXP (SET_DEST (curr_set), 0))
16973 && REGNO (XEXP (SET_DEST (prev_set), 0))
16974 == REGNO (XEXP (SET_DEST (curr_set), 0))
16975 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16976 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16977 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16978 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16979 && CONST_INT_P (SET_SRC (prev_set))
16980 && CONST_INT_P (SET_SRC (curr_set)))
16981 return true;
16982
16983 }
d7b03373 16984 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16985 {
16986 /* We're trying to match:
16987 prev (adrp) == (set (reg r0)
16988 (high (symbol_ref ("SYM"))))
16989 curr (ldr) == (set (reg r1)
16990 (mem (lo_sum (reg r0)
16991 (symbol_ref ("SYM")))))
16992 or
16993 curr (ldr) == (set (reg r1)
16994 (zero_extend (mem
16995 (lo_sum (reg r0)
16996 (symbol_ref ("SYM")))))) */
16997 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16998 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16999 {
17000 rtx curr_src = SET_SRC (curr_set);
17001
17002 if (GET_CODE (curr_src) == ZERO_EXTEND)
17003 curr_src = XEXP (curr_src, 0);
17004
17005 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
17006 && REG_P (XEXP (XEXP (curr_src, 0), 0))
17007 && REGNO (XEXP (XEXP (curr_src, 0), 0))
17008 == REGNO (SET_DEST (prev_set))
17009 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
17010 XEXP (SET_SRC (prev_set), 0)))
17011 return true;
17012 }
17013 }
cd0cb232 17014
d7b03373 17015 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
17016 && aarch_crypto_can_dual_issue (prev, curr))
17017 return true;
17018
d7b03373 17019 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
17020 && any_condjump_p (curr))
17021 {
509f819a
N
17022 unsigned int condreg1, condreg2;
17023 rtx cc_reg_1;
17024 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
17025 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
17026
17027 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
17028 && prev
17029 && modified_in_p (cc_reg_1, prev))
17030 {
f8a27206
AP
17031 enum attr_type prev_type = get_attr_type (prev);
17032
509f819a
N
17033 /* FIXME: this misses some which is considered simple arthematic
17034 instructions for ThunderX. Simple shifts are missed here. */
17035 if (prev_type == TYPE_ALUS_SREG
17036 || prev_type == TYPE_ALUS_IMM
17037 || prev_type == TYPE_LOGICS_REG
17038 || prev_type == TYPE_LOGICS_IMM)
17039 return true;
17040 }
3759108f
AP
17041 }
17042
bee7e0fc
AP
17043 if (prev_set
17044 && curr_set
17045 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
17046 && any_condjump_p (curr))
17047 {
17048 /* We're trying to match:
17049 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
17050 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
17051 (const_int 0))
17052 (label_ref ("SYM"))
17053 (pc)) */
17054 if (SET_DEST (curr_set) == (pc_rtx)
17055 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
17056 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
17057 && REG_P (SET_DEST (prev_set))
17058 && REGNO (SET_DEST (prev_set))
17059 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
17060 {
17061 /* Fuse ALU operations followed by conditional branch instruction. */
17062 switch (get_attr_type (prev))
17063 {
17064 case TYPE_ALU_IMM:
17065 case TYPE_ALU_SREG:
17066 case TYPE_ADC_REG:
17067 case TYPE_ADC_IMM:
17068 case TYPE_ADCS_REG:
17069 case TYPE_ADCS_IMM:
17070 case TYPE_LOGIC_REG:
17071 case TYPE_LOGIC_IMM:
17072 case TYPE_CSEL:
17073 case TYPE_ADR:
17074 case TYPE_MOV_IMM:
17075 case TYPE_SHIFT_REG:
17076 case TYPE_SHIFT_IMM:
17077 case TYPE_BFM:
17078 case TYPE_RBIT:
17079 case TYPE_REV:
17080 case TYPE_EXTEND:
17081 return true;
17082
17083 default:;
17084 }
17085 }
17086 }
17087
6a569cdd
KT
17088 return false;
17089}
17090
f2879a90
KT
17091/* Return true iff the instruction fusion described by OP is enabled. */
17092
17093bool
17094aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
17095{
17096 return (aarch64_tune_params.fusible_ops & op) != 0;
17097}
17098
350013bc
BC
17099/* If MEM is in the form of [base+offset], extract the two parts
17100 of address and set to BASE and OFFSET, otherwise return false
17101 after clearing BASE and OFFSET. */
17102
17103bool
17104extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
17105{
17106 rtx addr;
17107
17108 gcc_assert (MEM_P (mem));
17109
17110 addr = XEXP (mem, 0);
17111
17112 if (REG_P (addr))
17113 {
17114 *base = addr;
17115 *offset = const0_rtx;
17116 return true;
17117 }
17118
17119 if (GET_CODE (addr) == PLUS
17120 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
17121 {
17122 *base = XEXP (addr, 0);
17123 *offset = XEXP (addr, 1);
17124 return true;
17125 }
17126
17127 *base = NULL_RTX;
17128 *offset = NULL_RTX;
17129
17130 return false;
17131}
17132
17133/* Types for scheduling fusion. */
17134enum sched_fusion_type
17135{
17136 SCHED_FUSION_NONE = 0,
17137 SCHED_FUSION_LD_SIGN_EXTEND,
17138 SCHED_FUSION_LD_ZERO_EXTEND,
17139 SCHED_FUSION_LD,
17140 SCHED_FUSION_ST,
17141 SCHED_FUSION_NUM
17142};
17143
17144/* If INSN is a load or store of address in the form of [base+offset],
17145 extract the two parts and set to BASE and OFFSET. Return scheduling
17146 fusion type this INSN is. */
17147
17148static enum sched_fusion_type
17149fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
17150{
17151 rtx x, dest, src;
17152 enum sched_fusion_type fusion = SCHED_FUSION_LD;
17153
17154 gcc_assert (INSN_P (insn));
17155 x = PATTERN (insn);
17156 if (GET_CODE (x) != SET)
17157 return SCHED_FUSION_NONE;
17158
17159 src = SET_SRC (x);
17160 dest = SET_DEST (x);
17161
abc52318
KT
17162 machine_mode dest_mode = GET_MODE (dest);
17163
17164 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
17165 return SCHED_FUSION_NONE;
17166
17167 if (GET_CODE (src) == SIGN_EXTEND)
17168 {
17169 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
17170 src = XEXP (src, 0);
17171 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17172 return SCHED_FUSION_NONE;
17173 }
17174 else if (GET_CODE (src) == ZERO_EXTEND)
17175 {
17176 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
17177 src = XEXP (src, 0);
17178 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
17179 return SCHED_FUSION_NONE;
17180 }
17181
17182 if (GET_CODE (src) == MEM && REG_P (dest))
17183 extract_base_offset_in_addr (src, base, offset);
17184 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
17185 {
17186 fusion = SCHED_FUSION_ST;
17187 extract_base_offset_in_addr (dest, base, offset);
17188 }
17189 else
17190 return SCHED_FUSION_NONE;
17191
17192 if (*base == NULL_RTX || *offset == NULL_RTX)
17193 fusion = SCHED_FUSION_NONE;
17194
17195 return fusion;
17196}
17197
17198/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
17199
17200 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
17201 and PRI are only calculated for these instructions. For other instruction,
17202 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
17203 type instruction fusion can be added by returning different priorities.
17204
17205 It's important that irrelevant instructions get the largest FUSION_PRI. */
17206
17207static void
17208aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
17209 int *fusion_pri, int *pri)
17210{
17211 int tmp, off_val;
17212 rtx base, offset;
17213 enum sched_fusion_type fusion;
17214
17215 gcc_assert (INSN_P (insn));
17216
17217 tmp = max_pri - 1;
17218 fusion = fusion_load_store (insn, &base, &offset);
17219 if (fusion == SCHED_FUSION_NONE)
17220 {
17221 *pri = tmp;
17222 *fusion_pri = tmp;
17223 return;
17224 }
17225
17226 /* Set FUSION_PRI according to fusion type and base register. */
17227 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
17228
17229 /* Calculate PRI. */
17230 tmp /= 2;
17231
17232 /* INSN with smaller offset goes first. */
17233 off_val = (int)(INTVAL (offset));
17234 if (off_val >= 0)
17235 tmp -= (off_val & 0xfffff);
17236 else
17237 tmp += ((- off_val) & 0xfffff);
17238
17239 *pri = tmp;
17240 return;
17241}
17242
9bca63d4
WD
17243/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
17244 Adjust priority of sha1h instructions so they are scheduled before
17245 other SHA1 instructions. */
17246
17247static int
17248aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
17249{
17250 rtx x = PATTERN (insn);
17251
17252 if (GET_CODE (x) == SET)
17253 {
17254 x = SET_SRC (x);
17255
17256 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
17257 return priority + 10;
17258 }
17259
17260 return priority;
17261}
17262
350013bc
BC
17263/* Given OPERANDS of consecutive load/store, check if we can merge
17264 them into ldp/stp. LOAD is true if they are load instructions.
17265 MODE is the mode of memory operands. */
17266
17267bool
17268aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 17269 machine_mode mode)
350013bc
BC
17270{
17271 HOST_WIDE_INT offval_1, offval_2, msize;
17272 enum reg_class rclass_1, rclass_2;
17273 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
17274
17275 if (load)
17276 {
17277 mem_1 = operands[1];
17278 mem_2 = operands[3];
17279 reg_1 = operands[0];
17280 reg_2 = operands[2];
17281 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
17282 if (REGNO (reg_1) == REGNO (reg_2))
17283 return false;
17284 }
17285 else
17286 {
17287 mem_1 = operands[0];
17288 mem_2 = operands[2];
17289 reg_1 = operands[1];
17290 reg_2 = operands[3];
17291 }
17292
bf84ac44
AP
17293 /* The mems cannot be volatile. */
17294 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
17295 return false;
17296
54700e2e
AP
17297 /* If we have SImode and slow unaligned ldp,
17298 check the alignment to be at least 8 byte. */
17299 if (mode == SImode
17300 && (aarch64_tune_params.extra_tuning_flags
17301 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17302 && !optimize_size
17303 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17304 return false;
17305
350013bc
BC
17306 /* Check if the addresses are in the form of [base+offset]. */
17307 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17308 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17309 return false;
17310 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17311 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17312 return false;
17313
17314 /* Check if the bases are same. */
17315 if (!rtx_equal_p (base_1, base_2))
17316 return false;
17317
dfe1da23
JW
17318 /* The operands must be of the same size. */
17319 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
17320 GET_MODE_SIZE (GET_MODE (mem_2))));
17321
350013bc
BC
17322 offval_1 = INTVAL (offset_1);
17323 offval_2 = INTVAL (offset_2);
6a70badb
RS
17324 /* We should only be trying this for fixed-sized modes. There is no
17325 SVE LDP/STP instruction. */
17326 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
17327 /* Check if the offsets are consecutive. */
17328 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
17329 return false;
17330
17331 /* Check if the addresses are clobbered by load. */
17332 if (load)
17333 {
17334 if (reg_mentioned_p (reg_1, mem_1))
17335 return false;
17336
17337 /* In increasing order, the last load can clobber the address. */
17338 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 17339 return false;
350013bc
BC
17340 }
17341
9b56ec11
JW
17342 /* One of the memory accesses must be a mempair operand.
17343 If it is not the first one, they need to be swapped by the
17344 peephole. */
17345 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
17346 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
17347 return false;
17348
350013bc
BC
17349 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17350 rclass_1 = FP_REGS;
17351 else
17352 rclass_1 = GENERAL_REGS;
17353
17354 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17355 rclass_2 = FP_REGS;
17356 else
17357 rclass_2 = GENERAL_REGS;
17358
17359 /* Check if the registers are of same class. */
17360 if (rclass_1 != rclass_2)
17361 return false;
17362
17363 return true;
17364}
17365
9b56ec11
JW
17366/* Given OPERANDS of consecutive load/store that can be merged,
17367 swap them if they are not in ascending order. */
17368void
17369aarch64_swap_ldrstr_operands (rtx* operands, bool load)
17370{
17371 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
17372 HOST_WIDE_INT offval_1, offval_2;
17373
17374 if (load)
17375 {
17376 mem_1 = operands[1];
17377 mem_2 = operands[3];
17378 }
17379 else
17380 {
17381 mem_1 = operands[0];
17382 mem_2 = operands[2];
17383 }
17384
17385 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17386 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17387
17388 offval_1 = INTVAL (offset_1);
17389 offval_2 = INTVAL (offset_2);
17390
17391 if (offval_1 > offval_2)
17392 {
17393 /* Irrespective of whether this is a load or a store,
17394 we do the same swap. */
17395 std::swap (operands[0], operands[2]);
17396 std::swap (operands[1], operands[3]);
17397 }
17398}
17399
d0b51297
JW
17400/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
17401 comparison between the two. */
17402int
17403aarch64_host_wide_int_compare (const void *x, const void *y)
17404{
17405 return wi::cmps (* ((const HOST_WIDE_INT *) x),
17406 * ((const HOST_WIDE_INT *) y));
17407}
17408
17409/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
17410 other pointing to a REG rtx containing an offset, compare the offsets
17411 of the two pairs.
17412
17413 Return:
17414
17415 1 iff offset (X) > offset (Y)
17416 0 iff offset (X) == offset (Y)
17417 -1 iff offset (X) < offset (Y) */
17418int
17419aarch64_ldrstr_offset_compare (const void *x, const void *y)
17420{
17421 const rtx * operands_1 = (const rtx *) x;
17422 const rtx * operands_2 = (const rtx *) y;
17423 rtx mem_1, mem_2, base, offset_1, offset_2;
17424
17425 if (MEM_P (operands_1[0]))
17426 mem_1 = operands_1[0];
17427 else
17428 mem_1 = operands_1[1];
17429
17430 if (MEM_P (operands_2[0]))
17431 mem_2 = operands_2[0];
17432 else
17433 mem_2 = operands_2[1];
17434
17435 /* Extract the offsets. */
17436 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17437 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17438
17439 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
17440
17441 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
17442}
17443
350013bc
BC
17444/* Given OPERANDS of consecutive load/store, check if we can merge
17445 them into ldp/stp by adjusting the offset. LOAD is true if they
17446 are load instructions. MODE is the mode of memory operands.
17447
17448 Given below consecutive stores:
17449
17450 str w1, [xb, 0x100]
17451 str w1, [xb, 0x104]
17452 str w1, [xb, 0x108]
17453 str w1, [xb, 0x10c]
17454
17455 Though the offsets are out of the range supported by stp, we can
17456 still pair them after adjusting the offset, like:
17457
17458 add scratch, xb, 0x100
17459 stp w1, w1, [scratch]
17460 stp w1, w1, [scratch, 0x8]
17461
17462 The peephole patterns detecting this opportunity should guarantee
17463 the scratch register is avaliable. */
17464
17465bool
17466aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 17467 scalar_mode mode)
350013bc 17468{
34d7854d
JW
17469 const int num_insns = 4;
17470 enum reg_class rclass;
17471 HOST_WIDE_INT offvals[num_insns], msize;
17472 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
17473
17474 if (load)
17475 {
34d7854d
JW
17476 for (int i = 0; i < num_insns; i++)
17477 {
17478 reg[i] = operands[2 * i];
17479 mem[i] = operands[2 * i + 1];
17480
17481 gcc_assert (REG_P (reg[i]));
17482 }
d0b51297
JW
17483
17484 /* Do not attempt to merge the loads if the loads clobber each other. */
17485 for (int i = 0; i < 8; i += 2)
17486 for (int j = i + 2; j < 8; j += 2)
17487 if (reg_overlap_mentioned_p (operands[i], operands[j]))
17488 return false;
350013bc
BC
17489 }
17490 else
34d7854d
JW
17491 for (int i = 0; i < num_insns; i++)
17492 {
17493 mem[i] = operands[2 * i];
17494 reg[i] = operands[2 * i + 1];
17495 }
350013bc 17496
34d7854d
JW
17497 /* Skip if memory operand is by itself valid for ldp/stp. */
17498 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
17499 return false;
17500
34d7854d
JW
17501 for (int i = 0; i < num_insns; i++)
17502 {
17503 /* The mems cannot be volatile. */
17504 if (MEM_VOLATILE_P (mem[i]))
17505 return false;
17506
17507 /* Check if the addresses are in the form of [base+offset]. */
17508 extract_base_offset_in_addr (mem[i], base + i, offset + i);
17509 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
17510 return false;
17511 }
17512
363b395b
JW
17513 /* Check if the registers are of same class. */
17514 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
17515 ? FP_REGS : GENERAL_REGS;
17516
17517 for (int i = 1; i < num_insns; i++)
17518 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
17519 {
17520 if (rclass != FP_REGS)
17521 return false;
17522 }
17523 else
17524 {
17525 if (rclass != GENERAL_REGS)
17526 return false;
17527 }
17528
17529 /* Only the last register in the order in which they occur
17530 may be clobbered by the load. */
17531 if (rclass == GENERAL_REGS && load)
17532 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
17533 if (reg_mentioned_p (reg[i], mem[i]))
17534 return false;
350013bc
BC
17535
17536 /* Check if the bases are same. */
34d7854d
JW
17537 for (int i = 0; i < num_insns - 1; i++)
17538 if (!rtx_equal_p (base[i], base[i + 1]))
17539 return false;
17540
17541 for (int i = 0; i < num_insns; i++)
17542 offvals[i] = INTVAL (offset[i]);
350013bc 17543
350013bc 17544 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17545
17546 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
17547 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
17548 aarch64_host_wide_int_compare);
d0b51297
JW
17549
17550 if (!(offvals[1] == offvals[0] + msize
17551 && offvals[3] == offvals[2] + msize))
350013bc
BC
17552 return false;
17553
d0b51297
JW
17554 /* Check that offsets are within range of each other. The ldp/stp
17555 instructions have 7 bit immediate offsets, so use 0x80. */
17556 if (offvals[2] - offvals[0] >= msize * 0x80)
17557 return false;
350013bc 17558
d0b51297
JW
17559 /* The offsets must be aligned with respect to each other. */
17560 if (offvals[0] % msize != offvals[2] % msize)
17561 return false;
17562
54700e2e
AP
17563 /* If we have SImode and slow unaligned ldp,
17564 check the alignment to be at least 8 byte. */
17565 if (mode == SImode
17566 && (aarch64_tune_params.extra_tuning_flags
34d7854d 17567 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 17568 && !optimize_size
34d7854d 17569 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
17570 return false;
17571
350013bc
BC
17572 return true;
17573}
17574
17575/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
17576 into LDP/STP after adjusting the offset. It depends on the fact
17577 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
17578 MODE is the mode of memory operands. CODE is the rtl operator
17579 which should be applied to all memory operands, it's SIGN_EXTEND,
17580 ZERO_EXTEND or UNKNOWN. */
17581
17582bool
17583aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17584 scalar_mode mode, RTX_CODE code)
350013bc 17585{
d0b51297 17586 rtx base, offset_1, offset_3, t1, t2;
350013bc 17587 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
17588 rtx temp_operands[8];
17589 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
17590 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 17591
d0b51297
JW
17592 /* We make changes on a copy as we may still bail out. */
17593 for (int i = 0; i < 8; i ++)
17594 temp_operands[i] = operands[i];
9b56ec11 17595
d0b51297
JW
17596 /* Sort the operands. */
17597 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 17598
350013bc
BC
17599 if (load)
17600 {
d0b51297
JW
17601 mem_1 = temp_operands[1];
17602 mem_2 = temp_operands[3];
17603 mem_3 = temp_operands[5];
17604 mem_4 = temp_operands[7];
350013bc
BC
17605 }
17606 else
17607 {
d0b51297
JW
17608 mem_1 = temp_operands[0];
17609 mem_2 = temp_operands[2];
17610 mem_3 = temp_operands[4];
17611 mem_4 = temp_operands[6];
350013bc
BC
17612 gcc_assert (code == UNKNOWN);
17613 }
17614
9b56ec11 17615 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
17616 extract_base_offset_in_addr (mem_3, &base, &offset_3);
17617 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17618 && offset_3 != NULL_RTX);
350013bc 17619
d0b51297 17620 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 17621 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17622 stp_off_upper_limit = msize * (0x40 - 1);
17623 stp_off_lower_limit = - msize * 0x40;
350013bc 17624
d0b51297
JW
17625 off_val_1 = INTVAL (offset_1);
17626 off_val_3 = INTVAL (offset_3);
17627
17628 /* The base offset is optimally half way between the two STP/LDP offsets. */
17629 if (msize <= 4)
17630 base_off = (off_val_1 + off_val_3) / 2;
17631 else
17632 /* However, due to issues with negative LDP/STP offset generation for
17633 larger modes, for DF, DI and vector modes. we must not use negative
17634 addresses smaller than 9 signed unadjusted bits can store. This
17635 provides the most range in this case. */
17636 base_off = off_val_1;
17637
17638 /* Adjust the base so that it is aligned with the addresses but still
17639 optimal. */
17640 if (base_off % msize != off_val_1 % msize)
17641 /* Fix the offset, bearing in mind we want to make it bigger not
17642 smaller. */
17643 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
17644 else if (msize <= 4)
17645 /* The negative range of LDP/STP is one larger than the positive range. */
17646 base_off += msize;
17647
17648 /* Check if base offset is too big or too small. We can attempt to resolve
17649 this issue by setting it to the maximum value and seeing if the offsets
17650 still fit. */
17651 if (base_off >= 0x1000)
350013bc 17652 {
d0b51297
JW
17653 base_off = 0x1000 - 1;
17654 /* We must still make sure that the base offset is aligned with respect
17655 to the address. But it may may not be made any bigger. */
17656 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17657 }
17658
d0b51297
JW
17659 /* Likewise for the case where the base is too small. */
17660 if (base_off <= -0x1000)
350013bc 17661 {
d0b51297
JW
17662 base_off = -0x1000 + 1;
17663 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17664 }
17665
d0b51297
JW
17666 /* Offset of the first STP/LDP. */
17667 new_off_1 = off_val_1 - base_off;
17668
17669 /* Offset of the second STP/LDP. */
17670 new_off_3 = off_val_3 - base_off;
350013bc 17671
d0b51297
JW
17672 /* The offsets must be within the range of the LDP/STP instructions. */
17673 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
17674 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
17675 return false;
17676
d0b51297
JW
17677 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
17678 new_off_1), true);
17679 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
17680 new_off_1 + msize), true);
17681 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
17682 new_off_3), true);
17683 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
17684 new_off_3 + msize), true);
17685
17686 if (!aarch64_mem_pair_operand (mem_1, mode)
17687 || !aarch64_mem_pair_operand (mem_3, mode))
17688 return false;
350013bc
BC
17689
17690 if (code == ZERO_EXTEND)
17691 {
17692 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17693 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17694 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17695 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17696 }
17697 else if (code == SIGN_EXTEND)
17698 {
17699 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17700 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17701 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17702 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17703 }
17704
17705 if (load)
17706 {
d0b51297 17707 operands[0] = temp_operands[0];
350013bc 17708 operands[1] = mem_1;
d0b51297 17709 operands[2] = temp_operands[2];
350013bc 17710 operands[3] = mem_2;
d0b51297 17711 operands[4] = temp_operands[4];
350013bc 17712 operands[5] = mem_3;
d0b51297 17713 operands[6] = temp_operands[6];
350013bc
BC
17714 operands[7] = mem_4;
17715 }
17716 else
17717 {
17718 operands[0] = mem_1;
d0b51297 17719 operands[1] = temp_operands[1];
350013bc 17720 operands[2] = mem_2;
d0b51297 17721 operands[3] = temp_operands[3];
350013bc 17722 operands[4] = mem_3;
d0b51297 17723 operands[5] = temp_operands[5];
350013bc 17724 operands[6] = mem_4;
d0b51297 17725 operands[7] = temp_operands[7];
350013bc
BC
17726 }
17727
17728 /* Emit adjusting instruction. */
d0b51297 17729 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 17730 /* Emit ldp/stp instructions. */
f7df4a84
RS
17731 t1 = gen_rtx_SET (operands[0], operands[1]);
17732 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17733 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17734 t1 = gen_rtx_SET (operands[4], operands[5]);
17735 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17736 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17737 return true;
17738}
17739
76a34e3f
RS
17740/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17741 it isn't worth branching around empty masked ops (including masked
17742 stores). */
17743
17744static bool
17745aarch64_empty_mask_is_expensive (unsigned)
17746{
17747 return false;
17748}
17749
1b1e81f8
JW
17750/* Return 1 if pseudo register should be created and used to hold
17751 GOT address for PIC code. */
17752
17753bool
17754aarch64_use_pseudo_pic_reg (void)
17755{
17756 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17757}
17758
7b841a12
JW
17759/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17760
17761static int
17762aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17763{
17764 switch (XINT (x, 1))
17765 {
17766 case UNSPEC_GOTSMALLPIC:
17767 case UNSPEC_GOTSMALLPIC28K:
17768 case UNSPEC_GOTTINYPIC:
17769 return 0;
17770 default:
17771 break;
17772 }
17773
17774 return default_unspec_may_trap_p (x, flags);
17775}
17776
39252973
KT
17777
17778/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17779 return the log2 of that value. Otherwise return -1. */
17780
17781int
17782aarch64_fpconst_pow_of_2 (rtx x)
17783{
17784 const REAL_VALUE_TYPE *r;
17785
17786 if (!CONST_DOUBLE_P (x))
17787 return -1;
17788
17789 r = CONST_DOUBLE_REAL_VALUE (x);
17790
17791 if (REAL_VALUE_NEGATIVE (*r)
17792 || REAL_VALUE_ISNAN (*r)
17793 || REAL_VALUE_ISINF (*r)
17794 || !real_isinteger (r, DFmode))
17795 return -1;
17796
17797 return exact_log2 (real_to_integer (r));
17798}
17799
17800/* If X is a vector of equal CONST_DOUBLE values and that value is
17801 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17802
17803int
17804aarch64_vec_fpconst_pow_of_2 (rtx x)
17805{
6a70badb
RS
17806 int nelts;
17807 if (GET_CODE (x) != CONST_VECTOR
17808 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17809 return -1;
17810
17811 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17812 return -1;
17813
17814 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17815 if (firstval <= 0)
17816 return -1;
17817
6a70badb 17818 for (int i = 1; i < nelts; i++)
39252973
KT
17819 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17820 return -1;
17821
17822 return firstval;
17823}
17824
11e554b3
JG
17825/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17826 to float.
17827
17828 __fp16 always promotes through this hook.
17829 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17830 through the generic excess precision logic rather than here. */
17831
c2ec330c
AL
17832static tree
17833aarch64_promoted_type (const_tree t)
17834{
11e554b3
JG
17835 if (SCALAR_FLOAT_TYPE_P (t)
17836 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17837 return float_type_node;
11e554b3 17838
c2ec330c
AL
17839 return NULL_TREE;
17840}
ee62a5a6
RS
17841
17842/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17843
17844static bool
9acc9cbe 17845aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17846 optimization_type opt_type)
17847{
17848 switch (op)
17849 {
17850 case rsqrt_optab:
9acc9cbe 17851 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17852
17853 default:
17854 return true;
17855 }
17856}
17857
43cacb12
RS
17858/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17859
17860static unsigned int
17861aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17862 int *offset)
17863{
17864 /* Polynomial invariant 1 == (VG / 2) - 1. */
17865 gcc_assert (i == 1);
17866 *factor = 2;
17867 *offset = 1;
17868 return AARCH64_DWARF_VG;
17869}
17870
11e554b3
JG
17871/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17872 if MODE is HFmode, and punt to the generic implementation otherwise. */
17873
17874static bool
7c5bd57a 17875aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17876{
17877 return (mode == HFmode
17878 ? true
17879 : default_libgcc_floating_mode_supported_p (mode));
17880}
17881
2e5f8203
JG
17882/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17883 if MODE is HFmode, and punt to the generic implementation otherwise. */
17884
17885static bool
18e2a8b8 17886aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17887{
17888 return (mode == HFmode
17889 ? true
17890 : default_scalar_mode_supported_p (mode));
17891}
17892
11e554b3
JG
17893/* Set the value of FLT_EVAL_METHOD.
17894 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17895
17896 0: evaluate all operations and constants, whose semantic type has at
17897 most the range and precision of type float, to the range and
17898 precision of float; evaluate all other operations and constants to
17899 the range and precision of the semantic type;
17900
17901 N, where _FloatN is a supported interchange floating type
17902 evaluate all operations and constants, whose semantic type has at
17903 most the range and precision of _FloatN type, to the range and
17904 precision of the _FloatN type; evaluate all other operations and
17905 constants to the range and precision of the semantic type;
17906
17907 If we have the ARMv8.2-A extensions then we support _Float16 in native
17908 precision, so we should set this to 16. Otherwise, we support the type,
17909 but want to evaluate expressions in float precision, so set this to
17910 0. */
17911
17912static enum flt_eval_method
17913aarch64_excess_precision (enum excess_precision_type type)
17914{
17915 switch (type)
17916 {
17917 case EXCESS_PRECISION_TYPE_FAST:
17918 case EXCESS_PRECISION_TYPE_STANDARD:
17919 /* We can calculate either in 16-bit range and precision or
17920 32-bit range and precision. Make that decision based on whether
17921 we have native support for the ARMv8.2-A 16-bit floating-point
17922 instructions or not. */
17923 return (TARGET_FP_F16INST
17924 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17925 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17926 case EXCESS_PRECISION_TYPE_IMPLICIT:
17927 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17928 default:
17929 gcc_unreachable ();
17930 }
17931 return FLT_EVAL_METHOD_UNPREDICTABLE;
17932}
17933
b48d6421
KT
17934/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17935 scheduled for speculative execution. Reject the long-running division
17936 and square-root instructions. */
17937
17938static bool
17939aarch64_sched_can_speculate_insn (rtx_insn *insn)
17940{
17941 switch (get_attr_type (insn))
17942 {
17943 case TYPE_SDIV:
17944 case TYPE_UDIV:
17945 case TYPE_FDIVS:
17946 case TYPE_FDIVD:
17947 case TYPE_FSQRTS:
17948 case TYPE_FSQRTD:
17949 case TYPE_NEON_FP_SQRT_S:
17950 case TYPE_NEON_FP_SQRT_D:
17951 case TYPE_NEON_FP_SQRT_S_Q:
17952 case TYPE_NEON_FP_SQRT_D_Q:
17953 case TYPE_NEON_FP_DIV_S:
17954 case TYPE_NEON_FP_DIV_D:
17955 case TYPE_NEON_FP_DIV_S_Q:
17956 case TYPE_NEON_FP_DIV_D_Q:
17957 return false;
17958 default:
17959 return true;
17960 }
17961}
17962
43cacb12
RS
17963/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17964
17965static int
17966aarch64_compute_pressure_classes (reg_class *classes)
17967{
17968 int i = 0;
17969 classes[i++] = GENERAL_REGS;
17970 classes[i++] = FP_REGS;
17971 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17972 registers need to go in PR_LO_REGS at some point during their
17973 lifetime. Splitting it into two halves has the effect of making
17974 all predicates count against PR_LO_REGS, so that we try whenever
17975 possible to restrict the number of live predicates to 8. This
17976 greatly reduces the amount of spilling in certain loops. */
17977 classes[i++] = PR_LO_REGS;
17978 classes[i++] = PR_HI_REGS;
17979 return i;
17980}
17981
17982/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17983
17984static bool
17985aarch64_can_change_mode_class (machine_mode from,
17986 machine_mode to, reg_class_t)
17987{
002092be
RS
17988 if (BYTES_BIG_ENDIAN)
17989 {
17990 bool from_sve_p = aarch64_sve_data_mode_p (from);
17991 bool to_sve_p = aarch64_sve_data_mode_p (to);
17992
17993 /* Don't allow changes between SVE data modes and non-SVE modes.
17994 See the comment at the head of aarch64-sve.md for details. */
17995 if (from_sve_p != to_sve_p)
17996 return false;
17997
17998 /* Don't allow changes in element size: lane 0 of the new vector
17999 would not then be lane 0 of the old vector. See the comment
18000 above aarch64_maybe_expand_sve_subreg_move for a more detailed
18001 description.
18002
18003 In the worst case, this forces a register to be spilled in
18004 one mode and reloaded in the other, which handles the
18005 endianness correctly. */
18006 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
18007 return false;
18008 }
43cacb12
RS
18009 return true;
18010}
18011
5cce8171
RS
18012/* Implement TARGET_EARLY_REMAT_MODES. */
18013
18014static void
18015aarch64_select_early_remat_modes (sbitmap modes)
18016{
18017 /* SVE values are not normally live across a call, so it should be
18018 worth doing early rematerialization even in VL-specific mode. */
18019 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
18020 {
18021 machine_mode mode = (machine_mode) i;
18022 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
18023 if (vec_flags & VEC_ANY_SVE)
18024 bitmap_set_bit (modes, i);
18025 }
18026}
18027
c0111dc4
RE
18028/* Override the default target speculation_safe_value. */
18029static rtx
18030aarch64_speculation_safe_value (machine_mode mode,
18031 rtx result, rtx val, rtx failval)
18032{
18033 /* Maybe we should warn if falling back to hard barriers. They are
18034 likely to be noticably more expensive than the alternative below. */
18035 if (!aarch64_track_speculation)
18036 return default_speculation_safe_value (mode, result, val, failval);
18037
18038 if (!REG_P (val))
18039 val = copy_to_mode_reg (mode, val);
18040
18041 if (!aarch64_reg_or_zero (failval, mode))
18042 failval = copy_to_mode_reg (mode, failval);
18043
21cebf90 18044 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
18045 return result;
18046}
18047
2d56d6ba
KT
18048/* Implement TARGET_ESTIMATED_POLY_VALUE.
18049 Look into the tuning structure for an estimate.
18050 VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
18051 Advanced SIMD 128 bits. */
18052
18053static HOST_WIDE_INT
18054aarch64_estimated_poly_value (poly_int64 val)
18055{
18056 enum aarch64_sve_vector_bits_enum width_source
18057 = aarch64_tune_params.sve_width;
18058
18059 /* If we still don't have an estimate, use the default. */
18060 if (width_source == SVE_SCALABLE)
18061 return default_estimated_poly_value (val);
18062
18063 HOST_WIDE_INT over_128 = width_source - 128;
18064 return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
18065}
18066
51b86113
DM
18067/* Target-specific selftests. */
18068
18069#if CHECKING_P
18070
18071namespace selftest {
18072
18073/* Selftest for the RTL loader.
18074 Verify that the RTL loader copes with a dump from
18075 print_rtx_function. This is essentially just a test that class
18076 function_reader can handle a real dump, but it also verifies
18077 that lookup_reg_by_dump_name correctly handles hard regs.
18078 The presence of hard reg names in the dump means that the test is
18079 target-specific, hence it is in this file. */
18080
18081static void
18082aarch64_test_loading_full_dump ()
18083{
18084 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
18085
18086 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
18087
18088 rtx_insn *insn_1 = get_insn_by_uid (1);
18089 ASSERT_EQ (NOTE, GET_CODE (insn_1));
18090
18091 rtx_insn *insn_15 = get_insn_by_uid (15);
18092 ASSERT_EQ (INSN, GET_CODE (insn_15));
18093 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
18094
18095 /* Verify crtl->return_rtx. */
18096 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
18097 ASSERT_EQ (0, REGNO (crtl->return_rtx));
18098 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
18099}
18100
18101/* Run all target-specific selftests. */
18102
18103static void
18104aarch64_run_selftests (void)
18105{
18106 aarch64_test_loading_full_dump ();
18107}
18108
18109} // namespace selftest
18110
18111#endif /* #if CHECKING_P */
18112
43e9d192
IB
18113#undef TARGET_ADDRESS_COST
18114#define TARGET_ADDRESS_COST aarch64_address_cost
18115
18116/* This hook will determines whether unnamed bitfields affect the alignment
18117 of the containing structure. The hook returns true if the structure
18118 should inherit the alignment requirements of an unnamed bitfield's
18119 type. */
18120#undef TARGET_ALIGN_ANON_BITFIELD
18121#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
18122
18123#undef TARGET_ASM_ALIGNED_DI_OP
18124#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
18125
18126#undef TARGET_ASM_ALIGNED_HI_OP
18127#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
18128
18129#undef TARGET_ASM_ALIGNED_SI_OP
18130#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
18131
18132#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
18133#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
18134 hook_bool_const_tree_hwi_hwi_const_tree_true
18135
e1c1ecb0
KT
18136#undef TARGET_ASM_FILE_START
18137#define TARGET_ASM_FILE_START aarch64_start_file
18138
43e9d192
IB
18139#undef TARGET_ASM_OUTPUT_MI_THUNK
18140#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
18141
18142#undef TARGET_ASM_SELECT_RTX_SECTION
18143#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
18144
18145#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
18146#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
18147
18148#undef TARGET_BUILD_BUILTIN_VA_LIST
18149#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
18150
18151#undef TARGET_CALLEE_COPIES
18152#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
18153
18154#undef TARGET_CAN_ELIMINATE
18155#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
18156
1fd8d40c
KT
18157#undef TARGET_CAN_INLINE_P
18158#define TARGET_CAN_INLINE_P aarch64_can_inline_p
18159
43e9d192
IB
18160#undef TARGET_CANNOT_FORCE_CONST_MEM
18161#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
18162
50487d79
EM
18163#undef TARGET_CASE_VALUES_THRESHOLD
18164#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
18165
43e9d192
IB
18166#undef TARGET_CONDITIONAL_REGISTER_USAGE
18167#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
18168
18169/* Only the least significant bit is used for initialization guard
18170 variables. */
18171#undef TARGET_CXX_GUARD_MASK_BIT
18172#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
18173
18174#undef TARGET_C_MODE_FOR_SUFFIX
18175#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
18176
18177#ifdef TARGET_BIG_ENDIAN_DEFAULT
18178#undef TARGET_DEFAULT_TARGET_FLAGS
18179#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
18180#endif
18181
18182#undef TARGET_CLASS_MAX_NREGS
18183#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
18184
119103ca
JG
18185#undef TARGET_BUILTIN_DECL
18186#define TARGET_BUILTIN_DECL aarch64_builtin_decl
18187
a6fc00da
BH
18188#undef TARGET_BUILTIN_RECIPROCAL
18189#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
18190
11e554b3
JG
18191#undef TARGET_C_EXCESS_PRECISION
18192#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
18193
43e9d192
IB
18194#undef TARGET_EXPAND_BUILTIN
18195#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
18196
18197#undef TARGET_EXPAND_BUILTIN_VA_START
18198#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
18199
9697e620
JG
18200#undef TARGET_FOLD_BUILTIN
18201#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
18202
43e9d192
IB
18203#undef TARGET_FUNCTION_ARG
18204#define TARGET_FUNCTION_ARG aarch64_function_arg
18205
18206#undef TARGET_FUNCTION_ARG_ADVANCE
18207#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
18208
18209#undef TARGET_FUNCTION_ARG_BOUNDARY
18210#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
18211
76b0cbf8
RS
18212#undef TARGET_FUNCTION_ARG_PADDING
18213#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
18214
43cacb12
RS
18215#undef TARGET_GET_RAW_RESULT_MODE
18216#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
18217#undef TARGET_GET_RAW_ARG_MODE
18218#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
18219
43e9d192
IB
18220#undef TARGET_FUNCTION_OK_FOR_SIBCALL
18221#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
18222
18223#undef TARGET_FUNCTION_VALUE
18224#define TARGET_FUNCTION_VALUE aarch64_function_value
18225
18226#undef TARGET_FUNCTION_VALUE_REGNO_P
18227#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
18228
fc72cba7
AL
18229#undef TARGET_GIMPLE_FOLD_BUILTIN
18230#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 18231
43e9d192
IB
18232#undef TARGET_GIMPLIFY_VA_ARG_EXPR
18233#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
18234
18235#undef TARGET_INIT_BUILTINS
18236#define TARGET_INIT_BUILTINS aarch64_init_builtins
18237
c64f7d37
WD
18238#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
18239#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
18240 aarch64_ira_change_pseudo_allocno_class
18241
43e9d192
IB
18242#undef TARGET_LEGITIMATE_ADDRESS_P
18243#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
18244
18245#undef TARGET_LEGITIMATE_CONSTANT_P
18246#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
18247
491ec060
WD
18248#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
18249#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
18250 aarch64_legitimize_address_displacement
18251
43e9d192
IB
18252#undef TARGET_LIBGCC_CMP_RETURN_MODE
18253#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
18254
11e554b3
JG
18255#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
18256#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
18257aarch64_libgcc_floating_mode_supported_p
18258
ac2b960f
YZ
18259#undef TARGET_MANGLE_TYPE
18260#define TARGET_MANGLE_TYPE aarch64_mangle_type
18261
43e9d192
IB
18262#undef TARGET_MEMORY_MOVE_COST
18263#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
18264
26e0ff94
WD
18265#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
18266#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
18267
43e9d192
IB
18268#undef TARGET_MUST_PASS_IN_STACK
18269#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
18270
18271/* This target hook should return true if accesses to volatile bitfields
18272 should use the narrowest mode possible. It should return false if these
18273 accesses should use the bitfield container type. */
18274#undef TARGET_NARROW_VOLATILE_BITFIELD
18275#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
18276
18277#undef TARGET_OPTION_OVERRIDE
18278#define TARGET_OPTION_OVERRIDE aarch64_override_options
18279
18280#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18281#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
18282 aarch64_override_options_after_change
18283
361fb3ee
KT
18284#undef TARGET_OPTION_SAVE
18285#define TARGET_OPTION_SAVE aarch64_option_save
18286
18287#undef TARGET_OPTION_RESTORE
18288#define TARGET_OPTION_RESTORE aarch64_option_restore
18289
18290#undef TARGET_OPTION_PRINT
18291#define TARGET_OPTION_PRINT aarch64_option_print
18292
5a2c8331
KT
18293#undef TARGET_OPTION_VALID_ATTRIBUTE_P
18294#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
18295
d78006d9
KT
18296#undef TARGET_SET_CURRENT_FUNCTION
18297#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
18298
43e9d192
IB
18299#undef TARGET_PASS_BY_REFERENCE
18300#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
18301
18302#undef TARGET_PREFERRED_RELOAD_CLASS
18303#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
18304
cee66c68
WD
18305#undef TARGET_SCHED_REASSOCIATION_WIDTH
18306#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
18307
c2ec330c
AL
18308#undef TARGET_PROMOTED_TYPE
18309#define TARGET_PROMOTED_TYPE aarch64_promoted_type
18310
43e9d192
IB
18311#undef TARGET_SECONDARY_RELOAD
18312#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
18313
18314#undef TARGET_SHIFT_TRUNCATION_MASK
18315#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
18316
18317#undef TARGET_SETUP_INCOMING_VARARGS
18318#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
18319
18320#undef TARGET_STRUCT_VALUE_RTX
18321#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
18322
18323#undef TARGET_REGISTER_MOVE_COST
18324#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
18325
18326#undef TARGET_RETURN_IN_MEMORY
18327#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
18328
18329#undef TARGET_RETURN_IN_MSB
18330#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
18331
18332#undef TARGET_RTX_COSTS
7cc2145f 18333#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 18334
2e5f8203
JG
18335#undef TARGET_SCALAR_MODE_SUPPORTED_P
18336#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
18337
d126a4ae
AP
18338#undef TARGET_SCHED_ISSUE_RATE
18339#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
18340
d03f7e44
MK
18341#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18342#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
18343 aarch64_sched_first_cycle_multipass_dfa_lookahead
18344
2d6bc7fa
KT
18345#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
18346#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
18347 aarch64_first_cycle_multipass_dfa_lookahead_guard
18348
827ab47a
KT
18349#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
18350#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
18351 aarch64_get_separate_components
18352
18353#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
18354#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
18355 aarch64_components_for_bb
18356
18357#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
18358#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
18359 aarch64_disqualify_components
18360
18361#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
18362#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
18363 aarch64_emit_prologue_components
18364
18365#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
18366#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
18367 aarch64_emit_epilogue_components
18368
18369#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
18370#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
18371 aarch64_set_handled_components
18372
43e9d192
IB
18373#undef TARGET_TRAMPOLINE_INIT
18374#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
18375
18376#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
18377#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
18378
18379#undef TARGET_VECTOR_MODE_SUPPORTED_P
18380#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
18381
7df76747
N
18382#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18383#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
18384 aarch64_builtin_support_vector_misalignment
18385
9f4cbab8
RS
18386#undef TARGET_ARRAY_MODE
18387#define TARGET_ARRAY_MODE aarch64_array_mode
18388
43e9d192
IB
18389#undef TARGET_ARRAY_MODE_SUPPORTED_P
18390#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
18391
8990e73a
TB
18392#undef TARGET_VECTORIZE_ADD_STMT_COST
18393#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
18394
18395#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18396#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18397 aarch64_builtin_vectorization_cost
18398
43e9d192
IB
18399#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18400#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
18401
42fc9a7f
JG
18402#undef TARGET_VECTORIZE_BUILTINS
18403#define TARGET_VECTORIZE_BUILTINS
18404
18405#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
18406#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
18407 aarch64_builtin_vectorized_function
18408
3b357264
JG
18409#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
18410#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
18411 aarch64_autovectorize_vector_sizes
18412
aa87aced
KV
18413#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18414#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
18415 aarch64_atomic_assign_expand_fenv
18416
43e9d192
IB
18417/* Section anchor support. */
18418
18419#undef TARGET_MIN_ANCHOR_OFFSET
18420#define TARGET_MIN_ANCHOR_OFFSET -256
18421
18422/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
18423 byte offset; we can do much more for larger data types, but have no way
18424 to determine the size of the access. We assume accesses are aligned. */
18425#undef TARGET_MAX_ANCHOR_OFFSET
18426#define TARGET_MAX_ANCHOR_OFFSET 4095
18427
db0253a4
TB
18428#undef TARGET_VECTOR_ALIGNMENT
18429#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
18430
43cacb12
RS
18431#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
18432#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
18433 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
18434#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
18435#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
18436 aarch64_simd_vector_alignment_reachable
18437
88b08073
JG
18438/* vec_perm support. */
18439
f151c9e1
RS
18440#undef TARGET_VECTORIZE_VEC_PERM_CONST
18441#define TARGET_VECTORIZE_VEC_PERM_CONST \
18442 aarch64_vectorize_vec_perm_const
88b08073 18443
43cacb12
RS
18444#undef TARGET_VECTORIZE_GET_MASK_MODE
18445#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
18446#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
18447#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
18448 aarch64_empty_mask_is_expensive
6a86928d
RS
18449#undef TARGET_PREFERRED_ELSE_VALUE
18450#define TARGET_PREFERRED_ELSE_VALUE \
18451 aarch64_preferred_else_value
43cacb12 18452
c2ec330c
AL
18453#undef TARGET_INIT_LIBFUNCS
18454#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 18455
706b2314 18456#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
18457#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
18458
5cb74e90
RR
18459#undef TARGET_FLAGS_REGNUM
18460#define TARGET_FLAGS_REGNUM CC_REGNUM
18461
78607708
TV
18462#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
18463#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
18464
a3125fc2
CL
18465#undef TARGET_ASAN_SHADOW_OFFSET
18466#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
18467
0c4ec427
RE
18468#undef TARGET_LEGITIMIZE_ADDRESS
18469#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
18470
b48d6421
KT
18471#undef TARGET_SCHED_CAN_SPECULATE_INSN
18472#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
18473
594bdd53
FY
18474#undef TARGET_CAN_USE_DOLOOP_P
18475#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
18476
9bca63d4
WD
18477#undef TARGET_SCHED_ADJUST_PRIORITY
18478#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
18479
6a569cdd
KT
18480#undef TARGET_SCHED_MACRO_FUSION_P
18481#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
18482
18483#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
18484#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
18485
350013bc
BC
18486#undef TARGET_SCHED_FUSION_PRIORITY
18487#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
18488
7b841a12
JW
18489#undef TARGET_UNSPEC_MAY_TRAP_P
18490#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
18491
1b1e81f8
JW
18492#undef TARGET_USE_PSEUDO_PIC_REG
18493#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
18494
cc8ca59e
JB
18495#undef TARGET_PRINT_OPERAND
18496#define TARGET_PRINT_OPERAND aarch64_print_operand
18497
18498#undef TARGET_PRINT_OPERAND_ADDRESS
18499#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
18500
ee62a5a6
RS
18501#undef TARGET_OPTAB_SUPPORTED_P
18502#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
18503
43203dea
RR
18504#undef TARGET_OMIT_STRUCT_RETURN_REG
18505#define TARGET_OMIT_STRUCT_RETURN_REG true
18506
43cacb12
RS
18507#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
18508#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
18509 aarch64_dwarf_poly_indeterminate_value
18510
f46fe37e
EB
18511/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
18512#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18513#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
18514
c43f4279
RS
18515#undef TARGET_HARD_REGNO_NREGS
18516#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
18517#undef TARGET_HARD_REGNO_MODE_OK
18518#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
18519
99e1629f
RS
18520#undef TARGET_MODES_TIEABLE_P
18521#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
18522
80ec73f4
RS
18523#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18524#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18525 aarch64_hard_regno_call_part_clobbered
18526
58e17cf8
RS
18527#undef TARGET_CONSTANT_ALIGNMENT
18528#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
18529
8c6e3b23
TC
18530#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
18531#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
18532 aarch64_stack_clash_protection_alloca_probe_range
18533
43cacb12
RS
18534#undef TARGET_COMPUTE_PRESSURE_CLASSES
18535#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
18536
18537#undef TARGET_CAN_CHANGE_MODE_CLASS
18538#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
18539
5cce8171
RS
18540#undef TARGET_SELECT_EARLY_REMAT_MODES
18541#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
18542
c0111dc4
RE
18543#undef TARGET_SPECULATION_SAFE_VALUE
18544#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
18545
2d56d6ba
KT
18546#undef TARGET_ESTIMATED_POLY_VALUE
18547#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
18548
a0d0b980
SE
18549#undef TARGET_ATTRIBUTE_TABLE
18550#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
18551
51b86113
DM
18552#if CHECKING_P
18553#undef TARGET_RUN_TARGET_SELFTESTS
18554#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
18555#endif /* #if CHECKING_P */
18556
43e9d192
IB
18557struct gcc_target targetm = TARGET_INITIALIZER;
18558
18559#include "gt-aarch64.h"