]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[aarch64] Add xgene1 prefetch tunings.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
b187677b 81/* Information about a legitimate vector immediate operand. */
48063b9d
IB
82struct simd_immediate_info
83{
b187677b
RS
84 enum insn_type { MOV, MVN };
85 enum modifier_type { LSL, MSL };
86
87 simd_immediate_info () {}
88 simd_immediate_info (scalar_float_mode, rtx);
89 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
90 insn_type = MOV, modifier_type = LSL,
91 unsigned int = 0);
43cacb12 92 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
93
94 /* The mode of the elements. */
95 scalar_mode elt_mode;
96
43cacb12
RS
97 /* The value of each element if all elements are the same, or the
98 first value if the constant is a series. */
48063b9d 99 rtx value;
b187677b 100
43cacb12
RS
101 /* The value of the step if the constant is a series, null otherwise. */
102 rtx step;
103
b187677b
RS
104 /* The instruction to use to move the immediate into a vector. */
105 insn_type insn;
106
107 /* The kind of shift modifier to use, and the number of bits to shift.
108 This is (LSL, 0) if no shift is needed. */
109 modifier_type modifier;
110 unsigned int shift;
48063b9d
IB
111};
112
b187677b
RS
113/* Construct a floating-point immediate in which each element has mode
114 ELT_MODE_IN and value VALUE_IN. */
115inline simd_immediate_info
116::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 117 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
118 modifier (LSL), shift (0)
119{}
120
121/* Construct an integer immediate in which each element has mode ELT_MODE_IN
122 and value VALUE_IN. The other parameters are as for the structure
123 fields. */
124inline simd_immediate_info
125::simd_immediate_info (scalar_int_mode elt_mode_in,
126 unsigned HOST_WIDE_INT value_in,
127 insn_type insn_in, modifier_type modifier_in,
128 unsigned int shift_in)
129 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
130 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
131{}
132
133/* Construct an integer immediate in which each element has mode ELT_MODE_IN
134 and where element I is equal to VALUE_IN + I * STEP_IN. */
135inline simd_immediate_info
136::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
137 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
138 modifier (LSL), shift (0)
b187677b
RS
139{}
140
43e9d192
IB
141/* The current code model. */
142enum aarch64_code_model aarch64_cmodel;
143
43cacb12
RS
144/* The number of 64-bit elements in an SVE vector. */
145poly_uint16 aarch64_sve_vg;
146
43e9d192
IB
147#ifdef HAVE_AS_TLS
148#undef TARGET_HAVE_TLS
149#define TARGET_HAVE_TLS 1
150#endif
151
ef4bddc2
RS
152static bool aarch64_composite_type_p (const_tree, machine_mode);
153static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 154 const_tree,
ef4bddc2 155 machine_mode *, int *,
43e9d192
IB
156 bool *);
157static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 159static void aarch64_override_options_after_change (void);
ef4bddc2 160static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 161static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
162static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
163 const_tree type,
164 int misalignment,
165 bool is_packed);
43cacb12 166static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
167static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
168 aarch64_addr_query_type);
eb471ba3 169static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
88b08073 170
0c6caaf8
RL
171/* Major revision number of the ARM Architecture implemented by the target. */
172unsigned aarch64_architecture_version;
173
43e9d192 174/* The processor for which instructions should be scheduled. */
02fdbd5b 175enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 176
43e9d192
IB
177/* Mask to specify which instruction scheduling options should be used. */
178unsigned long aarch64_tune_flags = 0;
179
1be34295 180/* Global flag for PC relative loads. */
9ee6540a 181bool aarch64_pcrelative_literal_loads;
1be34295 182
d6cb6d6a
WD
183/* Global flag for whether frame pointer is enabled. */
184bool aarch64_use_frame_pointer;
185
8dec06f2
JG
186/* Support for command line parsing of boolean flags in the tuning
187 structures. */
188struct aarch64_flag_desc
189{
190 const char* name;
191 unsigned int flag;
192};
193
ed9fa8d2 194#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
195 { name, AARCH64_FUSE_##internal_name },
196static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
197{
198 { "none", AARCH64_FUSE_NOTHING },
199#include "aarch64-fusion-pairs.def"
200 { "all", AARCH64_FUSE_ALL },
201 { NULL, AARCH64_FUSE_NOTHING }
202};
8dec06f2 203
a339a01c 204#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
205 { name, AARCH64_EXTRA_TUNE_##internal_name },
206static const struct aarch64_flag_desc aarch64_tuning_flags[] =
207{
208 { "none", AARCH64_EXTRA_TUNE_NONE },
209#include "aarch64-tuning-flags.def"
210 { "all", AARCH64_EXTRA_TUNE_ALL },
211 { NULL, AARCH64_EXTRA_TUNE_NONE }
212};
8dec06f2 213
43e9d192
IB
214/* Tuning parameters. */
215
43e9d192
IB
216static const struct cpu_addrcost_table generic_addrcost_table =
217{
67747367 218 {
2fae724a 219 1, /* hi */
bd95e655
JG
220 0, /* si */
221 0, /* di */
2fae724a 222 1, /* ti */
67747367 223 },
bd95e655
JG
224 0, /* pre_modify */
225 0, /* post_modify */
226 0, /* register_offset */
783879e6
EM
227 0, /* register_sextend */
228 0, /* register_zextend */
bd95e655 229 0 /* imm_offset */
43e9d192
IB
230};
231
5ec1ae3b
EM
232static const struct cpu_addrcost_table exynosm1_addrcost_table =
233{
234 {
235 0, /* hi */
236 0, /* si */
237 0, /* di */
238 2, /* ti */
239 },
240 0, /* pre_modify */
241 0, /* post_modify */
242 1, /* register_offset */
243 1, /* register_sextend */
244 2, /* register_zextend */
245 0, /* imm_offset */
246};
247
381e27aa
PT
248static const struct cpu_addrcost_table xgene1_addrcost_table =
249{
381e27aa 250 {
bd95e655
JG
251 1, /* hi */
252 0, /* si */
253 0, /* di */
254 1, /* ti */
381e27aa 255 },
bd95e655 256 1, /* pre_modify */
52ddefd8 257 1, /* post_modify */
bd95e655 258 0, /* register_offset */
783879e6
EM
259 1, /* register_sextend */
260 1, /* register_zextend */
bd95e655 261 0, /* imm_offset */
381e27aa
PT
262};
263
d1261ac6 264static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
265{
266 {
5f407e57
AP
267 1, /* hi */
268 1, /* si */
269 1, /* di */
ad611a4c
VP
270 2, /* ti */
271 },
272 0, /* pre_modify */
273 0, /* post_modify */
274 2, /* register_offset */
275 3, /* register_sextend */
276 3, /* register_zextend */
277 0, /* imm_offset */
278};
279
910f72e7
SZ
280static const struct cpu_addrcost_table tsv110_addrcost_table =
281{
282 {
283 1, /* hi */
284 0, /* si */
285 0, /* di */
286 1, /* ti */
287 },
288 0, /* pre_modify */
289 0, /* post_modify */
290 0, /* register_offset */
291 1, /* register_sextend */
292 1, /* register_zextend */
293 0, /* imm_offset */
294};
295
8d39ea2f
LM
296static const struct cpu_addrcost_table qdf24xx_addrcost_table =
297{
298 {
299 1, /* hi */
300 1, /* si */
301 1, /* di */
302 2, /* ti */
303 },
304 1, /* pre_modify */
305 1, /* post_modify */
306 3, /* register_offset */
31508b39 307 3, /* register_sextend */
8d39ea2f
LM
308 3, /* register_zextend */
309 2, /* imm_offset */
310};
311
43e9d192
IB
312static const struct cpu_regmove_cost generic_regmove_cost =
313{
bd95e655 314 1, /* GP2GP */
3969c510
WD
315 /* Avoid the use of slow int<->fp moves for spilling by setting
316 their cost higher than memmov_cost. */
bd95e655
JG
317 5, /* GP2FP */
318 5, /* FP2GP */
319 2 /* FP2FP */
43e9d192
IB
320};
321
e4a9c55a
WD
322static const struct cpu_regmove_cost cortexa57_regmove_cost =
323{
bd95e655 324 1, /* GP2GP */
e4a9c55a
WD
325 /* Avoid the use of slow int<->fp moves for spilling by setting
326 their cost higher than memmov_cost. */
bd95e655
JG
327 5, /* GP2FP */
328 5, /* FP2GP */
329 2 /* FP2FP */
e4a9c55a
WD
330};
331
332static const struct cpu_regmove_cost cortexa53_regmove_cost =
333{
bd95e655 334 1, /* GP2GP */
e4a9c55a
WD
335 /* Avoid the use of slow int<->fp moves for spilling by setting
336 their cost higher than memmov_cost. */
bd95e655
JG
337 5, /* GP2FP */
338 5, /* FP2GP */
339 2 /* FP2FP */
e4a9c55a
WD
340};
341
5ec1ae3b
EM
342static const struct cpu_regmove_cost exynosm1_regmove_cost =
343{
344 1, /* GP2GP */
345 /* Avoid the use of slow int<->fp moves for spilling by setting
346 their cost higher than memmov_cost (actual, 4 and 9). */
347 9, /* GP2FP */
348 9, /* FP2GP */
349 1 /* FP2FP */
350};
351
d1bcc29f
AP
352static const struct cpu_regmove_cost thunderx_regmove_cost =
353{
bd95e655
JG
354 2, /* GP2GP */
355 2, /* GP2FP */
356 6, /* FP2GP */
357 4 /* FP2FP */
d1bcc29f
AP
358};
359
381e27aa
PT
360static const struct cpu_regmove_cost xgene1_regmove_cost =
361{
bd95e655 362 1, /* GP2GP */
381e27aa
PT
363 /* Avoid the use of slow int<->fp moves for spilling by setting
364 their cost higher than memmov_cost. */
bd95e655
JG
365 8, /* GP2FP */
366 8, /* FP2GP */
367 2 /* FP2FP */
381e27aa
PT
368};
369
ee446d9f
JW
370static const struct cpu_regmove_cost qdf24xx_regmove_cost =
371{
372 2, /* GP2GP */
373 /* Avoid the use of int<->fp moves for spilling. */
374 6, /* GP2FP */
375 6, /* FP2GP */
376 4 /* FP2FP */
377};
378
d1261ac6 379static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
380{
381 1, /* GP2GP */
382 /* Avoid the use of int<->fp moves for spilling. */
383 8, /* GP2FP */
384 8, /* FP2GP */
385 4 /* FP2FP */
386};
387
910f72e7
SZ
388static const struct cpu_regmove_cost tsv110_regmove_cost =
389{
390 1, /* GP2GP */
391 /* Avoid the use of slow int<->fp moves for spilling by setting
392 their cost higher than memmov_cost. */
393 2, /* GP2FP */
394 3, /* FP2GP */
395 2 /* FP2FP */
396};
397
8990e73a 398/* Generic costs for vector insn classes. */
8990e73a
TB
399static const struct cpu_vector_cost generic_vector_cost =
400{
cd8ae5ed
AP
401 1, /* scalar_int_stmt_cost */
402 1, /* scalar_fp_stmt_cost */
bd95e655
JG
403 1, /* scalar_load_cost */
404 1, /* scalar_store_cost */
cd8ae5ed
AP
405 1, /* vec_int_stmt_cost */
406 1, /* vec_fp_stmt_cost */
c428f91c 407 2, /* vec_permute_cost */
bd95e655
JG
408 1, /* vec_to_scalar_cost */
409 1, /* scalar_to_vec_cost */
410 1, /* vec_align_load_cost */
411 1, /* vec_unalign_load_cost */
412 1, /* vec_unalign_store_cost */
413 1, /* vec_store_cost */
414 3, /* cond_taken_branch_cost */
415 1 /* cond_not_taken_branch_cost */
8990e73a
TB
416};
417
e75bc10e
LM
418/* QDF24XX costs for vector insn classes. */
419static const struct cpu_vector_cost qdf24xx_vector_cost =
420{
421 1, /* scalar_int_stmt_cost */
422 1, /* scalar_fp_stmt_cost */
423 1, /* scalar_load_cost */
424 1, /* scalar_store_cost */
425 1, /* vec_int_stmt_cost */
426 3, /* vec_fp_stmt_cost */
427 2, /* vec_permute_cost */
428 1, /* vec_to_scalar_cost */
429 1, /* scalar_to_vec_cost */
430 1, /* vec_align_load_cost */
431 1, /* vec_unalign_load_cost */
432 1, /* vec_unalign_store_cost */
433 1, /* vec_store_cost */
434 3, /* cond_taken_branch_cost */
435 1 /* cond_not_taken_branch_cost */
436};
437
c3f20327
AP
438/* ThunderX costs for vector insn classes. */
439static const struct cpu_vector_cost thunderx_vector_cost =
440{
cd8ae5ed
AP
441 1, /* scalar_int_stmt_cost */
442 1, /* scalar_fp_stmt_cost */
c3f20327
AP
443 3, /* scalar_load_cost */
444 1, /* scalar_store_cost */
cd8ae5ed 445 4, /* vec_int_stmt_cost */
b29d7591 446 1, /* vec_fp_stmt_cost */
c3f20327
AP
447 4, /* vec_permute_cost */
448 2, /* vec_to_scalar_cost */
449 2, /* scalar_to_vec_cost */
450 3, /* vec_align_load_cost */
7e87a3d9
AP
451 5, /* vec_unalign_load_cost */
452 5, /* vec_unalign_store_cost */
c3f20327
AP
453 1, /* vec_store_cost */
454 3, /* cond_taken_branch_cost */
455 3 /* cond_not_taken_branch_cost */
456};
457
910f72e7
SZ
458static const struct cpu_vector_cost tsv110_vector_cost =
459{
460 1, /* scalar_int_stmt_cost */
461 1, /* scalar_fp_stmt_cost */
462 5, /* scalar_load_cost */
463 1, /* scalar_store_cost */
464 2, /* vec_int_stmt_cost */
465 2, /* vec_fp_stmt_cost */
466 2, /* vec_permute_cost */
467 3, /* vec_to_scalar_cost */
468 2, /* scalar_to_vec_cost */
469 5, /* vec_align_load_cost */
470 5, /* vec_unalign_load_cost */
471 1, /* vec_unalign_store_cost */
472 1, /* vec_store_cost */
473 1, /* cond_taken_branch_cost */
474 1 /* cond_not_taken_branch_cost */
475};
476
60bff090 477/* Generic costs for vector insn classes. */
60bff090
JG
478static const struct cpu_vector_cost cortexa57_vector_cost =
479{
cd8ae5ed
AP
480 1, /* scalar_int_stmt_cost */
481 1, /* scalar_fp_stmt_cost */
bd95e655
JG
482 4, /* scalar_load_cost */
483 1, /* scalar_store_cost */
cd8ae5ed
AP
484 2, /* vec_int_stmt_cost */
485 2, /* vec_fp_stmt_cost */
c428f91c 486 3, /* vec_permute_cost */
bd95e655
JG
487 8, /* vec_to_scalar_cost */
488 8, /* scalar_to_vec_cost */
db4a1c18
WD
489 4, /* vec_align_load_cost */
490 4, /* vec_unalign_load_cost */
bd95e655
JG
491 1, /* vec_unalign_store_cost */
492 1, /* vec_store_cost */
493 1, /* cond_taken_branch_cost */
494 1 /* cond_not_taken_branch_cost */
60bff090
JG
495};
496
5ec1ae3b
EM
497static const struct cpu_vector_cost exynosm1_vector_cost =
498{
cd8ae5ed
AP
499 1, /* scalar_int_stmt_cost */
500 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
501 5, /* scalar_load_cost */
502 1, /* scalar_store_cost */
cd8ae5ed
AP
503 3, /* vec_int_stmt_cost */
504 3, /* vec_fp_stmt_cost */
c428f91c 505 3, /* vec_permute_cost */
5ec1ae3b
EM
506 3, /* vec_to_scalar_cost */
507 3, /* scalar_to_vec_cost */
508 5, /* vec_align_load_cost */
509 5, /* vec_unalign_load_cost */
510 1, /* vec_unalign_store_cost */
511 1, /* vec_store_cost */
512 1, /* cond_taken_branch_cost */
513 1 /* cond_not_taken_branch_cost */
514};
515
381e27aa 516/* Generic costs for vector insn classes. */
381e27aa
PT
517static const struct cpu_vector_cost xgene1_vector_cost =
518{
cd8ae5ed
AP
519 1, /* scalar_int_stmt_cost */
520 1, /* scalar_fp_stmt_cost */
bd95e655
JG
521 5, /* scalar_load_cost */
522 1, /* scalar_store_cost */
cd8ae5ed
AP
523 2, /* vec_int_stmt_cost */
524 2, /* vec_fp_stmt_cost */
c428f91c 525 2, /* vec_permute_cost */
bd95e655
JG
526 4, /* vec_to_scalar_cost */
527 4, /* scalar_to_vec_cost */
528 10, /* vec_align_load_cost */
529 10, /* vec_unalign_load_cost */
530 2, /* vec_unalign_store_cost */
531 2, /* vec_store_cost */
532 2, /* cond_taken_branch_cost */
533 1 /* cond_not_taken_branch_cost */
381e27aa
PT
534};
535
ad611a4c 536/* Costs for vector insn classes for Vulcan. */
d1261ac6 537static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 538{
cd8ae5ed
AP
539 1, /* scalar_int_stmt_cost */
540 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
541 4, /* scalar_load_cost */
542 1, /* scalar_store_cost */
cd8ae5ed
AP
543 5, /* vec_int_stmt_cost */
544 6, /* vec_fp_stmt_cost */
ad611a4c
VP
545 3, /* vec_permute_cost */
546 6, /* vec_to_scalar_cost */
547 5, /* scalar_to_vec_cost */
548 8, /* vec_align_load_cost */
549 8, /* vec_unalign_load_cost */
550 4, /* vec_unalign_store_cost */
551 4, /* vec_store_cost */
552 2, /* cond_taken_branch_cost */
553 1 /* cond_not_taken_branch_cost */
554};
555
b9066f5a
MW
556/* Generic costs for branch instructions. */
557static const struct cpu_branch_cost generic_branch_cost =
558{
9094d4a4
WD
559 1, /* Predictable. */
560 3 /* Unpredictable. */
b9066f5a
MW
561};
562
9acc9cbe
EM
563/* Generic approximation modes. */
564static const cpu_approx_modes generic_approx_modes =
565{
79a2bc2d 566 AARCH64_APPROX_NONE, /* division */
98daafa0 567 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
568 AARCH64_APPROX_NONE /* recip_sqrt */
569};
570
571/* Approximation modes for Exynos M1. */
572static const cpu_approx_modes exynosm1_approx_modes =
573{
79a2bc2d 574 AARCH64_APPROX_NONE, /* division */
98daafa0 575 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
576 AARCH64_APPROX_ALL /* recip_sqrt */
577};
578
579/* Approximation modes for X-Gene 1. */
580static const cpu_approx_modes xgene1_approx_modes =
581{
79a2bc2d 582 AARCH64_APPROX_NONE, /* division */
98daafa0 583 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
584 AARCH64_APPROX_ALL /* recip_sqrt */
585};
586
9d2c6e2e
MK
587/* Generic prefetch settings (which disable prefetch). */
588static const cpu_prefetch_tune generic_prefetch_tune =
589{
590 0, /* num_slots */
591 -1, /* l1_cache_size */
592 -1, /* l1_cache_line_size */
16b2cafd 593 -1, /* l2_cache_size */
d2ff35c0 594 true, /* prefetch_dynamic_strides */
59100dfc 595 -1, /* minimum_stride */
16b2cafd 596 -1 /* default_opt_level */
9d2c6e2e
MK
597};
598
599static const cpu_prefetch_tune exynosm1_prefetch_tune =
600{
601 0, /* num_slots */
602 -1, /* l1_cache_size */
603 64, /* l1_cache_line_size */
16b2cafd 604 -1, /* l2_cache_size */
d2ff35c0 605 true, /* prefetch_dynamic_strides */
59100dfc 606 -1, /* minimum_stride */
16b2cafd 607 -1 /* default_opt_level */
9d2c6e2e
MK
608};
609
610static const cpu_prefetch_tune qdf24xx_prefetch_tune =
611{
70c51b58
MK
612 4, /* num_slots */
613 32, /* l1_cache_size */
9d2c6e2e 614 64, /* l1_cache_line_size */
725e2110 615 512, /* l2_cache_size */
d2ff35c0 616 false, /* prefetch_dynamic_strides */
59100dfc
LM
617 2048, /* minimum_stride */
618 3 /* default_opt_level */
9d2c6e2e
MK
619};
620
f1e247d0
AP
621static const cpu_prefetch_tune thunderxt88_prefetch_tune =
622{
623 8, /* num_slots */
624 32, /* l1_cache_size */
625 128, /* l1_cache_line_size */
626 16*1024, /* l2_cache_size */
d2ff35c0 627 true, /* prefetch_dynamic_strides */
59100dfc 628 -1, /* minimum_stride */
f1e247d0
AP
629 3 /* default_opt_level */
630};
631
632static const cpu_prefetch_tune thunderx_prefetch_tune =
633{
634 8, /* num_slots */
635 32, /* l1_cache_size */
636 128, /* l1_cache_line_size */
637 -1, /* l2_cache_size */
d2ff35c0 638 true, /* prefetch_dynamic_strides */
59100dfc 639 -1, /* minimum_stride */
f1e247d0
AP
640 -1 /* default_opt_level */
641};
642
9d2c6e2e
MK
643static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
644{
f1e247d0
AP
645 8, /* num_slots */
646 32, /* l1_cache_size */
9d2c6e2e 647 64, /* l1_cache_line_size */
f1e247d0 648 256, /* l2_cache_size */
d2ff35c0 649 true, /* prefetch_dynamic_strides */
59100dfc 650 -1, /* minimum_stride */
16b2cafd 651 -1 /* default_opt_level */
9d2c6e2e
MK
652};
653
910f72e7
SZ
654static const cpu_prefetch_tune tsv110_prefetch_tune =
655{
656 0, /* num_slots */
657 64, /* l1_cache_size */
658 64, /* l1_cache_line_size */
659 512, /* l2_cache_size */
660 true, /* prefetch_dynamic_strides */
661 -1, /* minimum_stride */
662 -1 /* default_opt_level */
663};
664
d5e9851e
CM
665static const cpu_prefetch_tune xgene1_prefetch_tune =
666{
667 8, /* num_slots */
668 32, /* l1_cache_size */
669 64, /* l1_cache_line_size */
670 256, /* l2_cache_size */
671 true, /* prefetch_dynamic_strides */
672 -1, /* minimum_stride */
673 -1 /* default_opt_level */
674};
675
43e9d192
IB
676static const struct tune_params generic_tunings =
677{
4e2cd668 678 &cortexa57_extra_costs,
43e9d192
IB
679 &generic_addrcost_table,
680 &generic_regmove_cost,
8990e73a 681 &generic_vector_cost,
b9066f5a 682 &generic_branch_cost,
9acc9cbe 683 &generic_approx_modes,
bd95e655
JG
684 4, /* memmov_cost */
685 2, /* issue_rate */
e0701ef0 686 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
687 "8", /* function_align. */
688 "4", /* jump_align. */
689 "8", /* loop_align. */
cee66c68
WD
690 2, /* int_reassoc_width. */
691 4, /* fp_reassoc_width. */
50093a33
WD
692 1, /* vec_reassoc_width. */
693 2, /* min_div_recip_mul_sf. */
dfba575f 694 2, /* min_div_recip_mul_df. */
50487d79 695 0, /* max_case_values. */
3b4c0f7e 696 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
697 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
698 &generic_prefetch_tune
43e9d192
IB
699};
700
1c72a3ca
JG
701static const struct tune_params cortexa35_tunings =
702{
703 &cortexa53_extra_costs,
704 &generic_addrcost_table,
705 &cortexa53_regmove_cost,
706 &generic_vector_cost,
aca97ef8 707 &generic_branch_cost,
9acc9cbe 708 &generic_approx_modes,
1c72a3ca
JG
709 4, /* memmov_cost */
710 1, /* issue_rate */
0bc24338 711 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 712 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
713 "16", /* function_align. */
714 "4", /* jump_align. */
715 "8", /* loop_align. */
1c72a3ca
JG
716 2, /* int_reassoc_width. */
717 4, /* fp_reassoc_width. */
718 1, /* vec_reassoc_width. */
719 2, /* min_div_recip_mul_sf. */
720 2, /* min_div_recip_mul_df. */
721 0, /* max_case_values. */
1c72a3ca 722 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
723 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
724 &generic_prefetch_tune
1c72a3ca
JG
725};
726
984239ad
KT
727static const struct tune_params cortexa53_tunings =
728{
729 &cortexa53_extra_costs,
730 &generic_addrcost_table,
e4a9c55a 731 &cortexa53_regmove_cost,
984239ad 732 &generic_vector_cost,
aca97ef8 733 &generic_branch_cost,
9acc9cbe 734 &generic_approx_modes,
bd95e655
JG
735 4, /* memmov_cost */
736 2, /* issue_rate */
00a8574a 737 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 738 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
739 "16", /* function_align. */
740 "4", /* jump_align. */
741 "8", /* loop_align. */
cee66c68
WD
742 2, /* int_reassoc_width. */
743 4, /* fp_reassoc_width. */
50093a33
WD
744 1, /* vec_reassoc_width. */
745 2, /* min_div_recip_mul_sf. */
dfba575f 746 2, /* min_div_recip_mul_df. */
50487d79 747 0, /* max_case_values. */
2d6bc7fa 748 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
749 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
750 &generic_prefetch_tune
984239ad
KT
751};
752
4fd92af6
KT
753static const struct tune_params cortexa57_tunings =
754{
755 &cortexa57_extra_costs,
a39d4348 756 &generic_addrcost_table,
e4a9c55a 757 &cortexa57_regmove_cost,
60bff090 758 &cortexa57_vector_cost,
aca97ef8 759 &generic_branch_cost,
9acc9cbe 760 &generic_approx_modes,
bd95e655
JG
761 4, /* memmov_cost */
762 3, /* issue_rate */
00a8574a 763 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 764 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
765 "16", /* function_align. */
766 "4", /* jump_align. */
767 "8", /* loop_align. */
cee66c68
WD
768 2, /* int_reassoc_width. */
769 4, /* fp_reassoc_width. */
50093a33
WD
770 1, /* vec_reassoc_width. */
771 2, /* min_div_recip_mul_sf. */
dfba575f 772 2, /* min_div_recip_mul_df. */
50487d79 773 0, /* max_case_values. */
2d6bc7fa 774 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
775 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
776 &generic_prefetch_tune
dfba575f
JG
777};
778
779static const struct tune_params cortexa72_tunings =
780{
781 &cortexa57_extra_costs,
a39d4348 782 &generic_addrcost_table,
dfba575f
JG
783 &cortexa57_regmove_cost,
784 &cortexa57_vector_cost,
aca97ef8 785 &generic_branch_cost,
9acc9cbe 786 &generic_approx_modes,
dfba575f
JG
787 4, /* memmov_cost */
788 3, /* issue_rate */
00a8574a 789 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 790 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
791 "16", /* function_align. */
792 "4", /* jump_align. */
793 "8", /* loop_align. */
dfba575f
JG
794 2, /* int_reassoc_width. */
795 4, /* fp_reassoc_width. */
796 1, /* vec_reassoc_width. */
797 2, /* min_div_recip_mul_sf. */
798 2, /* min_div_recip_mul_df. */
50487d79 799 0, /* max_case_values. */
0bc24338 800 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
801 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
802 &generic_prefetch_tune
4fd92af6
KT
803};
804
4fb570c4
KT
805static const struct tune_params cortexa73_tunings =
806{
807 &cortexa57_extra_costs,
a39d4348 808 &generic_addrcost_table,
4fb570c4
KT
809 &cortexa57_regmove_cost,
810 &cortexa57_vector_cost,
aca97ef8 811 &generic_branch_cost,
4fb570c4
KT
812 &generic_approx_modes,
813 4, /* memmov_cost. */
814 2, /* issue_rate. */
815 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
816 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
817 "16", /* function_align. */
818 "4", /* jump_align. */
819 "8", /* loop_align. */
4fb570c4
KT
820 2, /* int_reassoc_width. */
821 4, /* fp_reassoc_width. */
822 1, /* vec_reassoc_width. */
823 2, /* min_div_recip_mul_sf. */
824 2, /* min_div_recip_mul_df. */
825 0, /* max_case_values. */
4fb570c4 826 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
827 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
828 &generic_prefetch_tune
4fb570c4
KT
829};
830
9d2c6e2e
MK
831
832
5ec1ae3b
EM
833static const struct tune_params exynosm1_tunings =
834{
835 &exynosm1_extra_costs,
836 &exynosm1_addrcost_table,
837 &exynosm1_regmove_cost,
838 &exynosm1_vector_cost,
839 &generic_branch_cost,
9acc9cbe 840 &exynosm1_approx_modes,
5ec1ae3b
EM
841 4, /* memmov_cost */
842 3, /* issue_rate */
25cc2199 843 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
844 "4", /* function_align. */
845 "4", /* jump_align. */
846 "4", /* loop_align. */
5ec1ae3b
EM
847 2, /* int_reassoc_width. */
848 4, /* fp_reassoc_width. */
849 1, /* vec_reassoc_width. */
850 2, /* min_div_recip_mul_sf. */
851 2, /* min_div_recip_mul_df. */
852 48, /* max_case_values. */
220379df 853 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
854 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
855 &exynosm1_prefetch_tune
5ec1ae3b
EM
856};
857
f1e247d0
AP
858static const struct tune_params thunderxt88_tunings =
859{
860 &thunderx_extra_costs,
861 &generic_addrcost_table,
862 &thunderx_regmove_cost,
863 &thunderx_vector_cost,
864 &generic_branch_cost,
865 &generic_approx_modes,
866 6, /* memmov_cost */
867 2, /* issue_rate */
868 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
869 "8", /* function_align. */
870 "8", /* jump_align. */
871 "8", /* loop_align. */
f1e247d0
AP
872 2, /* int_reassoc_width. */
873 4, /* fp_reassoc_width. */
874 1, /* vec_reassoc_width. */
875 2, /* min_div_recip_mul_sf. */
876 2, /* min_div_recip_mul_df. */
877 0, /* max_case_values. */
878 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
879 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
880 &thunderxt88_prefetch_tune
881};
882
d1bcc29f
AP
883static const struct tune_params thunderx_tunings =
884{
885 &thunderx_extra_costs,
886 &generic_addrcost_table,
887 &thunderx_regmove_cost,
c3f20327 888 &thunderx_vector_cost,
b9066f5a 889 &generic_branch_cost,
9acc9cbe 890 &generic_approx_modes,
bd95e655
JG
891 6, /* memmov_cost */
892 2, /* issue_rate */
e9a3a175 893 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
894 "8", /* function_align. */
895 "8", /* jump_align. */
896 "8", /* loop_align. */
cee66c68
WD
897 2, /* int_reassoc_width. */
898 4, /* fp_reassoc_width. */
50093a33
WD
899 1, /* vec_reassoc_width. */
900 2, /* min_div_recip_mul_sf. */
dfba575f 901 2, /* min_div_recip_mul_df. */
50487d79 902 0, /* max_case_values. */
2d6bc7fa 903 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
904 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
905 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 906 &thunderx_prefetch_tune
d1bcc29f
AP
907};
908
910f72e7
SZ
909static const struct tune_params tsv110_tunings =
910{
911 &tsv110_extra_costs,
912 &tsv110_addrcost_table,
913 &tsv110_regmove_cost,
914 &tsv110_vector_cost,
915 &generic_branch_cost,
916 &generic_approx_modes,
917 4, /* memmov_cost */
918 4, /* issue_rate */
919 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
920 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
921 "16", /* function_align. */
922 "4", /* jump_align. */
923 "8", /* loop_align. */
924 2, /* int_reassoc_width. */
925 4, /* fp_reassoc_width. */
926 1, /* vec_reassoc_width. */
927 2, /* min_div_recip_mul_sf. */
928 2, /* min_div_recip_mul_df. */
929 0, /* max_case_values. */
930 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
931 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
932 &tsv110_prefetch_tune
933};
934
381e27aa
PT
935static const struct tune_params xgene1_tunings =
936{
937 &xgene1_extra_costs,
938 &xgene1_addrcost_table,
939 &xgene1_regmove_cost,
940 &xgene1_vector_cost,
b9066f5a 941 &generic_branch_cost,
9acc9cbe 942 &xgene1_approx_modes,
bd95e655
JG
943 6, /* memmov_cost */
944 4, /* issue_rate */
e9a3a175 945 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102
ML
946 "16", /* function_align. */
947 "8", /* jump_align. */
948 "16", /* loop_align. */
381e27aa
PT
949 2, /* int_reassoc_width. */
950 4, /* fp_reassoc_width. */
50093a33
WD
951 1, /* vec_reassoc_width. */
952 2, /* min_div_recip_mul_sf. */
dfba575f 953 2, /* min_div_recip_mul_df. */
50487d79 954 0, /* max_case_values. */
2d6bc7fa 955 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 956 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
d5e9851e 957 &xgene1_prefetch_tune
381e27aa
PT
958};
959
ee446d9f
JW
960static const struct tune_params qdf24xx_tunings =
961{
962 &qdf24xx_extra_costs,
8d39ea2f 963 &qdf24xx_addrcost_table,
ee446d9f 964 &qdf24xx_regmove_cost,
e75bc10e 965 &qdf24xx_vector_cost,
ee446d9f
JW
966 &generic_branch_cost,
967 &generic_approx_modes,
968 4, /* memmov_cost */
969 4, /* issue_rate */
970 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
971 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
972 "16", /* function_align. */
973 "8", /* jump_align. */
974 "16", /* loop_align. */
ee446d9f
JW
975 2, /* int_reassoc_width. */
976 4, /* fp_reassoc_width. */
977 1, /* vec_reassoc_width. */
978 2, /* min_div_recip_mul_sf. */
979 2, /* min_div_recip_mul_df. */
980 0, /* max_case_values. */
4f2a94e6 981 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
a98824ac 982 AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
9d2c6e2e 983 &qdf24xx_prefetch_tune
ee446d9f
JW
984};
985
52ee8191
SP
986/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
987 for now. */
988static const struct tune_params saphira_tunings =
989{
990 &generic_extra_costs,
991 &generic_addrcost_table,
992 &generic_regmove_cost,
993 &generic_vector_cost,
994 &generic_branch_cost,
995 &generic_approx_modes,
996 4, /* memmov_cost */
997 4, /* issue_rate */
998 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
999 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
1000 "16", /* function_align. */
1001 "8", /* jump_align. */
1002 "16", /* loop_align. */
52ee8191
SP
1003 2, /* int_reassoc_width. */
1004 4, /* fp_reassoc_width. */
1005 1, /* vec_reassoc_width. */
1006 2, /* min_div_recip_mul_sf. */
1007 2, /* min_div_recip_mul_df. */
1008 0, /* max_case_values. */
1009 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
1010 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1011 &generic_prefetch_tune
1012};
1013
d1261ac6 1014static const struct tune_params thunderx2t99_tunings =
ad611a4c 1015{
d1261ac6
AP
1016 &thunderx2t99_extra_costs,
1017 &thunderx2t99_addrcost_table,
1018 &thunderx2t99_regmove_cost,
1019 &thunderx2t99_vector_cost,
aca97ef8 1020 &generic_branch_cost,
ad611a4c
VP
1021 &generic_approx_modes,
1022 4, /* memmov_cost. */
1023 4, /* issue_rate. */
00c7c57f
JB
1024 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
1025 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
1026 "16", /* function_align. */
1027 "8", /* jump_align. */
1028 "16", /* loop_align. */
ad611a4c
VP
1029 3, /* int_reassoc_width. */
1030 2, /* fp_reassoc_width. */
1031 2, /* vec_reassoc_width. */
1032 2, /* min_div_recip_mul_sf. */
1033 2, /* min_div_recip_mul_df. */
1034 0, /* max_case_values. */
f1e247d0 1035 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
1036 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
1037 &thunderx2t99_prefetch_tune
ad611a4c
VP
1038};
1039
8dec06f2
JG
1040/* Support for fine-grained override of the tuning structures. */
1041struct aarch64_tuning_override_function
1042{
1043 const char* name;
1044 void (*parse_override)(const char*, struct tune_params*);
1045};
1046
1047static void aarch64_parse_fuse_string (const char*, struct tune_params*);
1048static void aarch64_parse_tune_string (const char*, struct tune_params*);
1049
1050static const struct aarch64_tuning_override_function
1051aarch64_tuning_override_functions[] =
1052{
1053 { "fuse", aarch64_parse_fuse_string },
1054 { "tune", aarch64_parse_tune_string },
1055 { NULL, NULL }
1056};
1057
43e9d192
IB
1058/* A processor implementing AArch64. */
1059struct processor
1060{
1061 const char *const name;
46806c44
KT
1062 enum aarch64_processor ident;
1063 enum aarch64_processor sched_core;
393ae126 1064 enum aarch64_arch arch;
0c6caaf8 1065 unsigned architecture_version;
43e9d192
IB
1066 const unsigned long flags;
1067 const struct tune_params *const tune;
1068};
1069
393ae126
KT
1070/* Architectures implementing AArch64. */
1071static const struct processor all_architectures[] =
1072{
1073#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1074 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1075#include "aarch64-arches.def"
393ae126
KT
1076 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1077};
1078
43e9d192
IB
1079/* Processor cores implementing AArch64. */
1080static const struct processor all_cores[] =
1081{
e8fcc9fa 1082#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1083 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1084 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1085 FLAGS, &COSTS##_tunings},
43e9d192 1086#include "aarch64-cores.def"
393ae126
KT
1087 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1088 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1089 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1090};
1091
43e9d192 1092
361fb3ee
KT
1093/* Target specification. These are populated by the -march, -mtune, -mcpu
1094 handling code or by target attributes. */
43e9d192
IB
1095static const struct processor *selected_arch;
1096static const struct processor *selected_cpu;
1097static const struct processor *selected_tune;
1098
b175b679
JG
1099/* The current tuning set. */
1100struct tune_params aarch64_tune_params = generic_tunings;
1101
43e9d192
IB
1102#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1103
1104/* An ISA extension in the co-processor and main instruction set space. */
1105struct aarch64_option_extension
1106{
1107 const char *const name;
1108 const unsigned long flags_on;
1109 const unsigned long flags_off;
1110};
1111
43e9d192
IB
1112typedef enum aarch64_cond_code
1113{
1114 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1115 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1116 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1117}
1118aarch64_cc;
1119
1120#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1121
1122/* The condition codes of the processor, and the inverse function. */
1123static const char * const aarch64_condition_codes[] =
1124{
1125 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1126 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1127};
1128
973d2e01
TP
1129/* Generate code to enable conditional branches in functions over 1 MiB. */
1130const char *
1131aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1132 const char * branch_format)
1133{
1134 rtx_code_label * tmp_label = gen_label_rtx ();
1135 char label_buf[256];
1136 char buffer[128];
1137 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1138 CODE_LABEL_NUMBER (tmp_label));
1139 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1140 rtx dest_label = operands[pos_label];
1141 operands[pos_label] = tmp_label;
1142
1143 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1144 output_asm_insn (buffer, operands);
1145
1146 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1147 operands[pos_label] = dest_label;
1148 output_asm_insn (buffer, operands);
1149 return "";
1150}
1151
261fb553 1152void
fc29dfc9 1153aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1154{
261fb553 1155 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1156 if (FLOAT_MODE_P (mode))
1157 error ("%qs is incompatible with the use of floating-point types",
1158 "-mgeneral-regs-only");
1159 else
1160 error ("%qs is incompatible with the use of vector types",
1161 "-mgeneral-regs-only");
261fb553 1162 else
fc29dfc9
SE
1163 if (FLOAT_MODE_P (mode))
1164 error ("%qs feature modifier is incompatible with the use of"
1165 " floating-point types", "+nofp");
1166 else
1167 error ("%qs feature modifier is incompatible with the use of"
1168 " vector types", "+nofp");
261fb553
AL
1169}
1170
c64f7d37 1171/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1172 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1173 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1174 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1175 and GENERAL_REGS is lower than the memory cost (in this case the best class
1176 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1177 cost results in bad allocations with many redundant int<->FP moves which
1178 are expensive on various cores.
1179 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1180 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1181 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1182 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1183 The result of this is that it is no longer inefficient to have a higher
1184 memory move cost than the register move cost.
1185*/
c64f7d37
WD
1186
1187static reg_class_t
31e2b5a3
WD
1188aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1189 reg_class_t best_class)
c64f7d37 1190{
b8506a8a 1191 machine_mode mode;
c64f7d37 1192
67e5c59a
RS
1193 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1194 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1195 return allocno_class;
1196
67e5c59a
RS
1197 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1198 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1199 return best_class;
1200
c64f7d37
WD
1201 mode = PSEUDO_REGNO_MODE (regno);
1202 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1203}
1204
26e0ff94 1205static unsigned int
b8506a8a 1206aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1207{
50093a33 1208 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1209 return aarch64_tune_params.min_div_recip_mul_sf;
1210 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1211}
1212
b5b33e11 1213/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1214static int
b5b33e11 1215aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1216{
1217 if (VECTOR_MODE_P (mode))
b175b679 1218 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1219 if (INTEGRAL_MODE_P (mode))
b175b679 1220 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1221 /* Avoid reassociating floating point addition so we emit more FMAs. */
1222 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1223 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1224 return 1;
1225}
1226
43e9d192
IB
1227/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1228unsigned
1229aarch64_dbx_register_number (unsigned regno)
1230{
1231 if (GP_REGNUM_P (regno))
1232 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1233 else if (regno == SP_REGNUM)
1234 return AARCH64_DWARF_SP;
1235 else if (FP_REGNUM_P (regno))
1236 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1237 else if (PR_REGNUM_P (regno))
1238 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1239 else if (regno == VG_REGNUM)
1240 return AARCH64_DWARF_VG;
43e9d192
IB
1241
1242 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1243 equivalent DWARF register. */
1244 return DWARF_FRAME_REGISTERS;
1245}
1246
43cacb12
RS
1247/* Return true if MODE is any of the Advanced SIMD structure modes. */
1248static bool
1249aarch64_advsimd_struct_mode_p (machine_mode mode)
1250{
1251 return (TARGET_SIMD
1252 && (mode == OImode || mode == CImode || mode == XImode));
1253}
1254
1255/* Return true if MODE is an SVE predicate mode. */
1256static bool
1257aarch64_sve_pred_mode_p (machine_mode mode)
1258{
1259 return (TARGET_SVE
1260 && (mode == VNx16BImode
1261 || mode == VNx8BImode
1262 || mode == VNx4BImode
1263 || mode == VNx2BImode));
1264}
1265
1266/* Three mutually-exclusive flags describing a vector or predicate type. */
1267const unsigned int VEC_ADVSIMD = 1;
1268const unsigned int VEC_SVE_DATA = 2;
1269const unsigned int VEC_SVE_PRED = 4;
1270/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1271 a structure of 2, 3 or 4 vectors. */
1272const unsigned int VEC_STRUCT = 8;
1273/* Useful combinations of the above. */
1274const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1275const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1276
1277/* Return a set of flags describing the vector properties of mode MODE.
1278 Ignore modes that are not supported by the current target. */
1279static unsigned int
1280aarch64_classify_vector_mode (machine_mode mode)
1281{
1282 if (aarch64_advsimd_struct_mode_p (mode))
1283 return VEC_ADVSIMD | VEC_STRUCT;
1284
1285 if (aarch64_sve_pred_mode_p (mode))
1286 return VEC_SVE_PRED;
1287
1288 scalar_mode inner = GET_MODE_INNER (mode);
1289 if (VECTOR_MODE_P (mode)
1290 && (inner == QImode
1291 || inner == HImode
1292 || inner == HFmode
1293 || inner == SImode
1294 || inner == SFmode
1295 || inner == DImode
1296 || inner == DFmode))
1297 {
9f4cbab8
RS
1298 if (TARGET_SVE)
1299 {
1300 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1301 return VEC_SVE_DATA;
1302 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1303 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1304 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1305 return VEC_SVE_DATA | VEC_STRUCT;
1306 }
43cacb12
RS
1307
1308 /* This includes V1DF but not V1DI (which doesn't exist). */
1309 if (TARGET_SIMD
1310 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1311 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1312 return VEC_ADVSIMD;
1313 }
1314
1315 return 0;
1316}
1317
1318/* Return true if MODE is any of the data vector modes, including
1319 structure modes. */
43e9d192 1320static bool
43cacb12 1321aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1322{
43cacb12 1323 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1324}
1325
43cacb12
RS
1326/* Return true if MODE is an SVE data vector mode; either a single vector
1327 or a structure of vectors. */
43e9d192 1328static bool
43cacb12 1329aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1330{
43cacb12 1331 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1332}
1333
9f4cbab8
RS
1334/* Implement target hook TARGET_ARRAY_MODE. */
1335static opt_machine_mode
1336aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1337{
1338 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1339 && IN_RANGE (nelems, 2, 4))
1340 return mode_for_vector (GET_MODE_INNER (mode),
1341 GET_MODE_NUNITS (mode) * nelems);
1342
1343 return opt_machine_mode ();
1344}
1345
43e9d192
IB
1346/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1347static bool
ef4bddc2 1348aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1349 unsigned HOST_WIDE_INT nelems)
1350{
1351 if (TARGET_SIMD
635e66fe
AL
1352 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1353 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1354 && (nelems >= 2 && nelems <= 4))
1355 return true;
1356
1357 return false;
1358}
1359
43cacb12
RS
1360/* Return the SVE predicate mode to use for elements that have
1361 ELEM_NBYTES bytes, if such a mode exists. */
1362
1363opt_machine_mode
1364aarch64_sve_pred_mode (unsigned int elem_nbytes)
1365{
1366 if (TARGET_SVE)
1367 {
1368 if (elem_nbytes == 1)
1369 return VNx16BImode;
1370 if (elem_nbytes == 2)
1371 return VNx8BImode;
1372 if (elem_nbytes == 4)
1373 return VNx4BImode;
1374 if (elem_nbytes == 8)
1375 return VNx2BImode;
1376 }
1377 return opt_machine_mode ();
1378}
1379
1380/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1381
1382static opt_machine_mode
1383aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1384{
1385 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1386 {
1387 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1388 machine_mode pred_mode;
1389 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1390 return pred_mode;
1391 }
1392
1393 return default_get_mask_mode (nunits, nbytes);
1394}
1395
b41d1f6e
RS
1396/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1397 prefer to use the first arithmetic operand as the else value if
1398 the else value doesn't matter, since that exactly matches the SVE
1399 destructive merging form. For ternary operations we could either
1400 pick the first operand and use FMAD-like instructions or the last
1401 operand and use FMLA-like instructions; the latter seems more
1402 natural. */
6a86928d
RS
1403
1404static tree
b41d1f6e 1405aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1406{
b41d1f6e 1407 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1408}
1409
c43f4279 1410/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1411
c43f4279 1412static unsigned int
ef4bddc2 1413aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1414{
6a70badb
RS
1415 /* ??? Logically we should only need to provide a value when
1416 HARD_REGNO_MODE_OK says that the combination is valid,
1417 but at the moment we need to handle all modes. Just ignore
1418 any runtime parts for registers that can't store them. */
1419 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1420 switch (aarch64_regno_regclass (regno))
1421 {
1422 case FP_REGS:
1423 case FP_LO_REGS:
43cacb12
RS
1424 if (aarch64_sve_data_mode_p (mode))
1425 return exact_div (GET_MODE_SIZE (mode),
1426 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1427 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1428 case PR_REGS:
1429 case PR_LO_REGS:
1430 case PR_HI_REGS:
1431 return 1;
43e9d192 1432 default:
6a70badb 1433 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1434 }
1435 gcc_unreachable ();
1436}
1437
f939c3e6 1438/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1439
f939c3e6 1440static bool
ef4bddc2 1441aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1442{
1443 if (GET_MODE_CLASS (mode) == MODE_CC)
1444 return regno == CC_REGNUM;
1445
43cacb12
RS
1446 if (regno == VG_REGNUM)
1447 /* This must have the same size as _Unwind_Word. */
1448 return mode == DImode;
1449
1450 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1451 if (vec_flags & VEC_SVE_PRED)
1452 return PR_REGNUM_P (regno);
1453
1454 if (PR_REGNUM_P (regno))
1455 return 0;
1456
9259db42
YZ
1457 if (regno == SP_REGNUM)
1458 /* The purpose of comparing with ptr_mode is to support the
1459 global register variable associated with the stack pointer
1460 register via the syntax of asm ("wsp") in ILP32. */
1461 return mode == Pmode || mode == ptr_mode;
1462
1463 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1464 return mode == Pmode;
1465
563cc649
RH
1466 if (GP_REGNUM_P (regno))
1467 {
1468 if (known_le (GET_MODE_SIZE (mode), 8))
1469 return true;
1470 else if (known_le (GET_MODE_SIZE (mode), 16))
1471 return (regno & 1) == 0;
1472 }
1473 else if (FP_REGNUM_P (regno))
43e9d192 1474 {
43cacb12 1475 if (vec_flags & VEC_STRUCT)
4edd6298 1476 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1477 else
43cacb12 1478 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1479 }
1480
f939c3e6 1481 return false;
43e9d192
IB
1482}
1483
80ec73f4
RS
1484/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1485 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1486 clobbers the top 64 bits when restoring the bottom 64 bits. */
1487
1488static bool
1489aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1490{
6a70badb 1491 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1492}
1493
43cacb12
RS
1494/* Implement REGMODE_NATURAL_SIZE. */
1495poly_uint64
1496aarch64_regmode_natural_size (machine_mode mode)
1497{
1498 /* The natural size for SVE data modes is one SVE data vector,
1499 and similarly for predicates. We can't independently modify
1500 anything smaller than that. */
1501 /* ??? For now, only do this for variable-width SVE registers.
1502 Doing it for constant-sized registers breaks lower-subreg.c. */
1503 /* ??? And once that's fixed, we should probably have similar
1504 code for Advanced SIMD. */
1505 if (!aarch64_sve_vg.is_constant ())
1506 {
1507 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1508 if (vec_flags & VEC_SVE_PRED)
1509 return BYTES_PER_SVE_PRED;
1510 if (vec_flags & VEC_SVE_DATA)
1511 return BYTES_PER_SVE_VECTOR;
1512 }
1513 return UNITS_PER_WORD;
1514}
1515
73d9ac6a 1516/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1517machine_mode
43cacb12
RS
1518aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1519 machine_mode mode)
1520{
1521 /* The predicate mode determines which bits are significant and
1522 which are "don't care". Decreasing the number of lanes would
1523 lose data while increasing the number of lanes would make bits
1524 unnecessarily significant. */
1525 if (PR_REGNUM_P (regno))
1526 return mode;
6a70badb
RS
1527 if (known_ge (GET_MODE_SIZE (mode), 4))
1528 return mode;
73d9ac6a 1529 else
6a70badb 1530 return SImode;
73d9ac6a
IB
1531}
1532
231c52ae
ST
1533/* Return true if I's bits are consecutive ones from the MSB. */
1534bool
1535aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
1536{
1537 return exact_log2 (-i) != HOST_WIDE_INT_M1;
1538}
1539
58e17cf8
RS
1540/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1541 that strcpy from constants will be faster. */
1542
1543static HOST_WIDE_INT
1544aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1545{
1546 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1547 return MAX (align, BITS_PER_WORD);
1548 return align;
1549}
1550
43e9d192
IB
1551/* Return true if calls to DECL should be treated as
1552 long-calls (ie called via a register). */
1553static bool
1554aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1555{
1556 return false;
1557}
1558
1559/* Return true if calls to symbol-ref SYM should be treated as
1560 long-calls (ie called via a register). */
1561bool
1562aarch64_is_long_call_p (rtx sym)
1563{
1564 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1565}
1566
b60d63cb
JW
1567/* Return true if calls to symbol-ref SYM should not go through
1568 plt stubs. */
1569
1570bool
1571aarch64_is_noplt_call_p (rtx sym)
1572{
1573 const_tree decl = SYMBOL_REF_DECL (sym);
1574
1575 if (flag_pic
1576 && decl
1577 && (!flag_plt
1578 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1579 && !targetm.binds_local_p (decl))
1580 return true;
1581
1582 return false;
1583}
1584
43e9d192
IB
1585/* Return true if the offsets to a zero/sign-extract operation
1586 represent an expression that matches an extend operation. The
1587 operands represent the paramters from
1588
4745e701 1589 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1590bool
77e994c9 1591aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1592 rtx extract_imm)
1593{
1594 HOST_WIDE_INT mult_val, extract_val;
1595
1596 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1597 return false;
1598
1599 mult_val = INTVAL (mult_imm);
1600 extract_val = INTVAL (extract_imm);
1601
1602 if (extract_val > 8
1603 && extract_val < GET_MODE_BITSIZE (mode)
1604 && exact_log2 (extract_val & ~7) > 0
1605 && (extract_val & 7) <= 4
1606 && mult_val == (1 << (extract_val & 7)))
1607 return true;
1608
1609 return false;
1610}
1611
1612/* Emit an insn that's a simple single-set. Both the operands must be
1613 known to be valid. */
827ab47a 1614inline static rtx_insn *
43e9d192
IB
1615emit_set_insn (rtx x, rtx y)
1616{
f7df4a84 1617 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1618}
1619
1620/* X and Y are two things to compare using CODE. Emit the compare insn and
1621 return the rtx for register 0 in the proper mode. */
1622rtx
1623aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1624{
ef4bddc2 1625 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1626 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1627
1628 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1629 return cc_reg;
1630}
1631
d400fda3
RH
1632/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
1633
1634static rtx
1635aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
1636 machine_mode y_mode)
1637{
1638 if (y_mode == E_QImode || y_mode == E_HImode)
1639 {
1640 if (CONST_INT_P (y))
1641 y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
1642 else
1643 {
1644 rtx t, cc_reg;
1645 machine_mode cc_mode;
1646
1647 t = gen_rtx_ZERO_EXTEND (SImode, y);
1648 t = gen_rtx_COMPARE (CC_SWPmode, t, x);
1649 cc_mode = CC_SWPmode;
1650 cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
1651 emit_set_insn (cc_reg, t);
1652 return cc_reg;
1653 }
1654 }
1655
1656 return aarch64_gen_compare_reg (code, x, y);
1657}
1658
43e9d192
IB
1659/* Build the SYMBOL_REF for __tls_get_addr. */
1660
1661static GTY(()) rtx tls_get_addr_libfunc;
1662
1663rtx
1664aarch64_tls_get_addr (void)
1665{
1666 if (!tls_get_addr_libfunc)
1667 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1668 return tls_get_addr_libfunc;
1669}
1670
1671/* Return the TLS model to use for ADDR. */
1672
1673static enum tls_model
1674tls_symbolic_operand_type (rtx addr)
1675{
1676 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1677 if (GET_CODE (addr) == CONST)
1678 {
6a70badb
RS
1679 poly_int64 addend;
1680 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1681 if (GET_CODE (sym) == SYMBOL_REF)
1682 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1683 }
1684 else if (GET_CODE (addr) == SYMBOL_REF)
1685 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1686
1687 return tls_kind;
1688}
1689
1690/* We'll allow lo_sum's in addresses in our legitimate addresses
1691 so that combine would take care of combining addresses where
1692 necessary, but for generation purposes, we'll generate the address
1693 as :
1694 RTL Absolute
1695 tmp = hi (symbol_ref); adrp x1, foo
1696 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1697 nop
1698
1699 PIC TLS
1700 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1701 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1702 bl __tls_get_addr
1703 nop
1704
1705 Load TLS symbol, depending on TLS mechanism and TLS access model.
1706
1707 Global Dynamic - Traditional TLS:
1708 adrp tmp, :tlsgd:imm
1709 add dest, tmp, #:tlsgd_lo12:imm
1710 bl __tls_get_addr
1711
1712 Global Dynamic - TLS Descriptors:
1713 adrp dest, :tlsdesc:imm
1714 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1715 add dest, dest, #:tlsdesc_lo12:imm
1716 blr tmp
1717 mrs tp, tpidr_el0
1718 add dest, dest, tp
1719
1720 Initial Exec:
1721 mrs tp, tpidr_el0
1722 adrp tmp, :gottprel:imm
1723 ldr dest, [tmp, #:gottprel_lo12:imm]
1724 add dest, dest, tp
1725
1726 Local Exec:
1727 mrs tp, tpidr_el0
0699caae
RL
1728 add t0, tp, #:tprel_hi12:imm, lsl #12
1729 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1730*/
1731
1732static void
1733aarch64_load_symref_appropriately (rtx dest, rtx imm,
1734 enum aarch64_symbol_type type)
1735{
1736 switch (type)
1737 {
1738 case SYMBOL_SMALL_ABSOLUTE:
1739 {
28514dda 1740 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1741 rtx tmp_reg = dest;
ef4bddc2 1742 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1743
1744 gcc_assert (mode == Pmode || mode == ptr_mode);
1745
43e9d192 1746 if (can_create_pseudo_p ())
28514dda 1747 tmp_reg = gen_reg_rtx (mode);
43e9d192 1748
28514dda 1749 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1750 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1751 return;
1752 }
1753
a5350ddc 1754 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1755 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1756 return;
1757
1b1e81f8
JW
1758 case SYMBOL_SMALL_GOT_28K:
1759 {
1760 machine_mode mode = GET_MODE (dest);
1761 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1762 rtx insn;
1763 rtx mem;
1b1e81f8
JW
1764
1765 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1766 here before rtl expand. Tree IVOPT will generate rtl pattern to
1767 decide rtx costs, in which case pic_offset_table_rtx is not
1768 initialized. For that case no need to generate the first adrp
026c3cfd 1769 instruction as the final cost for global variable access is
1b1e81f8
JW
1770 one instruction. */
1771 if (gp_rtx != NULL)
1772 {
1773 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1774 using the page base as GOT base, the first page may be wasted,
1775 in the worst scenario, there is only 28K space for GOT).
1776
1777 The generate instruction sequence for accessing global variable
1778 is:
1779
a3957742 1780 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1781
1782 Only one instruction needed. But we must initialize
1783 pic_offset_table_rtx properly. We generate initialize insn for
1784 every global access, and allow CSE to remove all redundant.
1785
1786 The final instruction sequences will look like the following
1787 for multiply global variables access.
1788
a3957742 1789 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1790
a3957742
JW
1791 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1792 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1793 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1794 ... */
1b1e81f8
JW
1795
1796 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1797 crtl->uses_pic_offset_table = 1;
1798 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1799
1800 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1801 gp_rtx = gen_lowpart (mode, gp_rtx);
1802
1b1e81f8
JW
1803 }
1804
1805 if (mode == ptr_mode)
1806 {
1807 if (mode == DImode)
53021678 1808 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1809 else
53021678
JW
1810 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1811
1812 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1813 }
1814 else
1815 {
1816 gcc_assert (mode == Pmode);
53021678
JW
1817
1818 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1819 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1820 }
1821
53021678
JW
1822 /* The operand is expected to be MEM. Whenever the related insn
1823 pattern changed, above code which calculate mem should be
1824 updated. */
1825 gcc_assert (GET_CODE (mem) == MEM);
1826 MEM_READONLY_P (mem) = 1;
1827 MEM_NOTRAP_P (mem) = 1;
1828 emit_insn (insn);
1b1e81f8
JW
1829 return;
1830 }
1831
6642bdb4 1832 case SYMBOL_SMALL_GOT_4G:
43e9d192 1833 {
28514dda
YZ
1834 /* In ILP32, the mode of dest can be either SImode or DImode,
1835 while the got entry is always of SImode size. The mode of
1836 dest depends on how dest is used: if dest is assigned to a
1837 pointer (e.g. in the memory), it has SImode; it may have
1838 DImode if dest is dereferenced to access the memeory.
1839 This is why we have to handle three different ldr_got_small
1840 patterns here (two patterns for ILP32). */
53021678
JW
1841
1842 rtx insn;
1843 rtx mem;
43e9d192 1844 rtx tmp_reg = dest;
ef4bddc2 1845 machine_mode mode = GET_MODE (dest);
28514dda 1846
43e9d192 1847 if (can_create_pseudo_p ())
28514dda
YZ
1848 tmp_reg = gen_reg_rtx (mode);
1849
1850 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1851 if (mode == ptr_mode)
1852 {
1853 if (mode == DImode)
53021678 1854 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1855 else
53021678
JW
1856 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1857
1858 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1859 }
1860 else
1861 {
1862 gcc_assert (mode == Pmode);
53021678
JW
1863
1864 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1865 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1866 }
1867
53021678
JW
1868 gcc_assert (GET_CODE (mem) == MEM);
1869 MEM_READONLY_P (mem) = 1;
1870 MEM_NOTRAP_P (mem) = 1;
1871 emit_insn (insn);
43e9d192
IB
1872 return;
1873 }
1874
1875 case SYMBOL_SMALL_TLSGD:
1876 {
5d8a22a5 1877 rtx_insn *insns;
23b88fda
N
1878 machine_mode mode = GET_MODE (dest);
1879 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1880
1881 start_sequence ();
23b88fda
N
1882 if (TARGET_ILP32)
1883 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1884 else
1885 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1886 insns = get_insns ();
1887 end_sequence ();
1888
1889 RTL_CONST_CALL_P (insns) = 1;
1890 emit_libcall_block (insns, dest, result, imm);
1891 return;
1892 }
1893
1894 case SYMBOL_SMALL_TLSDESC:
1895 {
ef4bddc2 1896 machine_mode mode = GET_MODE (dest);
621ad2de 1897 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1898 rtx tp;
1899
621ad2de
AP
1900 gcc_assert (mode == Pmode || mode == ptr_mode);
1901
2876a13f
JW
1902 /* In ILP32, the got entry is always of SImode size. Unlike
1903 small GOT, the dest is fixed at reg 0. */
1904 if (TARGET_ILP32)
1905 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1906 else
2876a13f 1907 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1908 tp = aarch64_load_tp (NULL);
621ad2de
AP
1909
1910 if (mode != Pmode)
1911 tp = gen_lowpart (mode, tp);
1912
2876a13f 1913 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1914 if (REG_P (dest))
1915 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1916 return;
1917 }
1918
79496620 1919 case SYMBOL_SMALL_TLSIE:
43e9d192 1920 {
621ad2de
AP
1921 /* In ILP32, the mode of dest can be either SImode or DImode,
1922 while the got entry is always of SImode size. The mode of
1923 dest depends on how dest is used: if dest is assigned to a
1924 pointer (e.g. in the memory), it has SImode; it may have
1925 DImode if dest is dereferenced to access the memeory.
1926 This is why we have to handle three different tlsie_small
1927 patterns here (two patterns for ILP32). */
ef4bddc2 1928 machine_mode mode = GET_MODE (dest);
621ad2de 1929 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1930 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1931
1932 if (mode == ptr_mode)
1933 {
1934 if (mode == DImode)
1935 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1936 else
1937 {
1938 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1939 tp = gen_lowpart (mode, tp);
1940 }
1941 }
1942 else
1943 {
1944 gcc_assert (mode == Pmode);
1945 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1946 }
1947
f7df4a84 1948 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
1949 if (REG_P (dest))
1950 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1951 return;
1952 }
1953
cbf5629e 1954 case SYMBOL_TLSLE12:
d18ba284 1955 case SYMBOL_TLSLE24:
cbf5629e
JW
1956 case SYMBOL_TLSLE32:
1957 case SYMBOL_TLSLE48:
43e9d192 1958 {
cbf5629e 1959 machine_mode mode = GET_MODE (dest);
43e9d192 1960 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1961
cbf5629e
JW
1962 if (mode != Pmode)
1963 tp = gen_lowpart (mode, tp);
1964
1965 switch (type)
1966 {
1967 case SYMBOL_TLSLE12:
1968 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1969 (dest, tp, imm));
1970 break;
1971 case SYMBOL_TLSLE24:
1972 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1973 (dest, tp, imm));
1974 break;
1975 case SYMBOL_TLSLE32:
1976 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1977 (dest, imm));
1978 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1979 (dest, dest, tp));
1980 break;
1981 case SYMBOL_TLSLE48:
1982 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1983 (dest, imm));
1984 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1985 (dest, dest, tp));
1986 break;
1987 default:
1988 gcc_unreachable ();
1989 }
e6f7f0e9 1990
241dbd9d
QZ
1991 if (REG_P (dest))
1992 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1993 return;
1994 }
1995
87dd8ab0
MS
1996 case SYMBOL_TINY_GOT:
1997 emit_insn (gen_ldr_got_tiny (dest, imm));
1998 return;
1999
5ae7caad
JW
2000 case SYMBOL_TINY_TLSIE:
2001 {
2002 machine_mode mode = GET_MODE (dest);
2003 rtx tp = aarch64_load_tp (NULL);
2004
2005 if (mode == ptr_mode)
2006 {
2007 if (mode == DImode)
2008 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
2009 else
2010 {
2011 tp = gen_lowpart (mode, tp);
2012 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
2013 }
2014 }
2015 else
2016 {
2017 gcc_assert (mode == Pmode);
2018 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
2019 }
2020
241dbd9d
QZ
2021 if (REG_P (dest))
2022 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
2023 return;
2024 }
2025
43e9d192
IB
2026 default:
2027 gcc_unreachable ();
2028 }
2029}
2030
2031/* Emit a move from SRC to DEST. Assume that the move expanders can
2032 handle all moves if !can_create_pseudo_p (). The distinction is
2033 important because, unlike emit_move_insn, the move expanders know
2034 how to force Pmode objects into the constant pool even when the
2035 constant pool address is not itself legitimate. */
2036static rtx
2037aarch64_emit_move (rtx dest, rtx src)
2038{
2039 return (can_create_pseudo_p ()
2040 ? emit_move_insn (dest, src)
2041 : emit_move_insn_1 (dest, src));
2042}
2043
f22d7973
RS
2044/* Apply UNOPTAB to OP and store the result in DEST. */
2045
2046static void
2047aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
2048{
2049 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
2050 if (dest != tmp)
2051 emit_move_insn (dest, tmp);
2052}
2053
2054/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
2055
2056static void
2057aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
2058{
2059 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
2060 OPTAB_DIRECT);
2061 if (dest != tmp)
2062 emit_move_insn (dest, tmp);
2063}
2064
030d03b8
RE
2065/* Split a 128-bit move operation into two 64-bit move operations,
2066 taking care to handle partial overlap of register to register
2067 copies. Special cases are needed when moving between GP regs and
2068 FP regs. SRC can be a register, constant or memory; DST a register
2069 or memory. If either operand is memory it must not have any side
2070 effects. */
43e9d192
IB
2071void
2072aarch64_split_128bit_move (rtx dst, rtx src)
2073{
030d03b8
RE
2074 rtx dst_lo, dst_hi;
2075 rtx src_lo, src_hi;
43e9d192 2076
ef4bddc2 2077 machine_mode mode = GET_MODE (dst);
12dc6974 2078
030d03b8
RE
2079 gcc_assert (mode == TImode || mode == TFmode);
2080 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
2081 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
2082
2083 if (REG_P (dst) && REG_P (src))
2084 {
030d03b8
RE
2085 int src_regno = REGNO (src);
2086 int dst_regno = REGNO (dst);
43e9d192 2087
030d03b8 2088 /* Handle FP <-> GP regs. */
43e9d192
IB
2089 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
2090 {
030d03b8
RE
2091 src_lo = gen_lowpart (word_mode, src);
2092 src_hi = gen_highpart (word_mode, src);
2093
0016d8d9
RS
2094 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
2095 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 2096 return;
43e9d192
IB
2097 }
2098 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
2099 {
030d03b8
RE
2100 dst_lo = gen_lowpart (word_mode, dst);
2101 dst_hi = gen_highpart (word_mode, dst);
2102
0016d8d9
RS
2103 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
2104 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 2105 return;
43e9d192 2106 }
43e9d192
IB
2107 }
2108
030d03b8
RE
2109 dst_lo = gen_lowpart (word_mode, dst);
2110 dst_hi = gen_highpart (word_mode, dst);
2111 src_lo = gen_lowpart (word_mode, src);
2112 src_hi = gen_highpart_mode (word_mode, mode, src);
2113
2114 /* At most one pairing may overlap. */
2115 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2116 {
2117 aarch64_emit_move (dst_hi, src_hi);
2118 aarch64_emit_move (dst_lo, src_lo);
2119 }
2120 else
2121 {
2122 aarch64_emit_move (dst_lo, src_lo);
2123 aarch64_emit_move (dst_hi, src_hi);
2124 }
43e9d192
IB
2125}
2126
2127bool
2128aarch64_split_128bit_move_p (rtx dst, rtx src)
2129{
2130 return (! REG_P (src)
2131 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2132}
2133
8b033a8a
SN
2134/* Split a complex SIMD combine. */
2135
2136void
2137aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2138{
ef4bddc2
RS
2139 machine_mode src_mode = GET_MODE (src1);
2140 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2141
2142 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2143 gcc_assert (register_operand (dst, dst_mode)
2144 && register_operand (src1, src_mode)
2145 && register_operand (src2, src_mode));
8b033a8a 2146
0016d8d9 2147 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2148 return;
8b033a8a
SN
2149}
2150
fd4842cd
SN
2151/* Split a complex SIMD move. */
2152
2153void
2154aarch64_split_simd_move (rtx dst, rtx src)
2155{
ef4bddc2
RS
2156 machine_mode src_mode = GET_MODE (src);
2157 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2158
2159 gcc_assert (VECTOR_MODE_P (dst_mode));
2160
2161 if (REG_P (dst) && REG_P (src))
2162 {
2163 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2164 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2165 }
2166}
2167
ef22810a
RH
2168bool
2169aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2170 machine_mode ymode, rtx y)
2171{
2172 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2173 gcc_assert (r != NULL);
2174 return rtx_equal_p (x, r);
2175}
2176
2177
43e9d192 2178static rtx
ef4bddc2 2179aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2180{
2181 if (can_create_pseudo_p ())
e18b4a81 2182 return force_reg (mode, value);
43e9d192
IB
2183 else
2184 {
f5470a77
RS
2185 gcc_assert (x);
2186 aarch64_emit_move (x, value);
43e9d192
IB
2187 return x;
2188 }
2189}
2190
43cacb12
RS
2191/* Return true if we can move VALUE into a register using a single
2192 CNT[BHWD] instruction. */
2193
2194static bool
2195aarch64_sve_cnt_immediate_p (poly_int64 value)
2196{
2197 HOST_WIDE_INT factor = value.coeffs[0];
2198 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2199 return (value.coeffs[1] == factor
2200 && IN_RANGE (factor, 2, 16 * 16)
2201 && (factor & 1) == 0
2202 && factor <= 16 * (factor & -factor));
2203}
2204
2205/* Likewise for rtx X. */
2206
2207bool
2208aarch64_sve_cnt_immediate_p (rtx x)
2209{
2210 poly_int64 value;
2211 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2212}
2213
2214/* Return the asm string for an instruction with a CNT-like vector size
2215 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2216 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2217 first part of the operands template (the part that comes before the
2218 vector size itself). FACTOR is the number of quadwords.
2219 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2220 If it is zero, we can use any element size. */
2221
2222static char *
2223aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2224 unsigned int factor,
2225 unsigned int nelts_per_vq)
2226{
2227 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2228
2229 if (nelts_per_vq == 0)
2230 /* There is some overlap in the ranges of the four CNT instructions.
2231 Here we always use the smallest possible element size, so that the
2232 multiplier is 1 whereever possible. */
2233 nelts_per_vq = factor & -factor;
2234 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2235 gcc_assert (IN_RANGE (shift, 1, 4));
2236 char suffix = "dwhb"[shift - 1];
2237
2238 factor >>= shift;
2239 unsigned int written;
2240 if (factor == 1)
2241 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2242 prefix, suffix, operands);
2243 else
2244 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2245 prefix, suffix, operands, factor);
2246 gcc_assert (written < sizeof (buffer));
2247 return buffer;
2248}
2249
2250/* Return the asm string for an instruction with a CNT-like vector size
2251 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2252 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2253 first part of the operands template (the part that comes before the
2254 vector size itself). X is the value of the vector size operand,
2255 as a polynomial integer rtx. */
2256
2257char *
2258aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2259 rtx x)
2260{
2261 poly_int64 value = rtx_to_poly_int64 (x);
2262 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2263 return aarch64_output_sve_cnt_immediate (prefix, operands,
2264 value.coeffs[1], 0);
2265}
2266
2267/* Return true if we can add VALUE to a register using a single ADDVL
2268 or ADDPL instruction. */
2269
2270static bool
2271aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2272{
2273 HOST_WIDE_INT factor = value.coeffs[0];
2274 if (factor == 0 || value.coeffs[1] != factor)
2275 return false;
2276 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2277 and a value of 16 is one vector width. */
2278 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2279 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2280}
2281
2282/* Likewise for rtx X. */
2283
2284bool
2285aarch64_sve_addvl_addpl_immediate_p (rtx x)
2286{
2287 poly_int64 value;
2288 return (poly_int_rtx_p (x, &value)
2289 && aarch64_sve_addvl_addpl_immediate_p (value));
2290}
2291
2292/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2293 and storing the result in operand 0. */
2294
2295char *
2296aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2297{
2298 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2299 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2300 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2301
2302 /* Use INC or DEC if possible. */
2303 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2304 {
2305 if (aarch64_sve_cnt_immediate_p (offset_value))
2306 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2307 offset_value.coeffs[1], 0);
2308 if (aarch64_sve_cnt_immediate_p (-offset_value))
2309 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2310 -offset_value.coeffs[1], 0);
2311 }
2312
2313 int factor = offset_value.coeffs[1];
2314 if ((factor & 15) == 0)
2315 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2316 else
2317 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2318 return buffer;
2319}
2320
2321/* Return true if X is a valid immediate for an SVE vector INC or DEC
2322 instruction. If it is, store the number of elements in each vector
2323 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2324 factor in *FACTOR_OUT (if nonnull). */
2325
2326bool
2327aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2328 unsigned int *nelts_per_vq_out)
2329{
2330 rtx elt;
2331 poly_int64 value;
2332
2333 if (!const_vec_duplicate_p (x, &elt)
2334 || !poly_int_rtx_p (elt, &value))
2335 return false;
2336
2337 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2338 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2339 /* There's no vector INCB. */
2340 return false;
2341
2342 HOST_WIDE_INT factor = value.coeffs[0];
2343 if (value.coeffs[1] != factor)
2344 return false;
2345
2346 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2347 if ((factor % nelts_per_vq) != 0
2348 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2349 return false;
2350
2351 if (factor_out)
2352 *factor_out = factor;
2353 if (nelts_per_vq_out)
2354 *nelts_per_vq_out = nelts_per_vq;
2355 return true;
2356}
2357
2358/* Return true if X is a valid immediate for an SVE vector INC or DEC
2359 instruction. */
2360
2361bool
2362aarch64_sve_inc_dec_immediate_p (rtx x)
2363{
2364 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2365}
2366
2367/* Return the asm template for an SVE vector INC or DEC instruction.
2368 OPERANDS gives the operands before the vector count and X is the
2369 value of the vector count operand itself. */
2370
2371char *
2372aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2373{
2374 int factor;
2375 unsigned int nelts_per_vq;
2376 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2377 gcc_unreachable ();
2378 if (factor < 0)
2379 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2380 nelts_per_vq);
2381 else
2382 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2383 nelts_per_vq);
2384}
43e9d192 2385
82614948
RR
2386static int
2387aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2388 scalar_int_mode mode)
43e9d192 2389{
43e9d192 2390 int i;
9a4865db
WD
2391 unsigned HOST_WIDE_INT val, val2, mask;
2392 int one_match, zero_match;
2393 int num_insns;
43e9d192 2394
9a4865db
WD
2395 val = INTVAL (imm);
2396
2397 if (aarch64_move_imm (val, mode))
43e9d192 2398 {
82614948 2399 if (generate)
f7df4a84 2400 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2401 return 1;
43e9d192
IB
2402 }
2403
9de00935
TC
2404 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2405 (with XXXX non-zero). In that case check to see if the move can be done in
2406 a smaller mode. */
2407 val2 = val & 0xffffffff;
2408 if (mode == DImode
2409 && aarch64_move_imm (val2, SImode)
2410 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2411 {
2412 if (generate)
2413 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2414
2415 /* Check if we have to emit a second instruction by checking to see
2416 if any of the upper 32 bits of the original DI mode value is set. */
2417 if (val == val2)
2418 return 1;
2419
2420 i = (val >> 48) ? 48 : 32;
2421
2422 if (generate)
2423 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2424 GEN_INT ((val >> i) & 0xffff)));
2425
2426 return 2;
2427 }
2428
9a4865db 2429 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2430 {
82614948
RR
2431 if (generate)
2432 {
9a4865db
WD
2433 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2434 if (mode == SImode)
2435 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2436 GEN_INT ((val >> 16) & 0xffff)));
2437 else
2438 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2439 GEN_INT ((val >> 16) & 0xffff)));
82614948 2440 }
9a4865db 2441 return 2;
43e9d192
IB
2442 }
2443
2444 /* Remaining cases are all for DImode. */
2445
43e9d192 2446 mask = 0xffff;
9a4865db
WD
2447 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2448 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2449 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2450 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2451
62c8d76c 2452 if (zero_match != 2 && one_match != 2)
43e9d192 2453 {
62c8d76c
WD
2454 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2455 For a 64-bit bitmask try whether changing 16 bits to all ones or
2456 zeroes creates a valid bitmask. To check any repeated bitmask,
2457 try using 16 bits from the other 32-bit half of val. */
43e9d192 2458
62c8d76c 2459 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2460 {
62c8d76c
WD
2461 val2 = val & ~mask;
2462 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2463 break;
2464 val2 = val | mask;
2465 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2466 break;
2467 val2 = val2 & ~mask;
2468 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2469 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2470 break;
43e9d192 2471 }
62c8d76c 2472 if (i != 64)
43e9d192 2473 {
62c8d76c 2474 if (generate)
43e9d192 2475 {
62c8d76c
WD
2476 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2477 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2478 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2479 }
1312b1ba 2480 return 2;
43e9d192
IB
2481 }
2482 }
2483
9a4865db
WD
2484 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2485 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2486 otherwise skip zero bits. */
2c274197 2487
9a4865db 2488 num_insns = 1;
43e9d192 2489 mask = 0xffff;
9a4865db
WD
2490 val2 = one_match > zero_match ? ~val : val;
2491 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2492
2493 if (generate)
2494 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2495 ? (val | ~(mask << i))
2496 : (val & (mask << i)))));
2497 for (i += 16; i < 64; i += 16)
43e9d192 2498 {
9a4865db
WD
2499 if ((val2 & (mask << i)) == 0)
2500 continue;
2501 if (generate)
2502 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2503 GEN_INT ((val >> i) & 0xffff)));
2504 num_insns ++;
82614948
RR
2505 }
2506
2507 return num_insns;
2508}
2509
c0bb5bc5
WD
2510/* Return whether imm is a 128-bit immediate which is simple enough to
2511 expand inline. */
2512bool
2513aarch64_mov128_immediate (rtx imm)
2514{
2515 if (GET_CODE (imm) == CONST_INT)
2516 return true;
2517
2518 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2519
2520 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2521 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2522
2523 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2524 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2525}
2526
2527
43cacb12
RS
2528/* Return the number of temporary registers that aarch64_add_offset_1
2529 would need to add OFFSET to a register. */
2530
2531static unsigned int
2532aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2533{
2534 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2535}
2536
f5470a77
RS
2537/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2538 a non-polynomial OFFSET. MODE is the mode of the addition.
2539 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2540 be set and CFA adjustments added to the generated instructions.
2541
2542 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2543 temporary if register allocation is already complete. This temporary
2544 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2545 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2546 the immediate again.
0100c5f9
RS
2547
2548 Since this function may be used to adjust the stack pointer, we must
2549 ensure that it cannot cause transient stack deallocation (for example
2550 by first incrementing SP and then decrementing when adjusting by a
2551 large immediate). */
2552
2553static void
f5470a77
RS
2554aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2555 rtx src, HOST_WIDE_INT offset, rtx temp1,
2556 bool frame_related_p, bool emit_move_imm)
0100c5f9 2557{
f5470a77
RS
2558 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2559 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2560
2561 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2562 rtx_insn *insn;
2563
f5470a77
RS
2564 if (!moffset)
2565 {
2566 if (!rtx_equal_p (dest, src))
2567 {
2568 insn = emit_insn (gen_rtx_SET (dest, src));
2569 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2570 }
2571 return;
2572 }
0100c5f9
RS
2573
2574 /* Single instruction adjustment. */
f5470a77 2575 if (aarch64_uimm12_shift (moffset))
0100c5f9 2576 {
f5470a77 2577 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2578 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2579 return;
2580 }
2581
f5470a77
RS
2582 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2583 and either:
2584
2585 a) the offset cannot be loaded by a 16-bit move or
2586 b) there is no spare register into which we can move it. */
2587 if (moffset < 0x1000000
2588 && ((!temp1 && !can_create_pseudo_p ())
2589 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2590 {
f5470a77 2591 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2592
f5470a77
RS
2593 low_off = offset < 0 ? -low_off : low_off;
2594 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2595 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2596 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2597 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2598 return;
2599 }
2600
2601 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2602 if (emit_move_imm)
f5470a77
RS
2603 {
2604 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2605 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2606 }
2607 insn = emit_insn (offset < 0
2608 ? gen_sub3_insn (dest, src, temp1)
2609 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2610 if (frame_related_p)
2611 {
2612 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2613 rtx adj = plus_constant (mode, src, offset);
2614 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2615 }
2616}
2617
43cacb12
RS
2618/* Return the number of temporary registers that aarch64_add_offset
2619 would need to move OFFSET into a register or add OFFSET to a register;
2620 ADD_P is true if we want the latter rather than the former. */
2621
2622static unsigned int
2623aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2624{
2625 /* This follows the same structure as aarch64_add_offset. */
2626 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2627 return 0;
2628
2629 unsigned int count = 0;
2630 HOST_WIDE_INT factor = offset.coeffs[1];
2631 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2632 poly_int64 poly_offset (factor, factor);
2633 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2634 /* Need one register for the ADDVL/ADDPL result. */
2635 count += 1;
2636 else if (factor != 0)
2637 {
2638 factor = abs (factor);
2639 if (factor > 16 * (factor & -factor))
2640 /* Need one register for the CNT result and one for the multiplication
2641 factor. If necessary, the second temporary can be reused for the
2642 constant part of the offset. */
2643 return 2;
2644 /* Need one register for the CNT result (which might then
2645 be shifted). */
2646 count += 1;
2647 }
2648 return count + aarch64_add_offset_1_temporaries (constant);
2649}
2650
2651/* If X can be represented as a poly_int64, return the number
2652 of temporaries that are required to add it to a register.
2653 Return -1 otherwise. */
2654
2655int
2656aarch64_add_offset_temporaries (rtx x)
2657{
2658 poly_int64 offset;
2659 if (!poly_int_rtx_p (x, &offset))
2660 return -1;
2661 return aarch64_offset_temporaries (true, offset);
2662}
2663
f5470a77
RS
2664/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2665 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2666 be set and CFA adjustments added to the generated instructions.
2667
2668 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2669 temporary if register allocation is already complete. This temporary
43cacb12
RS
2670 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2671 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2672 false to avoid emitting the immediate again.
2673
2674 TEMP2, if nonnull, is a second temporary register that doesn't
2675 overlap either DEST or REG.
f5470a77
RS
2676
2677 Since this function may be used to adjust the stack pointer, we must
2678 ensure that it cannot cause transient stack deallocation (for example
2679 by first incrementing SP and then decrementing when adjusting by a
2680 large immediate). */
2681
2682static void
2683aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2684 poly_int64 offset, rtx temp1, rtx temp2,
2685 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2686{
f5470a77
RS
2687 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2688 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2689 gcc_assert (temp1 == NULL_RTX
2690 || !frame_related_p
2691 || !reg_overlap_mentioned_p (temp1, dest));
2692 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2693
2694 /* Try using ADDVL or ADDPL to add the whole value. */
2695 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2696 {
2697 rtx offset_rtx = gen_int_mode (offset, mode);
2698 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2699 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2700 return;
2701 }
2702
2703 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2704 SVE vector register, over and above the minimum size of 128 bits.
2705 This is equivalent to half the value returned by CNTD with a
2706 vector shape of ALL. */
2707 HOST_WIDE_INT factor = offset.coeffs[1];
2708 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2709
2710 /* Try using ADDVL or ADDPL to add the VG-based part. */
2711 poly_int64 poly_offset (factor, factor);
2712 if (src != const0_rtx
2713 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2714 {
2715 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2716 if (frame_related_p)
2717 {
2718 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2719 RTX_FRAME_RELATED_P (insn) = true;
2720 src = dest;
2721 }
2722 else
2723 {
2724 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2725 src = aarch64_force_temporary (mode, temp1, addr);
2726 temp1 = temp2;
2727 temp2 = NULL_RTX;
2728 }
2729 }
2730 /* Otherwise use a CNT-based sequence. */
2731 else if (factor != 0)
2732 {
2733 /* Use a subtraction if we have a negative factor. */
2734 rtx_code code = PLUS;
2735 if (factor < 0)
2736 {
2737 factor = -factor;
2738 code = MINUS;
2739 }
2740
2741 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2742 into the multiplication. */
2743 rtx val;
2744 int shift = 0;
2745 if (factor & 1)
2746 /* Use a right shift by 1. */
2747 shift = -1;
2748 else
2749 factor /= 2;
2750 HOST_WIDE_INT low_bit = factor & -factor;
2751 if (factor <= 16 * low_bit)
2752 {
2753 if (factor > 16 * 8)
2754 {
2755 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2756 the value with the minimum multiplier and shift it into
2757 position. */
2758 int extra_shift = exact_log2 (low_bit);
2759 shift += extra_shift;
2760 factor >>= extra_shift;
2761 }
2762 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2763 }
2764 else
2765 {
2766 /* Use CNTD, then multiply it by FACTOR. */
2767 val = gen_int_mode (poly_int64 (2, 2), mode);
2768 val = aarch64_force_temporary (mode, temp1, val);
2769
2770 /* Go back to using a negative multiplication factor if we have
2771 no register from which to subtract. */
2772 if (code == MINUS && src == const0_rtx)
2773 {
2774 factor = -factor;
2775 code = PLUS;
2776 }
2777 rtx coeff1 = gen_int_mode (factor, mode);
2778 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2779 val = gen_rtx_MULT (mode, val, coeff1);
2780 }
2781
2782 if (shift > 0)
2783 {
2784 /* Multiply by 1 << SHIFT. */
2785 val = aarch64_force_temporary (mode, temp1, val);
2786 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2787 }
2788 else if (shift == -1)
2789 {
2790 /* Divide by 2. */
2791 val = aarch64_force_temporary (mode, temp1, val);
2792 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2793 }
2794
2795 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2796 if (src != const0_rtx)
2797 {
2798 val = aarch64_force_temporary (mode, temp1, val);
2799 val = gen_rtx_fmt_ee (code, mode, src, val);
2800 }
2801 else if (code == MINUS)
2802 {
2803 val = aarch64_force_temporary (mode, temp1, val);
2804 val = gen_rtx_NEG (mode, val);
2805 }
2806
2807 if (constant == 0 || frame_related_p)
2808 {
2809 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2810 if (frame_related_p)
2811 {
2812 RTX_FRAME_RELATED_P (insn) = true;
2813 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2814 gen_rtx_SET (dest, plus_constant (Pmode, src,
2815 poly_offset)));
2816 }
2817 src = dest;
2818 if (constant == 0)
2819 return;
2820 }
2821 else
2822 {
2823 src = aarch64_force_temporary (mode, temp1, val);
2824 temp1 = temp2;
2825 temp2 = NULL_RTX;
2826 }
2827
2828 emit_move_imm = true;
2829 }
f5470a77 2830
f5470a77
RS
2831 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2832 frame_related_p, emit_move_imm);
0100c5f9
RS
2833}
2834
43cacb12
RS
2835/* Like aarch64_add_offset, but the offset is given as an rtx rather
2836 than a poly_int64. */
2837
2838void
2839aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2840 rtx offset_rtx, rtx temp1, rtx temp2)
2841{
2842 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2843 temp1, temp2, false);
2844}
2845
f5470a77
RS
2846/* Add DELTA to the stack pointer, marking the instructions frame-related.
2847 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2848 if TEMP1 already contains abs (DELTA). */
2849
0100c5f9 2850static inline void
43cacb12 2851aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2852{
f5470a77 2853 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2854 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2855}
2856
f5470a77
RS
2857/* Subtract DELTA from the stack pointer, marking the instructions
2858 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2859 if nonnull. */
2860
0100c5f9 2861static inline void
cd1bef27
JL
2862aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
2863 bool emit_move_imm = true)
0100c5f9 2864{
f5470a77 2865 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
cd1bef27 2866 temp1, temp2, frame_related_p, emit_move_imm);
0100c5f9 2867}
82614948 2868
43cacb12
RS
2869/* Set DEST to (vec_series BASE STEP). */
2870
2871static void
2872aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2873{
2874 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2875 scalar_mode inner = GET_MODE_INNER (mode);
2876
2877 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2878 if (!aarch64_sve_index_immediate_p (base))
2879 base = force_reg (inner, base);
2880 if (!aarch64_sve_index_immediate_p (step))
2881 step = force_reg (inner, step);
2882
2883 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2884}
82614948 2885
43cacb12
RS
2886/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2887 integer of mode INT_MODE. Return true on success. */
2888
2889static bool
2890aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2891 rtx src)
2892{
2893 /* If the constant is smaller than 128 bits, we can do the move
2894 using a vector of SRC_MODEs. */
2895 if (src_mode != TImode)
2896 {
2897 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2898 GET_MODE_SIZE (src_mode));
2899 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2900 emit_move_insn (gen_lowpart (dup_mode, dest),
2901 gen_const_vec_duplicate (dup_mode, src));
2902 return true;
2903 }
2904
947b1372 2905 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2906 src = force_const_mem (src_mode, src);
2907 if (!src)
2908 return false;
2909
2910 /* Make sure that the address is legitimate. */
2911 if (!aarch64_sve_ld1r_operand_p (src))
2912 {
2913 rtx addr = force_reg (Pmode, XEXP (src, 0));
2914 src = replace_equiv_address (src, addr);
2915 }
2916
947b1372
RS
2917 machine_mode mode = GET_MODE (dest);
2918 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
2919 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
2920 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
2921 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
2922 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
2923 return true;
2924}
2925
2926/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
2927 isn't a simple duplicate or series. */
2928
2929static void
2930aarch64_expand_sve_const_vector (rtx dest, rtx src)
2931{
2932 machine_mode mode = GET_MODE (src);
2933 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
2934 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
2935 gcc_assert (npatterns > 1);
2936
2937 if (nelts_per_pattern == 1)
2938 {
2939 /* The constant is a repeating seqeuence of at least two elements,
2940 where the repeating elements occupy no more than 128 bits.
2941 Get an integer representation of the replicated value. */
8179efe0
RS
2942 scalar_int_mode int_mode;
2943 if (BYTES_BIG_ENDIAN)
2944 /* For now, always use LD1RQ to load the value on big-endian
2945 targets, since the handling of smaller integers includes a
2946 subreg that is semantically an element reverse. */
2947 int_mode = TImode;
2948 else
2949 {
2950 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
2951 gcc_assert (int_bits <= 128);
2952 int_mode = int_mode_for_size (int_bits, 0).require ();
2953 }
43cacb12
RS
2954 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
2955 if (int_value
2956 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
2957 return;
2958 }
2959
2960 /* Expand each pattern individually. */
2961 rtx_vector_builder builder;
2962 auto_vec<rtx, 16> vectors (npatterns);
2963 for (unsigned int i = 0; i < npatterns; ++i)
2964 {
2965 builder.new_vector (mode, 1, nelts_per_pattern);
2966 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
2967 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
2968 vectors.quick_push (force_reg (mode, builder.build ()));
2969 }
2970
2971 /* Use permutes to interleave the separate vectors. */
2972 while (npatterns > 1)
2973 {
2974 npatterns /= 2;
2975 for (unsigned int i = 0; i < npatterns; ++i)
2976 {
2977 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
2978 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
2979 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
2980 vectors[i] = tmp;
2981 }
2982 }
2983 gcc_assert (vectors[0] == dest);
2984}
2985
2986/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
2987 is a pattern that can be used to set DEST to a replicated scalar
2988 element. */
2989
2990void
2991aarch64_expand_mov_immediate (rtx dest, rtx imm,
2992 rtx (*gen_vec_duplicate) (rtx, rtx))
2993{
2994 machine_mode mode = GET_MODE (dest);
82614948
RR
2995
2996 /* Check on what type of symbol it is. */
77e994c9
RS
2997 scalar_int_mode int_mode;
2998 if ((GET_CODE (imm) == SYMBOL_REF
2999 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
3000 || GET_CODE (imm) == CONST
3001 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 3002 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 3003 {
43cacb12
RS
3004 rtx mem;
3005 poly_int64 offset;
3006 HOST_WIDE_INT const_offset;
82614948
RR
3007 enum aarch64_symbol_type sty;
3008
3009 /* If we have (const (plus symbol offset)), separate out the offset
3010 before we start classifying the symbol. */
43cacb12 3011 rtx base = strip_offset (imm, &offset);
82614948 3012
43cacb12
RS
3013 /* We must always add an offset involving VL separately, rather than
3014 folding it into the relocation. */
3015 if (!offset.is_constant (&const_offset))
3016 {
3017 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
3018 emit_insn (gen_rtx_SET (dest, imm));
3019 else
3020 {
3021 /* Do arithmetic on 32-bit values if the result is smaller
3022 than that. */
3023 if (partial_subreg_p (int_mode, SImode))
3024 {
3025 /* It is invalid to do symbol calculations in modes
3026 narrower than SImode. */
3027 gcc_assert (base == const0_rtx);
3028 dest = gen_lowpart (SImode, dest);
3029 int_mode = SImode;
3030 }
3031 if (base != const0_rtx)
3032 {
3033 base = aarch64_force_temporary (int_mode, dest, base);
3034 aarch64_add_offset (int_mode, dest, base, offset,
3035 NULL_RTX, NULL_RTX, false);
3036 }
3037 else
3038 aarch64_add_offset (int_mode, dest, base, offset,
3039 dest, NULL_RTX, false);
3040 }
3041 return;
3042 }
3043
3044 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
3045 switch (sty)
3046 {
3047 case SYMBOL_FORCE_TO_MEM:
43cacb12 3048 if (const_offset != 0
77e994c9 3049 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
3050 {
3051 gcc_assert (can_create_pseudo_p ());
77e994c9 3052 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3053 aarch64_add_offset (int_mode, dest, base, const_offset,
3054 NULL_RTX, NULL_RTX, false);
82614948
RR
3055 return;
3056 }
b4f50fd4 3057
82614948
RR
3058 mem = force_const_mem (ptr_mode, imm);
3059 gcc_assert (mem);
b4f50fd4
RR
3060
3061 /* If we aren't generating PC relative literals, then
3062 we need to expand the literal pool access carefully.
3063 This is something that needs to be done in a number
3064 of places, so could well live as a separate function. */
9ee6540a 3065 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
3066 {
3067 gcc_assert (can_create_pseudo_p ());
3068 base = gen_reg_rtx (ptr_mode);
3069 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
3070 if (ptr_mode != Pmode)
3071 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
3072 mem = gen_rtx_MEM (ptr_mode, base);
3073 }
3074
77e994c9
RS
3075 if (int_mode != ptr_mode)
3076 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 3077
f7df4a84 3078 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 3079
82614948
RR
3080 return;
3081
3082 case SYMBOL_SMALL_TLSGD:
3083 case SYMBOL_SMALL_TLSDESC:
79496620 3084 case SYMBOL_SMALL_TLSIE:
1b1e81f8 3085 case SYMBOL_SMALL_GOT_28K:
6642bdb4 3086 case SYMBOL_SMALL_GOT_4G:
82614948 3087 case SYMBOL_TINY_GOT:
5ae7caad 3088 case SYMBOL_TINY_TLSIE:
43cacb12 3089 if (const_offset != 0)
82614948
RR
3090 {
3091 gcc_assert(can_create_pseudo_p ());
77e994c9 3092 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3093 aarch64_add_offset (int_mode, dest, base, const_offset,
3094 NULL_RTX, NULL_RTX, false);
82614948
RR
3095 return;
3096 }
3097 /* FALLTHRU */
3098
82614948
RR
3099 case SYMBOL_SMALL_ABSOLUTE:
3100 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3101 case SYMBOL_TLSLE12:
d18ba284 3102 case SYMBOL_TLSLE24:
cbf5629e
JW
3103 case SYMBOL_TLSLE32:
3104 case SYMBOL_TLSLE48:
82614948
RR
3105 aarch64_load_symref_appropriately (dest, imm, sty);
3106 return;
3107
3108 default:
3109 gcc_unreachable ();
3110 }
3111 }
3112
3113 if (!CONST_INT_P (imm))
3114 {
43cacb12
RS
3115 rtx base, step, value;
3116 if (GET_CODE (imm) == HIGH
3117 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3118 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3119 else if (const_vec_series_p (imm, &base, &step))
3120 aarch64_expand_vec_series (dest, base, step);
3121 else if (const_vec_duplicate_p (imm, &value))
3122 {
3123 /* If the constant is out of range of an SVE vector move,
3124 load it from memory if we can, otherwise move it into
3125 a register and use a DUP. */
3126 scalar_mode inner_mode = GET_MODE_INNER (mode);
3127 rtx op = force_const_mem (inner_mode, value);
3128 if (!op)
3129 op = force_reg (inner_mode, value);
3130 else if (!aarch64_sve_ld1r_operand_p (op))
3131 {
3132 rtx addr = force_reg (Pmode, XEXP (op, 0));
3133 op = replace_equiv_address (op, addr);
3134 }
3135 emit_insn (gen_vec_duplicate (dest, op));
3136 }
3137 else if (GET_CODE (imm) == CONST_VECTOR
3138 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3139 aarch64_expand_sve_const_vector (dest, imm);
82614948 3140 else
43cacb12 3141 {
82614948
RR
3142 rtx mem = force_const_mem (mode, imm);
3143 gcc_assert (mem);
43cacb12 3144 emit_move_insn (dest, mem);
43e9d192 3145 }
82614948
RR
3146
3147 return;
43e9d192 3148 }
82614948 3149
77e994c9
RS
3150 aarch64_internal_mov_immediate (dest, imm, true,
3151 as_a <scalar_int_mode> (mode));
43e9d192
IB
3152}
3153
43cacb12
RS
3154/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3155 that is known to contain PTRUE. */
3156
3157void
3158aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3159{
3160 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3161 gen_rtvec (2, pred, src),
3162 UNSPEC_MERGE_PTRUE)));
3163}
3164
3165/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3166 operand is in memory. In this case we need to use the predicated LD1
3167 and ST1 instead of LDR and STR, both for correctness on big-endian
3168 targets and because LD1 and ST1 support a wider range of addressing modes.
3169 PRED_MODE is the mode of the predicate.
3170
3171 See the comment at the head of aarch64-sve.md for details about the
3172 big-endian handling. */
3173
3174void
3175aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3176{
3177 machine_mode mode = GET_MODE (dest);
3178 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3179 if (!register_operand (src, mode)
3180 && !register_operand (dest, mode))
3181 {
3182 rtx tmp = gen_reg_rtx (mode);
3183 if (MEM_P (src))
3184 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3185 else
3186 emit_move_insn (tmp, src);
3187 src = tmp;
3188 }
3189 aarch64_emit_sve_pred_move (dest, ptrue, src);
3190}
3191
002092be
RS
3192/* Called only on big-endian targets. See whether an SVE vector move
3193 from SRC to DEST is effectively a REV[BHW] instruction, because at
3194 least one operand is a subreg of an SVE vector that has wider or
3195 narrower elements. Return true and emit the instruction if so.
3196
3197 For example:
3198
3199 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3200
3201 represents a VIEW_CONVERT between the following vectors, viewed
3202 in memory order:
3203
3204 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3205 R1: { [0], [1], [2], [3], ... }
3206
3207 The high part of lane X in R2 should therefore correspond to lane X*2
3208 of R1, but the register representations are:
3209
3210 msb lsb
3211 R2: ...... [1].high [1].low [0].high [0].low
3212 R1: ...... [3] [2] [1] [0]
3213
3214 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3215 We therefore need a reverse operation to swap the high and low values
3216 around.
3217
3218 This is purely an optimization. Without it we would spill the
3219 subreg operand to the stack in one mode and reload it in the
3220 other mode, which has the same effect as the REV. */
3221
3222bool
3223aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3224{
3225 gcc_assert (BYTES_BIG_ENDIAN);
3226 if (GET_CODE (dest) == SUBREG)
3227 dest = SUBREG_REG (dest);
3228 if (GET_CODE (src) == SUBREG)
3229 src = SUBREG_REG (src);
3230
3231 /* The optimization handles two single SVE REGs with different element
3232 sizes. */
3233 if (!REG_P (dest)
3234 || !REG_P (src)
3235 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3236 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3237 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3238 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3239 return false;
3240
3241 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3242 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3243 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3244 UNSPEC_REV_SUBREG);
3245 emit_insn (gen_rtx_SET (dest, unspec));
3246 return true;
3247}
3248
3249/* Return a copy of X with mode MODE, without changing its other
3250 attributes. Unlike gen_lowpart, this doesn't care whether the
3251 mode change is valid. */
3252
3253static rtx
3254aarch64_replace_reg_mode (rtx x, machine_mode mode)
3255{
3256 if (GET_MODE (x) == mode)
3257 return x;
3258
3259 x = shallow_copy_rtx (x);
3260 set_mode_and_regno (x, mode, REGNO (x));
3261 return x;
3262}
3263
3264/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3265 operands. */
3266
3267void
3268aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3269{
3270 /* Decide which REV operation we need. The mode with narrower elements
3271 determines the mode of the operands and the mode with the wider
3272 elements determines the reverse width. */
3273 machine_mode mode_with_wider_elts = GET_MODE (dest);
3274 machine_mode mode_with_narrower_elts = GET_MODE (src);
3275 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3276 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3277 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3278
3279 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3280 unsigned int unspec;
3281 if (wider_bytes == 8)
3282 unspec = UNSPEC_REV64;
3283 else if (wider_bytes == 4)
3284 unspec = UNSPEC_REV32;
3285 else if (wider_bytes == 2)
3286 unspec = UNSPEC_REV16;
3287 else
3288 gcc_unreachable ();
3289 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3290
3291 /* Emit:
3292
3293 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3294 UNSPEC_MERGE_PTRUE))
3295
3296 with the appropriate modes. */
3297 ptrue = gen_lowpart (pred_mode, ptrue);
3298 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3299 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3300 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3301 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3302 UNSPEC_MERGE_PTRUE);
3303 emit_insn (gen_rtx_SET (dest, src));
3304}
3305
43e9d192 3306static bool
fee9ba42
JW
3307aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3308 tree exp ATTRIBUTE_UNUSED)
43e9d192 3309{
fee9ba42 3310 /* Currently, always true. */
43e9d192
IB
3311 return true;
3312}
3313
3314/* Implement TARGET_PASS_BY_REFERENCE. */
3315
3316static bool
3317aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3318 machine_mode mode,
43e9d192
IB
3319 const_tree type,
3320 bool named ATTRIBUTE_UNUSED)
3321{
3322 HOST_WIDE_INT size;
ef4bddc2 3323 machine_mode dummymode;
43e9d192
IB
3324 int nregs;
3325
3326 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3327 if (mode == BLKmode && type)
3328 size = int_size_in_bytes (type);
3329 else
3330 /* No frontends can create types with variable-sized modes, so we
3331 shouldn't be asked to pass or return them. */
3332 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3333
aadc1c43
MHD
3334 /* Aggregates are passed by reference based on their size. */
3335 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3336 {
aadc1c43 3337 size = int_size_in_bytes (type);
43e9d192
IB
3338 }
3339
3340 /* Variable sized arguments are always returned by reference. */
3341 if (size < 0)
3342 return true;
3343
3344 /* Can this be a candidate to be passed in fp/simd register(s)? */
3345 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3346 &dummymode, &nregs,
3347 NULL))
3348 return false;
3349
3350 /* Arguments which are variable sized or larger than 2 registers are
3351 passed by reference unless they are a homogenous floating point
3352 aggregate. */
3353 return size > 2 * UNITS_PER_WORD;
3354}
3355
3356/* Return TRUE if VALTYPE is padded to its least significant bits. */
3357static bool
3358aarch64_return_in_msb (const_tree valtype)
3359{
ef4bddc2 3360 machine_mode dummy_mode;
43e9d192
IB
3361 int dummy_int;
3362
3363 /* Never happens in little-endian mode. */
3364 if (!BYTES_BIG_ENDIAN)
3365 return false;
3366
3367 /* Only composite types smaller than or equal to 16 bytes can
3368 be potentially returned in registers. */
3369 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3370 || int_size_in_bytes (valtype) <= 0
3371 || int_size_in_bytes (valtype) > 16)
3372 return false;
3373
3374 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3375 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3376 is always passed/returned in the least significant bits of fp/simd
3377 register(s). */
3378 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3379 &dummy_mode, &dummy_int, NULL))
3380 return false;
3381
3382 return true;
3383}
3384
3385/* Implement TARGET_FUNCTION_VALUE.
3386 Define how to find the value returned by a function. */
3387
3388static rtx
3389aarch64_function_value (const_tree type, const_tree func,
3390 bool outgoing ATTRIBUTE_UNUSED)
3391{
ef4bddc2 3392 machine_mode mode;
43e9d192
IB
3393 int unsignedp;
3394 int count;
ef4bddc2 3395 machine_mode ag_mode;
43e9d192
IB
3396
3397 mode = TYPE_MODE (type);
3398 if (INTEGRAL_TYPE_P (type))
3399 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3400
3401 if (aarch64_return_in_msb (type))
3402 {
3403 HOST_WIDE_INT size = int_size_in_bytes (type);
3404
3405 if (size % UNITS_PER_WORD != 0)
3406 {
3407 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3408 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3409 }
3410 }
3411
3412 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3413 &ag_mode, &count, NULL))
3414 {
3415 if (!aarch64_composite_type_p (type, mode))
3416 {
3417 gcc_assert (count == 1 && mode == ag_mode);
3418 return gen_rtx_REG (mode, V0_REGNUM);
3419 }
3420 else
3421 {
3422 int i;
3423 rtx par;
3424
3425 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3426 for (i = 0; i < count; i++)
3427 {
3428 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3429 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3430 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3431 XVECEXP (par, 0, i) = tmp;
3432 }
3433 return par;
3434 }
3435 }
3436 else
3437 return gen_rtx_REG (mode, R0_REGNUM);
3438}
3439
3440/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3441 Return true if REGNO is the number of a hard register in which the values
3442 of called function may come back. */
3443
3444static bool
3445aarch64_function_value_regno_p (const unsigned int regno)
3446{
3447 /* Maximum of 16 bytes can be returned in the general registers. Examples
3448 of 16-byte return values are: 128-bit integers and 16-byte small
3449 structures (excluding homogeneous floating-point aggregates). */
3450 if (regno == R0_REGNUM || regno == R1_REGNUM)
3451 return true;
3452
3453 /* Up to four fp/simd registers can return a function value, e.g. a
3454 homogeneous floating-point aggregate having four members. */
3455 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3456 return TARGET_FLOAT;
43e9d192
IB
3457
3458 return false;
3459}
3460
3461/* Implement TARGET_RETURN_IN_MEMORY.
3462
3463 If the type T of the result of a function is such that
3464 void func (T arg)
3465 would require that arg be passed as a value in a register (or set of
3466 registers) according to the parameter passing rules, then the result
3467 is returned in the same registers as would be used for such an
3468 argument. */
3469
3470static bool
3471aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3472{
3473 HOST_WIDE_INT size;
ef4bddc2 3474 machine_mode ag_mode;
43e9d192
IB
3475 int count;
3476
3477 if (!AGGREGATE_TYPE_P (type)
3478 && TREE_CODE (type) != COMPLEX_TYPE
3479 && TREE_CODE (type) != VECTOR_TYPE)
3480 /* Simple scalar types always returned in registers. */
3481 return false;
3482
3483 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3484 type,
3485 &ag_mode,
3486 &count,
3487 NULL))
3488 return false;
3489
3490 /* Types larger than 2 registers returned in memory. */
3491 size = int_size_in_bytes (type);
3492 return (size < 0 || size > 2 * UNITS_PER_WORD);
3493}
3494
3495static bool
ef4bddc2 3496aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3497 const_tree type, int *nregs)
3498{
3499 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3500 return aarch64_vfp_is_call_or_return_candidate (mode,
3501 type,
3502 &pcum->aapcs_vfp_rmode,
3503 nregs,
3504 NULL);
3505}
3506
985b8393 3507/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3508 bits. The idea is to suppress any stronger alignment requested by
3509 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3510 This is a helper function for local use only. */
3511
985b8393 3512static unsigned int
ef4bddc2 3513aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3514{
75d6cc81 3515 if (!type)
985b8393 3516 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3517
75d6cc81 3518 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3519 return 0;
43e9d192 3520
75d6cc81
AL
3521 gcc_assert (TYPE_MODE (type) == mode);
3522
3523 if (!AGGREGATE_TYPE_P (type))
985b8393 3524 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3525
3526 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3527 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3528
985b8393 3529 unsigned int alignment = 0;
75d6cc81 3530 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3531 if (TREE_CODE (field) == FIELD_DECL)
3532 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3533
985b8393 3534 return alignment;
43e9d192
IB
3535}
3536
3537/* Layout a function argument according to the AAPCS64 rules. The rule
3538 numbers refer to the rule numbers in the AAPCS64. */
3539
3540static void
ef4bddc2 3541aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3542 const_tree type,
3543 bool named ATTRIBUTE_UNUSED)
3544{
3545 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3546 int ncrn, nvrn, nregs;
3547 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3548 HOST_WIDE_INT size;
43e9d192
IB
3549
3550 /* We need to do this once per argument. */
3551 if (pcum->aapcs_arg_processed)
3552 return;
3553
3554 pcum->aapcs_arg_processed = true;
3555
3abf17cf 3556 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3557 if (type)
3558 size = int_size_in_bytes (type);
3559 else
3560 /* No frontends can create types with variable-sized modes, so we
3561 shouldn't be asked to pass or return them. */
3562 size = GET_MODE_SIZE (mode).to_constant ();
3563 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3564
43e9d192
IB
3565 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3566 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3567 mode,
3568 type,
3569 &nregs);
3570
3571 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3572 The following code thus handles passing by SIMD/FP registers first. */
3573
3574 nvrn = pcum->aapcs_nvrn;
3575
3576 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3577 and homogenous short-vector aggregates (HVA). */
3578 if (allocate_nvrn)
3579 {
261fb553 3580 if (!TARGET_FLOAT)
fc29dfc9 3581 aarch64_err_no_fpadvsimd (mode);
261fb553 3582
43e9d192
IB
3583 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3584 {
3585 pcum->aapcs_nextnvrn = nvrn + nregs;
3586 if (!aarch64_composite_type_p (type, mode))
3587 {
3588 gcc_assert (nregs == 1);
3589 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3590 }
3591 else
3592 {
3593 rtx par;
3594 int i;
3595 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3596 for (i = 0; i < nregs; i++)
3597 {
3598 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3599 V0_REGNUM + nvrn + i);
6a70badb
RS
3600 rtx offset = gen_int_mode
3601 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3602 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3603 XVECEXP (par, 0, i) = tmp;
3604 }
3605 pcum->aapcs_reg = par;
3606 }
3607 return;
3608 }
3609 else
3610 {
3611 /* C.3 NSRN is set to 8. */
3612 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3613 goto on_stack;
3614 }
3615 }
3616
3617 ncrn = pcum->aapcs_ncrn;
3abf17cf 3618 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3619
3620 /* C6 - C9. though the sign and zero extension semantics are
3621 handled elsewhere. This is the case where the argument fits
3622 entirely general registers. */
3623 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3624 {
43e9d192
IB
3625
3626 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3627
3628 /* C.8 if the argument has an alignment of 16 then the NGRN is
3629 rounded up to the next even number. */
985b8393
JJ
3630 if (nregs == 2
3631 && ncrn % 2
2ec07fa6 3632 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3633 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3634 alignment nregs should be > 2 and therefore it should be
3635 passed by reference rather than value. */
985b8393
JJ
3636 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3637 {
3638 ++ncrn;
3639 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3640 }
2ec07fa6 3641
43e9d192
IB
3642 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3643 A reg is still generated for it, but the caller should be smart
3644 enough not to use it. */
3645 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3646 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3647 else
3648 {
3649 rtx par;
3650 int i;
3651
3652 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3653 for (i = 0; i < nregs; i++)
3654 {
3655 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3656 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3657 GEN_INT (i * UNITS_PER_WORD));
3658 XVECEXP (par, 0, i) = tmp;
3659 }
3660 pcum->aapcs_reg = par;
3661 }
3662
3663 pcum->aapcs_nextncrn = ncrn + nregs;
3664 return;
3665 }
3666
3667 /* C.11 */
3668 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3669
3670 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3671 this argument and align the total size if necessary. */
43e9d192 3672on_stack:
3abf17cf 3673 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3674
985b8393 3675 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3676 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3677 16 / UNITS_PER_WORD);
43e9d192
IB
3678 return;
3679}
3680
3681/* Implement TARGET_FUNCTION_ARG. */
3682
3683static rtx
ef4bddc2 3684aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3685 const_tree type, bool named)
3686{
3687 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3688 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3689
3690 if (mode == VOIDmode)
3691 return NULL_RTX;
3692
3693 aarch64_layout_arg (pcum_v, mode, type, named);
3694 return pcum->aapcs_reg;
3695}
3696
3697void
3698aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3699 const_tree fntype ATTRIBUTE_UNUSED,
3700 rtx libname ATTRIBUTE_UNUSED,
3701 const_tree fndecl ATTRIBUTE_UNUSED,
3702 unsigned n_named ATTRIBUTE_UNUSED)
3703{
3704 pcum->aapcs_ncrn = 0;
3705 pcum->aapcs_nvrn = 0;
3706 pcum->aapcs_nextncrn = 0;
3707 pcum->aapcs_nextnvrn = 0;
3708 pcum->pcs_variant = ARM_PCS_AAPCS64;
3709 pcum->aapcs_reg = NULL_RTX;
3710 pcum->aapcs_arg_processed = false;
3711 pcum->aapcs_stack_words = 0;
3712 pcum->aapcs_stack_size = 0;
3713
261fb553
AL
3714 if (!TARGET_FLOAT
3715 && fndecl && TREE_PUBLIC (fndecl)
3716 && fntype && fntype != error_mark_node)
3717 {
3718 const_tree type = TREE_TYPE (fntype);
3719 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3720 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3721 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3722 &mode, &nregs, NULL))
fc29dfc9 3723 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 3724 }
43e9d192
IB
3725 return;
3726}
3727
3728static void
3729aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3730 machine_mode mode,
43e9d192
IB
3731 const_tree type,
3732 bool named)
3733{
3734 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3735 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3736 {
3737 aarch64_layout_arg (pcum_v, mode, type, named);
3738 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3739 != (pcum->aapcs_stack_words != 0));
3740 pcum->aapcs_arg_processed = false;
3741 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3742 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3743 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3744 pcum->aapcs_stack_words = 0;
3745 pcum->aapcs_reg = NULL_RTX;
3746 }
3747}
3748
3749bool
3750aarch64_function_arg_regno_p (unsigned regno)
3751{
3752 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3753 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3754}
3755
3756/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3757 PARM_BOUNDARY bits of alignment, but will be given anything up
3758 to STACK_BOUNDARY bits if the type requires it. This makes sure
3759 that both before and after the layout of each argument, the Next
3760 Stacked Argument Address (NSAA) will have a minimum alignment of
3761 8 bytes. */
3762
3763static unsigned int
ef4bddc2 3764aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3765{
985b8393
JJ
3766 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3767 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3768}
3769
43cacb12
RS
3770/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3771
3772static fixed_size_mode
3773aarch64_get_reg_raw_mode (int regno)
3774{
3775 if (TARGET_SVE && FP_REGNUM_P (regno))
3776 /* Don't use the SVE part of the register for __builtin_apply and
3777 __builtin_return. The SVE registers aren't used by the normal PCS,
3778 so using them there would be a waste of time. The PCS extensions
3779 for SVE types are fundamentally incompatible with the
3780 __builtin_return/__builtin_apply interface. */
3781 return as_a <fixed_size_mode> (V16QImode);
3782 return default_get_reg_raw_mode (regno);
3783}
3784
76b0cbf8 3785/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3786
3787 Small aggregate types are placed in the lowest memory address.
3788
3789 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3790
76b0cbf8
RS
3791static pad_direction
3792aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3793{
3794 /* On little-endian targets, the least significant byte of every stack
3795 argument is passed at the lowest byte address of the stack slot. */
3796 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3797 return PAD_UPWARD;
43e9d192 3798
00edcfbe 3799 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3800 the least significant byte of a stack argument is passed at the highest
3801 byte address of the stack slot. */
3802 if (type
00edcfbe
YZ
3803 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3804 || POINTER_TYPE_P (type))
43e9d192 3805 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3806 return PAD_DOWNWARD;
43e9d192
IB
3807
3808 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3809 return PAD_UPWARD;
43e9d192
IB
3810}
3811
3812/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3813
3814 It specifies padding for the last (may also be the only)
3815 element of a block move between registers and memory. If
3816 assuming the block is in the memory, padding upward means that
3817 the last element is padded after its highest significant byte,
3818 while in downward padding, the last element is padded at the
3819 its least significant byte side.
3820
3821 Small aggregates and small complex types are always padded
3822 upwards.
3823
3824 We don't need to worry about homogeneous floating-point or
3825 short-vector aggregates; their move is not affected by the
3826 padding direction determined here. Regardless of endianness,
3827 each element of such an aggregate is put in the least
3828 significant bits of a fp/simd register.
3829
3830 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3831 register has useful data, and return the opposite if the most
3832 significant byte does. */
3833
3834bool
ef4bddc2 3835aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3836 bool first ATTRIBUTE_UNUSED)
3837{
3838
3839 /* Small composite types are always padded upward. */
3840 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3841 {
6a70badb
RS
3842 HOST_WIDE_INT size;
3843 if (type)
3844 size = int_size_in_bytes (type);
3845 else
3846 /* No frontends can create types with variable-sized modes, so we
3847 shouldn't be asked to pass or return them. */
3848 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3849 if (size < 2 * UNITS_PER_WORD)
3850 return true;
3851 }
3852
3853 /* Otherwise, use the default padding. */
3854 return !BYTES_BIG_ENDIAN;
3855}
3856
095a2d76 3857static scalar_int_mode
43e9d192
IB
3858aarch64_libgcc_cmp_return_mode (void)
3859{
3860 return SImode;
3861}
3862
a3eb8a52
EB
3863#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3864
3865/* We use the 12-bit shifted immediate arithmetic instructions so values
3866 must be multiple of (1 << 12), i.e. 4096. */
3867#define ARITH_FACTOR 4096
3868
3869#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3870#error Cannot use simple address calculation for stack probing
3871#endif
3872
3873/* The pair of scratch registers used for stack probing. */
3874#define PROBE_STACK_FIRST_REG 9
3875#define PROBE_STACK_SECOND_REG 10
3876
6a70badb 3877/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3878 inclusive. These are offsets from the current stack pointer. */
3879
3880static void
6a70badb 3881aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3882{
6a70badb
RS
3883 HOST_WIDE_INT size;
3884 if (!poly_size.is_constant (&size))
3885 {
3886 sorry ("stack probes for SVE frames");
3887 return;
3888 }
3889
5f5c5e0f 3890 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3891
3892 /* See the same assertion on PROBE_INTERVAL above. */
3893 gcc_assert ((first % ARITH_FACTOR) == 0);
3894
3895 /* See if we have a constant small number of probes to generate. If so,
3896 that's the easy case. */
3897 if (size <= PROBE_INTERVAL)
3898 {
3899 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3900
3901 emit_set_insn (reg1,
5f5c5e0f 3902 plus_constant (Pmode,
a3eb8a52 3903 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3904 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3905 }
3906
3907 /* The run-time loop is made up of 8 insns in the generic case while the
3908 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3909 else if (size <= 4 * PROBE_INTERVAL)
3910 {
3911 HOST_WIDE_INT i, rem;
3912
3913 emit_set_insn (reg1,
5f5c5e0f 3914 plus_constant (Pmode,
a3eb8a52
EB
3915 stack_pointer_rtx,
3916 -(first + PROBE_INTERVAL)));
3917 emit_stack_probe (reg1);
3918
3919 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3920 it exceeds SIZE. If only two probes are needed, this will not
3921 generate any code. Then probe at FIRST + SIZE. */
3922 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3923 {
3924 emit_set_insn (reg1,
5f5c5e0f 3925 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
3926 emit_stack_probe (reg1);
3927 }
3928
3929 rem = size - (i - PROBE_INTERVAL);
3930 if (rem > 256)
3931 {
3932 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3933
5f5c5e0f
EB
3934 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
3935 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
3936 }
3937 else
5f5c5e0f 3938 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
3939 }
3940
3941 /* Otherwise, do the same as above, but in a loop. Note that we must be
3942 extra careful with variables wrapping around because we might be at
3943 the very top (or the very bottom) of the address space and we have
3944 to be able to handle this case properly; in particular, we use an
3945 equality test for the loop condition. */
3946 else
3947 {
5f5c5e0f 3948 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
3949
3950 /* Step 1: round SIZE to the previous multiple of the interval. */
3951
3952 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
3953
3954
3955 /* Step 2: compute initial and final value of the loop counter. */
3956
3957 /* TEST_ADDR = SP + FIRST. */
3958 emit_set_insn (reg1,
5f5c5e0f 3959 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
3960
3961 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
3962 HOST_WIDE_INT adjustment = - (first + rounded_size);
3963 if (! aarch64_uimm12_shift (adjustment))
3964 {
3965 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
3966 true, Pmode);
3967 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
3968 }
3969 else
8dd64cdf
EB
3970 emit_set_insn (reg2,
3971 plus_constant (Pmode, stack_pointer_rtx, adjustment));
3972
a3eb8a52
EB
3973 /* Step 3: the loop
3974
3975 do
3976 {
3977 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3978 probe at TEST_ADDR
3979 }
3980 while (TEST_ADDR != LAST_ADDR)
3981
3982 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3983 until it is equal to ROUNDED_SIZE. */
3984
5f5c5e0f 3985 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
3986
3987
3988 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3989 that SIZE is equal to ROUNDED_SIZE. */
3990
3991 if (size != rounded_size)
3992 {
3993 HOST_WIDE_INT rem = size - rounded_size;
3994
3995 if (rem > 256)
3996 {
3997 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3998
5f5c5e0f
EB
3999 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
4000 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
4001 }
4002 else
5f5c5e0f 4003 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
4004 }
4005 }
4006
4007 /* Make sure nothing is scheduled before we are done. */
4008 emit_insn (gen_blockage ());
4009}
4010
4011/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4012 absolute addresses. */
4013
4014const char *
4015aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
4016{
4017 static int labelno = 0;
4018 char loop_lab[32];
4019 rtx xops[2];
4020
4021 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
4022
4023 /* Loop. */
4024 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4025
cd1bef27
JL
4026 HOST_WIDE_INT stack_clash_probe_interval
4027 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
4028
a3eb8a52
EB
4029 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4030 xops[0] = reg1;
cd1bef27
JL
4031 HOST_WIDE_INT interval;
4032 if (flag_stack_clash_protection)
4033 interval = stack_clash_probe_interval;
4034 else
4035 interval = PROBE_INTERVAL;
4036
4037 gcc_assert (aarch64_uimm12_shift (interval));
4038 xops[1] = GEN_INT (interval);
4039
a3eb8a52
EB
4040 output_asm_insn ("sub\t%0, %0, %1", xops);
4041
cd1bef27
JL
4042 /* If doing stack clash protection then we probe up by the ABI specified
4043 amount. We do this because we're dropping full pages at a time in the
4044 loop. But if we're doing non-stack clash probing, probe at SP 0. */
4045 if (flag_stack_clash_protection)
4046 xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
4047 else
4048 xops[1] = CONST0_RTX (GET_MODE (xops[1]));
4049
4050 /* Probe at TEST_ADDR. If we're inside the loop it is always safe to probe
4051 by this amount for each iteration. */
4052 output_asm_insn ("str\txzr, [%0, %1]", xops);
a3eb8a52
EB
4053
4054 /* Test if TEST_ADDR == LAST_ADDR. */
4055 xops[1] = reg2;
4056 output_asm_insn ("cmp\t%0, %1", xops);
4057
4058 /* Branch. */
4059 fputs ("\tb.ne\t", asm_out_file);
4060 assemble_name_raw (asm_out_file, loop_lab);
4061 fputc ('\n', asm_out_file);
4062
4063 return "";
4064}
4065
eb471ba3
TC
4066/* Emit the probe loop for doing stack clash probes and stack adjustments for
4067 SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
4068 of GUARD_SIZE. When a probe is emitted it is done at most
4069 MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
4070 at most MIN_PROBE_THRESHOLD. By the end of this function
4071 BASE = BASE - ADJUSTMENT. */
4072
4073const char *
4074aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
4075 rtx min_probe_threshold, rtx guard_size)
4076{
4077 /* This function is not allowed to use any instruction generation function
4078 like gen_ and friends. If you do you'll likely ICE during CFG validation,
4079 so instead emit the code you want using output_asm_insn. */
4080 gcc_assert (flag_stack_clash_protection);
4081 gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
4082 gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
4083
4084 /* The minimum required allocation before the residual requires probing. */
4085 HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
4086
4087 /* Clamp the value down to the nearest value that can be used with a cmp. */
4088 residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
4089 rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
4090
4091 gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
4092 gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
4093
4094 static int labelno = 0;
4095 char loop_start_lab[32];
4096 char loop_end_lab[32];
4097 rtx xops[2];
4098
4099 ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
4100 ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
4101
4102 /* Emit loop start label. */
4103 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
4104
4105 /* ADJUSTMENT < RESIDUAL_PROBE_GUARD. */
4106 xops[0] = adjustment;
4107 xops[1] = probe_offset_value_rtx;
4108 output_asm_insn ("cmp\t%0, %1", xops);
4109
4110 /* Branch to end if not enough adjustment to probe. */
4111 fputs ("\tb.lt\t", asm_out_file);
4112 assemble_name_raw (asm_out_file, loop_end_lab);
4113 fputc ('\n', asm_out_file);
4114
4115 /* BASE = BASE - RESIDUAL_PROBE_GUARD. */
4116 xops[0] = base;
4117 xops[1] = probe_offset_value_rtx;
4118 output_asm_insn ("sub\t%0, %0, %1", xops);
4119
4120 /* Probe at BASE. */
4121 xops[1] = const0_rtx;
4122 output_asm_insn ("str\txzr, [%0, %1]", xops);
4123
4124 /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD. */
4125 xops[0] = adjustment;
4126 xops[1] = probe_offset_value_rtx;
4127 output_asm_insn ("sub\t%0, %0, %1", xops);
4128
4129 /* Branch to start if still more bytes to allocate. */
4130 fputs ("\tb\t", asm_out_file);
4131 assemble_name_raw (asm_out_file, loop_start_lab);
4132 fputc ('\n', asm_out_file);
4133
4134 /* No probe leave. */
4135 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
4136
4137 /* BASE = BASE - ADJUSTMENT. */
4138 xops[0] = base;
4139 xops[1] = adjustment;
4140 output_asm_insn ("sub\t%0, %0, %1", xops);
4141 return "";
4142}
4143
d6cb6d6a
WD
4144/* Determine whether a frame chain needs to be generated. */
4145static bool
4146aarch64_needs_frame_chain (void)
4147{
4148 /* Force a frame chain for EH returns so the return address is at FP+8. */
4149 if (frame_pointer_needed || crtl->calls_eh_return)
4150 return true;
4151
4152 /* A leaf function cannot have calls or write LR. */
4153 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
4154
4155 /* Don't use a frame chain in leaf functions if leaf frame pointers
4156 are disabled. */
4157 if (flag_omit_leaf_frame_pointer && is_leaf)
4158 return false;
4159
4160 return aarch64_use_frame_pointer;
4161}
4162
43e9d192
IB
4163/* Mark the registers that need to be saved by the callee and calculate
4164 the size of the callee-saved registers area and frame record (both FP
33a2e348 4165 and LR may be omitted). */
43e9d192
IB
4166static void
4167aarch64_layout_frame (void)
4168{
4169 HOST_WIDE_INT offset = 0;
4b0685d9 4170 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192 4171
d6cb6d6a 4172 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 4173
8c6e3b23
TC
4174 /* Adjust the outgoing arguments size if required. Keep it in sync with what
4175 the mid-end is doing. */
4176 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
4177
97826595
MS
4178#define SLOT_NOT_REQUIRED (-2)
4179#define SLOT_REQUIRED (-1)
4180
71bfb77a
WD
4181 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
4182 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 4183
43e9d192
IB
4184 /* First mark all the registers that really need to be saved... */
4185 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4186 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4187
4188 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4189 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4190
4191 /* ... that includes the eh data registers (if needed)... */
4192 if (crtl->calls_eh_return)
4193 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4194 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4195 = SLOT_REQUIRED;
43e9d192
IB
4196
4197 /* ... and any callee saved register that dataflow says is live. */
4198 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4199 if (df_regs_ever_live_p (regno)
1c923b60
JW
4200 && (regno == R30_REGNUM
4201 || !call_used_regs[regno]))
97826595 4202 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4203
4204 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4205 if (df_regs_ever_live_p (regno)
4206 && !call_used_regs[regno])
4b0685d9
WD
4207 {
4208 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4209 last_fp_reg = regno;
4210 }
43e9d192 4211
204d2c03 4212 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4213 {
2e1cdae5 4214 /* FP and LR are placed in the linkage record. */
43e9d192 4215 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4216 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4217 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4218 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4219 offset = 2 * UNITS_PER_WORD;
4220 }
43e9d192 4221
db6b62a8
TC
4222 /* With stack-clash, LR must be saved in non-leaf functions. */
4223 gcc_assert (crtl->is_leaf
4224 || (cfun->machine->frame.reg_offset[R30_REGNUM]
4225 != SLOT_NOT_REQUIRED));
4226
43e9d192 4227 /* Now assign stack slots for them. */
2e1cdae5 4228 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4229 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4230 {
4231 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4232 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4233 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4234 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4235 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4236 offset += UNITS_PER_WORD;
4237 }
4238
4b0685d9
WD
4239 HOST_WIDE_INT max_int_offset = offset;
4240 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4241 bool has_align_gap = offset != max_int_offset;
4242
43e9d192 4243 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4244 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4245 {
4b0685d9
WD
4246 /* If there is an alignment gap between integer and fp callee-saves,
4247 allocate the last fp register to it if possible. */
4248 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
4249 {
4250 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4251 break;
4252 }
4253
43e9d192 4254 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4255 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4256 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4257 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4258 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4259 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4260 offset += UNITS_PER_WORD;
4261 }
4262
4f59f9f2 4263 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4264
4265 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4266
71bfb77a
WD
4267 HOST_WIDE_INT varargs_and_saved_regs_size
4268 = offset + cfun->machine->frame.saved_varargs_size;
4269
1c960e02 4270 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4271 = aligned_upper_bound (varargs_and_saved_regs_size
4272 + get_frame_size (),
4273 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4274
6a70badb
RS
4275 /* Both these values are already aligned. */
4276 gcc_assert (multiple_p (crtl->outgoing_args_size,
4277 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4278 cfun->machine->frame.frame_size
6a70badb
RS
4279 = (cfun->machine->frame.hard_fp_offset
4280 + crtl->outgoing_args_size);
1c960e02 4281
71bfb77a
WD
4282 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4283
4284 cfun->machine->frame.initial_adjust = 0;
4285 cfun->machine->frame.final_adjust = 0;
4286 cfun->machine->frame.callee_adjust = 0;
4287 cfun->machine->frame.callee_offset = 0;
4288
4289 HOST_WIDE_INT max_push_offset = 0;
4290 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4291 max_push_offset = 512;
4292 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4293 max_push_offset = 256;
4294
6a70badb
RS
4295 HOST_WIDE_INT const_size, const_fp_offset;
4296 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4297 && const_size < max_push_offset
4298 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4299 {
4300 /* Simple, small frame with no outgoing arguments:
4301 stp reg1, reg2, [sp, -frame_size]!
4302 stp reg3, reg4, [sp, 16] */
6a70badb 4303 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4304 }
6a70badb
RS
4305 else if (known_lt (crtl->outgoing_args_size
4306 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4307 && !(cfun->calls_alloca
6a70badb
RS
4308 && known_lt (cfun->machine->frame.hard_fp_offset,
4309 max_push_offset)))
71bfb77a
WD
4310 {
4311 /* Frame with small outgoing arguments:
4312 sub sp, sp, frame_size
4313 stp reg1, reg2, [sp, outgoing_args_size]
4314 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4315 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4316 cfun->machine->frame.callee_offset
4317 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4318 }
6a70badb
RS
4319 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4320 && const_fp_offset < max_push_offset)
71bfb77a
WD
4321 {
4322 /* Frame with large outgoing arguments but a small local area:
4323 stp reg1, reg2, [sp, -hard_fp_offset]!
4324 stp reg3, reg4, [sp, 16]
4325 sub sp, sp, outgoing_args_size */
6a70badb 4326 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4327 cfun->machine->frame.final_adjust
4328 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4329 }
71bfb77a
WD
4330 else
4331 {
4332 /* Frame with large local area and outgoing arguments using frame pointer:
4333 sub sp, sp, hard_fp_offset
4334 stp x29, x30, [sp, 0]
4335 add x29, sp, 0
4336 stp reg3, reg4, [sp, 16]
4337 sub sp, sp, outgoing_args_size */
4338 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4339 cfun->machine->frame.final_adjust
4340 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4341 }
4342
43e9d192
IB
4343 cfun->machine->frame.laid_out = true;
4344}
4345
04ddfe06
KT
4346/* Return true if the register REGNO is saved on entry to
4347 the current function. */
4348
43e9d192
IB
4349static bool
4350aarch64_register_saved_on_entry (int regno)
4351{
97826595 4352 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4353}
4354
04ddfe06
KT
4355/* Return the next register up from REGNO up to LIMIT for the callee
4356 to save. */
4357
64dedd72
JW
4358static unsigned
4359aarch64_next_callee_save (unsigned regno, unsigned limit)
4360{
4361 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4362 regno ++;
4363 return regno;
4364}
43e9d192 4365
04ddfe06
KT
4366/* Push the register number REGNO of mode MODE to the stack with write-back
4367 adjusting the stack by ADJUSTMENT. */
4368
c5e1f66e 4369static void
ef4bddc2 4370aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4371 HOST_WIDE_INT adjustment)
4372 {
4373 rtx base_rtx = stack_pointer_rtx;
4374 rtx insn, reg, mem;
4375
4376 reg = gen_rtx_REG (mode, regno);
4377 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4378 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4379 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4380
4381 insn = emit_move_insn (mem, reg);
4382 RTX_FRAME_RELATED_P (insn) = 1;
4383}
4384
04ddfe06
KT
4385/* Generate and return an instruction to store the pair of registers
4386 REG and REG2 of mode MODE to location BASE with write-back adjusting
4387 the stack location BASE by ADJUSTMENT. */
4388
80c11907 4389static rtx
ef4bddc2 4390aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4391 HOST_WIDE_INT adjustment)
4392{
4393 switch (mode)
4394 {
4e10a5a7 4395 case E_DImode:
80c11907
JW
4396 return gen_storewb_pairdi_di (base, base, reg, reg2,
4397 GEN_INT (-adjustment),
4398 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4399 case E_DFmode:
80c11907
JW
4400 return gen_storewb_pairdf_di (base, base, reg, reg2,
4401 GEN_INT (-adjustment),
4402 GEN_INT (UNITS_PER_WORD - adjustment));
4403 default:
4404 gcc_unreachable ();
4405 }
4406}
4407
04ddfe06
KT
4408/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4409 stack pointer by ADJUSTMENT. */
4410
80c11907 4411static void
89ac681e 4412aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4413{
5d8a22a5 4414 rtx_insn *insn;
0d4a1197 4415 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 4416
71bfb77a 4417 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4418 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4419
80c11907
JW
4420 rtx reg1 = gen_rtx_REG (mode, regno1);
4421 rtx reg2 = gen_rtx_REG (mode, regno2);
4422
4423 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4424 reg2, adjustment));
4425 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4426 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4427 RTX_FRAME_RELATED_P (insn) = 1;
4428}
4429
04ddfe06
KT
4430/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4431 adjusting it by ADJUSTMENT afterwards. */
4432
159313d9 4433static rtx
ef4bddc2 4434aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4435 HOST_WIDE_INT adjustment)
4436{
4437 switch (mode)
4438 {
4e10a5a7 4439 case E_DImode:
159313d9 4440 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4441 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4442 case E_DFmode:
159313d9 4443 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4444 GEN_INT (UNITS_PER_WORD));
159313d9
JW
4445 default:
4446 gcc_unreachable ();
4447 }
4448}
4449
04ddfe06
KT
4450/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4451 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4452 into CFI_OPS. */
4453
89ac681e
WD
4454static void
4455aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4456 rtx *cfi_ops)
4457{
0d4a1197 4458 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
4459 rtx reg1 = gen_rtx_REG (mode, regno1);
4460
4461 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4462
71bfb77a 4463 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4464 {
4465 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4466 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4467 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4468 }
4469 else
4470 {
4471 rtx reg2 = gen_rtx_REG (mode, regno2);
4472 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4473 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4474 reg2, adjustment));
4475 }
4476}
4477
04ddfe06
KT
4478/* Generate and return a store pair instruction of mode MODE to store
4479 register REG1 to MEM1 and register REG2 to MEM2. */
4480
72df5c1f 4481static rtx
ef4bddc2 4482aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4483 rtx reg2)
4484{
4485 switch (mode)
4486 {
4e10a5a7 4487 case E_DImode:
dfe1da23 4488 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4489
4e10a5a7 4490 case E_DFmode:
dfe1da23 4491 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f
JW
4492
4493 default:
4494 gcc_unreachable ();
4495 }
4496}
4497
04ddfe06
KT
4498/* Generate and regurn a load pair isntruction of mode MODE to load register
4499 REG1 from MEM1 and register REG2 from MEM2. */
4500
72df5c1f 4501static rtx
ef4bddc2 4502aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4503 rtx mem2)
4504{
4505 switch (mode)
4506 {
4e10a5a7 4507 case E_DImode:
dfe1da23 4508 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4509
4e10a5a7 4510 case E_DFmode:
dfe1da23 4511 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f
JW
4512
4513 default:
4514 gcc_unreachable ();
4515 }
4516}
4517
db58fd89
JW
4518/* Return TRUE if return address signing should be enabled for the current
4519 function, otherwise return FALSE. */
4520
4521bool
4522aarch64_return_address_signing_enabled (void)
4523{
4524 /* This function should only be called after frame laid out. */
4525 gcc_assert (cfun->machine->frame.laid_out);
4526
4527 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4528 if it's LR is pushed onto stack. */
4529 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4530 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4531 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4532}
4533
04ddfe06
KT
4534/* Emit code to save the callee-saved registers from register number START
4535 to LIMIT to the stack at the location starting at offset START_OFFSET,
4536 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4537
43e9d192 4538static void
6a70badb 4539aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4540 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4541{
5d8a22a5 4542 rtx_insn *insn;
43e9d192
IB
4543 unsigned regno;
4544 unsigned regno2;
4545
0ec74a1e 4546 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4547 regno <= limit;
4548 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4549 {
ae13fce3 4550 rtx reg, mem;
6a70badb 4551 poly_int64 offset;
64dedd72 4552
ae13fce3
JW
4553 if (skip_wb
4554 && (regno == cfun->machine->frame.wb_candidate1
4555 || regno == cfun->machine->frame.wb_candidate2))
4556 continue;
4557
827ab47a
KT
4558 if (cfun->machine->reg_is_wrapped_separately[regno])
4559 continue;
4560
ae13fce3
JW
4561 reg = gen_rtx_REG (mode, regno);
4562 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4563 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4564 offset));
64dedd72
JW
4565
4566 regno2 = aarch64_next_callee_save (regno + 1, limit);
4567
4568 if (regno2 <= limit
827ab47a 4569 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
4570 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4571 == cfun->machine->frame.reg_offset[regno2]))
4572
43e9d192 4573 {
0ec74a1e 4574 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4575 rtx mem2;
4576
4577 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4578 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4579 offset));
8ed2fc62
JW
4580 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4581 reg2));
0b4a9743 4582
64dedd72
JW
4583 /* The first part of a frame-related parallel insn is
4584 always assumed to be relevant to the frame
4585 calculations; subsequent parts, are only
4586 frame-related if explicitly marked. */
4587 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4588 regno = regno2;
4589 }
4590 else
8ed2fc62
JW
4591 insn = emit_move_insn (mem, reg);
4592
4593 RTX_FRAME_RELATED_P (insn) = 1;
4594 }
4595}
4596
04ddfe06
KT
4597/* Emit code to restore the callee registers of mode MODE from register
4598 number START up to and including LIMIT. Restore from the stack offset
4599 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4600 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4601
8ed2fc62 4602static void
ef4bddc2 4603aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4604 poly_int64 start_offset, unsigned start,
dd991abb 4605 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4606{
8ed2fc62 4607 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4608 unsigned regno;
4609 unsigned regno2;
6a70badb 4610 poly_int64 offset;
8ed2fc62
JW
4611
4612 for (regno = aarch64_next_callee_save (start, limit);
4613 regno <= limit;
4614 regno = aarch64_next_callee_save (regno + 1, limit))
4615 {
827ab47a
KT
4616 if (cfun->machine->reg_is_wrapped_separately[regno])
4617 continue;
4618
ae13fce3 4619 rtx reg, mem;
8ed2fc62 4620
ae13fce3
JW
4621 if (skip_wb
4622 && (regno == cfun->machine->frame.wb_candidate1
4623 || regno == cfun->machine->frame.wb_candidate2))
4624 continue;
4625
4626 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4627 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4628 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4629
4630 regno2 = aarch64_next_callee_save (regno + 1, limit);
4631
4632 if (regno2 <= limit
827ab47a 4633 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
4634 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4635 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 4636 {
8ed2fc62
JW
4637 rtx reg2 = gen_rtx_REG (mode, regno2);
4638 rtx mem2;
4639
4640 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4641 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4642 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4643
dd991abb 4644 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4645 regno = regno2;
43e9d192 4646 }
8ed2fc62 4647 else
dd991abb
RH
4648 emit_move_insn (reg, mem);
4649 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4650 }
43e9d192
IB
4651}
4652
43cacb12
RS
4653/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4654 of MODE. */
4655
4656static inline bool
4657offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4658{
4659 HOST_WIDE_INT multiple;
4660 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4661 && IN_RANGE (multiple, -8, 7));
4662}
4663
4664/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4665 of MODE. */
4666
4667static inline bool
4668offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4669{
4670 HOST_WIDE_INT multiple;
4671 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4672 && IN_RANGE (multiple, 0, 63));
4673}
4674
4675/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4676 of MODE. */
4677
4678bool
4679aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4680{
4681 HOST_WIDE_INT multiple;
4682 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4683 && IN_RANGE (multiple, -64, 63));
4684}
4685
4686/* Return true if OFFSET is a signed 9-bit value. */
4687
3c5af608
MM
4688bool
4689aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
4690 poly_int64 offset)
827ab47a 4691{
6a70badb
RS
4692 HOST_WIDE_INT const_offset;
4693 return (offset.is_constant (&const_offset)
4694 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4695}
4696
43cacb12
RS
4697/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4698 of MODE. */
4699
827ab47a 4700static inline bool
43cacb12 4701offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4702{
6a70badb
RS
4703 HOST_WIDE_INT multiple;
4704 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4705 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4706}
4707
43cacb12
RS
4708/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4709 of MODE. */
4710
4711static inline bool
4712offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4713{
6a70badb
RS
4714 HOST_WIDE_INT multiple;
4715 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4716 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4717}
4718
4719/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4720
4721static sbitmap
4722aarch64_get_separate_components (void)
4723{
827ab47a
KT
4724 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4725 bitmap_clear (components);
4726
4727 /* The registers we need saved to the frame. */
4728 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4729 if (aarch64_register_saved_on_entry (regno))
4730 {
6a70badb 4731 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4732 if (!frame_pointer_needed)
4733 offset += cfun->machine->frame.frame_size
4734 - cfun->machine->frame.hard_fp_offset;
4735 /* Check that we can access the stack slot of the register with one
4736 direct load with no adjustments needed. */
4737 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4738 bitmap_set_bit (components, regno);
4739 }
4740
4741 /* Don't mess with the hard frame pointer. */
4742 if (frame_pointer_needed)
4743 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4744
4745 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4746 unsigned reg2 = cfun->machine->frame.wb_candidate2;
0795f659 4747 /* If registers have been chosen to be stored/restored with
827ab47a
KT
4748 writeback don't interfere with them to avoid having to output explicit
4749 stack adjustment instructions. */
4750 if (reg2 != INVALID_REGNUM)
4751 bitmap_clear_bit (components, reg2);
4752 if (reg1 != INVALID_REGNUM)
4753 bitmap_clear_bit (components, reg1);
4754
4755 bitmap_clear_bit (components, LR_REGNUM);
4756 bitmap_clear_bit (components, SP_REGNUM);
4757
4758 return components;
4759}
4760
4761/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4762
4763static sbitmap
4764aarch64_components_for_bb (basic_block bb)
4765{
4766 bitmap in = DF_LIVE_IN (bb);
4767 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4768 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
4769
4770 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4771 bitmap_clear (components);
4772
4773 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4774 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4775 if ((!call_used_regs[regno])
4776 && (bitmap_bit_p (in, regno)
4777 || bitmap_bit_p (gen, regno)
4778 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4779 {
4780 unsigned regno2, offset, offset2;
4781 bitmap_set_bit (components, regno);
4782
4783 /* If there is a callee-save at an adjacent offset, add it too
4784 to increase the use of LDP/STP. */
4785 offset = cfun->machine->frame.reg_offset[regno];
4786 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4787
4788 if (regno2 <= LAST_SAVED_REGNUM)
4789 {
4790 offset2 = cfun->machine->frame.reg_offset[regno2];
4791 if ((offset & ~8) == (offset2 & ~8))
4792 bitmap_set_bit (components, regno2);
4793 }
4794 }
827ab47a
KT
4795
4796 return components;
4797}
4798
4799/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4800 Nothing to do for aarch64. */
4801
4802static void
4803aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4804{
4805}
4806
4807/* Return the next set bit in BMP from START onwards. Return the total number
4808 of bits in BMP if no set bit is found at or after START. */
4809
4810static unsigned int
4811aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4812{
4813 unsigned int nbits = SBITMAP_SIZE (bmp);
4814 if (start == nbits)
4815 return start;
4816
4817 gcc_assert (start < nbits);
4818 for (unsigned int i = start; i < nbits; i++)
4819 if (bitmap_bit_p (bmp, i))
4820 return i;
4821
4822 return nbits;
4823}
4824
4825/* Do the work for aarch64_emit_prologue_components and
4826 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4827 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4828 for these components or the epilogue sequence. That is, it determines
4829 whether we should emit stores or loads and what kind of CFA notes to attach
4830 to the insns. Otherwise the logic for the two sequences is very
4831 similar. */
4832
4833static void
4834aarch64_process_components (sbitmap components, bool prologue_p)
4835{
4836 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4837 ? HARD_FRAME_POINTER_REGNUM
4838 : STACK_POINTER_REGNUM);
4839
4840 unsigned last_regno = SBITMAP_SIZE (components);
4841 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4842 rtx_insn *insn = NULL;
4843
4844 while (regno != last_regno)
4845 {
4846 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
4847 so DFmode for the vector registers is enough. */
0d4a1197 4848 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a 4849 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4850 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4851 if (!frame_pointer_needed)
4852 offset += cfun->machine->frame.frame_size
4853 - cfun->machine->frame.hard_fp_offset;
4854 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4855 rtx mem = gen_frame_mem (mode, addr);
4856
4857 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4858 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4859 /* No more registers to handle after REGNO.
4860 Emit a single save/restore and exit. */
4861 if (regno2 == last_regno)
4862 {
4863 insn = emit_insn (set);
4864 RTX_FRAME_RELATED_P (insn) = 1;
4865 if (prologue_p)
4866 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4867 else
4868 add_reg_note (insn, REG_CFA_RESTORE, reg);
4869 break;
4870 }
4871
6a70badb 4872 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4873 /* The next register is not of the same class or its offset is not
4874 mergeable with the current one into a pair. */
4875 if (!satisfies_constraint_Ump (mem)
4876 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
6a70badb
RS
4877 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
4878 GET_MODE_SIZE (mode)))
827ab47a
KT
4879 {
4880 insn = emit_insn (set);
4881 RTX_FRAME_RELATED_P (insn) = 1;
4882 if (prologue_p)
4883 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4884 else
4885 add_reg_note (insn, REG_CFA_RESTORE, reg);
4886
4887 regno = regno2;
4888 continue;
4889 }
4890
4891 /* REGNO2 can be saved/restored in a pair with REGNO. */
4892 rtx reg2 = gen_rtx_REG (mode, regno2);
4893 if (!frame_pointer_needed)
4894 offset2 += cfun->machine->frame.frame_size
4895 - cfun->machine->frame.hard_fp_offset;
4896 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
4897 rtx mem2 = gen_frame_mem (mode, addr2);
4898 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
4899 : gen_rtx_SET (reg2, mem2);
4900
4901 if (prologue_p)
4902 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
4903 else
4904 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
4905
4906 RTX_FRAME_RELATED_P (insn) = 1;
4907 if (prologue_p)
4908 {
4909 add_reg_note (insn, REG_CFA_OFFSET, set);
4910 add_reg_note (insn, REG_CFA_OFFSET, set2);
4911 }
4912 else
4913 {
4914 add_reg_note (insn, REG_CFA_RESTORE, reg);
4915 add_reg_note (insn, REG_CFA_RESTORE, reg2);
4916 }
4917
4918 regno = aarch64_get_next_set_bit (components, regno2 + 1);
4919 }
4920}
4921
4922/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
4923
4924static void
4925aarch64_emit_prologue_components (sbitmap components)
4926{
4927 aarch64_process_components (components, true);
4928}
4929
4930/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
4931
4932static void
4933aarch64_emit_epilogue_components (sbitmap components)
4934{
4935 aarch64_process_components (components, false);
4936}
4937
4938/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
4939
4940static void
4941aarch64_set_handled_components (sbitmap components)
4942{
4943 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4944 if (bitmap_bit_p (components, regno))
4945 cfun->machine->reg_is_wrapped_separately[regno] = true;
4946}
4947
8c6e3b23
TC
4948/* On AArch64 we have an ABI defined safe buffer. This constant is used to
4949 determining the probe offset for alloca. */
4950
4951static HOST_WIDE_INT
4952aarch64_stack_clash_protection_alloca_probe_range (void)
4953{
4954 return STACK_CLASH_CALLER_GUARD;
4955}
4956
4957
cd1bef27
JL
4958/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
4959 registers. If POLY_SIZE is not large enough to require a probe this function
4960 will only adjust the stack. When allocating the stack space
4961 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
4962 FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
4963 arguments. If we are then we ensure that any allocation larger than the ABI
4964 defined buffer needs a probe so that the invariant of having a 1KB buffer is
4965 maintained.
4966
4967 We emit barriers after each stack adjustment to prevent optimizations from
4968 breaking the invariant that we never drop the stack more than a page. This
4969 invariant is needed to make it easier to correctly handle asynchronous
4970 events, e.g. if we were to allow the stack to be dropped by more than a page
4971 and then have multiple probes up and we take a signal somewhere in between
4972 then the signal handler doesn't know the state of the stack and can make no
4973 assumptions about which pages have been probed. */
4974
4975static void
4976aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
4977 poly_int64 poly_size,
4978 bool frame_related_p,
4979 bool final_adjustment_p)
4980{
4981 HOST_WIDE_INT guard_size
4982 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
4983 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
4984 /* When doing the final adjustment for the outgoing argument size we can't
4985 assume that LR was saved at position 0. So subtract it's offset from the
4986 ABI safe buffer so that we don't accidentally allow an adjustment that
4987 would result in an allocation larger than the ABI buffer without
4988 probing. */
4989 HOST_WIDE_INT min_probe_threshold
4990 = final_adjustment_p
4991 ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
4992 : guard_size - guard_used_by_caller;
4993
4994 poly_int64 frame_size = cfun->machine->frame.frame_size;
4995
4996 /* We should always have a positive probe threshold. */
4997 gcc_assert (min_probe_threshold > 0);
4998
4999 if (flag_stack_clash_protection && !final_adjustment_p)
5000 {
5001 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
5002 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5003
5004 if (known_eq (frame_size, 0))
5005 {
5006 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
5007 }
5008 else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
5009 && known_lt (final_adjust, guard_used_by_caller))
5010 {
5011 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
5012 }
5013 }
5014
cd1bef27
JL
5015 /* If SIZE is not large enough to require probing, just adjust the stack and
5016 exit. */
eb471ba3 5017 if (known_lt (poly_size, min_probe_threshold)
cd1bef27
JL
5018 || !flag_stack_clash_protection)
5019 {
5020 aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
5021 return;
5022 }
5023
eb471ba3
TC
5024 HOST_WIDE_INT size;
5025 /* Handle the SVE non-constant case first. */
5026 if (!poly_size.is_constant (&size))
5027 {
5028 if (dump_file)
5029 {
5030 fprintf (dump_file, "Stack clash SVE prologue: ");
5031 print_dec (poly_size, dump_file);
5032 fprintf (dump_file, " bytes, dynamic probing will be required.\n");
5033 }
5034
5035 /* First calculate the amount of bytes we're actually spilling. */
5036 aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
5037 poly_size, temp1, temp2, false, true);
5038
5039 rtx_insn *insn = get_last_insn ();
5040
5041 if (frame_related_p)
5042 {
5043 /* This is done to provide unwinding information for the stack
5044 adjustments we're about to do, however to prevent the optimizers
5045 from removing the R15 move and leaving the CFA note (which would be
5046 very wrong) we tie the old and new stack pointer together.
5047 The tie will expand to nothing but the optimizers will not touch
5048 the instruction. */
5049 rtx stack_ptr_copy = gen_rtx_REG (Pmode, R15_REGNUM);
5050 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
5051 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
5052
5053 /* We want the CFA independent of the stack pointer for the
5054 duration of the loop. */
5055 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
5056 RTX_FRAME_RELATED_P (insn) = 1;
5057 }
5058
5059 rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
5060 rtx guard_const = gen_int_mode (guard_size, Pmode);
5061
5062 insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
5063 stack_pointer_rtx, temp1,
5064 probe_const, guard_const));
5065
5066 /* Now reset the CFA register if needed. */
5067 if (frame_related_p)
5068 {
5069 add_reg_note (insn, REG_CFA_DEF_CFA,
5070 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
5071 gen_int_mode (poly_size, Pmode)));
5072 RTX_FRAME_RELATED_P (insn) = 1;
5073 }
5074
5075 return;
5076 }
5077
cd1bef27
JL
5078 if (dump_file)
5079 fprintf (dump_file,
eb471ba3
TC
5080 "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
5081 " bytes, probing will be required.\n", size);
cd1bef27
JL
5082
5083 /* Round size to the nearest multiple of guard_size, and calculate the
5084 residual as the difference between the original size and the rounded
5085 size. */
5086 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
5087 HOST_WIDE_INT residual = size - rounded_size;
5088
5089 /* We can handle a small number of allocations/probes inline. Otherwise
5090 punt to a loop. */
5091 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
5092 {
5093 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
5094 {
5095 aarch64_sub_sp (NULL, temp2, guard_size, true);
5096 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5097 guard_used_by_caller));
5098 emit_insn (gen_blockage ());
5099 }
5100 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
5101 }
5102 else
5103 {
5104 /* Compute the ending address. */
5105 aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
5106 temp1, NULL, false, true);
5107 rtx_insn *insn = get_last_insn ();
5108
5109 /* For the initial allocation, we don't have a frame pointer
5110 set up, so we always need CFI notes. If we're doing the
5111 final allocation, then we may have a frame pointer, in which
5112 case it is the CFA, otherwise we need CFI notes.
5113
5114 We can determine which allocation we are doing by looking at
5115 the value of FRAME_RELATED_P since the final allocations are not
5116 frame related. */
5117 if (frame_related_p)
5118 {
5119 /* We want the CFA independent of the stack pointer for the
5120 duration of the loop. */
5121 add_reg_note (insn, REG_CFA_DEF_CFA,
5122 plus_constant (Pmode, temp1, rounded_size));
5123 RTX_FRAME_RELATED_P (insn) = 1;
5124 }
5125
5126 /* This allocates and probes the stack. Note that this re-uses some of
5127 the existing Ada stack protection code. However we are guaranteed not
5128 to enter the non loop or residual branches of that code.
5129
5130 The non-loop part won't be entered because if our allocation amount
5131 doesn't require a loop, the case above would handle it.
5132
5133 The residual amount won't be entered because TEMP1 is a mutliple of
5134 the allocation size. The residual will always be 0. As such, the only
5135 part we are actually using from that code is the loop setup. The
5136 actual probing is done in aarch64_output_probe_stack_range. */
5137 insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
5138 stack_pointer_rtx, temp1));
5139
5140 /* Now reset the CFA register if needed. */
5141 if (frame_related_p)
5142 {
5143 add_reg_note (insn, REG_CFA_DEF_CFA,
5144 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
5145 RTX_FRAME_RELATED_P (insn) = 1;
5146 }
5147
5148 emit_insn (gen_blockage ());
5149 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
5150 }
5151
5152 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
5153 be probed. This maintains the requirement that each page is probed at
5154 least once. For initial probing we probe only if the allocation is
5155 more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
5156 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
5157 GUARD_SIZE. This works that for any allocation that is large enough to
5158 trigger a probe here, we'll have at least one, and if they're not large
5159 enough for this code to emit anything for them, The page would have been
5160 probed by the saving of FP/LR either by this function or any callees. If
5161 we don't have any callees then we won't have more stack adjustments and so
5162 are still safe. */
5163 if (residual)
5164 {
5165 HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
5166 /* If we're doing final adjustments, and we've done any full page
5167 allocations then any residual needs to be probed. */
5168 if (final_adjustment_p && rounded_size != 0)
5169 min_probe_threshold = 0;
5170 /* If doing a small final adjustment, we always probe at offset 0.
5171 This is done to avoid issues when LR is not at position 0 or when
5172 the final adjustment is smaller than the probing offset. */
5173 else if (final_adjustment_p && rounded_size == 0)
5174 residual_probe_offset = 0;
5175
5176 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
5177 if (residual >= min_probe_threshold)
5178 {
5179 if (dump_file)
5180 fprintf (dump_file,
5181 "Stack clash AArch64 prologue residuals: "
5182 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
5183 "\n", residual);
5184
5185 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
5186 residual_probe_offset));
5187 emit_insn (gen_blockage ());
5188 }
5189 }
5190}
5191
43cacb12
RS
5192/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
5193 is saved at BASE + OFFSET. */
5194
5195static void
5196aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
5197 rtx base, poly_int64 offset)
5198{
5199 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
5200 add_reg_note (insn, REG_CFA_EXPRESSION,
5201 gen_rtx_SET (mem, regno_reg_rtx[reg]));
5202}
5203
43e9d192
IB
5204/* AArch64 stack frames generated by this compiler look like:
5205
5206 +-------------------------------+
5207 | |
5208 | incoming stack arguments |
5209 | |
34834420
MS
5210 +-------------------------------+
5211 | | <-- incoming stack pointer (aligned)
43e9d192
IB
5212 | callee-allocated save area |
5213 | for register varargs |
5214 | |
34834420
MS
5215 +-------------------------------+
5216 | local variables | <-- frame_pointer_rtx
43e9d192
IB
5217 | |
5218 +-------------------------------+
cd1bef27 5219 | padding | \
454fdba9 5220 +-------------------------------+ |
454fdba9 5221 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
5222 +-------------------------------+ |
5223 | LR' | |
5224 +-------------------------------+ |
34834420
MS
5225 | FP' | / <- hard_frame_pointer_rtx (aligned)
5226 +-------------------------------+
43e9d192
IB
5227 | dynamic allocation |
5228 +-------------------------------+
34834420
MS
5229 | padding |
5230 +-------------------------------+
5231 | outgoing stack arguments | <-- arg_pointer
5232 | |
5233 +-------------------------------+
5234 | | <-- stack_pointer_rtx (aligned)
43e9d192 5235
34834420
MS
5236 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
5237 but leave frame_pointer_rtx and hard_frame_pointer_rtx
cd1bef27
JL
5238 unchanged.
5239
5240 By default for stack-clash we assume the guard is at least 64KB, but this
5241 value is configurable to either 4KB or 64KB. We also force the guard size to
5242 be the same as the probing interval and both values are kept in sync.
5243
5244 With those assumptions the callee can allocate up to 63KB (or 3KB depending
5245 on the guard size) of stack space without probing.
5246
5247 When probing is needed, we emit a probe at the start of the prologue
5248 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
5249
5250 We have to track how much space has been allocated and the only stores
5251 to the stack we track as implicit probes are the FP/LR stores.
5252
5253 For outgoing arguments we probe if the size is larger than 1KB, such that
5254 the ABI specified buffer is maintained for the next callee. */
43e9d192
IB
5255
5256/* Generate the prologue instructions for entry into a function.
5257 Establish the stack frame by decreasing the stack pointer with a
5258 properly calculated size and, if necessary, create a frame record
5259 filled with the values of LR and previous frame pointer. The
6991c977 5260 current FP is also set up if it is in use. */
43e9d192
IB
5261
5262void
5263aarch64_expand_prologue (void)
5264{
6a70badb
RS
5265 poly_int64 frame_size = cfun->machine->frame.frame_size;
5266 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5267 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5268 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5269 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5270 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5271 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 5272 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 5273 rtx_insn *insn;
43e9d192 5274
db58fd89
JW
5275 /* Sign return address for functions. */
5276 if (aarch64_return_address_signing_enabled ())
27169e45
JW
5277 {
5278 insn = emit_insn (gen_pacisp ());
5279 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5280 RTX_FRAME_RELATED_P (insn) = 1;
5281 }
db58fd89 5282
dd991abb 5283 if (flag_stack_usage_info)
6a70badb 5284 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 5285
a3eb8a52
EB
5286 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5287 {
5288 if (crtl->is_leaf && !cfun->calls_alloca)
5289 {
6a70badb
RS
5290 if (maybe_gt (frame_size, PROBE_INTERVAL)
5291 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
5292 aarch64_emit_probe_stack_range (get_stack_check_protect (),
5293 (frame_size
5294 - get_stack_check_protect ()));
a3eb8a52 5295 }
6a70badb 5296 else if (maybe_gt (frame_size, 0))
8c1dd970 5297 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
5298 }
5299
f5470a77
RS
5300 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5301 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
5302
cd1bef27
JL
5303 /* In theory we should never have both an initial adjustment
5304 and a callee save adjustment. Verify that is the case since the
5305 code below does not handle it for -fstack-clash-protection. */
5306 gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
5307
5308 /* Will only probe if the initial adjustment is larger than the guard
5309 less the amount of the guard reserved for use by the caller's
5310 outgoing args. */
5311 aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust,
5312 true, false);
43e9d192 5313
71bfb77a
WD
5314 if (callee_adjust != 0)
5315 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 5316
204d2c03 5317 if (emit_frame_chain)
43e9d192 5318 {
43cacb12 5319 poly_int64 reg_offset = callee_adjust;
71bfb77a 5320 if (callee_adjust == 0)
43cacb12
RS
5321 {
5322 reg1 = R29_REGNUM;
5323 reg2 = R30_REGNUM;
5324 reg_offset = callee_offset;
5325 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
5326 }
f5470a77 5327 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
5328 stack_pointer_rtx, callee_offset,
5329 ip1_rtx, ip0_rtx, frame_pointer_needed);
5330 if (frame_pointer_needed && !frame_size.is_constant ())
5331 {
5332 /* Variable-sized frames need to describe the save slot
5333 address using DW_CFA_expression rather than DW_CFA_offset.
5334 This means that, without taking further action, the
5335 locations of the registers that we've already saved would
5336 remain based on the stack pointer even after we redefine
5337 the CFA based on the frame pointer. We therefore need new
5338 DW_CFA_expressions to re-express the save slots with addresses
5339 based on the frame pointer. */
5340 rtx_insn *insn = get_last_insn ();
5341 gcc_assert (RTX_FRAME_RELATED_P (insn));
5342
5343 /* Add an explicit CFA definition if this was previously
5344 implicit. */
5345 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
5346 {
5347 rtx src = plus_constant (Pmode, stack_pointer_rtx,
5348 callee_offset);
5349 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5350 gen_rtx_SET (hard_frame_pointer_rtx, src));
5351 }
5352
5353 /* Change the save slot expressions for the registers that
5354 we've already saved. */
5355 reg_offset -= callee_offset;
5356 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
5357 reg_offset + UNITS_PER_WORD);
5358 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
5359 reg_offset);
5360 }
71bfb77a 5361 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 5362 }
71bfb77a
WD
5363
5364 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 5365 callee_adjust != 0 || emit_frame_chain);
71bfb77a 5366 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
204d2c03 5367 callee_adjust != 0 || emit_frame_chain);
cd1bef27
JL
5368
5369 /* We may need to probe the final adjustment if it is larger than the guard
5370 that is assumed by the called. */
5371 aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx, final_adjust,
5372 !frame_pointer_needed, true);
43e9d192
IB
5373}
5374
4f942779
RL
5375/* Return TRUE if we can use a simple_return insn.
5376
5377 This function checks whether the callee saved stack is empty, which
5378 means no restore actions are need. The pro_and_epilogue will use
5379 this to check whether shrink-wrapping opt is feasible. */
5380
5381bool
5382aarch64_use_return_insn_p (void)
5383{
5384 if (!reload_completed)
5385 return false;
5386
5387 if (crtl->profile)
5388 return false;
5389
6a70badb 5390 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
5391}
5392
71bfb77a
WD
5393/* Generate the epilogue instructions for returning from a function.
5394 This is almost exactly the reverse of the prolog sequence, except
5395 that we need to insert barriers to avoid scheduling loads that read
5396 from a deallocated stack, and we optimize the unwind records by
5397 emitting them all together if possible. */
43e9d192
IB
5398void
5399aarch64_expand_epilogue (bool for_sibcall)
5400{
6a70badb 5401 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 5402 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
5403 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5404 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5405 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5406 unsigned reg2 = cfun->machine->frame.wb_candidate2;
5407 rtx cfi_ops = NULL;
5408 rtx_insn *insn;
43cacb12
RS
5409 /* A stack clash protection prologue may not have left IP0_REGNUM or
5410 IP1_REGNUM in a usable state. The same is true for allocations
5411 with an SVE component, since we then need both temporary registers
cd1bef27
JL
5412 for each allocation. For stack clash we are in a usable state if
5413 the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER. */
5414 HOST_WIDE_INT guard_size
5415 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
5416 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
5417
5418 /* We can re-use the registers when the allocation amount is smaller than
5419 guard_size - guard_used_by_caller because we won't be doing any probes
5420 then. In such situations the register should remain live with the correct
5421 value. */
43cacb12 5422 bool can_inherit_p = (initial_adjust.is_constant ()
cd1bef27
JL
5423 && final_adjust.is_constant ())
5424 && (!flag_stack_clash_protection
5425 || known_lt (initial_adjust,
5426 guard_size - guard_used_by_caller));
44c0e7b9 5427
71bfb77a 5428 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
5429 bool need_barrier_p
5430 = maybe_ne (get_frame_size ()
5431 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 5432
71bfb77a 5433 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
5434 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
5435 || cfun->calls_alloca
8144a493 5436 || crtl->calls_eh_return)
43e9d192 5437 {
71bfb77a
WD
5438 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5439 need_barrier_p = false;
5440 }
7e8c2bd5 5441
71bfb77a
WD
5442 /* Restore the stack pointer from the frame pointer if it may not
5443 be the same as the stack pointer. */
f5470a77
RS
5444 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5445 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
5446 if (frame_pointer_needed
5447 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
5448 /* If writeback is used when restoring callee-saves, the CFA
5449 is restored on the instruction doing the writeback. */
5450 aarch64_add_offset (Pmode, stack_pointer_rtx,
5451 hard_frame_pointer_rtx, -callee_offset,
43cacb12 5452 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 5453 else
cd1bef27
JL
5454 /* The case where we need to re-use the register here is very rare, so
5455 avoid the complicated condition and just always emit a move if the
5456 immediate doesn't fit. */
5457 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust, true);
43e9d192 5458
71bfb77a
WD
5459 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
5460 callee_adjust != 0, &cfi_ops);
5461 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5462 callee_adjust != 0, &cfi_ops);
43e9d192 5463
71bfb77a
WD
5464 if (need_barrier_p)
5465 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5466
5467 if (callee_adjust != 0)
5468 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
5469
6a70badb 5470 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
5471 {
5472 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5473 insn = get_last_insn ();
71bfb77a
WD
5474 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5475 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5476 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5477 cfi_ops = NULL;
43e9d192
IB
5478 }
5479
43cacb12
RS
5480 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
5481 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 5482
71bfb77a
WD
5483 if (cfi_ops)
5484 {
5485 /* Emit delayed restores and reset the CFA to be SP. */
5486 insn = get_last_insn ();
5487 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5488 REG_NOTES (insn) = cfi_ops;
5489 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5490 }
5491
db58fd89
JW
5492 /* We prefer to emit the combined return/authenticate instruction RETAA,
5493 however there are three cases in which we must instead emit an explicit
5494 authentication instruction.
5495
5496 1) Sibcalls don't return in a normal way, so if we're about to call one
5497 we must authenticate.
5498
5499 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5500 generating code for !TARGET_ARMV8_3 we can't use it and must
5501 explicitly authenticate.
5502
5503 3) On an eh_return path we make extra stack adjustments to update the
5504 canonical frame address to be the exception handler's CFA. We want
5505 to authenticate using the CFA of the function which calls eh_return.
5506 */
5507 if (aarch64_return_address_signing_enabled ()
5508 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5509 {
5510 insn = emit_insn (gen_autisp ());
5511 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5512 RTX_FRAME_RELATED_P (insn) = 1;
5513 }
db58fd89 5514
dd991abb
RH
5515 /* Stack adjustment for exception handler. */
5516 if (crtl->calls_eh_return)
5517 {
5518 /* We need to unwind the stack by the offset computed by
5519 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5520 to be SP; letting the CFA move during this adjustment
5521 is just as correct as retaining the CFA from the body
5522 of the function. Therefore, do nothing special. */
5523 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5524 }
5525
5526 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5527 if (!for_sibcall)
5528 emit_jump_insn (ret_rtx);
5529}
5530
8144a493
WD
5531/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5532 normally or return to a previous frame after unwinding.
1c960e02 5533
8144a493
WD
5534 An EH return uses a single shared return sequence. The epilogue is
5535 exactly like a normal epilogue except that it has an extra input
5536 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5537 that must be applied after the frame has been destroyed. An extra label
5538 is inserted before the epilogue which initializes this register to zero,
5539 and this is the entry point for a normal return.
43e9d192 5540
8144a493
WD
5541 An actual EH return updates the return address, initializes the stack
5542 adjustment and jumps directly into the epilogue (bypassing the zeroing
5543 of the adjustment). Since the return address is typically saved on the
5544 stack when a function makes a call, the saved LR must be updated outside
5545 the epilogue.
43e9d192 5546
8144a493
WD
5547 This poses problems as the store is generated well before the epilogue,
5548 so the offset of LR is not known yet. Also optimizations will remove the
5549 store as it appears dead, even after the epilogue is generated (as the
5550 base or offset for loading LR is different in many cases).
43e9d192 5551
8144a493
WD
5552 To avoid these problems this implementation forces the frame pointer
5553 in eh_return functions so that the location of LR is fixed and known early.
5554 It also marks the store volatile, so no optimization is permitted to
5555 remove the store. */
5556rtx
5557aarch64_eh_return_handler_rtx (void)
5558{
5559 rtx tmp = gen_frame_mem (Pmode,
5560 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5561
8144a493
WD
5562 /* Mark the store volatile, so no optimization is permitted to remove it. */
5563 MEM_VOLATILE_P (tmp) = true;
5564 return tmp;
43e9d192
IB
5565}
5566
43e9d192
IB
5567/* Output code to add DELTA to the first argument, and then jump
5568 to FUNCTION. Used for C++ multiple inheritance. */
5569static void
5570aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5571 HOST_WIDE_INT delta,
5572 HOST_WIDE_INT vcall_offset,
5573 tree function)
5574{
5575 /* The this pointer is always in x0. Note that this differs from
5576 Arm where the this pointer maybe bumped to r1 if r0 is required
5577 to return a pointer to an aggregate. On AArch64 a result value
5578 pointer will be in x8. */
5579 int this_regno = R0_REGNUM;
5d8a22a5
DM
5580 rtx this_rtx, temp0, temp1, addr, funexp;
5581 rtx_insn *insn;
43e9d192 5582
75f1d6fc
SN
5583 reload_completed = 1;
5584 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5585
f5470a77
RS
5586 this_rtx = gen_rtx_REG (Pmode, this_regno);
5587 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5588 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5589
43e9d192 5590 if (vcall_offset == 0)
43cacb12 5591 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5592 else
5593 {
28514dda 5594 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5595
75f1d6fc
SN
5596 addr = this_rtx;
5597 if (delta != 0)
5598 {
5599 if (delta >= -256 && delta < 256)
5600 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5601 plus_constant (Pmode, this_rtx, delta));
5602 else
43cacb12
RS
5603 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5604 temp1, temp0, false);
43e9d192
IB
5605 }
5606
28514dda
YZ
5607 if (Pmode == ptr_mode)
5608 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5609 else
5610 aarch64_emit_move (temp0,
5611 gen_rtx_ZERO_EXTEND (Pmode,
5612 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5613
28514dda 5614 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5615 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5616 else
5617 {
f43657b4
JW
5618 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5619 Pmode);
75f1d6fc 5620 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5621 }
5622
28514dda
YZ
5623 if (Pmode == ptr_mode)
5624 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5625 else
5626 aarch64_emit_move (temp1,
5627 gen_rtx_SIGN_EXTEND (Pmode,
5628 gen_rtx_MEM (ptr_mode, addr)));
5629
75f1d6fc 5630 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5631 }
5632
75f1d6fc
SN
5633 /* Generate a tail call to the target function. */
5634 if (!TREE_USED (function))
5635 {
5636 assemble_external (function);
5637 TREE_USED (function) = 1;
5638 }
5639 funexp = XEXP (DECL_RTL (function), 0);
5640 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5641 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5642 SIBLING_CALL_P (insn) = 1;
5643
5644 insn = get_insns ();
5645 shorten_branches (insn);
5646 final_start_function (insn, file, 1);
5647 final (insn, file, 1);
43e9d192 5648 final_end_function ();
75f1d6fc
SN
5649
5650 /* Stop pretending to be a post-reload pass. */
5651 reload_completed = 0;
43e9d192
IB
5652}
5653
43e9d192
IB
5654static bool
5655aarch64_tls_referenced_p (rtx x)
5656{
5657 if (!TARGET_HAVE_TLS)
5658 return false;
e7de8563
RS
5659 subrtx_iterator::array_type array;
5660 FOR_EACH_SUBRTX (iter, array, x, ALL)
5661 {
5662 const_rtx x = *iter;
5663 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5664 return true;
5665 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5666 TLS offsets, not real symbol references. */
5667 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5668 iter.skip_subrtxes ();
5669 }
5670 return false;
43e9d192
IB
5671}
5672
5673
43e9d192
IB
5674/* Return true if val can be encoded as a 12-bit unsigned immediate with
5675 a left shift of 0 or 12 bits. */
5676bool
5677aarch64_uimm12_shift (HOST_WIDE_INT val)
5678{
5679 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5680 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5681 );
5682}
5683
eb471ba3
TC
5684/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
5685 that can be created with a left shift of 0 or 12. */
5686static HOST_WIDE_INT
5687aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
5688{
5689 /* Check to see if the value fits in 24 bits, as that is the maximum we can
5690 handle correctly. */
5691 gcc_assert ((val & 0xffffff) == val);
5692
5693 if (((val & 0xfff) << 0) == val)
5694 return val;
5695
5696 return val & (0xfff << 12);
5697}
43e9d192
IB
5698
5699/* Return true if val is an immediate that can be loaded into a
5700 register by a MOVZ instruction. */
5701static bool
77e994c9 5702aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5703{
5704 if (GET_MODE_SIZE (mode) > 4)
5705 {
5706 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5707 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5708 return 1;
5709 }
5710 else
5711 {
43cacb12
RS
5712 /* Ignore sign extension. */
5713 val &= (HOST_WIDE_INT) 0xffffffff;
5714 }
5715 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5716 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5717}
5718
5719/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5720 64-bit (DImode) integer. */
5721
5722static unsigned HOST_WIDE_INT
5723aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5724{
5725 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5726 while (size < 64)
5727 {
5728 val &= (HOST_WIDE_INT_1U << size) - 1;
5729 val |= val << size;
5730 size *= 2;
43e9d192 5731 }
43cacb12 5732 return val;
43e9d192
IB
5733}
5734
a64c73a2
WD
5735/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5736
5737static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5738 {
5739 0x0000000100000001ull,
5740 0x0001000100010001ull,
5741 0x0101010101010101ull,
5742 0x1111111111111111ull,
5743 0x5555555555555555ull,
5744 };
5745
43e9d192
IB
5746
5747/* Return true if val is a valid bitmask immediate. */
a64c73a2 5748
43e9d192 5749bool
a64c73a2 5750aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5751{
a64c73a2
WD
5752 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5753 int bits;
5754
5755 /* Check for a single sequence of one bits and return quickly if so.
5756 The special cases of all ones and all zeroes returns false. */
43cacb12 5757 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5758 tmp = val + (val & -val);
5759
5760 if (tmp == (tmp & -tmp))
5761 return (val + 1) > 1;
5762
5763 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5764 if (mode == SImode)
5765 val = (val << 32) | (val & 0xffffffff);
5766
5767 /* Invert if the immediate doesn't start with a zero bit - this means we
5768 only need to search for sequences of one bits. */
5769 if (val & 1)
5770 val = ~val;
5771
5772 /* Find the first set bit and set tmp to val with the first sequence of one
5773 bits removed. Return success if there is a single sequence of ones. */
5774 first_one = val & -val;
5775 tmp = val & (val + first_one);
5776
5777 if (tmp == 0)
5778 return true;
5779
5780 /* Find the next set bit and compute the difference in bit position. */
5781 next_one = tmp & -tmp;
5782 bits = clz_hwi (first_one) - clz_hwi (next_one);
5783 mask = val ^ tmp;
5784
5785 /* Check the bit position difference is a power of 2, and that the first
5786 sequence of one bits fits within 'bits' bits. */
5787 if ((mask >> bits) != 0 || bits != (bits & -bits))
5788 return false;
5789
5790 /* Check the sequence of one bits is repeated 64/bits times. */
5791 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5792}
5793
43fd192f
MC
5794/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5795 Assumed precondition: VAL_IN Is not zero. */
5796
5797unsigned HOST_WIDE_INT
5798aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5799{
5800 int lowest_bit_set = ctz_hwi (val_in);
5801 int highest_bit_set = floor_log2 (val_in);
5802 gcc_assert (val_in != 0);
5803
5804 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5805 (HOST_WIDE_INT_1U << lowest_bit_set));
5806}
5807
5808/* Create constant where bits outside of lowest bit set to highest bit set
5809 are set to 1. */
5810
5811unsigned HOST_WIDE_INT
5812aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5813{
5814 return val_in | ~aarch64_and_split_imm1 (val_in);
5815}
5816
5817/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5818
5819bool
5820aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5821{
77e994c9
RS
5822 scalar_int_mode int_mode;
5823 if (!is_a <scalar_int_mode> (mode, &int_mode))
5824 return false;
5825
5826 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5827 return false;
5828
77e994c9 5829 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5830 return false;
5831
5832 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5833
77e994c9 5834 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 5835}
43e9d192
IB
5836
5837/* Return true if val is an immediate that can be loaded into a
5838 register in a single instruction. */
5839bool
ef4bddc2 5840aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 5841{
77e994c9
RS
5842 scalar_int_mode int_mode;
5843 if (!is_a <scalar_int_mode> (mode, &int_mode))
5844 return false;
5845
5846 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 5847 return 1;
77e994c9 5848 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
5849}
5850
5851static bool
ef4bddc2 5852aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
5853{
5854 rtx base, offset;
7eda14e1 5855
43e9d192
IB
5856 if (GET_CODE (x) == HIGH)
5857 return true;
5858
43cacb12
RS
5859 /* There's no way to calculate VL-based values using relocations. */
5860 subrtx_iterator::array_type array;
5861 FOR_EACH_SUBRTX (iter, array, x, ALL)
5862 if (GET_CODE (*iter) == CONST_POLY_INT)
5863 return true;
5864
43e9d192
IB
5865 split_const (x, &base, &offset);
5866 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 5867 {
43cacb12 5868 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
5869 != SYMBOL_FORCE_TO_MEM)
5870 return true;
5871 else
5872 /* Avoid generating a 64-bit relocation in ILP32; leave
5873 to aarch64_expand_mov_immediate to handle it properly. */
5874 return mode != ptr_mode;
5875 }
43e9d192
IB
5876
5877 return aarch64_tls_referenced_p (x);
5878}
5879
e79136e4
WD
5880/* Implement TARGET_CASE_VALUES_THRESHOLD.
5881 The expansion for a table switch is quite expensive due to the number
5882 of instructions, the table lookup and hard to predict indirect jump.
5883 When optimizing for speed, and -O3 enabled, use the per-core tuning if
5884 set, otherwise use tables for > 16 cases as a tradeoff between size and
5885 performance. When optimizing for size, use the default setting. */
50487d79
EM
5886
5887static unsigned int
5888aarch64_case_values_threshold (void)
5889{
5890 /* Use the specified limit for the number of cases before using jump
5891 tables at higher optimization levels. */
5892 if (optimize > 2
5893 && selected_cpu->tune->max_case_values != 0)
5894 return selected_cpu->tune->max_case_values;
5895 else
e79136e4 5896 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
5897}
5898
43e9d192
IB
5899/* Return true if register REGNO is a valid index register.
5900 STRICT_P is true if REG_OK_STRICT is in effect. */
5901
5902bool
5903aarch64_regno_ok_for_index_p (int regno, bool strict_p)
5904{
5905 if (!HARD_REGISTER_NUM_P (regno))
5906 {
5907 if (!strict_p)
5908 return true;
5909
5910 if (!reg_renumber)
5911 return false;
5912
5913 regno = reg_renumber[regno];
5914 }
5915 return GP_REGNUM_P (regno);
5916}
5917
5918/* Return true if register REGNO is a valid base register for mode MODE.
5919 STRICT_P is true if REG_OK_STRICT is in effect. */
5920
5921bool
5922aarch64_regno_ok_for_base_p (int regno, bool strict_p)
5923{
5924 if (!HARD_REGISTER_NUM_P (regno))
5925 {
5926 if (!strict_p)
5927 return true;
5928
5929 if (!reg_renumber)
5930 return false;
5931
5932 regno = reg_renumber[regno];
5933 }
5934
5935 /* The fake registers will be eliminated to either the stack or
5936 hard frame pointer, both of which are usually valid base registers.
5937 Reload deals with the cases where the eliminated form isn't valid. */
5938 return (GP_REGNUM_P (regno)
5939 || regno == SP_REGNUM
5940 || regno == FRAME_POINTER_REGNUM
5941 || regno == ARG_POINTER_REGNUM);
5942}
5943
5944/* Return true if X is a valid base register for mode MODE.
5945 STRICT_P is true if REG_OK_STRICT is in effect. */
5946
5947static bool
5948aarch64_base_register_rtx_p (rtx x, bool strict_p)
5949{
76160199
RS
5950 if (!strict_p
5951 && GET_CODE (x) == SUBREG
5952 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
5953 x = SUBREG_REG (x);
5954
5955 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
5956}
5957
5958/* Return true if address offset is a valid index. If it is, fill in INFO
5959 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
5960
5961static bool
5962aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 5963 machine_mode mode, bool strict_p)
43e9d192
IB
5964{
5965 enum aarch64_address_type type;
5966 rtx index;
5967 int shift;
5968
5969 /* (reg:P) */
5970 if ((REG_P (x) || GET_CODE (x) == SUBREG)
5971 && GET_MODE (x) == Pmode)
5972 {
5973 type = ADDRESS_REG_REG;
5974 index = x;
5975 shift = 0;
5976 }
5977 /* (sign_extend:DI (reg:SI)) */
5978 else if ((GET_CODE (x) == SIGN_EXTEND
5979 || GET_CODE (x) == ZERO_EXTEND)
5980 && GET_MODE (x) == DImode
5981 && GET_MODE (XEXP (x, 0)) == SImode)
5982 {
5983 type = (GET_CODE (x) == SIGN_EXTEND)
5984 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5985 index = XEXP (x, 0);
5986 shift = 0;
5987 }
5988 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
5989 else if (GET_CODE (x) == MULT
5990 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5991 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5992 && GET_MODE (XEXP (x, 0)) == DImode
5993 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5994 && CONST_INT_P (XEXP (x, 1)))
5995 {
5996 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5997 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5998 index = XEXP (XEXP (x, 0), 0);
5999 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6000 }
6001 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
6002 else if (GET_CODE (x) == ASHIFT
6003 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
6004 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
6005 && GET_MODE (XEXP (x, 0)) == DImode
6006 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
6007 && CONST_INT_P (XEXP (x, 1)))
6008 {
6009 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
6010 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6011 index = XEXP (XEXP (x, 0), 0);
6012 shift = INTVAL (XEXP (x, 1));
6013 }
6014 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
6015 else if ((GET_CODE (x) == SIGN_EXTRACT
6016 || GET_CODE (x) == ZERO_EXTRACT)
6017 && GET_MODE (x) == DImode
6018 && GET_CODE (XEXP (x, 0)) == MULT
6019 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6020 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6021 {
6022 type = (GET_CODE (x) == SIGN_EXTRACT)
6023 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6024 index = XEXP (XEXP (x, 0), 0);
6025 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6026 if (INTVAL (XEXP (x, 1)) != 32 + shift
6027 || INTVAL (XEXP (x, 2)) != 0)
6028 shift = -1;
6029 }
6030 /* (and:DI (mult:DI (reg:DI) (const_int scale))
6031 (const_int 0xffffffff<<shift)) */
6032 else if (GET_CODE (x) == AND
6033 && GET_MODE (x) == DImode
6034 && GET_CODE (XEXP (x, 0)) == MULT
6035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6037 && CONST_INT_P (XEXP (x, 1)))
6038 {
6039 type = ADDRESS_REG_UXTW;
6040 index = XEXP (XEXP (x, 0), 0);
6041 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
6042 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6043 shift = -1;
6044 }
6045 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
6046 else if ((GET_CODE (x) == SIGN_EXTRACT
6047 || GET_CODE (x) == ZERO_EXTRACT)
6048 && GET_MODE (x) == DImode
6049 && GET_CODE (XEXP (x, 0)) == ASHIFT
6050 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6051 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
6052 {
6053 type = (GET_CODE (x) == SIGN_EXTRACT)
6054 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
6055 index = XEXP (XEXP (x, 0), 0);
6056 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6057 if (INTVAL (XEXP (x, 1)) != 32 + shift
6058 || INTVAL (XEXP (x, 2)) != 0)
6059 shift = -1;
6060 }
6061 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
6062 (const_int 0xffffffff<<shift)) */
6063 else if (GET_CODE (x) == AND
6064 && GET_MODE (x) == DImode
6065 && GET_CODE (XEXP (x, 0)) == ASHIFT
6066 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
6067 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6068 && CONST_INT_P (XEXP (x, 1)))
6069 {
6070 type = ADDRESS_REG_UXTW;
6071 index = XEXP (XEXP (x, 0), 0);
6072 shift = INTVAL (XEXP (XEXP (x, 0), 1));
6073 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
6074 shift = -1;
6075 }
6076 /* (mult:P (reg:P) (const_int scale)) */
6077 else if (GET_CODE (x) == MULT
6078 && GET_MODE (x) == Pmode
6079 && GET_MODE (XEXP (x, 0)) == Pmode
6080 && CONST_INT_P (XEXP (x, 1)))
6081 {
6082 type = ADDRESS_REG_REG;
6083 index = XEXP (x, 0);
6084 shift = exact_log2 (INTVAL (XEXP (x, 1)));
6085 }
6086 /* (ashift:P (reg:P) (const_int shift)) */
6087 else if (GET_CODE (x) == ASHIFT
6088 && GET_MODE (x) == Pmode
6089 && GET_MODE (XEXP (x, 0)) == Pmode
6090 && CONST_INT_P (XEXP (x, 1)))
6091 {
6092 type = ADDRESS_REG_REG;
6093 index = XEXP (x, 0);
6094 shift = INTVAL (XEXP (x, 1));
6095 }
6096 else
6097 return false;
6098
76160199
RS
6099 if (!strict_p
6100 && GET_CODE (index) == SUBREG
6101 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
6102 index = SUBREG_REG (index);
6103
43cacb12
RS
6104 if (aarch64_sve_data_mode_p (mode))
6105 {
6106 if (type != ADDRESS_REG_REG
6107 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
6108 return false;
6109 }
6110 else
6111 {
6112 if (shift != 0
6113 && !(IN_RANGE (shift, 1, 3)
6114 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
6115 return false;
6116 }
6117
6118 if (REG_P (index)
43e9d192
IB
6119 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
6120 {
6121 info->type = type;
6122 info->offset = index;
6123 info->shift = shift;
6124 return true;
6125 }
6126
6127 return false;
6128}
6129
abc52318
KT
6130/* Return true if MODE is one of the modes for which we
6131 support LDP/STP operations. */
6132
6133static bool
6134aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
6135{
6136 return mode == SImode || mode == DImode
6137 || mode == SFmode || mode == DFmode
6138 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
6139 && (known_eq (GET_MODE_SIZE (mode), 8)
6140 || (known_eq (GET_MODE_SIZE (mode), 16)
6141 && (aarch64_tune_params.extra_tuning_flags
6142 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
6143}
6144
9e0218fc
RH
6145/* Return true if REGNO is a virtual pointer register, or an eliminable
6146 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
6147 include stack_pointer or hard_frame_pointer. */
6148static bool
6149virt_or_elim_regno_p (unsigned regno)
6150{
6151 return ((regno >= FIRST_VIRTUAL_REGISTER
6152 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
6153 || regno == FRAME_POINTER_REGNUM
6154 || regno == ARG_POINTER_REGNUM);
6155}
6156
a97d8b98
RS
6157/* Return true if X is a valid address of type TYPE for machine mode MODE.
6158 If it is, fill in INFO appropriately. STRICT_P is true if
6159 REG_OK_STRICT is in effect. */
43e9d192 6160
a98824ac 6161bool
43e9d192 6162aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98 6163 rtx x, machine_mode mode, bool strict_p,
a98824ac 6164 aarch64_addr_query_type type)
43e9d192
IB
6165{
6166 enum rtx_code code = GET_CODE (x);
6167 rtx op0, op1;
dc640181
RS
6168 poly_int64 offset;
6169
6a70badb 6170 HOST_WIDE_INT const_size;
2d8c6dc1 6171
80d43579
WD
6172 /* On BE, we use load/store pair for all large int mode load/stores.
6173 TI/TFmode may also use a load/store pair. */
43cacb12
RS
6174 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6175 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 6176 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 6177 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
6178 || mode == TImode
6179 || mode == TFmode
43cacb12 6180 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 6181
a25831ac
AV
6182 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
6183 corresponds to the actual size of the memory being loaded/stored and the
6184 mode of the corresponding addressing mode is half of that. */
6185 if (type == ADDR_QUERY_LDP_STP_N
6186 && known_eq (GET_MODE_SIZE (mode), 16))
6187 mode = DFmode;
6188
6a70badb 6189 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
6190 && (known_lt (GET_MODE_SIZE (mode), 16)
6191 || vec_flags == VEC_ADVSIMD
6192 || vec_flags == VEC_SVE_DATA));
6193
6194 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
6195 [Rn, #offset, MUL VL]. */
6196 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
6197 && (code != REG && code != PLUS))
6198 return false;
2d8c6dc1
AH
6199
6200 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
6201 REG addressing. */
43cacb12
RS
6202 if (advsimd_struct_p
6203 && !BYTES_BIG_ENDIAN
43e9d192
IB
6204 && (code != POST_INC && code != REG))
6205 return false;
6206
43cacb12
RS
6207 gcc_checking_assert (GET_MODE (x) == VOIDmode
6208 || SCALAR_INT_MODE_P (GET_MODE (x)));
6209
43e9d192
IB
6210 switch (code)
6211 {
6212 case REG:
6213 case SUBREG:
6214 info->type = ADDRESS_REG_IMM;
6215 info->base = x;
6216 info->offset = const0_rtx;
dc640181 6217 info->const_offset = 0;
43e9d192
IB
6218 return aarch64_base_register_rtx_p (x, strict_p);
6219
6220 case PLUS:
6221 op0 = XEXP (x, 0);
6222 op1 = XEXP (x, 1);
15c0c5c9
JW
6223
6224 if (! strict_p
4aa81c2e 6225 && REG_P (op0)
9e0218fc 6226 && virt_or_elim_regno_p (REGNO (op0))
dc640181 6227 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
6228 {
6229 info->type = ADDRESS_REG_IMM;
6230 info->base = op0;
6231 info->offset = op1;
dc640181 6232 info->const_offset = offset;
15c0c5c9
JW
6233
6234 return true;
6235 }
6236
6a70badb 6237 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
6238 && aarch64_base_register_rtx_p (op0, strict_p)
6239 && poly_int_rtx_p (op1, &offset))
43e9d192 6240 {
43e9d192
IB
6241 info->type = ADDRESS_REG_IMM;
6242 info->base = op0;
6243 info->offset = op1;
dc640181 6244 info->const_offset = offset;
43e9d192
IB
6245
6246 /* TImode and TFmode values are allowed in both pairs of X
6247 registers and individual Q registers. The available
6248 address modes are:
6249 X,X: 7-bit signed scaled offset
6250 Q: 9-bit signed offset
6251 We conservatively require an offset representable in either mode.
8ed49fab
KT
6252 When performing the check for pairs of X registers i.e. LDP/STP
6253 pass down DImode since that is the natural size of the LDP/STP
6254 instruction memory accesses. */
43e9d192 6255 if (mode == TImode || mode == TFmode)
8ed49fab 6256 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
3c5af608 6257 && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
8734dfac 6258 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 6259
2d8c6dc1
AH
6260 /* A 7bit offset check because OImode will emit a ldp/stp
6261 instruction (only big endian will get here).
6262 For ldp/stp instructions, the offset is scaled for the size of a
6263 single element of the pair. */
6264 if (mode == OImode)
6265 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
6266
6267 /* Three 9/12 bit offsets checks because CImode will emit three
6268 ldr/str instructions (only big endian will get here). */
6269 if (mode == CImode)
6270 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3c5af608
MM
6271 && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
6272 offset + 32)
2d8c6dc1
AH
6273 || offset_12bit_unsigned_scaled_p (V16QImode,
6274 offset + 32)));
6275
6276 /* Two 7bit offsets checks because XImode will emit two ldp/stp
6277 instructions (only big endian will get here). */
6278 if (mode == XImode)
6279 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
6280 && aarch64_offset_7bit_signed_scaled_p (TImode,
6281 offset + 32));
6282
43cacb12
RS
6283 /* Make "m" use the LD1 offset range for SVE data modes, so
6284 that pre-RTL optimizers like ivopts will work to that
6285 instead of the wider LDR/STR range. */
6286 if (vec_flags == VEC_SVE_DATA)
6287 return (type == ADDR_QUERY_M
6288 ? offset_4bit_signed_scaled_p (mode, offset)
6289 : offset_9bit_signed_scaled_p (mode, offset));
6290
9f4cbab8
RS
6291 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
6292 {
6293 poly_int64 end_offset = (offset
6294 + GET_MODE_SIZE (mode)
6295 - BYTES_PER_SVE_VECTOR);
6296 return (type == ADDR_QUERY_M
6297 ? offset_4bit_signed_scaled_p (mode, offset)
6298 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
6299 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
6300 end_offset)));
6301 }
6302
43cacb12
RS
6303 if (vec_flags == VEC_SVE_PRED)
6304 return offset_9bit_signed_scaled_p (mode, offset);
6305
2d8c6dc1 6306 if (load_store_pair_p)
6a70badb 6307 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6308 || known_eq (GET_MODE_SIZE (mode), 8)
6309 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6310 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6311 else
3c5af608 6312 return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
43e9d192
IB
6313 || offset_12bit_unsigned_scaled_p (mode, offset));
6314 }
6315
6316 if (allow_reg_index_p)
6317 {
6318 /* Look for base + (scaled/extended) index register. */
6319 if (aarch64_base_register_rtx_p (op0, strict_p)
6320 && aarch64_classify_index (info, op1, mode, strict_p))
6321 {
6322 info->base = op0;
6323 return true;
6324 }
6325 if (aarch64_base_register_rtx_p (op1, strict_p)
6326 && aarch64_classify_index (info, op0, mode, strict_p))
6327 {
6328 info->base = op1;
6329 return true;
6330 }
6331 }
6332
6333 return false;
6334
6335 case POST_INC:
6336 case POST_DEC:
6337 case PRE_INC:
6338 case PRE_DEC:
6339 info->type = ADDRESS_REG_WB;
6340 info->base = XEXP (x, 0);
6341 info->offset = NULL_RTX;
6342 return aarch64_base_register_rtx_p (info->base, strict_p);
6343
6344 case POST_MODIFY:
6345 case PRE_MODIFY:
6346 info->type = ADDRESS_REG_WB;
6347 info->base = XEXP (x, 0);
6348 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 6349 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
6350 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
6351 && aarch64_base_register_rtx_p (info->base, strict_p))
6352 {
43e9d192 6353 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 6354 info->const_offset = offset;
43e9d192
IB
6355
6356 /* TImode and TFmode values are allowed in both pairs of X
6357 registers and individual Q registers. The available
6358 address modes are:
6359 X,X: 7-bit signed scaled offset
6360 Q: 9-bit signed offset
6361 We conservatively require an offset representable in either mode.
6362 */
6363 if (mode == TImode || mode == TFmode)
44707478 6364 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3c5af608 6365 && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
43e9d192 6366
2d8c6dc1 6367 if (load_store_pair_p)
6a70badb 6368 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
6369 || known_eq (GET_MODE_SIZE (mode), 8)
6370 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 6371 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192 6372 else
3c5af608 6373 return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
43e9d192
IB
6374 }
6375 return false;
6376
6377 case CONST:
6378 case SYMBOL_REF:
6379 case LABEL_REF:
79517551
SN
6380 /* load literal: pc-relative constant pool entry. Only supported
6381 for SI mode or larger. */
43e9d192 6382 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 6383
6a70badb
RS
6384 if (!load_store_pair_p
6385 && GET_MODE_SIZE (mode).is_constant (&const_size)
6386 && const_size >= 4)
43e9d192
IB
6387 {
6388 rtx sym, addend;
6389
6390 split_const (x, &sym, &addend);
b4f50fd4
RR
6391 return ((GET_CODE (sym) == LABEL_REF
6392 || (GET_CODE (sym) == SYMBOL_REF
6393 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 6394 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
6395 }
6396 return false;
6397
6398 case LO_SUM:
6399 info->type = ADDRESS_LO_SUM;
6400 info->base = XEXP (x, 0);
6401 info->offset = XEXP (x, 1);
6402 if (allow_reg_index_p
6403 && aarch64_base_register_rtx_p (info->base, strict_p))
6404 {
6405 rtx sym, offs;
6406 split_const (info->offset, &sym, &offs);
6407 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
6408 && (aarch64_classify_symbol (sym, INTVAL (offs))
6409 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
6410 {
6411 /* The symbol and offset must be aligned to the access size. */
6412 unsigned int align;
43e9d192
IB
6413
6414 if (CONSTANT_POOL_ADDRESS_P (sym))
6415 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
6416 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
6417 {
6418 tree exp = SYMBOL_REF_DECL (sym);
6419 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 6420 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
6421 }
6422 else if (SYMBOL_REF_DECL (sym))
6423 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
6424 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
6425 && SYMBOL_REF_BLOCK (sym) != NULL)
6426 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
6427 else
6428 align = BITS_PER_UNIT;
6429
6a70badb
RS
6430 poly_int64 ref_size = GET_MODE_SIZE (mode);
6431 if (known_eq (ref_size, 0))
43e9d192
IB
6432 ref_size = GET_MODE_SIZE (DImode);
6433
6a70badb
RS
6434 return (multiple_p (INTVAL (offs), ref_size)
6435 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
6436 }
6437 }
6438 return false;
6439
6440 default:
6441 return false;
6442 }
6443}
6444
9bf2f779
KT
6445/* Return true if the address X is valid for a PRFM instruction.
6446 STRICT_P is true if we should do strict checking with
6447 aarch64_classify_address. */
6448
6449bool
6450aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
6451{
6452 struct aarch64_address_info addr;
6453
6454 /* PRFM accepts the same addresses as DImode... */
a97d8b98 6455 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
6456 if (!res)
6457 return false;
6458
6459 /* ... except writeback forms. */
6460 return addr.type != ADDRESS_REG_WB;
6461}
6462
43e9d192
IB
6463bool
6464aarch64_symbolic_address_p (rtx x)
6465{
6466 rtx offset;
6467
6468 split_const (x, &x, &offset);
6469 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
6470}
6471
a6e0bfa7 6472/* Classify the base of symbolic expression X. */
da4f13a4
MS
6473
6474enum aarch64_symbol_type
a6e0bfa7 6475aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
6476{
6477 rtx offset;
da4f13a4 6478
43e9d192 6479 split_const (x, &x, &offset);
43cacb12 6480 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
6481}
6482
6483
6484/* Return TRUE if X is a legitimate address for accessing memory in
6485 mode MODE. */
6486static bool
ef4bddc2 6487aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
6488{
6489 struct aarch64_address_info addr;
6490
a97d8b98 6491 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
6492}
6493
a97d8b98
RS
6494/* Return TRUE if X is a legitimate address of type TYPE for accessing
6495 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 6496bool
a97d8b98
RS
6497aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
6498 aarch64_addr_query_type type)
43e9d192
IB
6499{
6500 struct aarch64_address_info addr;
6501
a97d8b98 6502 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6503}
6504
9005477f
RS
6505/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6506
491ec060 6507static bool
9005477f
RS
6508aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6509 poly_int64 orig_offset,
6510 machine_mode mode)
491ec060 6511{
6a70badb
RS
6512 HOST_WIDE_INT size;
6513 if (GET_MODE_SIZE (mode).is_constant (&size))
6514 {
9005477f
RS
6515 HOST_WIDE_INT const_offset, second_offset;
6516
6517 /* A general SVE offset is A * VQ + B. Remove the A component from
6518 coefficient 0 in order to get the constant B. */
6519 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6520
6521 /* Split an out-of-range address displacement into a base and
6522 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6523 range otherwise to increase opportunities for sharing the base
6524 address of different sizes. Unaligned accesses use the signed
6525 9-bit range, TImode/TFmode use the intersection of signed
6526 scaled 7-bit and signed 9-bit offset. */
6a70badb 6527 if (mode == TImode || mode == TFmode)
9005477f
RS
6528 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6529 else if ((const_offset & (size - 1)) != 0)
6530 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6531 else
9005477f 6532 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6533
9005477f
RS
6534 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6535 return false;
6536
6537 /* Split the offset into second_offset and the rest. */
6538 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6539 *offset2 = gen_int_mode (second_offset, Pmode);
6540 return true;
6541 }
6542 else
6543 {
6544 /* Get the mode we should use as the basis of the range. For structure
6545 modes this is the mode of one vector. */
6546 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6547 machine_mode step_mode
6548 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6549
6550 /* Get the "mul vl" multiplier we'd like to use. */
6551 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6552 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6553 if (vec_flags & VEC_SVE_DATA)
6554 /* LDR supports a 9-bit range, but the move patterns for
6555 structure modes require all vectors to be in range of the
6556 same base. The simplest way of accomodating that while still
6557 promoting reuse of anchor points between different modes is
6558 to use an 8-bit range unconditionally. */
6559 vnum = ((vnum + 128) & 255) - 128;
6560 else
6561 /* Predicates are only handled singly, so we might as well use
6562 the full range. */
6563 vnum = ((vnum + 256) & 511) - 256;
6564 if (vnum == 0)
6565 return false;
6566
6567 /* Convert the "mul vl" multiplier into a byte offset. */
6568 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6569 if (known_eq (second_offset, orig_offset))
6570 return false;
6571
6572 /* Split the offset into second_offset and the rest. */
6573 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6574 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6575 return true;
6576 }
491ec060
WD
6577}
6578
a2170965
TC
6579/* Return the binary representation of floating point constant VALUE in INTVAL.
6580 If the value cannot be converted, return false without setting INTVAL.
6581 The conversion is done in the given MODE. */
6582bool
6583aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6584{
6585
6586 /* We make a general exception for 0. */
6587 if (aarch64_float_const_zero_rtx_p (value))
6588 {
6589 *intval = 0;
6590 return true;
6591 }
6592
0d0e0188 6593 scalar_float_mode mode;
a2170965 6594 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6595 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6596 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6597 /* Only support up to DF mode. */
6598 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6599 return false;
6600
6601 unsigned HOST_WIDE_INT ival = 0;
6602
6603 long res[2];
6604 real_to_target (res,
6605 CONST_DOUBLE_REAL_VALUE (value),
6606 REAL_MODE_FORMAT (mode));
6607
5c22bb48
TC
6608 if (mode == DFmode)
6609 {
6610 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6611 ival = zext_hwi (res[order], 32);
6612 ival |= (zext_hwi (res[1 - order], 32) << 32);
6613 }
6614 else
6615 ival = zext_hwi (res[0], 32);
a2170965
TC
6616
6617 *intval = ival;
6618 return true;
6619}
6620
6621/* Return TRUE if rtx X is an immediate constant that can be moved using a
6622 single MOV(+MOVK) followed by an FMOV. */
6623bool
6624aarch64_float_const_rtx_p (rtx x)
6625{
6626 machine_mode mode = GET_MODE (x);
6627 if (mode == VOIDmode)
6628 return false;
6629
6630 /* Determine whether it's cheaper to write float constants as
6631 mov/movk pairs over ldr/adrp pairs. */
6632 unsigned HOST_WIDE_INT ival;
6633
6634 if (GET_CODE (x) == CONST_DOUBLE
6635 && SCALAR_FLOAT_MODE_P (mode)
6636 && aarch64_reinterpret_float_as_int (x, &ival))
6637 {
77e994c9
RS
6638 scalar_int_mode imode = (mode == HFmode
6639 ? SImode
6640 : int_mode_for_mode (mode).require ());
a2170965
TC
6641 int num_instr = aarch64_internal_mov_immediate
6642 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6643 return num_instr < 3;
6644 }
6645
6646 return false;
6647}
6648
43e9d192
IB
6649/* Return TRUE if rtx X is immediate constant 0.0 */
6650bool
3520f7cc 6651aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6652{
43e9d192
IB
6653 if (GET_MODE (x) == VOIDmode)
6654 return false;
6655
34a72c33 6656 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6657 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6658 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6659}
6660
a2170965
TC
6661/* Return TRUE if rtx X is immediate constant that fits in a single
6662 MOVI immediate operation. */
6663bool
6664aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6665{
6666 if (!TARGET_SIMD)
6667 return false;
6668
77e994c9
RS
6669 machine_mode vmode;
6670 scalar_int_mode imode;
a2170965
TC
6671 unsigned HOST_WIDE_INT ival;
6672
6673 if (GET_CODE (x) == CONST_DOUBLE
6674 && SCALAR_FLOAT_MODE_P (mode))
6675 {
6676 if (!aarch64_reinterpret_float_as_int (x, &ival))
6677 return false;
6678
35c38fa6
TC
6679 /* We make a general exception for 0. */
6680 if (aarch64_float_const_zero_rtx_p (x))
6681 return true;
6682
304b9962 6683 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6684 }
6685 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6686 && is_a <scalar_int_mode> (mode, &imode))
6687 ival = INTVAL (x);
a2170965
TC
6688 else
6689 return false;
6690
6691 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6692 a 128 bit vector mode. */
77e994c9 6693 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6694
6695 vmode = aarch64_simd_container_mode (imode, width);
6696 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6697
b187677b 6698 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6699}
6700
6701
70f09188
AP
6702/* Return the fixed registers used for condition codes. */
6703
6704static bool
6705aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6706{
6707 *p1 = CC_REGNUM;
6708 *p2 = INVALID_REGNUM;
6709 return true;
6710}
6711
47210a04
RL
6712/* This function is used by the call expanders of the machine description.
6713 RESULT is the register in which the result is returned. It's NULL for
6714 "call" and "sibcall".
6715 MEM is the location of the function call.
6716 SIBCALL indicates whether this function call is normal call or sibling call.
6717 It will generate different pattern accordingly. */
6718
6719void
6720aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6721{
6722 rtx call, callee, tmp;
6723 rtvec vec;
6724 machine_mode mode;
6725
6726 gcc_assert (MEM_P (mem));
6727 callee = XEXP (mem, 0);
6728 mode = GET_MODE (callee);
6729 gcc_assert (mode == Pmode);
6730
6731 /* Decide if we should generate indirect calls by loading the
6732 address of the callee into a register before performing
6733 the branch-and-link. */
6734 if (SYMBOL_REF_P (callee)
6735 ? (aarch64_is_long_call_p (callee)
6736 || aarch64_is_noplt_call_p (callee))
6737 : !REG_P (callee))
6738 XEXP (mem, 0) = force_reg (mode, callee);
6739
6740 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6741
6742 if (result != NULL_RTX)
6743 call = gen_rtx_SET (result, call);
6744
6745 if (sibcall)
6746 tmp = ret_rtx;
6747 else
6748 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6749
6750 vec = gen_rtvec (2, call, tmp);
6751 call = gen_rtx_PARALLEL (VOIDmode, vec);
6752
6753 aarch64_emit_call_insn (call);
6754}
6755
78607708
TV
6756/* Emit call insn with PAT and do aarch64-specific handling. */
6757
d07a3fed 6758void
78607708
TV
6759aarch64_emit_call_insn (rtx pat)
6760{
6761 rtx insn = emit_call_insn (pat);
6762
6763 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6764 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6765 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6766}
6767
ef4bddc2 6768machine_mode
43e9d192
IB
6769aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6770{
6771 /* All floating point compares return CCFP if it is an equality
6772 comparison, and CCFPE otherwise. */
6773 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6774 {
6775 switch (code)
6776 {
6777 case EQ:
6778 case NE:
6779 case UNORDERED:
6780 case ORDERED:
6781 case UNLT:
6782 case UNLE:
6783 case UNGT:
6784 case UNGE:
6785 case UNEQ:
43e9d192
IB
6786 return CCFPmode;
6787
6788 case LT:
6789 case LE:
6790 case GT:
6791 case GE:
8332c5ee 6792 case LTGT:
43e9d192
IB
6793 return CCFPEmode;
6794
6795 default:
6796 gcc_unreachable ();
6797 }
6798 }
6799
2b8568fe
KT
6800 /* Equality comparisons of short modes against zero can be performed
6801 using the TST instruction with the appropriate bitmask. */
6802 if (y == const0_rtx && REG_P (x)
6803 && (code == EQ || code == NE)
6804 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6805 return CC_NZmode;
6806
b06335f9
KT
6807 /* Similarly, comparisons of zero_extends from shorter modes can
6808 be performed using an ANDS with an immediate mask. */
6809 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6810 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6811 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6812 && (code == EQ || code == NE))
6813 return CC_NZmode;
6814
43e9d192
IB
6815 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6816 && y == const0_rtx
6817 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6818 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6819 || GET_CODE (x) == NEG
6820 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6821 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6822 return CC_NZmode;
6823
1c992d1e 6824 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6825 the comparison will have to be swapped when we emit the assembly
6826 code. */
6827 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6828 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6829 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6830 || GET_CODE (x) == LSHIFTRT
1c992d1e 6831 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6832 return CC_SWPmode;
6833
1c992d1e
RE
6834 /* Similarly for a negated operand, but we can only do this for
6835 equalities. */
6836 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 6837 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
6838 && (code == EQ || code == NE)
6839 && GET_CODE (x) == NEG)
6840 return CC_Zmode;
6841
ef22810a
RH
6842 /* A test for unsigned overflow. */
6843 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6844 && code == NE
6845 && GET_CODE (x) == PLUS
6846 && GET_CODE (y) == ZERO_EXTEND)
6847 return CC_Cmode;
6848
30c46053
MC
6849 /* A test for signed overflow. */
6850 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6851 && code == NE
6852 && GET_CODE (x) == PLUS
6853 && GET_CODE (y) == SIGN_EXTEND)
6854 return CC_Vmode;
6855
43e9d192
IB
6856 /* For everything else, return CCmode. */
6857 return CCmode;
6858}
6859
3dfa7055 6860static int
b8506a8a 6861aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 6862
cd5660ab 6863int
43e9d192
IB
6864aarch64_get_condition_code (rtx x)
6865{
ef4bddc2 6866 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
6867 enum rtx_code comp_code = GET_CODE (x);
6868
6869 if (GET_MODE_CLASS (mode) != MODE_CC)
6870 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
6871 return aarch64_get_condition_code_1 (mode, comp_code);
6872}
43e9d192 6873
3dfa7055 6874static int
b8506a8a 6875aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 6876{
43e9d192
IB
6877 switch (mode)
6878 {
4e10a5a7
RS
6879 case E_CCFPmode:
6880 case E_CCFPEmode:
43e9d192
IB
6881 switch (comp_code)
6882 {
6883 case GE: return AARCH64_GE;
6884 case GT: return AARCH64_GT;
6885 case LE: return AARCH64_LS;
6886 case LT: return AARCH64_MI;
6887 case NE: return AARCH64_NE;
6888 case EQ: return AARCH64_EQ;
6889 case ORDERED: return AARCH64_VC;
6890 case UNORDERED: return AARCH64_VS;
6891 case UNLT: return AARCH64_LT;
6892 case UNLE: return AARCH64_LE;
6893 case UNGT: return AARCH64_HI;
6894 case UNGE: return AARCH64_PL;
cd5660ab 6895 default: return -1;
43e9d192
IB
6896 }
6897 break;
6898
4e10a5a7 6899 case E_CCmode:
43e9d192
IB
6900 switch (comp_code)
6901 {
6902 case NE: return AARCH64_NE;
6903 case EQ: return AARCH64_EQ;
6904 case GE: return AARCH64_GE;
6905 case GT: return AARCH64_GT;
6906 case LE: return AARCH64_LE;
6907 case LT: return AARCH64_LT;
6908 case GEU: return AARCH64_CS;
6909 case GTU: return AARCH64_HI;
6910 case LEU: return AARCH64_LS;
6911 case LTU: return AARCH64_CC;
cd5660ab 6912 default: return -1;
43e9d192
IB
6913 }
6914 break;
6915
4e10a5a7 6916 case E_CC_SWPmode:
43e9d192
IB
6917 switch (comp_code)
6918 {
6919 case NE: return AARCH64_NE;
6920 case EQ: return AARCH64_EQ;
6921 case GE: return AARCH64_LE;
6922 case GT: return AARCH64_LT;
6923 case LE: return AARCH64_GE;
6924 case LT: return AARCH64_GT;
6925 case GEU: return AARCH64_LS;
6926 case GTU: return AARCH64_CC;
6927 case LEU: return AARCH64_CS;
6928 case LTU: return AARCH64_HI;
cd5660ab 6929 default: return -1;
43e9d192
IB
6930 }
6931 break;
6932
4e10a5a7 6933 case E_CC_NZmode:
43e9d192
IB
6934 switch (comp_code)
6935 {
6936 case NE: return AARCH64_NE;
6937 case EQ: return AARCH64_EQ;
6938 case GE: return AARCH64_PL;
6939 case LT: return AARCH64_MI;
cd5660ab 6940 default: return -1;
43e9d192
IB
6941 }
6942 break;
6943
4e10a5a7 6944 case E_CC_Zmode:
1c992d1e
RE
6945 switch (comp_code)
6946 {
6947 case NE: return AARCH64_NE;
6948 case EQ: return AARCH64_EQ;
cd5660ab 6949 default: return -1;
1c992d1e
RE
6950 }
6951 break;
6952
4e10a5a7 6953 case E_CC_Cmode:
ef22810a
RH
6954 switch (comp_code)
6955 {
6956 case NE: return AARCH64_CS;
6957 case EQ: return AARCH64_CC;
6958 default: return -1;
6959 }
6960 break;
6961
30c46053
MC
6962 case E_CC_Vmode:
6963 switch (comp_code)
6964 {
6965 case NE: return AARCH64_VS;
6966 case EQ: return AARCH64_VC;
6967 default: return -1;
6968 }
6969 break;
6970
43e9d192 6971 default:
cd5660ab 6972 return -1;
43e9d192 6973 }
3dfa7055 6974
3dfa7055 6975 return -1;
43e9d192
IB
6976}
6977
ddeabd3e
AL
6978bool
6979aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
6980 HOST_WIDE_INT minval,
6981 HOST_WIDE_INT maxval)
ddeabd3e 6982{
6a70badb
RS
6983 rtx elt;
6984 return (const_vec_duplicate_p (x, &elt)
6985 && CONST_INT_P (elt)
6986 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
6987}
6988
6989bool
6990aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
6991{
6992 return aarch64_const_vec_all_same_in_range_p (x, val, val);
6993}
6994
43cacb12
RS
6995/* Return true if VEC is a constant in which every element is in the range
6996 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
6997
6998static bool
6999aarch64_const_vec_all_in_range_p (rtx vec,
7000 HOST_WIDE_INT minval,
7001 HOST_WIDE_INT maxval)
7002{
7003 if (GET_CODE (vec) != CONST_VECTOR
7004 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
7005 return false;
7006
7007 int nunits;
7008 if (!CONST_VECTOR_STEPPED_P (vec))
7009 nunits = const_vector_encoded_nelts (vec);
7010 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
7011 return false;
7012
7013 for (int i = 0; i < nunits; i++)
7014 {
7015 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
7016 if (!CONST_INT_P (vec_elem)
7017 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
7018 return false;
7019 }
7020 return true;
7021}
43e9d192 7022
cf670503
ZC
7023/* N Z C V. */
7024#define AARCH64_CC_V 1
7025#define AARCH64_CC_C (1 << 1)
7026#define AARCH64_CC_Z (1 << 2)
7027#define AARCH64_CC_N (1 << 3)
7028
c8012fbc
WD
7029/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
7030static const int aarch64_nzcv_codes[] =
7031{
7032 0, /* EQ, Z == 1. */
7033 AARCH64_CC_Z, /* NE, Z == 0. */
7034 0, /* CS, C == 1. */
7035 AARCH64_CC_C, /* CC, C == 0. */
7036 0, /* MI, N == 1. */
7037 AARCH64_CC_N, /* PL, N == 0. */
7038 0, /* VS, V == 1. */
7039 AARCH64_CC_V, /* VC, V == 0. */
7040 0, /* HI, C ==1 && Z == 0. */
7041 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
7042 AARCH64_CC_V, /* GE, N == V. */
7043 0, /* LT, N != V. */
7044 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
7045 0, /* LE, !(Z == 0 && N == V). */
7046 0, /* AL, Any. */
7047 0 /* NV, Any. */
cf670503
ZC
7048};
7049
43cacb12
RS
7050/* Print floating-point vector immediate operand X to F, negating it
7051 first if NEGATE is true. Return true on success, false if it isn't
7052 a constant we can handle. */
7053
7054static bool
7055aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
7056{
7057 rtx elt;
7058
7059 if (!const_vec_duplicate_p (x, &elt))
7060 return false;
7061
7062 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
7063 if (negate)
7064 r = real_value_negate (&r);
7065
7066 /* We only handle the SVE single-bit immediates here. */
7067 if (real_equal (&r, &dconst0))
7068 asm_fprintf (f, "0.0");
7069 else if (real_equal (&r, &dconst1))
7070 asm_fprintf (f, "1.0");
7071 else if (real_equal (&r, &dconsthalf))
7072 asm_fprintf (f, "0.5");
7073 else
7074 return false;
7075
7076 return true;
7077}
7078
9f4cbab8
RS
7079/* Return the equivalent letter for size. */
7080static char
7081sizetochar (int size)
7082{
7083 switch (size)
7084 {
7085 case 64: return 'd';
7086 case 32: return 's';
7087 case 16: return 'h';
7088 case 8 : return 'b';
7089 default: gcc_unreachable ();
7090 }
7091}
7092
bcf19844
JW
7093/* Print operand X to file F in a target specific manner according to CODE.
7094 The acceptable formatting commands given by CODE are:
7095 'c': An integer or symbol address without a preceding #
7096 sign.
43cacb12
RS
7097 'C': Take the duplicated element in a vector constant
7098 and print it in hex.
7099 'D': Take the duplicated element in a vector constant
7100 and print it as an unsigned integer, in decimal.
bcf19844
JW
7101 'e': Print the sign/zero-extend size as a character 8->b,
7102 16->h, 32->w.
7103 'p': Prints N such that 2^N == X (X must be power of 2 and
7104 const int).
7105 'P': Print the number of non-zero bits in X (a const_int).
7106 'H': Print the higher numbered register of a pair (TImode)
7107 of regs.
7108 'm': Print a condition (eq, ne, etc).
7109 'M': Same as 'm', but invert condition.
43cacb12
RS
7110 'N': Take the duplicated element in a vector constant
7111 and print the negative of it in decimal.
bcf19844
JW
7112 'b/h/s/d/q': Print a scalar FP/SIMD register name.
7113 'S/T/U/V': Print a FP/SIMD register name for a register list.
7114 The register printed is the FP/SIMD register name
7115 of X + 0/1/2/3 for S/T/U/V.
7116 'R': Print a scalar FP/SIMD register name + 1.
7117 'X': Print bottom 16 bits of integer constant in hex.
7118 'w/x': Print a general register name or the zero register
7119 (32-bit or 64-bit).
7120 '0': Print a normal operand, if it's a general register,
7121 then we assume DImode.
7122 'k': Print NZCV for conditional compare instructions.
7123 'A': Output address constant representing the first
7124 argument of X, specifying a relocation offset
7125 if appropriate.
7126 'L': Output constant address specified by X
7127 with a relocation offset if appropriate.
7128 'G': Prints address of X, specifying a PC relative
e69a816d
WD
7129 relocation mode if appropriate.
7130 'y': Output address of LDP or STP - this is used for
7131 some LDP/STPs which don't use a PARALLEL in their
7132 pattern (so the mode needs to be adjusted).
7133 'z': Output address of a typical LDP or STP. */
bcf19844 7134
cc8ca59e
JB
7135static void
7136aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 7137{
43cacb12 7138 rtx elt;
43e9d192
IB
7139 switch (code)
7140 {
f541a481
KT
7141 case 'c':
7142 switch (GET_CODE (x))
7143 {
7144 case CONST_INT:
7145 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7146 break;
7147
7148 case SYMBOL_REF:
7149 output_addr_const (f, x);
7150 break;
7151
7152 case CONST:
7153 if (GET_CODE (XEXP (x, 0)) == PLUS
7154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
7155 {
7156 output_addr_const (f, x);
7157 break;
7158 }
7159 /* Fall through. */
7160
7161 default:
ee61f880 7162 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
7163 }
7164 break;
7165
43e9d192 7166 case 'e':
43e9d192
IB
7167 {
7168 int n;
7169
4aa81c2e 7170 if (!CONST_INT_P (x)
43e9d192
IB
7171 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
7172 {
7173 output_operand_lossage ("invalid operand for '%%%c'", code);
7174 return;
7175 }
7176
7177 switch (n)
7178 {
7179 case 3:
7180 fputc ('b', f);
7181 break;
7182 case 4:
7183 fputc ('h', f);
7184 break;
7185 case 5:
7186 fputc ('w', f);
7187 break;
7188 default:
7189 output_operand_lossage ("invalid operand for '%%%c'", code);
7190 return;
7191 }
7192 }
7193 break;
7194
7195 case 'p':
7196 {
7197 int n;
7198
4aa81c2e 7199 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
7200 {
7201 output_operand_lossage ("invalid operand for '%%%c'", code);
7202 return;
7203 }
7204
7205 asm_fprintf (f, "%d", n);
7206 }
7207 break;
7208
7209 case 'P':
4aa81c2e 7210 if (!CONST_INT_P (x))
43e9d192
IB
7211 {
7212 output_operand_lossage ("invalid operand for '%%%c'", code);
7213 return;
7214 }
7215
8d55c61b 7216 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
7217 break;
7218
7219 case 'H':
c0111dc4
RE
7220 if (x == const0_rtx)
7221 {
7222 asm_fprintf (f, "xzr");
7223 break;
7224 }
7225
4aa81c2e 7226 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
7227 {
7228 output_operand_lossage ("invalid operand for '%%%c'", code);
7229 return;
7230 }
7231
01a3a324 7232 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
7233 break;
7234
43e9d192 7235 case 'M':
c8012fbc 7236 case 'm':
cd5660ab
KT
7237 {
7238 int cond_code;
c8012fbc
WD
7239 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
7240 if (x == const_true_rtx)
cd5660ab 7241 {
c8012fbc
WD
7242 if (code == 'M')
7243 fputs ("nv", f);
cd5660ab
KT
7244 return;
7245 }
43e9d192 7246
cd5660ab
KT
7247 if (!COMPARISON_P (x))
7248 {
7249 output_operand_lossage ("invalid operand for '%%%c'", code);
7250 return;
7251 }
c8012fbc 7252
cd5660ab
KT
7253 cond_code = aarch64_get_condition_code (x);
7254 gcc_assert (cond_code >= 0);
c8012fbc
WD
7255 if (code == 'M')
7256 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
7257 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 7258 }
43e9d192
IB
7259 break;
7260
43cacb12
RS
7261 case 'N':
7262 if (!const_vec_duplicate_p (x, &elt))
7263 {
7264 output_operand_lossage ("invalid vector constant");
7265 return;
7266 }
7267
7268 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7269 asm_fprintf (f, "%wd", -INTVAL (elt));
7270 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7271 && aarch64_print_vector_float_operand (f, x, true))
7272 ;
7273 else
7274 {
7275 output_operand_lossage ("invalid vector constant");
7276 return;
7277 }
7278 break;
7279
43e9d192
IB
7280 case 'b':
7281 case 'h':
7282 case 's':
7283 case 'd':
7284 case 'q':
43e9d192
IB
7285 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7286 {
7287 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7288 return;
7289 }
50ce6f88 7290 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
7291 break;
7292
7293 case 'S':
7294 case 'T':
7295 case 'U':
7296 case 'V':
43e9d192
IB
7297 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7298 {
7299 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7300 return;
7301 }
43cacb12
RS
7302 asm_fprintf (f, "%c%d",
7303 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
7304 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
7305 break;
7306
2d8c6dc1 7307 case 'R':
2d8c6dc1
AH
7308 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
7309 {
7310 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
7311 return;
7312 }
7313 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
7314 break;
7315
a05c0ddf 7316 case 'X':
4aa81c2e 7317 if (!CONST_INT_P (x))
a05c0ddf
IB
7318 {
7319 output_operand_lossage ("invalid operand for '%%%c'", code);
7320 return;
7321 }
50d38551 7322 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
7323 break;
7324
43cacb12
RS
7325 case 'C':
7326 {
7327 /* Print a replicated constant in hex. */
7328 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7329 {
7330 output_operand_lossage ("invalid operand for '%%%c'", code);
7331 return;
7332 }
7333 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7334 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7335 }
7336 break;
7337
7338 case 'D':
7339 {
7340 /* Print a replicated constant in decimal, treating it as
7341 unsigned. */
7342 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
7343 {
7344 output_operand_lossage ("invalid operand for '%%%c'", code);
7345 return;
7346 }
7347 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
7348 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
7349 }
7350 break;
7351
43e9d192
IB
7352 case 'w':
7353 case 'x':
3520f7cc
JG
7354 if (x == const0_rtx
7355 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 7356 {
50ce6f88 7357 asm_fprintf (f, "%czr", code);
43e9d192
IB
7358 break;
7359 }
7360
7361 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
7362 {
50ce6f88 7363 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
7364 break;
7365 }
7366
7367 if (REG_P (x) && REGNO (x) == SP_REGNUM)
7368 {
50ce6f88 7369 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
7370 break;
7371 }
7372
7373 /* Fall through */
7374
7375 case 0:
43e9d192
IB
7376 if (x == NULL)
7377 {
7378 output_operand_lossage ("missing operand");
7379 return;
7380 }
7381
7382 switch (GET_CODE (x))
7383 {
7384 case REG:
43cacb12 7385 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
7386 {
7387 if (REG_NREGS (x) == 1)
7388 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
7389 else
7390 {
7391 char suffix
7392 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
7393 asm_fprintf (f, "{z%d.%c - z%d.%c}",
7394 REGNO (x) - V0_REGNUM, suffix,
7395 END_REGNO (x) - V0_REGNUM - 1, suffix);
7396 }
7397 }
43cacb12
RS
7398 else
7399 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
7400 break;
7401
7402 case MEM:
cc8ca59e 7403 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
7404 break;
7405
7406 case LABEL_REF:
7407 case SYMBOL_REF:
7408 output_addr_const (asm_out_file, x);
7409 break;
7410
7411 case CONST_INT:
7412 asm_fprintf (f, "%wd", INTVAL (x));
7413 break;
7414
43cacb12
RS
7415 case CONST:
7416 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 7417 {
43cacb12
RS
7418 output_addr_const (asm_out_file, x);
7419 break;
3520f7cc 7420 }
43cacb12
RS
7421 /* fall through */
7422
7423 case CONST_VECTOR:
7424 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 7425 {
43cacb12
RS
7426 output_operand_lossage ("invalid vector constant");
7427 return;
3520f7cc 7428 }
43cacb12
RS
7429
7430 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
7431 asm_fprintf (f, "%wd", INTVAL (elt));
7432 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7433 && aarch64_print_vector_float_operand (f, x, false))
7434 ;
3520f7cc 7435 else
43cacb12
RS
7436 {
7437 output_operand_lossage ("invalid vector constant");
7438 return;
7439 }
43e9d192
IB
7440 break;
7441
3520f7cc 7442 case CONST_DOUBLE:
2ca5b430
KT
7443 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
7444 be getting CONST_DOUBLEs holding integers. */
7445 gcc_assert (GET_MODE (x) != VOIDmode);
7446 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
7447 {
7448 fputc ('0', f);
7449 break;
7450 }
7451 else if (aarch64_float_const_representable_p (x))
7452 {
7453#define buf_size 20
7454 char float_buf[buf_size] = {'\0'};
34a72c33
RS
7455 real_to_decimal_for_mode (float_buf,
7456 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
7457 buf_size, buf_size,
7458 1, GET_MODE (x));
7459 asm_fprintf (asm_out_file, "%s", float_buf);
7460 break;
7461#undef buf_size
7462 }
7463 output_operand_lossage ("invalid constant");
7464 return;
43e9d192
IB
7465 default:
7466 output_operand_lossage ("invalid operand");
7467 return;
7468 }
7469 break;
7470
7471 case 'A':
7472 if (GET_CODE (x) == HIGH)
7473 x = XEXP (x, 0);
7474
a6e0bfa7 7475 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7476 {
6642bdb4 7477 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7478 asm_fprintf (asm_out_file, ":got:");
7479 break;
7480
7481 case SYMBOL_SMALL_TLSGD:
7482 asm_fprintf (asm_out_file, ":tlsgd:");
7483 break;
7484
7485 case SYMBOL_SMALL_TLSDESC:
7486 asm_fprintf (asm_out_file, ":tlsdesc:");
7487 break;
7488
79496620 7489 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7490 asm_fprintf (asm_out_file, ":gottprel:");
7491 break;
7492
d18ba284 7493 case SYMBOL_TLSLE24:
43e9d192
IB
7494 asm_fprintf (asm_out_file, ":tprel:");
7495 break;
7496
87dd8ab0
MS
7497 case SYMBOL_TINY_GOT:
7498 gcc_unreachable ();
7499 break;
7500
43e9d192
IB
7501 default:
7502 break;
7503 }
7504 output_addr_const (asm_out_file, x);
7505 break;
7506
7507 case 'L':
a6e0bfa7 7508 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7509 {
6642bdb4 7510 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7511 asm_fprintf (asm_out_file, ":lo12:");
7512 break;
7513
7514 case SYMBOL_SMALL_TLSGD:
7515 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7516 break;
7517
7518 case SYMBOL_SMALL_TLSDESC:
7519 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7520 break;
7521
79496620 7522 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7523 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7524 break;
7525
cbf5629e
JW
7526 case SYMBOL_TLSLE12:
7527 asm_fprintf (asm_out_file, ":tprel_lo12:");
7528 break;
7529
d18ba284 7530 case SYMBOL_TLSLE24:
43e9d192
IB
7531 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7532 break;
7533
87dd8ab0
MS
7534 case SYMBOL_TINY_GOT:
7535 asm_fprintf (asm_out_file, ":got:");
7536 break;
7537
5ae7caad
JW
7538 case SYMBOL_TINY_TLSIE:
7539 asm_fprintf (asm_out_file, ":gottprel:");
7540 break;
7541
43e9d192
IB
7542 default:
7543 break;
7544 }
7545 output_addr_const (asm_out_file, x);
7546 break;
7547
7548 case 'G':
a6e0bfa7 7549 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7550 {
d18ba284 7551 case SYMBOL_TLSLE24:
43e9d192
IB
7552 asm_fprintf (asm_out_file, ":tprel_hi12:");
7553 break;
7554 default:
7555 break;
7556 }
7557 output_addr_const (asm_out_file, x);
7558 break;
7559
cf670503
ZC
7560 case 'k':
7561 {
c8012fbc 7562 HOST_WIDE_INT cond_code;
cf670503 7563
c8012fbc 7564 if (!CONST_INT_P (x))
cf670503
ZC
7565 {
7566 output_operand_lossage ("invalid operand for '%%%c'", code);
7567 return;
7568 }
7569
c8012fbc
WD
7570 cond_code = INTVAL (x);
7571 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7572 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7573 }
7574 break;
7575
e69a816d
WD
7576 case 'y':
7577 case 'z':
7578 {
7579 machine_mode mode = GET_MODE (x);
7580
c348cab0 7581 if (GET_CODE (x) != MEM
6a70badb 7582 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7583 {
7584 output_operand_lossage ("invalid operand for '%%%c'", code);
7585 return;
7586 }
7587
a25831ac
AV
7588 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
7589 code == 'y'
7590 ? ADDR_QUERY_LDP_STP_N
7591 : ADDR_QUERY_LDP_STP))
c348cab0 7592 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7593 }
7594 break;
7595
43e9d192
IB
7596 default:
7597 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7598 return;
7599 }
7600}
7601
e69a816d
WD
7602/* Print address 'x' of a memory access with mode 'mode'.
7603 'op' is the context required by aarch64_classify_address. It can either be
7604 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7605static bool
a97d8b98
RS
7606aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7607 aarch64_addr_query_type type)
43e9d192
IB
7608{
7609 struct aarch64_address_info addr;
6a70badb 7610 unsigned int size;
43e9d192 7611
e69a816d 7612 /* Check all addresses are Pmode - including ILP32. */
67c58c8f
SE
7613 if (GET_MODE (x) != Pmode)
7614 output_operand_lossage ("invalid address mode");
e69a816d 7615
a97d8b98 7616 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7617 switch (addr.type)
7618 {
7619 case ADDRESS_REG_IMM:
dc640181 7620 if (known_eq (addr.const_offset, 0))
01a3a324 7621 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7622 else if (aarch64_sve_data_mode_p (mode))
7623 {
7624 HOST_WIDE_INT vnum
7625 = exact_div (addr.const_offset,
7626 BYTES_PER_SVE_VECTOR).to_constant ();
7627 asm_fprintf (f, "[%s, #%wd, mul vl]",
7628 reg_names[REGNO (addr.base)], vnum);
7629 }
7630 else if (aarch64_sve_pred_mode_p (mode))
7631 {
7632 HOST_WIDE_INT vnum
7633 = exact_div (addr.const_offset,
7634 BYTES_PER_SVE_PRED).to_constant ();
7635 asm_fprintf (f, "[%s, #%wd, mul vl]",
7636 reg_names[REGNO (addr.base)], vnum);
7637 }
43e9d192 7638 else
16a3246f 7639 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7640 INTVAL (addr.offset));
c348cab0 7641 return true;
43e9d192
IB
7642
7643 case ADDRESS_REG_REG:
7644 if (addr.shift == 0)
16a3246f 7645 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7646 reg_names [REGNO (addr.offset)]);
43e9d192 7647 else
16a3246f 7648 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7649 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7650 return true;
43e9d192
IB
7651
7652 case ADDRESS_REG_UXTW:
7653 if (addr.shift == 0)
16a3246f 7654 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7655 REGNO (addr.offset) - R0_REGNUM);
7656 else
16a3246f 7657 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7658 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7659 return true;
43e9d192
IB
7660
7661 case ADDRESS_REG_SXTW:
7662 if (addr.shift == 0)
16a3246f 7663 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7664 REGNO (addr.offset) - R0_REGNUM);
7665 else
16a3246f 7666 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7667 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7668 return true;
43e9d192
IB
7669
7670 case ADDRESS_REG_WB:
6a70badb
RS
7671 /* Writeback is only supported for fixed-width modes. */
7672 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7673 switch (GET_CODE (x))
7674 {
7675 case PRE_INC:
6a70badb 7676 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7677 return true;
43e9d192 7678 case POST_INC:
6a70badb 7679 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7680 return true;
43e9d192 7681 case PRE_DEC:
6a70badb 7682 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7683 return true;
43e9d192 7684 case POST_DEC:
6a70badb 7685 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7686 return true;
43e9d192 7687 case PRE_MODIFY:
6a70badb 7688 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7689 INTVAL (addr.offset));
c348cab0 7690 return true;
43e9d192 7691 case POST_MODIFY:
6a70badb 7692 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7693 INTVAL (addr.offset));
c348cab0 7694 return true;
43e9d192
IB
7695 default:
7696 break;
7697 }
7698 break;
7699
7700 case ADDRESS_LO_SUM:
16a3246f 7701 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7702 output_addr_const (f, addr.offset);
7703 asm_fprintf (f, "]");
c348cab0 7704 return true;
43e9d192
IB
7705
7706 case ADDRESS_SYMBOLIC:
d6591257 7707 output_addr_const (f, x);
c348cab0 7708 return true;
43e9d192
IB
7709 }
7710
c348cab0 7711 return false;
43e9d192
IB
7712}
7713
e69a816d
WD
7714/* Print address 'x' of a memory access with mode 'mode'. */
7715static void
7716aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7717{
43cacb12 7718 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7719 output_addr_const (f, x);
e69a816d
WD
7720}
7721
43e9d192
IB
7722bool
7723aarch64_label_mentioned_p (rtx x)
7724{
7725 const char *fmt;
7726 int i;
7727
7728 if (GET_CODE (x) == LABEL_REF)
7729 return true;
7730
7731 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7732 referencing instruction, but they are constant offsets, not
7733 symbols. */
7734 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7735 return false;
7736
7737 fmt = GET_RTX_FORMAT (GET_CODE (x));
7738 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7739 {
7740 if (fmt[i] == 'E')
7741 {
7742 int j;
7743
7744 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7745 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7746 return 1;
7747 }
7748 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7749 return 1;
7750 }
7751
7752 return 0;
7753}
7754
7755/* Implement REGNO_REG_CLASS. */
7756
7757enum reg_class
7758aarch64_regno_regclass (unsigned regno)
7759{
7760 if (GP_REGNUM_P (regno))
a4a182c6 7761 return GENERAL_REGS;
43e9d192
IB
7762
7763 if (regno == SP_REGNUM)
7764 return STACK_REG;
7765
7766 if (regno == FRAME_POINTER_REGNUM
7767 || regno == ARG_POINTER_REGNUM)
f24bb080 7768 return POINTER_REGS;
43e9d192
IB
7769
7770 if (FP_REGNUM_P (regno))
7771 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7772
43cacb12
RS
7773 if (PR_REGNUM_P (regno))
7774 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7775
43e9d192
IB
7776 return NO_REGS;
7777}
7778
6a70badb
RS
7779/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7780 If OFFSET is out of range, return an offset of an anchor point
7781 that is in range. Return 0 otherwise. */
7782
7783static HOST_WIDE_INT
7784aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7785 machine_mode mode)
7786{
7787 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7788 if (size > 16)
7789 return (offset + 0x400) & ~0x7f0;
7790
7791 /* For offsets that aren't a multiple of the access size, the limit is
7792 -256...255. */
7793 if (offset & (size - 1))
7794 {
7795 /* BLKmode typically uses LDP of X-registers. */
7796 if (mode == BLKmode)
7797 return (offset + 512) & ~0x3ff;
7798 return (offset + 0x100) & ~0x1ff;
7799 }
7800
7801 /* Small negative offsets are supported. */
7802 if (IN_RANGE (offset, -256, 0))
7803 return 0;
7804
7805 if (mode == TImode || mode == TFmode)
7806 return (offset + 0x100) & ~0x1ff;
7807
7808 /* Use 12-bit offset by access size. */
7809 return offset & (~0xfff * size);
7810}
7811
0c4ec427 7812static rtx
ef4bddc2 7813aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7814{
7815 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7816 where mask is selected by alignment and size of the offset.
7817 We try to pick as large a range for the offset as possible to
7818 maximize the chance of a CSE. However, for aligned addresses
7819 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7820 elements are likely to use the same base. We need to be careful
7821 not to split a CONST for some forms of address expression, otherwise
7822 it will generate sub-optimal code. */
0c4ec427
RE
7823
7824 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7825 {
9e0218fc 7826 rtx base = XEXP (x, 0);
17d7bdd8 7827 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7828 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 7829
9e0218fc 7830 if (GET_CODE (base) == PLUS)
e8426e0a 7831 {
9e0218fc
RH
7832 rtx op0 = XEXP (base, 0);
7833 rtx op1 = XEXP (base, 1);
7834
7835 /* Force any scaling into a temp for CSE. */
7836 op0 = force_reg (Pmode, op0);
7837 op1 = force_reg (Pmode, op1);
7838
7839 /* Let the pointer register be in op0. */
7840 if (REG_POINTER (op1))
7841 std::swap (op0, op1);
7842
7843 /* If the pointer is virtual or frame related, then we know that
7844 virtual register instantiation or register elimination is going
7845 to apply a second constant. We want the two constants folded
7846 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
7847 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 7848 {
9e0218fc
RH
7849 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
7850 NULL_RTX, true, OPTAB_DIRECT);
7851 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 7852 }
e8426e0a 7853
9e0218fc
RH
7854 /* Otherwise, in order to encourage CSE (and thence loop strength
7855 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
7856 base = expand_binop (Pmode, add_optab, op0, op1,
7857 NULL_RTX, true, OPTAB_DIRECT);
7858 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
7859 }
7860
6a70badb
RS
7861 HOST_WIDE_INT size;
7862 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 7863 {
6a70badb
RS
7864 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
7865 mode);
7866 if (base_offset != 0)
7867 {
7868 base = plus_constant (Pmode, base, base_offset);
7869 base = force_operand (base, NULL_RTX);
7870 return plus_constant (Pmode, base, offset - base_offset);
7871 }
9e0218fc 7872 }
0c4ec427
RE
7873 }
7874
7875 return x;
7876}
7877
43e9d192
IB
7878static reg_class_t
7879aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
7880 reg_class_t rclass,
ef4bddc2 7881 machine_mode mode,
43e9d192
IB
7882 secondary_reload_info *sri)
7883{
9a1b9cb4
RS
7884 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
7885 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
7886 comment at the head of aarch64-sve.md for more details about the
7887 big-endian handling. */
43cacb12
RS
7888 if (BYTES_BIG_ENDIAN
7889 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
7890 && !((REG_P (x) && HARD_REGISTER_P (x))
7891 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
7892 && aarch64_sve_data_mode_p (mode))
7893 {
7894 sri->icode = CODE_FOR_aarch64_sve_reload_be;
7895 return NO_REGS;
7896 }
b4f50fd4
RR
7897
7898 /* If we have to disable direct literal pool loads and stores because the
7899 function is too big, then we need a scratch register. */
7900 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
7901 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
7902 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 7903 && !aarch64_pcrelative_literal_loads)
b4f50fd4 7904 {
0016d8d9 7905 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
7906 return NO_REGS;
7907 }
7908
43e9d192
IB
7909 /* Without the TARGET_SIMD instructions we cannot move a Q register
7910 to a Q register directly. We need a scratch. */
7911 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
7912 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
7913 && reg_class_subset_p (rclass, FP_REGS))
7914 {
0016d8d9 7915 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
7916 return NO_REGS;
7917 }
7918
7919 /* A TFmode or TImode memory access should be handled via an FP_REGS
7920 because AArch64 has richer addressing modes for LDR/STR instructions
7921 than LDP/STP instructions. */
d5726973 7922 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 7923 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
7924 return FP_REGS;
7925
7926 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 7927 return GENERAL_REGS;
43e9d192
IB
7928
7929 return NO_REGS;
7930}
7931
7932static bool
6216fd90 7933aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 7934{
6216fd90 7935 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 7936
6216fd90
WD
7937 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
7938 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 7939 if (frame_pointer_needed)
6216fd90 7940 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
7941 return true;
7942}
7943
6a70badb 7944poly_int64
43e9d192
IB
7945aarch64_initial_elimination_offset (unsigned from, unsigned to)
7946{
78c29983
MS
7947 if (to == HARD_FRAME_POINTER_REGNUM)
7948 {
7949 if (from == ARG_POINTER_REGNUM)
71bfb77a 7950 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
7951
7952 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7953 return cfun->machine->frame.hard_fp_offset
7954 - cfun->machine->frame.locals_offset;
78c29983
MS
7955 }
7956
7957 if (to == STACK_POINTER_REGNUM)
7958 {
7959 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7960 return cfun->machine->frame.frame_size
7961 - cfun->machine->frame.locals_offset;
78c29983
MS
7962 }
7963
1c960e02 7964 return cfun->machine->frame.frame_size;
43e9d192
IB
7965}
7966
43e9d192
IB
7967/* Implement RETURN_ADDR_RTX. We do not support moving back to a
7968 previous frame. */
7969
7970rtx
7971aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7972{
7973 if (count != 0)
7974 return const0_rtx;
7975 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
7976}
7977
7978
7979static void
7980aarch64_asm_trampoline_template (FILE *f)
7981{
28514dda
YZ
7982 if (TARGET_ILP32)
7983 {
7984 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
7985 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
7986 }
7987 else
7988 {
7989 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
7990 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
7991 }
01a3a324 7992 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 7993 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
7994 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
7995 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
7996}
7997
7998static void
7999aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
8000{
8001 rtx fnaddr, mem, a_tramp;
28514dda 8002 const int tramp_code_sz = 16;
43e9d192
IB
8003
8004 /* Don't need to copy the trailing D-words, we fill those in below. */
8005 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
8006 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
8007 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 8008 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
8009 if (GET_MODE (fnaddr) != ptr_mode)
8010 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
8011 emit_move_insn (mem, fnaddr);
8012
28514dda 8013 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
8014 emit_move_insn (mem, chain_value);
8015
8016 /* XXX We should really define a "clear_cache" pattern and use
8017 gen_clear_cache(). */
8018 a_tramp = XEXP (m_tramp, 0);
8019 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 8020 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
8021 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
8022 ptr_mode);
43e9d192
IB
8023}
8024
8025static unsigned char
ef4bddc2 8026aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 8027{
6a70badb
RS
8028 /* ??? Logically we should only need to provide a value when
8029 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
8030 can hold MODE, but at the moment we need to handle all modes.
8031 Just ignore any runtime parts for registers that can't store them. */
8032 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 8033 unsigned int nregs;
43e9d192
IB
8034 switch (regclass)
8035 {
d677263e 8036 case TAILCALL_ADDR_REGS:
43e9d192
IB
8037 case POINTER_REGS:
8038 case GENERAL_REGS:
8039 case ALL_REGS:
f25a140b 8040 case POINTER_AND_FP_REGS:
43e9d192
IB
8041 case FP_REGS:
8042 case FP_LO_REGS:
43cacb12
RS
8043 if (aarch64_sve_data_mode_p (mode)
8044 && constant_multiple_p (GET_MODE_SIZE (mode),
8045 BYTES_PER_SVE_VECTOR, &nregs))
8046 return nregs;
8047 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
8048 ? CEIL (lowest_size, UNITS_PER_VREG)
8049 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 8050 case STACK_REG:
43cacb12
RS
8051 case PR_REGS:
8052 case PR_LO_REGS:
8053 case PR_HI_REGS:
43e9d192
IB
8054 return 1;
8055
8056 case NO_REGS:
8057 return 0;
8058
8059 default:
8060 break;
8061 }
8062 gcc_unreachable ();
8063}
8064
8065static reg_class_t
78d8b9f0 8066aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 8067{
51bb310d 8068 if (regclass == POINTER_REGS)
78d8b9f0
IB
8069 return GENERAL_REGS;
8070
51bb310d
MS
8071 if (regclass == STACK_REG)
8072 {
8073 if (REG_P(x)
8074 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
8075 return regclass;
8076
8077 return NO_REGS;
8078 }
8079
27bd251b
IB
8080 /* Register eliminiation can result in a request for
8081 SP+constant->FP_REGS. We cannot support such operations which
8082 use SP as source and an FP_REG as destination, so reject out
8083 right now. */
8084 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
8085 {
8086 rtx lhs = XEXP (x, 0);
8087
8088 /* Look through a possible SUBREG introduced by ILP32. */
8089 if (GET_CODE (lhs) == SUBREG)
8090 lhs = SUBREG_REG (lhs);
8091
8092 gcc_assert (REG_P (lhs));
8093 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
8094 POINTER_REGS));
8095 return NO_REGS;
8096 }
8097
78d8b9f0 8098 return regclass;
43e9d192
IB
8099}
8100
8101void
8102aarch64_asm_output_labelref (FILE* f, const char *name)
8103{
8104 asm_fprintf (f, "%U%s", name);
8105}
8106
8107static void
8108aarch64_elf_asm_constructor (rtx symbol, int priority)
8109{
8110 if (priority == DEFAULT_INIT_PRIORITY)
8111 default_ctor_section_asm_out_constructor (symbol, priority);
8112 else
8113 {
8114 section *s;
53d190c1
AT
8115 /* While priority is known to be in range [0, 65535], so 18 bytes
8116 would be enough, the compiler might not know that. To avoid
8117 -Wformat-truncation false positive, use a larger size. */
8118 char buf[23];
43e9d192 8119 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 8120 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8121 switch_to_section (s);
8122 assemble_align (POINTER_SIZE);
28514dda 8123 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8124 }
8125}
8126
8127static void
8128aarch64_elf_asm_destructor (rtx symbol, int priority)
8129{
8130 if (priority == DEFAULT_INIT_PRIORITY)
8131 default_dtor_section_asm_out_destructor (symbol, priority);
8132 else
8133 {
8134 section *s;
53d190c1
AT
8135 /* While priority is known to be in range [0, 65535], so 18 bytes
8136 would be enough, the compiler might not know that. To avoid
8137 -Wformat-truncation false positive, use a larger size. */
8138 char buf[23];
43e9d192 8139 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 8140 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
8141 switch_to_section (s);
8142 assemble_align (POINTER_SIZE);
28514dda 8143 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
8144 }
8145}
8146
8147const char*
8148aarch64_output_casesi (rtx *operands)
8149{
8150 char buf[100];
8151 char label[100];
b32d5189 8152 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
8153 int index;
8154 static const char *const patterns[4][2] =
8155 {
8156 {
8157 "ldrb\t%w3, [%0,%w1,uxtw]",
8158 "add\t%3, %4, %w3, sxtb #2"
8159 },
8160 {
8161 "ldrh\t%w3, [%0,%w1,uxtw #1]",
8162 "add\t%3, %4, %w3, sxth #2"
8163 },
8164 {
8165 "ldr\t%w3, [%0,%w1,uxtw #2]",
8166 "add\t%3, %4, %w3, sxtw #2"
8167 },
8168 /* We assume that DImode is only generated when not optimizing and
8169 that we don't really need 64-bit address offsets. That would
8170 imply an object file with 8GB of code in a single function! */
8171 {
8172 "ldr\t%w3, [%0,%w1,uxtw #2]",
8173 "add\t%3, %4, %w3, sxtw #2"
8174 }
8175 };
8176
8177 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
8178
77e994c9
RS
8179 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
8180 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
8181
8182 gcc_assert (index >= 0 && index <= 3);
8183
8184 /* Need to implement table size reduction, by chaning the code below. */
8185 output_asm_insn (patterns[index][0], operands);
8186 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
8187 snprintf (buf, sizeof (buf),
8188 "adr\t%%4, %s", targetm.strip_name_encoding (label));
8189 output_asm_insn (buf, operands);
8190 output_asm_insn (patterns[index][1], operands);
8191 output_asm_insn ("br\t%3", operands);
8192 assemble_label (asm_out_file, label);
8193 return "";
8194}
8195
8196
8197/* Return size in bits of an arithmetic operand which is shifted/scaled and
8198 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
8199 operator. */
8200
8201int
8202aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
8203{
8204 if (shift >= 0 && shift <= 3)
8205 {
8206 int size;
8207 for (size = 8; size <= 32; size *= 2)
8208 {
8209 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
8210 if (mask == bits << shift)
8211 return size;
8212 }
8213 }
8214 return 0;
8215}
8216
e78d485e
RR
8217/* Constant pools are per function only when PC relative
8218 literal loads are true or we are in the large memory
8219 model. */
8220
8221static inline bool
8222aarch64_can_use_per_function_literal_pools_p (void)
8223{
9ee6540a 8224 return (aarch64_pcrelative_literal_loads
e78d485e
RR
8225 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
8226}
8227
43e9d192 8228static bool
e78d485e 8229aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 8230{
74a9301d
VM
8231 /* We can't use blocks for constants when we're using a per-function
8232 constant pool. */
8233 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
8234}
8235
e78d485e
RR
8236/* Select appropriate section for constants depending
8237 on where we place literal pools. */
8238
43e9d192 8239static section *
e78d485e
RR
8240aarch64_select_rtx_section (machine_mode mode,
8241 rtx x,
8242 unsigned HOST_WIDE_INT align)
43e9d192 8243{
e78d485e
RR
8244 if (aarch64_can_use_per_function_literal_pools_p ())
8245 return function_section (current_function_decl);
43e9d192 8246
e78d485e
RR
8247 return default_elf_select_rtx_section (mode, x, align);
8248}
43e9d192 8249
5fca7b66
RH
8250/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
8251void
8252aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
8253 HOST_WIDE_INT offset)
8254{
8255 /* When using per-function literal pools, we must ensure that any code
8256 section is aligned to the minimal instruction length, lest we get
8257 errors from the assembler re "unaligned instructions". */
8258 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
8259 ASM_OUTPUT_ALIGN (f, 2);
8260}
8261
43e9d192
IB
8262/* Costs. */
8263
8264/* Helper function for rtx cost calculation. Strip a shift expression
8265 from X. Returns the inner operand if successful, or the original
8266 expression on failure. */
8267static rtx
8268aarch64_strip_shift (rtx x)
8269{
8270 rtx op = x;
8271
57b77d46
RE
8272 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
8273 we can convert both to ROR during final output. */
43e9d192
IB
8274 if ((GET_CODE (op) == ASHIFT
8275 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
8276 || GET_CODE (op) == LSHIFTRT
8277 || GET_CODE (op) == ROTATERT
8278 || GET_CODE (op) == ROTATE)
43e9d192
IB
8279 && CONST_INT_P (XEXP (op, 1)))
8280 return XEXP (op, 0);
8281
8282 if (GET_CODE (op) == MULT
8283 && CONST_INT_P (XEXP (op, 1))
8284 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
8285 return XEXP (op, 0);
8286
8287 return x;
8288}
8289
4745e701 8290/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
8291 expression from X. Returns the inner operand if successful, or the
8292 original expression on failure. We deal with a number of possible
b10f1009
AP
8293 canonicalization variations here. If STRIP_SHIFT is true, then
8294 we can strip off a shift also. */
43e9d192 8295static rtx
b10f1009 8296aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 8297{
77e994c9 8298 scalar_int_mode mode;
43e9d192
IB
8299 rtx op = x;
8300
77e994c9
RS
8301 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
8302 return op;
8303
43e9d192
IB
8304 /* Zero and sign extraction of a widened value. */
8305 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
8306 && XEXP (op, 2) == const0_rtx
4745e701 8307 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 8308 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
8309 XEXP (op, 1)))
8310 return XEXP (XEXP (op, 0), 0);
8311
8312 /* It can also be represented (for zero-extend) as an AND with an
8313 immediate. */
8314 if (GET_CODE (op) == AND
8315 && GET_CODE (XEXP (op, 0)) == MULT
8316 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
8317 && CONST_INT_P (XEXP (op, 1))
8318 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
8319 INTVAL (XEXP (op, 1))) != 0)
8320 return XEXP (XEXP (op, 0), 0);
8321
8322 /* Now handle extended register, as this may also have an optional
8323 left shift by 1..4. */
b10f1009
AP
8324 if (strip_shift
8325 && GET_CODE (op) == ASHIFT
43e9d192
IB
8326 && CONST_INT_P (XEXP (op, 1))
8327 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
8328 op = XEXP (op, 0);
8329
8330 if (GET_CODE (op) == ZERO_EXTEND
8331 || GET_CODE (op) == SIGN_EXTEND)
8332 op = XEXP (op, 0);
8333
8334 if (op != x)
8335 return op;
8336
4745e701
JG
8337 return x;
8338}
8339
0a78ebe4
KT
8340/* Return true iff CODE is a shift supported in combination
8341 with arithmetic instructions. */
4d1919ed 8342
0a78ebe4
KT
8343static bool
8344aarch64_shift_p (enum rtx_code code)
8345{
8346 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
8347}
8348
b10f1009
AP
8349
8350/* Return true iff X is a cheap shift without a sign extend. */
8351
8352static bool
8353aarch64_cheap_mult_shift_p (rtx x)
8354{
8355 rtx op0, op1;
8356
8357 op0 = XEXP (x, 0);
8358 op1 = XEXP (x, 1);
8359
8360 if (!(aarch64_tune_params.extra_tuning_flags
8361 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
8362 return false;
8363
8364 if (GET_CODE (op0) == SIGN_EXTEND)
8365 return false;
8366
8367 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
8368 && UINTVAL (op1) <= 4)
8369 return true;
8370
8371 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
8372 return false;
8373
8374 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
8375
8376 if (l2 > 0 && l2 <= 4)
8377 return true;
8378
8379 return false;
8380}
8381
4745e701 8382/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
8383 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
8384 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
8385 operands where needed. */
8386
8387static int
e548c9df 8388aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
8389{
8390 rtx op0, op1;
8391 const struct cpu_cost_table *extra_cost
b175b679 8392 = aarch64_tune_params.insn_extra_cost;
4745e701 8393 int cost = 0;
0a78ebe4 8394 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 8395 machine_mode mode = GET_MODE (x);
4745e701
JG
8396
8397 gcc_checking_assert (code == MULT);
8398
8399 op0 = XEXP (x, 0);
8400 op1 = XEXP (x, 1);
8401
8402 if (VECTOR_MODE_P (mode))
8403 mode = GET_MODE_INNER (mode);
8404
8405 /* Integer multiply/fma. */
8406 if (GET_MODE_CLASS (mode) == MODE_INT)
8407 {
8408 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
8409 if (aarch64_shift_p (GET_CODE (x))
8410 || (CONST_INT_P (op1)
8411 && exact_log2 (INTVAL (op1)) > 0))
4745e701 8412 {
0a78ebe4
KT
8413 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
8414 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
8415 if (speed)
8416 {
0a78ebe4
KT
8417 if (compound_p)
8418 {
b10f1009
AP
8419 /* If the shift is considered cheap,
8420 then don't add any cost. */
8421 if (aarch64_cheap_mult_shift_p (x))
8422 ;
8423 else if (REG_P (op1))
0a78ebe4
KT
8424 /* ARITH + shift-by-register. */
8425 cost += extra_cost->alu.arith_shift_reg;
8426 else if (is_extend)
8427 /* ARITH + extended register. We don't have a cost field
8428 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
8429 cost += extra_cost->alu.extend_arith;
8430 else
8431 /* ARITH + shift-by-immediate. */
8432 cost += extra_cost->alu.arith_shift;
8433 }
4745e701
JG
8434 else
8435 /* LSL (immediate). */
0a78ebe4
KT
8436 cost += extra_cost->alu.shift;
8437
4745e701 8438 }
0a78ebe4
KT
8439 /* Strip extends as we will have costed them in the case above. */
8440 if (is_extend)
b10f1009 8441 op0 = aarch64_strip_extend (op0, true);
4745e701 8442
e548c9df 8443 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
8444
8445 return cost;
8446 }
8447
d2ac256b
KT
8448 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
8449 compound and let the below cases handle it. After all, MNEG is a
8450 special-case alias of MSUB. */
8451 if (GET_CODE (op0) == NEG)
8452 {
8453 op0 = XEXP (op0, 0);
8454 compound_p = true;
8455 }
8456
4745e701
JG
8457 /* Integer multiplies or FMAs have zero/sign extending variants. */
8458 if ((GET_CODE (op0) == ZERO_EXTEND
8459 && GET_CODE (op1) == ZERO_EXTEND)
8460 || (GET_CODE (op0) == SIGN_EXTEND
8461 && GET_CODE (op1) == SIGN_EXTEND))
8462 {
e548c9df
AM
8463 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8464 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8465
8466 if (speed)
8467 {
0a78ebe4 8468 if (compound_p)
d2ac256b 8469 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8470 cost += extra_cost->mult[0].extend_add;
8471 else
8472 /* MUL/SMULL/UMULL. */
8473 cost += extra_cost->mult[0].extend;
8474 }
8475
8476 return cost;
8477 }
8478
d2ac256b 8479 /* This is either an integer multiply or a MADD. In both cases
4745e701 8480 we want to recurse and cost the operands. */
e548c9df
AM
8481 cost += rtx_cost (op0, mode, MULT, 0, speed);
8482 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8483
8484 if (speed)
8485 {
0a78ebe4 8486 if (compound_p)
d2ac256b 8487 /* MADD/MSUB. */
4745e701
JG
8488 cost += extra_cost->mult[mode == DImode].add;
8489 else
8490 /* MUL. */
8491 cost += extra_cost->mult[mode == DImode].simple;
8492 }
8493
8494 return cost;
8495 }
8496 else
8497 {
8498 if (speed)
8499 {
3d840f7d 8500 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8501 operands, unless the rounding mode is upward or downward in
8502 which case FNMUL is different than FMUL with operand negation. */
8503 bool neg0 = GET_CODE (op0) == NEG;
8504 bool neg1 = GET_CODE (op1) == NEG;
8505 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8506 {
8507 if (neg0)
8508 op0 = XEXP (op0, 0);
8509 if (neg1)
8510 op1 = XEXP (op1, 0);
8511 }
4745e701 8512
0a78ebe4 8513 if (compound_p)
4745e701
JG
8514 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8515 cost += extra_cost->fp[mode == DFmode].fma;
8516 else
3d840f7d 8517 /* FMUL/FNMUL. */
4745e701
JG
8518 cost += extra_cost->fp[mode == DFmode].mult;
8519 }
8520
e548c9df
AM
8521 cost += rtx_cost (op0, mode, MULT, 0, speed);
8522 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8523 return cost;
8524 }
43e9d192
IB
8525}
8526
67747367
JG
8527static int
8528aarch64_address_cost (rtx x,
ef4bddc2 8529 machine_mode mode,
67747367
JG
8530 addr_space_t as ATTRIBUTE_UNUSED,
8531 bool speed)
8532{
8533 enum rtx_code c = GET_CODE (x);
b175b679 8534 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8535 struct aarch64_address_info info;
8536 int cost = 0;
8537 info.shift = 0;
8538
a97d8b98 8539 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8540 {
8541 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8542 {
8543 /* This is a CONST or SYMBOL ref which will be split
8544 in a different way depending on the code model in use.
8545 Cost it through the generic infrastructure. */
e548c9df 8546 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8547 /* Divide through by the cost of one instruction to
8548 bring it to the same units as the address costs. */
8549 cost_symbol_ref /= COSTS_N_INSNS (1);
8550 /* The cost is then the cost of preparing the address,
8551 followed by an immediate (possibly 0) offset. */
8552 return cost_symbol_ref + addr_cost->imm_offset;
8553 }
8554 else
8555 {
8556 /* This is most likely a jump table from a case
8557 statement. */
8558 return addr_cost->register_offset;
8559 }
8560 }
8561
8562 switch (info.type)
8563 {
8564 case ADDRESS_LO_SUM:
8565 case ADDRESS_SYMBOLIC:
8566 case ADDRESS_REG_IMM:
8567 cost += addr_cost->imm_offset;
8568 break;
8569
8570 case ADDRESS_REG_WB:
8571 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8572 cost += addr_cost->pre_modify;
8573 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8574 cost += addr_cost->post_modify;
8575 else
8576 gcc_unreachable ();
8577
8578 break;
8579
8580 case ADDRESS_REG_REG:
8581 cost += addr_cost->register_offset;
8582 break;
8583
67747367 8584 case ADDRESS_REG_SXTW:
783879e6
EM
8585 cost += addr_cost->register_sextend;
8586 break;
8587
8588 case ADDRESS_REG_UXTW:
8589 cost += addr_cost->register_zextend;
67747367
JG
8590 break;
8591
8592 default:
8593 gcc_unreachable ();
8594 }
8595
8596
8597 if (info.shift > 0)
8598 {
8599 /* For the sake of calculating the cost of the shifted register
8600 component, we can treat same sized modes in the same way. */
6a70badb
RS
8601 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8602 cost += addr_cost->addr_scale_costs.hi;
8603 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8604 cost += addr_cost->addr_scale_costs.si;
8605 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8606 cost += addr_cost->addr_scale_costs.di;
8607 else
8608 /* We can't tell, or this is a 128-bit vector. */
8609 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8610 }
8611
8612 return cost;
8613}
8614
b9066f5a
MW
8615/* Return the cost of a branch. If SPEED_P is true then the compiler is
8616 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8617 to be taken. */
8618
8619int
8620aarch64_branch_cost (bool speed_p, bool predictable_p)
8621{
8622 /* When optimizing for speed, use the cost of unpredictable branches. */
8623 const struct cpu_branch_cost *branch_costs =
b175b679 8624 aarch64_tune_params.branch_costs;
b9066f5a
MW
8625
8626 if (!speed_p || predictable_p)
8627 return branch_costs->predictable;
8628 else
8629 return branch_costs->unpredictable;
8630}
8631
7cc2145f
JG
8632/* Return true if the RTX X in mode MODE is a zero or sign extract
8633 usable in an ADD or SUB (extended register) instruction. */
8634static bool
77e994c9 8635aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8636{
8637 /* Catch add with a sign extract.
8638 This is add_<optab><mode>_multp2. */
8639 if (GET_CODE (x) == SIGN_EXTRACT
8640 || GET_CODE (x) == ZERO_EXTRACT)
8641 {
8642 rtx op0 = XEXP (x, 0);
8643 rtx op1 = XEXP (x, 1);
8644 rtx op2 = XEXP (x, 2);
8645
8646 if (GET_CODE (op0) == MULT
8647 && CONST_INT_P (op1)
8648 && op2 == const0_rtx
8649 && CONST_INT_P (XEXP (op0, 1))
8650 && aarch64_is_extend_from_extract (mode,
8651 XEXP (op0, 1),
8652 op1))
8653 {
8654 return true;
8655 }
8656 }
e47c4031
KT
8657 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8658 No shift. */
8659 else if (GET_CODE (x) == SIGN_EXTEND
8660 || GET_CODE (x) == ZERO_EXTEND)
8661 return REG_P (XEXP (x, 0));
7cc2145f
JG
8662
8663 return false;
8664}
8665
61263118
KT
8666static bool
8667aarch64_frint_unspec_p (unsigned int u)
8668{
8669 switch (u)
8670 {
8671 case UNSPEC_FRINTZ:
8672 case UNSPEC_FRINTP:
8673 case UNSPEC_FRINTM:
8674 case UNSPEC_FRINTA:
8675 case UNSPEC_FRINTN:
8676 case UNSPEC_FRINTX:
8677 case UNSPEC_FRINTI:
8678 return true;
8679
8680 default:
8681 return false;
8682 }
8683}
8684
fb0cb7fa
KT
8685/* Return true iff X is an rtx that will match an extr instruction
8686 i.e. as described in the *extr<mode>5_insn family of patterns.
8687 OP0 and OP1 will be set to the operands of the shifts involved
8688 on success and will be NULL_RTX otherwise. */
8689
8690static bool
8691aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8692{
8693 rtx op0, op1;
77e994c9
RS
8694 scalar_int_mode mode;
8695 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8696 return false;
fb0cb7fa
KT
8697
8698 *res_op0 = NULL_RTX;
8699 *res_op1 = NULL_RTX;
8700
8701 if (GET_CODE (x) != IOR)
8702 return false;
8703
8704 op0 = XEXP (x, 0);
8705 op1 = XEXP (x, 1);
8706
8707 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8708 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8709 {
8710 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8711 if (GET_CODE (op1) == ASHIFT)
8712 std::swap (op0, op1);
8713
8714 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8715 return false;
8716
8717 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8718 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8719
8720 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8721 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8722 {
8723 *res_op0 = XEXP (op0, 0);
8724 *res_op1 = XEXP (op1, 0);
8725 return true;
8726 }
8727 }
8728
8729 return false;
8730}
8731
2d5ffe46
AP
8732/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8733 storing it in *COST. Result is true if the total cost of the operation
8734 has now been calculated. */
8735static bool
8736aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8737{
b9e3afe9
AP
8738 rtx inner;
8739 rtx comparator;
8740 enum rtx_code cmpcode;
8741
8742 if (COMPARISON_P (op0))
8743 {
8744 inner = XEXP (op0, 0);
8745 comparator = XEXP (op0, 1);
8746 cmpcode = GET_CODE (op0);
8747 }
8748 else
8749 {
8750 inner = op0;
8751 comparator = const0_rtx;
8752 cmpcode = NE;
8753 }
8754
2d5ffe46
AP
8755 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8756 {
8757 /* Conditional branch. */
b9e3afe9 8758 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8759 return true;
8760 else
8761 {
b9e3afe9 8762 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8763 {
2d5ffe46
AP
8764 if (comparator == const0_rtx)
8765 {
8766 /* TBZ/TBNZ/CBZ/CBNZ. */
8767 if (GET_CODE (inner) == ZERO_EXTRACT)
8768 /* TBZ/TBNZ. */
e548c9df
AM
8769 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8770 ZERO_EXTRACT, 0, speed);
8771 else
8772 /* CBZ/CBNZ. */
8773 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8774
8775 return true;
8776 }
8777 }
b9e3afe9 8778 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8779 {
2d5ffe46
AP
8780 /* TBZ/TBNZ. */
8781 if (comparator == const0_rtx)
8782 return true;
8783 }
8784 }
8785 }
b9e3afe9 8786 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8787 {
786298dc 8788 /* CCMP. */
6dfeb7ce 8789 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8790 {
8791 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8792 if (XEXP (op1, 1) == const0_rtx)
8793 *cost += 1;
8794 if (speed)
8795 {
8796 machine_mode mode = GET_MODE (XEXP (op1, 0));
8797 const struct cpu_cost_table *extra_cost
8798 = aarch64_tune_params.insn_extra_cost;
8799
8800 if (GET_MODE_CLASS (mode) == MODE_INT)
8801 *cost += extra_cost->alu.arith;
8802 else
8803 *cost += extra_cost->fp[mode == DFmode].compare;
8804 }
8805 return true;
8806 }
8807
2d5ffe46
AP
8808 /* It's a conditional operation based on the status flags,
8809 so it must be some flavor of CSEL. */
8810
8811 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8812 if (GET_CODE (op1) == NEG
8813 || GET_CODE (op1) == NOT
8814 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8815 op1 = XEXP (op1, 0);
bad00732
KT
8816 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8817 {
8818 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8819 op1 = XEXP (op1, 0);
8820 op2 = XEXP (op2, 0);
8821 }
2d5ffe46 8822
e548c9df
AM
8823 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8824 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8825 return true;
8826 }
8827
8828 /* We don't know what this is, cost all operands. */
8829 return false;
8830}
8831
283b6c85
KT
8832/* Check whether X is a bitfield operation of the form shift + extend that
8833 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
8834 operand to which the bitfield operation is applied. Otherwise return
8835 NULL_RTX. */
8836
8837static rtx
8838aarch64_extend_bitfield_pattern_p (rtx x)
8839{
8840 rtx_code outer_code = GET_CODE (x);
8841 machine_mode outer_mode = GET_MODE (x);
8842
8843 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
8844 && outer_mode != SImode && outer_mode != DImode)
8845 return NULL_RTX;
8846
8847 rtx inner = XEXP (x, 0);
8848 rtx_code inner_code = GET_CODE (inner);
8849 machine_mode inner_mode = GET_MODE (inner);
8850 rtx op = NULL_RTX;
8851
8852 switch (inner_code)
8853 {
8854 case ASHIFT:
8855 if (CONST_INT_P (XEXP (inner, 1))
8856 && (inner_mode == QImode || inner_mode == HImode))
8857 op = XEXP (inner, 0);
8858 break;
8859 case LSHIFTRT:
8860 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
8861 && (inner_mode == QImode || inner_mode == HImode))
8862 op = XEXP (inner, 0);
8863 break;
8864 case ASHIFTRT:
8865 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
8866 && (inner_mode == QImode || inner_mode == HImode))
8867 op = XEXP (inner, 0);
8868 break;
8869 default:
8870 break;
8871 }
8872
8873 return op;
8874}
8875
8c83f71d
KT
8876/* Return true if the mask and a shift amount from an RTX of the form
8877 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
8878 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
8879
8880bool
77e994c9
RS
8881aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
8882 rtx shft_amnt)
8c83f71d
KT
8883{
8884 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
8885 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
8886 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
1b6acf23
WD
8887 && (INTVAL (mask)
8888 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
8c83f71d
KT
8889}
8890
43e9d192
IB
8891/* Calculate the cost of calculating X, storing it in *COST. Result
8892 is true if the total cost of the operation has now been calculated. */
8893static bool
e548c9df 8894aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
8895 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
8896{
a8eecd00 8897 rtx op0, op1, op2;
73250c4c 8898 const struct cpu_cost_table *extra_cost
b175b679 8899 = aarch64_tune_params.insn_extra_cost;
e548c9df 8900 int code = GET_CODE (x);
b4206259 8901 scalar_int_mode int_mode;
43e9d192 8902
7fc5ef02
JG
8903 /* By default, assume that everything has equivalent cost to the
8904 cheapest instruction. Any additional costs are applied as a delta
8905 above this default. */
8906 *cost = COSTS_N_INSNS (1);
8907
43e9d192
IB
8908 switch (code)
8909 {
8910 case SET:
ba123b0d
JG
8911 /* The cost depends entirely on the operands to SET. */
8912 *cost = 0;
43e9d192
IB
8913 op0 = SET_DEST (x);
8914 op1 = SET_SRC (x);
8915
8916 switch (GET_CODE (op0))
8917 {
8918 case MEM:
8919 if (speed)
2961177e
JG
8920 {
8921 rtx address = XEXP (op0, 0);
b6875aac
KV
8922 if (VECTOR_MODE_P (mode))
8923 *cost += extra_cost->ldst.storev;
8924 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8925 *cost += extra_cost->ldst.store;
8926 else if (mode == SFmode)
8927 *cost += extra_cost->ldst.storef;
8928 else if (mode == DFmode)
8929 *cost += extra_cost->ldst.stored;
8930
8931 *cost +=
8932 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8933 0, speed));
8934 }
43e9d192 8935
e548c9df 8936 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8937 return true;
8938
8939 case SUBREG:
8940 if (! REG_P (SUBREG_REG (op0)))
e548c9df 8941 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 8942
43e9d192
IB
8943 /* Fall through. */
8944 case REG:
b6875aac
KV
8945 /* The cost is one per vector-register copied. */
8946 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
8947 {
fe1447a1
RS
8948 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
8949 *cost = COSTS_N_INSNS (nregs);
b6875aac 8950 }
ba123b0d
JG
8951 /* const0_rtx is in general free, but we will use an
8952 instruction to set a register to 0. */
b6875aac
KV
8953 else if (REG_P (op1) || op1 == const0_rtx)
8954 {
8955 /* The cost is 1 per register copied. */
fe1447a1
RS
8956 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
8957 *cost = COSTS_N_INSNS (nregs);
b6875aac 8958 }
ba123b0d
JG
8959 else
8960 /* Cost is just the cost of the RHS of the set. */
e548c9df 8961 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8962 return true;
8963
ba123b0d 8964 case ZERO_EXTRACT:
43e9d192 8965 case SIGN_EXTRACT:
ba123b0d
JG
8966 /* Bit-field insertion. Strip any redundant widening of
8967 the RHS to meet the width of the target. */
43e9d192
IB
8968 if (GET_CODE (op1) == SUBREG)
8969 op1 = SUBREG_REG (op1);
8970 if ((GET_CODE (op1) == ZERO_EXTEND
8971 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 8972 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
8973 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
8974 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 8975 op1 = XEXP (op1, 0);
ba123b0d
JG
8976
8977 if (CONST_INT_P (op1))
8978 {
8979 /* MOV immediate is assumed to always be cheap. */
8980 *cost = COSTS_N_INSNS (1);
8981 }
8982 else
8983 {
8984 /* BFM. */
8985 if (speed)
8986 *cost += extra_cost->alu.bfi;
e548c9df 8987 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
8988 }
8989
43e9d192
IB
8990 return true;
8991
8992 default:
ba123b0d
JG
8993 /* We can't make sense of this, assume default cost. */
8994 *cost = COSTS_N_INSNS (1);
61263118 8995 return false;
43e9d192
IB
8996 }
8997 return false;
8998
9dfc162c
JG
8999 case CONST_INT:
9000 /* If an instruction can incorporate a constant within the
9001 instruction, the instruction's expression avoids calling
9002 rtx_cost() on the constant. If rtx_cost() is called on a
9003 constant, then it is usually because the constant must be
9004 moved into a register by one or more instructions.
9005
9006 The exception is constant 0, which can be expressed
9007 as XZR/WZR and is therefore free. The exception to this is
9008 if we have (set (reg) (const0_rtx)) in which case we must cost
9009 the move. However, we can catch that when we cost the SET, so
9010 we don't need to consider that here. */
9011 if (x == const0_rtx)
9012 *cost = 0;
9013 else
9014 {
9015 /* To an approximation, building any other constant is
9016 proportionally expensive to the number of instructions
9017 required to build that constant. This is true whether we
9018 are compiling for SPEED or otherwise. */
77e994c9
RS
9019 if (!is_a <scalar_int_mode> (mode, &int_mode))
9020 int_mode = word_mode;
82614948 9021 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 9022 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
9023 }
9024 return true;
9025
9026 case CONST_DOUBLE:
a2170965
TC
9027
9028 /* First determine number of instructions to do the move
9029 as an integer constant. */
9030 if (!aarch64_float_const_representable_p (x)
9031 && !aarch64_can_const_movi_rtx_p (x, mode)
9032 && aarch64_float_const_rtx_p (x))
9033 {
9034 unsigned HOST_WIDE_INT ival;
9035 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
9036 gcc_assert (succeed);
9037
77e994c9
RS
9038 scalar_int_mode imode = (mode == HFmode
9039 ? SImode
9040 : int_mode_for_mode (mode).require ());
a2170965
TC
9041 int ncost = aarch64_internal_mov_immediate
9042 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
9043 *cost += COSTS_N_INSNS (ncost);
9044 return true;
9045 }
9046
9dfc162c
JG
9047 if (speed)
9048 {
9049 /* mov[df,sf]_aarch64. */
9050 if (aarch64_float_const_representable_p (x))
9051 /* FMOV (scalar immediate). */
9052 *cost += extra_cost->fp[mode == DFmode].fpconst;
9053 else if (!aarch64_float_const_zero_rtx_p (x))
9054 {
9055 /* This will be a load from memory. */
9056 if (mode == DFmode)
9057 *cost += extra_cost->ldst.loadd;
9058 else
9059 *cost += extra_cost->ldst.loadf;
9060 }
9061 else
9062 /* Otherwise this is +0.0. We get this using MOVI d0, #0
9063 or MOV v0.s[0], wzr - neither of which are modeled by the
9064 cost tables. Just use the default cost. */
9065 {
9066 }
9067 }
9068
9069 return true;
9070
43e9d192
IB
9071 case MEM:
9072 if (speed)
2961177e
JG
9073 {
9074 /* For loads we want the base cost of a load, plus an
9075 approximation for the additional cost of the addressing
9076 mode. */
9077 rtx address = XEXP (x, 0);
b6875aac
KV
9078 if (VECTOR_MODE_P (mode))
9079 *cost += extra_cost->ldst.loadv;
9080 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
9081 *cost += extra_cost->ldst.load;
9082 else if (mode == SFmode)
9083 *cost += extra_cost->ldst.loadf;
9084 else if (mode == DFmode)
9085 *cost += extra_cost->ldst.loadd;
9086
9087 *cost +=
9088 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9089 0, speed));
9090 }
43e9d192
IB
9091
9092 return true;
9093
9094 case NEG:
4745e701
JG
9095 op0 = XEXP (x, 0);
9096
b6875aac
KV
9097 if (VECTOR_MODE_P (mode))
9098 {
9099 if (speed)
9100 {
9101 /* FNEG. */
9102 *cost += extra_cost->vect.alu;
9103 }
9104 return false;
9105 }
9106
e548c9df
AM
9107 if (GET_MODE_CLASS (mode) == MODE_INT)
9108 {
4745e701
JG
9109 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9110 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9111 {
9112 /* CSETM. */
e548c9df 9113 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
9114 return true;
9115 }
9116
9117 /* Cost this as SUB wzr, X. */
e548c9df 9118 op0 = CONST0_RTX (mode);
4745e701
JG
9119 op1 = XEXP (x, 0);
9120 goto cost_minus;
9121 }
9122
e548c9df 9123 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
9124 {
9125 /* Support (neg(fma...)) as a single instruction only if
9126 sign of zeros is unimportant. This matches the decision
9127 making in aarch64.md. */
9128 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
9129 {
9130 /* FNMADD. */
e548c9df 9131 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
9132 return true;
9133 }
d318517d
SN
9134 if (GET_CODE (op0) == MULT)
9135 {
9136 /* FNMUL. */
9137 *cost = rtx_cost (op0, mode, NEG, 0, speed);
9138 return true;
9139 }
4745e701
JG
9140 if (speed)
9141 /* FNEG. */
9142 *cost += extra_cost->fp[mode == DFmode].neg;
9143 return false;
9144 }
9145
9146 return false;
43e9d192 9147
781aeb73
KT
9148 case CLRSB:
9149 case CLZ:
9150 if (speed)
b6875aac
KV
9151 {
9152 if (VECTOR_MODE_P (mode))
9153 *cost += extra_cost->vect.alu;
9154 else
9155 *cost += extra_cost->alu.clz;
9156 }
781aeb73
KT
9157
9158 return false;
9159
43e9d192
IB
9160 case COMPARE:
9161 op0 = XEXP (x, 0);
9162 op1 = XEXP (x, 1);
9163
9164 if (op1 == const0_rtx
9165 && GET_CODE (op0) == AND)
9166 {
9167 x = op0;
e548c9df 9168 mode = GET_MODE (op0);
43e9d192
IB
9169 goto cost_logic;
9170 }
9171
a8eecd00
JG
9172 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
9173 {
9174 /* TODO: A write to the CC flags possibly costs extra, this
9175 needs encoding in the cost tables. */
9176
e548c9df 9177 mode = GET_MODE (op0);
a8eecd00
JG
9178 /* ANDS. */
9179 if (GET_CODE (op0) == AND)
9180 {
9181 x = op0;
9182 goto cost_logic;
9183 }
9184
9185 if (GET_CODE (op0) == PLUS)
9186 {
9187 /* ADDS (and CMN alias). */
9188 x = op0;
9189 goto cost_plus;
9190 }
9191
9192 if (GET_CODE (op0) == MINUS)
9193 {
9194 /* SUBS. */
9195 x = op0;
9196 goto cost_minus;
9197 }
9198
345854d8
KT
9199 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
9200 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
9201 && CONST_INT_P (XEXP (op0, 2)))
9202 {
9203 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
9204 Handle it here directly rather than going to cost_logic
9205 since we know the immediate generated for the TST is valid
9206 so we can avoid creating an intermediate rtx for it only
9207 for costing purposes. */
9208 if (speed)
9209 *cost += extra_cost->alu.logical;
9210
9211 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
9212 ZERO_EXTRACT, 0, speed);
9213 return true;
9214 }
9215
a8eecd00
JG
9216 if (GET_CODE (op1) == NEG)
9217 {
9218 /* CMN. */
9219 if (speed)
9220 *cost += extra_cost->alu.arith;
9221
e548c9df
AM
9222 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
9223 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
9224 return true;
9225 }
9226
9227 /* CMP.
9228
9229 Compare can freely swap the order of operands, and
9230 canonicalization puts the more complex operation first.
9231 But the integer MINUS logic expects the shift/extend
9232 operation in op1. */
9233 if (! (REG_P (op0)
9234 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
9235 {
9236 op0 = XEXP (x, 1);
9237 op1 = XEXP (x, 0);
9238 }
9239 goto cost_minus;
9240 }
9241
9242 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9243 {
9244 /* FCMP. */
9245 if (speed)
9246 *cost += extra_cost->fp[mode == DFmode].compare;
9247
9248 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
9249 {
e548c9df 9250 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
9251 /* FCMP supports constant 0.0 for no extra cost. */
9252 return true;
9253 }
9254 return false;
9255 }
9256
b6875aac
KV
9257 if (VECTOR_MODE_P (mode))
9258 {
9259 /* Vector compare. */
9260 if (speed)
9261 *cost += extra_cost->vect.alu;
9262
9263 if (aarch64_float_const_zero_rtx_p (op1))
9264 {
9265 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
9266 cost. */
9267 return true;
9268 }
9269 return false;
9270 }
a8eecd00 9271 return false;
43e9d192
IB
9272
9273 case MINUS:
4745e701
JG
9274 {
9275 op0 = XEXP (x, 0);
9276 op1 = XEXP (x, 1);
9277
9278cost_minus:
e548c9df 9279 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 9280
4745e701
JG
9281 /* Detect valid immediates. */
9282 if ((GET_MODE_CLASS (mode) == MODE_INT
9283 || (GET_MODE_CLASS (mode) == MODE_CC
9284 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
9285 && CONST_INT_P (op1)
9286 && aarch64_uimm12_shift (INTVAL (op1)))
9287 {
4745e701
JG
9288 if (speed)
9289 /* SUB(S) (immediate). */
9290 *cost += extra_cost->alu.arith;
9291 return true;
4745e701
JG
9292 }
9293
7cc2145f 9294 /* Look for SUB (extended register). */
77e994c9
RS
9295 if (is_a <scalar_int_mode> (mode, &int_mode)
9296 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
9297 {
9298 if (speed)
2533c820 9299 *cost += extra_cost->alu.extend_arith;
7cc2145f 9300
b10f1009 9301 op1 = aarch64_strip_extend (op1, true);
e47c4031 9302 *cost += rtx_cost (op1, VOIDmode,
e548c9df 9303 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
9304 return true;
9305 }
9306
b10f1009 9307 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
9308
9309 /* Cost this as an FMA-alike operation. */
9310 if ((GET_CODE (new_op1) == MULT
0a78ebe4 9311 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
9312 && code != COMPARE)
9313 {
9314 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
9315 (enum rtx_code) code,
9316 speed);
4745e701
JG
9317 return true;
9318 }
43e9d192 9319
e548c9df 9320 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 9321
4745e701
JG
9322 if (speed)
9323 {
b6875aac
KV
9324 if (VECTOR_MODE_P (mode))
9325 {
9326 /* Vector SUB. */
9327 *cost += extra_cost->vect.alu;
9328 }
9329 else if (GET_MODE_CLASS (mode) == MODE_INT)
9330 {
9331 /* SUB(S). */
9332 *cost += extra_cost->alu.arith;
9333 }
4745e701 9334 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9335 {
9336 /* FSUB. */
9337 *cost += extra_cost->fp[mode == DFmode].addsub;
9338 }
4745e701
JG
9339 }
9340 return true;
9341 }
43e9d192
IB
9342
9343 case PLUS:
4745e701
JG
9344 {
9345 rtx new_op0;
43e9d192 9346
4745e701
JG
9347 op0 = XEXP (x, 0);
9348 op1 = XEXP (x, 1);
43e9d192 9349
a8eecd00 9350cost_plus:
4745e701
JG
9351 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
9352 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
9353 {
9354 /* CSINC. */
e548c9df
AM
9355 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
9356 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
9357 return true;
9358 }
43e9d192 9359
4745e701 9360 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
9361 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
9362 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 9363 {
e548c9df 9364 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 9365
4745e701
JG
9366 if (speed)
9367 /* ADD (immediate). */
9368 *cost += extra_cost->alu.arith;
9369 return true;
9370 }
9371
e548c9df 9372 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 9373
7cc2145f 9374 /* Look for ADD (extended register). */
77e994c9
RS
9375 if (is_a <scalar_int_mode> (mode, &int_mode)
9376 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
9377 {
9378 if (speed)
2533c820 9379 *cost += extra_cost->alu.extend_arith;
7cc2145f 9380
b10f1009 9381 op0 = aarch64_strip_extend (op0, true);
e47c4031 9382 *cost += rtx_cost (op0, VOIDmode,
e548c9df 9383 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
9384 return true;
9385 }
9386
4745e701
JG
9387 /* Strip any extend, leave shifts behind as we will
9388 cost them through mult_cost. */
b10f1009 9389 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
9390
9391 if (GET_CODE (new_op0) == MULT
0a78ebe4 9392 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
9393 {
9394 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
9395 speed);
4745e701
JG
9396 return true;
9397 }
9398
e548c9df 9399 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
9400
9401 if (speed)
9402 {
b6875aac
KV
9403 if (VECTOR_MODE_P (mode))
9404 {
9405 /* Vector ADD. */
9406 *cost += extra_cost->vect.alu;
9407 }
9408 else if (GET_MODE_CLASS (mode) == MODE_INT)
9409 {
9410 /* ADD. */
9411 *cost += extra_cost->alu.arith;
9412 }
4745e701 9413 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9414 {
9415 /* FADD. */
9416 *cost += extra_cost->fp[mode == DFmode].addsub;
9417 }
4745e701
JG
9418 }
9419 return true;
9420 }
43e9d192 9421
18b42b2a
KT
9422 case BSWAP:
9423 *cost = COSTS_N_INSNS (1);
9424
9425 if (speed)
b6875aac
KV
9426 {
9427 if (VECTOR_MODE_P (mode))
9428 *cost += extra_cost->vect.alu;
9429 else
9430 *cost += extra_cost->alu.rev;
9431 }
18b42b2a
KT
9432 return false;
9433
43e9d192 9434 case IOR:
f7d5cf8d
KT
9435 if (aarch_rev16_p (x))
9436 {
9437 *cost = COSTS_N_INSNS (1);
9438
b6875aac
KV
9439 if (speed)
9440 {
9441 if (VECTOR_MODE_P (mode))
9442 *cost += extra_cost->vect.alu;
9443 else
9444 *cost += extra_cost->alu.rev;
9445 }
9446 return true;
f7d5cf8d 9447 }
fb0cb7fa
KT
9448
9449 if (aarch64_extr_rtx_p (x, &op0, &op1))
9450 {
e548c9df
AM
9451 *cost += rtx_cost (op0, mode, IOR, 0, speed);
9452 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
9453 if (speed)
9454 *cost += extra_cost->alu.shift;
9455
9456 return true;
9457 }
f7d5cf8d 9458 /* Fall through. */
43e9d192
IB
9459 case XOR:
9460 case AND:
9461 cost_logic:
9462 op0 = XEXP (x, 0);
9463 op1 = XEXP (x, 1);
9464
b6875aac
KV
9465 if (VECTOR_MODE_P (mode))
9466 {
9467 if (speed)
9468 *cost += extra_cost->vect.alu;
9469 return true;
9470 }
9471
268c3b47
JG
9472 if (code == AND
9473 && GET_CODE (op0) == MULT
9474 && CONST_INT_P (XEXP (op0, 1))
9475 && CONST_INT_P (op1)
9476 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9477 INTVAL (op1)) != 0)
9478 {
9479 /* This is a UBFM/SBFM. */
e548c9df 9480 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9481 if (speed)
9482 *cost += extra_cost->alu.bfx;
9483 return true;
9484 }
9485
b4206259 9486 if (is_int_mode (mode, &int_mode))
43e9d192 9487 {
8c83f71d 9488 if (CONST_INT_P (op1))
43e9d192 9489 {
8c83f71d
KT
9490 /* We have a mask + shift version of a UBFIZ
9491 i.e. the *andim_ashift<mode>_bfiz pattern. */
9492 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9493 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9494 XEXP (op0, 1)))
8c83f71d 9495 {
b4206259 9496 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9497 (enum rtx_code) code, 0, speed);
9498 if (speed)
9499 *cost += extra_cost->alu.bfx;
268c3b47 9500
8c83f71d
KT
9501 return true;
9502 }
b4206259 9503 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9504 {
9505 /* We possibly get the immediate for free, this is not
9506 modelled. */
b4206259
RS
9507 *cost += rtx_cost (op0, int_mode,
9508 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9509 if (speed)
9510 *cost += extra_cost->alu.logical;
268c3b47 9511
8c83f71d
KT
9512 return true;
9513 }
43e9d192
IB
9514 }
9515 else
9516 {
268c3b47
JG
9517 rtx new_op0 = op0;
9518
9519 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9520 if (GET_CODE (op0) == NOT)
9521 op0 = XEXP (op0, 0);
268c3b47
JG
9522
9523 new_op0 = aarch64_strip_shift (op0);
9524
9525 /* If we had a shift on op0 then this is a logical-shift-
9526 by-register/immediate operation. Otherwise, this is just
9527 a logical operation. */
9528 if (speed)
9529 {
9530 if (new_op0 != op0)
9531 {
9532 /* Shift by immediate. */
9533 if (CONST_INT_P (XEXP (op0, 1)))
9534 *cost += extra_cost->alu.log_shift;
9535 else
9536 *cost += extra_cost->alu.log_shift_reg;
9537 }
9538 else
9539 *cost += extra_cost->alu.logical;
9540 }
9541
9542 /* In both cases we want to cost both operands. */
b4206259
RS
9543 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9544 0, speed);
9545 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9546 1, speed);
268c3b47
JG
9547
9548 return true;
43e9d192 9549 }
43e9d192
IB
9550 }
9551 return false;
9552
268c3b47 9553 case NOT:
6365da9e
KT
9554 x = XEXP (x, 0);
9555 op0 = aarch64_strip_shift (x);
9556
b6875aac
KV
9557 if (VECTOR_MODE_P (mode))
9558 {
9559 /* Vector NOT. */
9560 *cost += extra_cost->vect.alu;
9561 return false;
9562 }
9563
6365da9e
KT
9564 /* MVN-shifted-reg. */
9565 if (op0 != x)
9566 {
e548c9df 9567 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9568
9569 if (speed)
9570 *cost += extra_cost->alu.log_shift;
9571
9572 return true;
9573 }
9574 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9575 Handle the second form here taking care that 'a' in the above can
9576 be a shift. */
9577 else if (GET_CODE (op0) == XOR)
9578 {
9579 rtx newop0 = XEXP (op0, 0);
9580 rtx newop1 = XEXP (op0, 1);
9581 rtx op0_stripped = aarch64_strip_shift (newop0);
9582
e548c9df
AM
9583 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9584 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9585
9586 if (speed)
9587 {
9588 if (op0_stripped != newop0)
9589 *cost += extra_cost->alu.log_shift;
9590 else
9591 *cost += extra_cost->alu.logical;
9592 }
9593
9594 return true;
9595 }
268c3b47
JG
9596 /* MVN. */
9597 if (speed)
9598 *cost += extra_cost->alu.logical;
9599
268c3b47
JG
9600 return false;
9601
43e9d192 9602 case ZERO_EXTEND:
b1685e62
JG
9603
9604 op0 = XEXP (x, 0);
9605 /* If a value is written in SI mode, then zero extended to DI
9606 mode, the operation will in general be free as a write to
9607 a 'w' register implicitly zeroes the upper bits of an 'x'
9608 register. However, if this is
9609
9610 (set (reg) (zero_extend (reg)))
9611
9612 we must cost the explicit register move. */
9613 if (mode == DImode
9614 && GET_MODE (op0) == SImode
9615 && outer == SET)
9616 {
e548c9df 9617 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9618
dde23f43
KM
9619 /* If OP_COST is non-zero, then the cost of the zero extend
9620 is effectively the cost of the inner operation. Otherwise
9621 we have a MOV instruction and we take the cost from the MOV
9622 itself. This is true independently of whether we are
9623 optimizing for space or time. */
9624 if (op_cost)
b1685e62
JG
9625 *cost = op_cost;
9626
9627 return true;
9628 }
e548c9df 9629 else if (MEM_P (op0))
43e9d192 9630 {
b1685e62 9631 /* All loads can zero extend to any size for free. */
e548c9df 9632 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9633 return true;
9634 }
b1685e62 9635
283b6c85
KT
9636 op0 = aarch64_extend_bitfield_pattern_p (x);
9637 if (op0)
9638 {
9639 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9640 if (speed)
9641 *cost += extra_cost->alu.bfx;
9642 return true;
9643 }
9644
b1685e62 9645 if (speed)
b6875aac
KV
9646 {
9647 if (VECTOR_MODE_P (mode))
9648 {
9649 /* UMOV. */
9650 *cost += extra_cost->vect.alu;
9651 }
9652 else
9653 {
63715e5e
WD
9654 /* We generate an AND instead of UXTB/UXTH. */
9655 *cost += extra_cost->alu.logical;
b6875aac
KV
9656 }
9657 }
43e9d192
IB
9658 return false;
9659
9660 case SIGN_EXTEND:
b1685e62 9661 if (MEM_P (XEXP (x, 0)))
43e9d192 9662 {
b1685e62
JG
9663 /* LDRSH. */
9664 if (speed)
9665 {
9666 rtx address = XEXP (XEXP (x, 0), 0);
9667 *cost += extra_cost->ldst.load_sign_extend;
9668
9669 *cost +=
9670 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9671 0, speed));
9672 }
43e9d192
IB
9673 return true;
9674 }
b1685e62 9675
283b6c85
KT
9676 op0 = aarch64_extend_bitfield_pattern_p (x);
9677 if (op0)
9678 {
9679 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9680 if (speed)
9681 *cost += extra_cost->alu.bfx;
9682 return true;
9683 }
9684
b1685e62 9685 if (speed)
b6875aac
KV
9686 {
9687 if (VECTOR_MODE_P (mode))
9688 *cost += extra_cost->vect.alu;
9689 else
9690 *cost += extra_cost->alu.extend;
9691 }
43e9d192
IB
9692 return false;
9693
ba0cfa17
JG
9694 case ASHIFT:
9695 op0 = XEXP (x, 0);
9696 op1 = XEXP (x, 1);
9697
9698 if (CONST_INT_P (op1))
9699 {
ba0cfa17 9700 if (speed)
b6875aac
KV
9701 {
9702 if (VECTOR_MODE_P (mode))
9703 {
9704 /* Vector shift (immediate). */
9705 *cost += extra_cost->vect.alu;
9706 }
9707 else
9708 {
9709 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9710 aliases. */
9711 *cost += extra_cost->alu.shift;
9712 }
9713 }
ba0cfa17
JG
9714
9715 /* We can incorporate zero/sign extend for free. */
9716 if (GET_CODE (op0) == ZERO_EXTEND
9717 || GET_CODE (op0) == SIGN_EXTEND)
9718 op0 = XEXP (op0, 0);
9719
e548c9df 9720 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9721 return true;
9722 }
9723 else
9724 {
7813b280 9725 if (VECTOR_MODE_P (mode))
b6875aac 9726 {
7813b280
KT
9727 if (speed)
9728 /* Vector shift (register). */
9729 *cost += extra_cost->vect.alu;
9730 }
9731 else
9732 {
9733 if (speed)
9734 /* LSLV. */
9735 *cost += extra_cost->alu.shift_reg;
9736
9737 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9738 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9739 && known_eq (INTVAL (XEXP (op1, 1)),
9740 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9741 {
7813b280
KT
9742 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9743 /* We already demanded XEXP (op1, 0) to be REG_P, so
9744 don't recurse into it. */
9745 return true;
b6875aac
KV
9746 }
9747 }
ba0cfa17
JG
9748 return false; /* All arguments need to be in registers. */
9749 }
9750
43e9d192 9751 case ROTATE:
43e9d192
IB
9752 case ROTATERT:
9753 case LSHIFTRT:
43e9d192 9754 case ASHIFTRT:
ba0cfa17
JG
9755 op0 = XEXP (x, 0);
9756 op1 = XEXP (x, 1);
43e9d192 9757
ba0cfa17
JG
9758 if (CONST_INT_P (op1))
9759 {
9760 /* ASR (immediate) and friends. */
9761 if (speed)
b6875aac
KV
9762 {
9763 if (VECTOR_MODE_P (mode))
9764 *cost += extra_cost->vect.alu;
9765 else
9766 *cost += extra_cost->alu.shift;
9767 }
43e9d192 9768
e548c9df 9769 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9770 return true;
9771 }
9772 else
9773 {
7813b280 9774 if (VECTOR_MODE_P (mode))
b6875aac 9775 {
7813b280
KT
9776 if (speed)
9777 /* Vector shift (register). */
b6875aac 9778 *cost += extra_cost->vect.alu;
7813b280
KT
9779 }
9780 else
9781 {
9782 if (speed)
9783 /* ASR (register) and friends. */
b6875aac 9784 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9785
9786 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9787 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9788 && known_eq (INTVAL (XEXP (op1, 1)),
9789 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9790 {
9791 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9792 /* We already demanded XEXP (op1, 0) to be REG_P, so
9793 don't recurse into it. */
9794 return true;
9795 }
b6875aac 9796 }
ba0cfa17
JG
9797 return false; /* All arguments need to be in registers. */
9798 }
43e9d192 9799
909734be
JG
9800 case SYMBOL_REF:
9801
1b1e81f8
JW
9802 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9803 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9804 {
9805 /* LDR. */
9806 if (speed)
9807 *cost += extra_cost->ldst.load;
9808 }
9809 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9810 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9811 {
9812 /* ADRP, followed by ADD. */
9813 *cost += COSTS_N_INSNS (1);
9814 if (speed)
9815 *cost += 2 * extra_cost->alu.arith;
9816 }
9817 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9818 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9819 {
9820 /* ADR. */
9821 if (speed)
9822 *cost += extra_cost->alu.arith;
9823 }
9824
9825 if (flag_pic)
9826 {
9827 /* One extra load instruction, after accessing the GOT. */
9828 *cost += COSTS_N_INSNS (1);
9829 if (speed)
9830 *cost += extra_cost->ldst.load;
9831 }
43e9d192
IB
9832 return true;
9833
909734be 9834 case HIGH:
43e9d192 9835 case LO_SUM:
909734be
JG
9836 /* ADRP/ADD (immediate). */
9837 if (speed)
9838 *cost += extra_cost->alu.arith;
43e9d192
IB
9839 return true;
9840
9841 case ZERO_EXTRACT:
9842 case SIGN_EXTRACT:
7cc2145f
JG
9843 /* UBFX/SBFX. */
9844 if (speed)
b6875aac
KV
9845 {
9846 if (VECTOR_MODE_P (mode))
9847 *cost += extra_cost->vect.alu;
9848 else
9849 *cost += extra_cost->alu.bfx;
9850 }
7cc2145f
JG
9851
9852 /* We can trust that the immediates used will be correct (there
9853 are no by-register forms), so we need only cost op0. */
e548c9df 9854 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
9855 return true;
9856
9857 case MULT:
4745e701
JG
9858 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
9859 /* aarch64_rtx_mult_cost always handles recursion to its
9860 operands. */
9861 return true;
43e9d192
IB
9862
9863 case MOD:
4f58fe36
KT
9864 /* We can expand signed mod by power of 2 using a NEGS, two parallel
9865 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
9866 an unconditional negate. This case should only ever be reached through
9867 the set_smod_pow2_cheap check in expmed.c. */
9868 if (CONST_INT_P (XEXP (x, 1))
9869 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9870 && (mode == SImode || mode == DImode))
9871 {
9872 /* We expand to 4 instructions. Reset the baseline. */
9873 *cost = COSTS_N_INSNS (4);
9874
9875 if (speed)
9876 *cost += 2 * extra_cost->alu.logical
9877 + 2 * extra_cost->alu.arith;
9878
9879 return true;
9880 }
9881
9882 /* Fall-through. */
43e9d192 9883 case UMOD:
43e9d192
IB
9884 if (speed)
9885 {
cb9ac430 9886 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
9887 if (VECTOR_MODE_P (mode))
9888 *cost += extra_cost->vect.alu;
e548c9df
AM
9889 else if (GET_MODE_CLASS (mode) == MODE_INT)
9890 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
9891 + extra_cost->mult[mode == DImode].idiv
9892 + (code == MOD ? 1 : 0));
43e9d192
IB
9893 }
9894 return false; /* All arguments need to be in registers. */
9895
9896 case DIV:
9897 case UDIV:
4105fe38 9898 case SQRT:
43e9d192
IB
9899 if (speed)
9900 {
b6875aac
KV
9901 if (VECTOR_MODE_P (mode))
9902 *cost += extra_cost->vect.alu;
9903 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
9904 /* There is no integer SQRT, so only DIV and UDIV can get
9905 here. */
cb9ac430
TC
9906 *cost += (extra_cost->mult[mode == DImode].idiv
9907 /* Slighly prefer UDIV over SDIV. */
9908 + (code == DIV ? 1 : 0));
4105fe38
JG
9909 else
9910 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
9911 }
9912 return false; /* All arguments need to be in registers. */
9913
a8eecd00 9914 case IF_THEN_ELSE:
2d5ffe46
AP
9915 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
9916 XEXP (x, 2), cost, speed);
a8eecd00
JG
9917
9918 case EQ:
9919 case NE:
9920 case GT:
9921 case GTU:
9922 case LT:
9923 case LTU:
9924 case GE:
9925 case GEU:
9926 case LE:
9927 case LEU:
9928
9929 return false; /* All arguments must be in registers. */
9930
b292109f
JG
9931 case FMA:
9932 op0 = XEXP (x, 0);
9933 op1 = XEXP (x, 1);
9934 op2 = XEXP (x, 2);
9935
9936 if (speed)
b6875aac
KV
9937 {
9938 if (VECTOR_MODE_P (mode))
9939 *cost += extra_cost->vect.alu;
9940 else
9941 *cost += extra_cost->fp[mode == DFmode].fma;
9942 }
b292109f
JG
9943
9944 /* FMSUB, FNMADD, and FNMSUB are free. */
9945 if (GET_CODE (op0) == NEG)
9946 op0 = XEXP (op0, 0);
9947
9948 if (GET_CODE (op2) == NEG)
9949 op2 = XEXP (op2, 0);
9950
9951 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
9952 and the by-element operand as operand 0. */
9953 if (GET_CODE (op1) == NEG)
9954 op1 = XEXP (op1, 0);
9955
9956 /* Catch vector-by-element operations. The by-element operand can
9957 either be (vec_duplicate (vec_select (x))) or just
9958 (vec_select (x)), depending on whether we are multiplying by
9959 a vector or a scalar.
9960
9961 Canonicalization is not very good in these cases, FMA4 will put the
9962 by-element operand as operand 0, FNMA4 will have it as operand 1. */
9963 if (GET_CODE (op0) == VEC_DUPLICATE)
9964 op0 = XEXP (op0, 0);
9965 else if (GET_CODE (op1) == VEC_DUPLICATE)
9966 op1 = XEXP (op1, 0);
9967
9968 if (GET_CODE (op0) == VEC_SELECT)
9969 op0 = XEXP (op0, 0);
9970 else if (GET_CODE (op1) == VEC_SELECT)
9971 op1 = XEXP (op1, 0);
9972
9973 /* If the remaining parameters are not registers,
9974 get the cost to put them into registers. */
e548c9df
AM
9975 *cost += rtx_cost (op0, mode, FMA, 0, speed);
9976 *cost += rtx_cost (op1, mode, FMA, 1, speed);
9977 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
9978 return true;
9979
5e2a765b
KT
9980 case FLOAT:
9981 case UNSIGNED_FLOAT:
9982 if (speed)
9983 *cost += extra_cost->fp[mode == DFmode].fromint;
9984 return false;
9985
b292109f
JG
9986 case FLOAT_EXTEND:
9987 if (speed)
b6875aac
KV
9988 {
9989 if (VECTOR_MODE_P (mode))
9990 {
9991 /*Vector truncate. */
9992 *cost += extra_cost->vect.alu;
9993 }
9994 else
9995 *cost += extra_cost->fp[mode == DFmode].widen;
9996 }
b292109f
JG
9997 return false;
9998
9999 case FLOAT_TRUNCATE:
10000 if (speed)
b6875aac
KV
10001 {
10002 if (VECTOR_MODE_P (mode))
10003 {
10004 /*Vector conversion. */
10005 *cost += extra_cost->vect.alu;
10006 }
10007 else
10008 *cost += extra_cost->fp[mode == DFmode].narrow;
10009 }
b292109f
JG
10010 return false;
10011
61263118
KT
10012 case FIX:
10013 case UNSIGNED_FIX:
10014 x = XEXP (x, 0);
10015 /* Strip the rounding part. They will all be implemented
10016 by the fcvt* family of instructions anyway. */
10017 if (GET_CODE (x) == UNSPEC)
10018 {
10019 unsigned int uns_code = XINT (x, 1);
10020
10021 if (uns_code == UNSPEC_FRINTA
10022 || uns_code == UNSPEC_FRINTM
10023 || uns_code == UNSPEC_FRINTN
10024 || uns_code == UNSPEC_FRINTP
10025 || uns_code == UNSPEC_FRINTZ)
10026 x = XVECEXP (x, 0, 0);
10027 }
10028
10029 if (speed)
b6875aac
KV
10030 {
10031 if (VECTOR_MODE_P (mode))
10032 *cost += extra_cost->vect.alu;
10033 else
10034 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
10035 }
39252973
KT
10036
10037 /* We can combine fmul by a power of 2 followed by a fcvt into a single
10038 fixed-point fcvt. */
10039 if (GET_CODE (x) == MULT
10040 && ((VECTOR_MODE_P (mode)
10041 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
10042 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
10043 {
10044 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
10045 0, speed);
10046 return true;
10047 }
10048
e548c9df 10049 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
10050 return true;
10051
b292109f 10052 case ABS:
b6875aac
KV
10053 if (VECTOR_MODE_P (mode))
10054 {
10055 /* ABS (vector). */
10056 if (speed)
10057 *cost += extra_cost->vect.alu;
10058 }
10059 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 10060 {
19261b99
KT
10061 op0 = XEXP (x, 0);
10062
10063 /* FABD, which is analogous to FADD. */
10064 if (GET_CODE (op0) == MINUS)
10065 {
e548c9df
AM
10066 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
10067 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
10068 if (speed)
10069 *cost += extra_cost->fp[mode == DFmode].addsub;
10070
10071 return true;
10072 }
10073 /* Simple FABS is analogous to FNEG. */
b292109f
JG
10074 if (speed)
10075 *cost += extra_cost->fp[mode == DFmode].neg;
10076 }
10077 else
10078 {
10079 /* Integer ABS will either be split to
10080 two arithmetic instructions, or will be an ABS
10081 (scalar), which we don't model. */
10082 *cost = COSTS_N_INSNS (2);
10083 if (speed)
10084 *cost += 2 * extra_cost->alu.arith;
10085 }
10086 return false;
10087
10088 case SMAX:
10089 case SMIN:
10090 if (speed)
10091 {
b6875aac
KV
10092 if (VECTOR_MODE_P (mode))
10093 *cost += extra_cost->vect.alu;
10094 else
10095 {
10096 /* FMAXNM/FMINNM/FMAX/FMIN.
10097 TODO: This may not be accurate for all implementations, but
10098 we do not model this in the cost tables. */
10099 *cost += extra_cost->fp[mode == DFmode].addsub;
10100 }
b292109f
JG
10101 }
10102 return false;
10103
61263118
KT
10104 case UNSPEC:
10105 /* The floating point round to integer frint* instructions. */
10106 if (aarch64_frint_unspec_p (XINT (x, 1)))
10107 {
10108 if (speed)
10109 *cost += extra_cost->fp[mode == DFmode].roundint;
10110
10111 return false;
10112 }
781aeb73
KT
10113
10114 if (XINT (x, 1) == UNSPEC_RBIT)
10115 {
10116 if (speed)
10117 *cost += extra_cost->alu.rev;
10118
10119 return false;
10120 }
61263118
KT
10121 break;
10122
fb620c4a
JG
10123 case TRUNCATE:
10124
10125 /* Decompose <su>muldi3_highpart. */
10126 if (/* (truncate:DI */
10127 mode == DImode
10128 /* (lshiftrt:TI */
10129 && GET_MODE (XEXP (x, 0)) == TImode
10130 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
10131 /* (mult:TI */
10132 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10133 /* (ANY_EXTEND:TI (reg:DI))
10134 (ANY_EXTEND:TI (reg:DI))) */
10135 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10136 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
10137 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10138 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
10139 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
10140 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
10141 /* (const_int 64) */
10142 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10143 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
10144 {
10145 /* UMULH/SMULH. */
10146 if (speed)
10147 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
10148 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
10149 mode, MULT, 0, speed);
10150 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
10151 mode, MULT, 1, speed);
fb620c4a
JG
10152 return true;
10153 }
10154
10155 /* Fall through. */
43e9d192 10156 default:
61263118 10157 break;
43e9d192 10158 }
61263118 10159
c10e3d7f
AP
10160 if (dump_file
10161 && flag_aarch64_verbose_cost)
61263118
KT
10162 fprintf (dump_file,
10163 "\nFailed to cost RTX. Assuming default cost.\n");
10164
10165 return true;
43e9d192
IB
10166}
10167
0ee859b5
JG
10168/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
10169 calculated for X. This cost is stored in *COST. Returns true
10170 if the total cost of X was calculated. */
10171static bool
e548c9df 10172aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
10173 int param, int *cost, bool speed)
10174{
e548c9df 10175 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 10176
c10e3d7f
AP
10177 if (dump_file
10178 && flag_aarch64_verbose_cost)
0ee859b5
JG
10179 {
10180 print_rtl_single (dump_file, x);
10181 fprintf (dump_file, "\n%s cost: %d (%s)\n",
10182 speed ? "Hot" : "Cold",
10183 *cost, result ? "final" : "partial");
10184 }
10185
10186 return result;
10187}
10188
43e9d192 10189static int
ef4bddc2 10190aarch64_register_move_cost (machine_mode mode,
8a3a7e67 10191 reg_class_t from_i, reg_class_t to_i)
43e9d192 10192{
8a3a7e67
RH
10193 enum reg_class from = (enum reg_class) from_i;
10194 enum reg_class to = (enum reg_class) to_i;
43e9d192 10195 const struct cpu_regmove_cost *regmove_cost
b175b679 10196 = aarch64_tune_params.regmove_cost;
43e9d192 10197
3be07662 10198 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 10199 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
10200 to = GENERAL_REGS;
10201
d677263e 10202 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
10203 from = GENERAL_REGS;
10204
6ee70f81
AP
10205 /* Moving between GPR and stack cost is the same as GP2GP. */
10206 if ((from == GENERAL_REGS && to == STACK_REG)
10207 || (to == GENERAL_REGS && from == STACK_REG))
10208 return regmove_cost->GP2GP;
10209
10210 /* To/From the stack register, we move via the gprs. */
10211 if (to == STACK_REG || from == STACK_REG)
10212 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
10213 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
10214
6a70badb 10215 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
10216 {
10217 /* 128-bit operations on general registers require 2 instructions. */
10218 if (from == GENERAL_REGS && to == GENERAL_REGS)
10219 return regmove_cost->GP2GP * 2;
10220 else if (from == GENERAL_REGS)
10221 return regmove_cost->GP2FP * 2;
10222 else if (to == GENERAL_REGS)
10223 return regmove_cost->FP2GP * 2;
10224
10225 /* When AdvSIMD instructions are disabled it is not possible to move
10226 a 128-bit value directly between Q registers. This is handled in
10227 secondary reload. A general register is used as a scratch to move
10228 the upper DI value and the lower DI value is moved directly,
10229 hence the cost is the sum of three moves. */
10230 if (! TARGET_SIMD)
10231 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
10232
10233 return regmove_cost->FP2FP;
10234 }
10235
43e9d192
IB
10236 if (from == GENERAL_REGS && to == GENERAL_REGS)
10237 return regmove_cost->GP2GP;
10238 else if (from == GENERAL_REGS)
10239 return regmove_cost->GP2FP;
10240 else if (to == GENERAL_REGS)
10241 return regmove_cost->FP2GP;
10242
43e9d192
IB
10243 return regmove_cost->FP2FP;
10244}
10245
10246static int
ef4bddc2 10247aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
10248 reg_class_t rclass ATTRIBUTE_UNUSED,
10249 bool in ATTRIBUTE_UNUSED)
10250{
b175b679 10251 return aarch64_tune_params.memmov_cost;
43e9d192
IB
10252}
10253
0c30e0f3
EM
10254/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
10255 to optimize 1.0/sqrt. */
ee62a5a6
RS
10256
10257static bool
9acc9cbe 10258use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
10259{
10260 return (!flag_trapping_math
10261 && flag_unsafe_math_optimizations
9acc9cbe
EM
10262 && ((aarch64_tune_params.approx_modes->recip_sqrt
10263 & AARCH64_APPROX_MODE (mode))
1a33079e 10264 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
10265}
10266
0c30e0f3
EM
10267/* Function to decide when to use the approximate reciprocal square root
10268 builtin. */
a6fc00da
BH
10269
10270static tree
ee62a5a6 10271aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 10272{
9acc9cbe
EM
10273 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
10274
10275 if (!use_rsqrt_p (mode))
a6fc00da 10276 return NULL_TREE;
ee62a5a6 10277 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
10278}
10279
98daafa0
EM
10280/* Emit instruction sequence to compute either the approximate square root
10281 or its approximate reciprocal, depending on the flag RECP, and return
10282 whether the sequence was emitted or not. */
a6fc00da 10283
98daafa0
EM
10284bool
10285aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 10286{
98daafa0 10287 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
10288
10289 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
10290 {
10291 gcc_assert (!recp);
10292 return false;
10293 }
10294
2e19adc8
RE
10295 if (!recp)
10296 {
10297 if (!(flag_mlow_precision_sqrt
10298 || (aarch64_tune_params.approx_modes->sqrt
10299 & AARCH64_APPROX_MODE (mode))))
10300 return false;
10301
10302 if (flag_finite_math_only
10303 || flag_trapping_math
10304 || !flag_unsafe_math_optimizations
10305 || optimize_function_for_size_p (cfun))
10306 return false;
10307 }
10308 else
10309 /* Caller assumes we cannot fail. */
10310 gcc_assert (use_rsqrt_p (mode));
daef0a8c 10311
ddc203a7 10312 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
10313 rtx xmsk = gen_reg_rtx (mmsk);
10314 if (!recp)
2e19adc8
RE
10315 /* When calculating the approximate square root, compare the
10316 argument with 0.0 and create a mask. */
10317 emit_insn (gen_rtx_SET (xmsk,
10318 gen_rtx_NEG (mmsk,
10319 gen_rtx_EQ (mmsk, src,
10320 CONST0_RTX (mode)))));
a6fc00da 10321
98daafa0
EM
10322 /* Estimate the approximate reciprocal square root. */
10323 rtx xdst = gen_reg_rtx (mode);
0016d8d9 10324 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 10325
98daafa0
EM
10326 /* Iterate over the series twice for SF and thrice for DF. */
10327 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 10328
98daafa0
EM
10329 /* Optionally iterate over the series once less for faster performance
10330 while sacrificing the accuracy. */
10331 if ((recp && flag_mrecip_low_precision_sqrt)
10332 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
10333 iterations--;
10334
98daafa0
EM
10335 /* Iterate over the series to calculate the approximate reciprocal square
10336 root. */
10337 rtx x1 = gen_reg_rtx (mode);
10338 while (iterations--)
a6fc00da 10339 {
a6fc00da 10340 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
10341 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
10342
0016d8d9 10343 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 10344
98daafa0
EM
10345 if (iterations > 0)
10346 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
10347 }
10348
10349 if (!recp)
10350 {
10351 /* Qualify the approximate reciprocal square root when the argument is
10352 0.0 by squashing the intermediary result to 0.0. */
10353 rtx xtmp = gen_reg_rtx (mmsk);
10354 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
10355 gen_rtx_SUBREG (mmsk, xdst, 0)));
10356 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 10357
98daafa0
EM
10358 /* Calculate the approximate square root. */
10359 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
10360 }
10361
98daafa0
EM
10362 /* Finalize the approximation. */
10363 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
10364
10365 return true;
a6fc00da
BH
10366}
10367
79a2bc2d
EM
10368/* Emit the instruction sequence to compute the approximation for the division
10369 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
10370
10371bool
10372aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
10373{
10374 machine_mode mode = GET_MODE (quo);
33d72b63
JW
10375
10376 if (GET_MODE_INNER (mode) == HFmode)
10377 return false;
10378
79a2bc2d
EM
10379 bool use_approx_division_p = (flag_mlow_precision_div
10380 || (aarch64_tune_params.approx_modes->division
10381 & AARCH64_APPROX_MODE (mode)));
10382
10383 if (!flag_finite_math_only
10384 || flag_trapping_math
10385 || !flag_unsafe_math_optimizations
10386 || optimize_function_for_size_p (cfun)
10387 || !use_approx_division_p)
10388 return false;
10389
1be49a38
RR
10390 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10391 return false;
10392
79a2bc2d
EM
10393 /* Estimate the approximate reciprocal. */
10394 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 10395 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
10396
10397 /* Iterate over the series twice for SF and thrice for DF. */
10398 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10399
10400 /* Optionally iterate over the series once less for faster performance,
10401 while sacrificing the accuracy. */
10402 if (flag_mlow_precision_div)
10403 iterations--;
10404
10405 /* Iterate over the series to calculate the approximate reciprocal. */
10406 rtx xtmp = gen_reg_rtx (mode);
10407 while (iterations--)
10408 {
0016d8d9 10409 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
10410
10411 if (iterations > 0)
10412 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10413 }
10414
10415 if (num != CONST1_RTX (mode))
10416 {
10417 /* As the approximate reciprocal of DEN is already calculated, only
10418 calculate the approximate division when NUM is not 1.0. */
10419 rtx xnum = force_reg (mode, num);
10420 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10421 }
10422
10423 /* Finalize the approximation. */
10424 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10425 return true;
10426}
10427
d126a4ae
AP
10428/* Return the number of instructions that can be issued per cycle. */
10429static int
10430aarch64_sched_issue_rate (void)
10431{
b175b679 10432 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10433}
10434
d03f7e44
MK
10435static int
10436aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10437{
10438 int issue_rate = aarch64_sched_issue_rate ();
10439
10440 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10441}
10442
2d6bc7fa
KT
10443
10444/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10445 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10446 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10447
10448static int
10449aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10450 int ready_index)
10451{
10452 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10453}
10454
10455
8990e73a
TB
10456/* Vectorizer cost model target hooks. */
10457
10458/* Implement targetm.vectorize.builtin_vectorization_cost. */
10459static int
10460aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10461 tree vectype,
10462 int misalign ATTRIBUTE_UNUSED)
10463{
10464 unsigned elements;
cd8ae5ed
AP
10465 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10466 bool fp = false;
10467
10468 if (vectype != NULL)
10469 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10470
10471 switch (type_of_cost)
10472 {
10473 case scalar_stmt:
cd8ae5ed 10474 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10475
10476 case scalar_load:
cd8ae5ed 10477 return costs->scalar_load_cost;
8990e73a
TB
10478
10479 case scalar_store:
cd8ae5ed 10480 return costs->scalar_store_cost;
8990e73a
TB
10481
10482 case vector_stmt:
cd8ae5ed 10483 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10484
10485 case vector_load:
cd8ae5ed 10486 return costs->vec_align_load_cost;
8990e73a
TB
10487
10488 case vector_store:
cd8ae5ed 10489 return costs->vec_store_cost;
8990e73a
TB
10490
10491 case vec_to_scalar:
cd8ae5ed 10492 return costs->vec_to_scalar_cost;
8990e73a
TB
10493
10494 case scalar_to_vec:
cd8ae5ed 10495 return costs->scalar_to_vec_cost;
8990e73a
TB
10496
10497 case unaligned_load:
cc9fe6bb 10498 case vector_gather_load:
cd8ae5ed 10499 return costs->vec_unalign_load_cost;
8990e73a
TB
10500
10501 case unaligned_store:
cc9fe6bb 10502 case vector_scatter_store:
cd8ae5ed 10503 return costs->vec_unalign_store_cost;
8990e73a
TB
10504
10505 case cond_branch_taken:
cd8ae5ed 10506 return costs->cond_taken_branch_cost;
8990e73a
TB
10507
10508 case cond_branch_not_taken:
cd8ae5ed 10509 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10510
10511 case vec_perm:
cd8ae5ed 10512 return costs->vec_permute_cost;
c428f91c 10513
8990e73a 10514 case vec_promote_demote:
cd8ae5ed 10515 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10516
10517 case vec_construct:
6a70badb 10518 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10519 return elements / 2 + 1;
10520
10521 default:
10522 gcc_unreachable ();
10523 }
10524}
10525
10526/* Implement targetm.vectorize.add_stmt_cost. */
10527static unsigned
10528aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10529 struct _stmt_vec_info *stmt_info, int misalign,
10530 enum vect_cost_model_location where)
10531{
10532 unsigned *cost = (unsigned *) data;
10533 unsigned retval = 0;
10534
10535 if (flag_vect_cost_model)
10536 {
10537 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10538 int stmt_cost =
10539 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10540
10541 /* Statements in an inner loop relative to the loop being
10542 vectorized are weighted more heavily. The value here is
058e4c71 10543 arbitrary and could potentially be improved with analysis. */
8990e73a 10544 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10545 count *= 50; /* FIXME */
8990e73a
TB
10546
10547 retval = (unsigned) (count * stmt_cost);
10548 cost[where] += retval;
10549 }
10550
10551 return retval;
10552}
10553
0cfff2a1 10554static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10555
0cfff2a1
KT
10556/* Parse the TO_PARSE string and put the architecture struct that it
10557 selects into RES and the architectural features into ISA_FLAGS.
10558 Return an aarch64_parse_opt_result describing the parse result.
c7887347
ML
10559 If there is an error parsing, RES and ISA_FLAGS are left unchanged.
10560 When the TO_PARSE string contains an invalid extension,
10561 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10562
0cfff2a1
KT
10563static enum aarch64_parse_opt_result
10564aarch64_parse_arch (const char *to_parse, const struct processor **res,
c7887347 10565 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10566{
ff150bc4 10567 const char *ext;
43e9d192 10568 const struct processor *arch;
43e9d192
IB
10569 size_t len;
10570
ff150bc4 10571 ext = strchr (to_parse, '+');
43e9d192
IB
10572
10573 if (ext != NULL)
ff150bc4 10574 len = ext - to_parse;
43e9d192 10575 else
ff150bc4 10576 len = strlen (to_parse);
43e9d192
IB
10577
10578 if (len == 0)
0cfff2a1
KT
10579 return AARCH64_PARSE_MISSING_ARG;
10580
43e9d192 10581
0cfff2a1 10582 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10583 for (arch = all_architectures; arch->name != NULL; arch++)
10584 {
ff150bc4
ML
10585 if (strlen (arch->name) == len
10586 && strncmp (arch->name, to_parse, len) == 0)
43e9d192 10587 {
0cfff2a1 10588 unsigned long isa_temp = arch->flags;
43e9d192
IB
10589
10590 if (ext != NULL)
10591 {
0cfff2a1
KT
10592 /* TO_PARSE string contains at least one extension. */
10593 enum aarch64_parse_opt_result ext_res
c7887347 10594 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10595
0cfff2a1
KT
10596 if (ext_res != AARCH64_PARSE_OK)
10597 return ext_res;
ffee7aa9 10598 }
0cfff2a1
KT
10599 /* Extension parsing was successful. Confirm the result
10600 arch and ISA flags. */
10601 *res = arch;
10602 *isa_flags = isa_temp;
10603 return AARCH64_PARSE_OK;
43e9d192
IB
10604 }
10605 }
10606
10607 /* ARCH name not found in list. */
0cfff2a1 10608 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10609}
10610
0cfff2a1
KT
10611/* Parse the TO_PARSE string and put the result tuning in RES and the
10612 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10613 describing the parse result. If there is an error parsing, RES and
c7887347
ML
10614 ISA_FLAGS are left unchanged.
10615 When the TO_PARSE string contains an invalid extension,
10616 a copy of the string is created and stored to INVALID_EXTENSION. */
43e9d192 10617
0cfff2a1
KT
10618static enum aarch64_parse_opt_result
10619aarch64_parse_cpu (const char *to_parse, const struct processor **res,
c7887347 10620 unsigned long *isa_flags, std::string *invalid_extension)
43e9d192 10621{
ff150bc4 10622 const char *ext;
43e9d192 10623 const struct processor *cpu;
43e9d192
IB
10624 size_t len;
10625
ff150bc4 10626 ext = strchr (to_parse, '+');
43e9d192
IB
10627
10628 if (ext != NULL)
ff150bc4 10629 len = ext - to_parse;
43e9d192 10630 else
ff150bc4 10631 len = strlen (to_parse);
43e9d192
IB
10632
10633 if (len == 0)
0cfff2a1
KT
10634 return AARCH64_PARSE_MISSING_ARG;
10635
43e9d192
IB
10636
10637 /* Loop through the list of supported CPUs to find a match. */
10638 for (cpu = all_cores; cpu->name != NULL; cpu++)
10639 {
ff150bc4 10640 if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
43e9d192 10641 {
0cfff2a1
KT
10642 unsigned long isa_temp = cpu->flags;
10643
43e9d192
IB
10644
10645 if (ext != NULL)
10646 {
0cfff2a1
KT
10647 /* TO_PARSE string contains at least one extension. */
10648 enum aarch64_parse_opt_result ext_res
c7887347 10649 = aarch64_parse_extension (ext, &isa_temp, invalid_extension);
43e9d192 10650
0cfff2a1
KT
10651 if (ext_res != AARCH64_PARSE_OK)
10652 return ext_res;
10653 }
10654 /* Extension parsing was successfull. Confirm the result
10655 cpu and ISA flags. */
10656 *res = cpu;
10657 *isa_flags = isa_temp;
10658 return AARCH64_PARSE_OK;
43e9d192
IB
10659 }
10660 }
10661
10662 /* CPU name not found in list. */
0cfff2a1 10663 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10664}
10665
0cfff2a1
KT
10666/* Parse the TO_PARSE string and put the cpu it selects into RES.
10667 Return an aarch64_parse_opt_result describing the parse result.
10668 If the parsing fails the RES does not change. */
43e9d192 10669
0cfff2a1
KT
10670static enum aarch64_parse_opt_result
10671aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10672{
10673 const struct processor *cpu;
43e9d192
IB
10674
10675 /* Loop through the list of supported CPUs to find a match. */
10676 for (cpu = all_cores; cpu->name != NULL; cpu++)
10677 {
ff150bc4 10678 if (strcmp (cpu->name, to_parse) == 0)
43e9d192 10679 {
0cfff2a1
KT
10680 *res = cpu;
10681 return AARCH64_PARSE_OK;
43e9d192
IB
10682 }
10683 }
10684
10685 /* CPU name not found in list. */
0cfff2a1 10686 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10687}
10688
8dec06f2
JG
10689/* Parse TOKEN, which has length LENGTH to see if it is an option
10690 described in FLAG. If it is, return the index bit for that fusion type.
10691 If not, error (printing OPTION_NAME) and return zero. */
10692
10693static unsigned int
10694aarch64_parse_one_option_token (const char *token,
10695 size_t length,
10696 const struct aarch64_flag_desc *flag,
10697 const char *option_name)
10698{
10699 for (; flag->name != NULL; flag++)
10700 {
10701 if (length == strlen (flag->name)
10702 && !strncmp (flag->name, token, length))
10703 return flag->flag;
10704 }
10705
10706 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10707 return 0;
10708}
10709
10710/* Parse OPTION which is a comma-separated list of flags to enable.
10711 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10712 default state we inherit from the CPU tuning structures. OPTION_NAME
10713 gives the top-level option we are parsing in the -moverride string,
10714 for use in error messages. */
10715
10716static unsigned int
10717aarch64_parse_boolean_options (const char *option,
10718 const struct aarch64_flag_desc *flags,
10719 unsigned int initial_state,
10720 const char *option_name)
10721{
10722 const char separator = '.';
10723 const char* specs = option;
10724 const char* ntoken = option;
10725 unsigned int found_flags = initial_state;
10726
10727 while ((ntoken = strchr (specs, separator)))
10728 {
10729 size_t token_length = ntoken - specs;
10730 unsigned token_ops = aarch64_parse_one_option_token (specs,
10731 token_length,
10732 flags,
10733 option_name);
10734 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10735 in the token stream, reset the supported operations. So:
10736
10737 adrp+add.cmp+branch.none.adrp+add
10738
10739 would have the result of turning on only adrp+add fusion. */
10740 if (!token_ops)
10741 found_flags = 0;
10742
10743 found_flags |= token_ops;
10744 specs = ++ntoken;
10745 }
10746
10747 /* We ended with a comma, print something. */
10748 if (!(*specs))
10749 {
10750 error ("%s string ill-formed\n", option_name);
10751 return 0;
10752 }
10753
10754 /* We still have one more token to parse. */
10755 size_t token_length = strlen (specs);
10756 unsigned token_ops = aarch64_parse_one_option_token (specs,
10757 token_length,
10758 flags,
10759 option_name);
10760 if (!token_ops)
10761 found_flags = 0;
10762
10763 found_flags |= token_ops;
10764 return found_flags;
10765}
10766
10767/* Support for overriding instruction fusion. */
10768
10769static void
10770aarch64_parse_fuse_string (const char *fuse_string,
10771 struct tune_params *tune)
10772{
10773 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10774 aarch64_fusible_pairs,
10775 tune->fusible_ops,
10776 "fuse=");
10777}
10778
10779/* Support for overriding other tuning flags. */
10780
10781static void
10782aarch64_parse_tune_string (const char *tune_string,
10783 struct tune_params *tune)
10784{
10785 tune->extra_tuning_flags
10786 = aarch64_parse_boolean_options (tune_string,
10787 aarch64_tuning_flags,
10788 tune->extra_tuning_flags,
10789 "tune=");
10790}
10791
10792/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10793 we understand. If it is, extract the option string and handoff to
10794 the appropriate function. */
10795
10796void
10797aarch64_parse_one_override_token (const char* token,
10798 size_t length,
10799 struct tune_params *tune)
10800{
10801 const struct aarch64_tuning_override_function *fn
10802 = aarch64_tuning_override_functions;
10803
10804 const char *option_part = strchr (token, '=');
10805 if (!option_part)
10806 {
10807 error ("tuning string missing in option (%s)", token);
10808 return;
10809 }
10810
10811 /* Get the length of the option name. */
10812 length = option_part - token;
10813 /* Skip the '=' to get to the option string. */
10814 option_part++;
10815
10816 for (; fn->name != NULL; fn++)
10817 {
10818 if (!strncmp (fn->name, token, length))
10819 {
10820 fn->parse_override (option_part, tune);
10821 return;
10822 }
10823 }
10824
10825 error ("unknown tuning option (%s)",token);
10826 return;
10827}
10828
5eee3c34
JW
10829/* A checking mechanism for the implementation of the tls size. */
10830
10831static void
10832initialize_aarch64_tls_size (struct gcc_options *opts)
10833{
10834 if (aarch64_tls_size == 0)
10835 aarch64_tls_size = 24;
10836
10837 switch (opts->x_aarch64_cmodel_var)
10838 {
10839 case AARCH64_CMODEL_TINY:
10840 /* Both the default and maximum TLS size allowed under tiny is 1M which
10841 needs two instructions to address, so we clamp the size to 24. */
10842 if (aarch64_tls_size > 24)
10843 aarch64_tls_size = 24;
10844 break;
10845 case AARCH64_CMODEL_SMALL:
10846 /* The maximum TLS size allowed under small is 4G. */
10847 if (aarch64_tls_size > 32)
10848 aarch64_tls_size = 32;
10849 break;
10850 case AARCH64_CMODEL_LARGE:
10851 /* The maximum TLS size allowed under large is 16E.
10852 FIXME: 16E should be 64bit, we only support 48bit offset now. */
10853 if (aarch64_tls_size > 48)
10854 aarch64_tls_size = 48;
10855 break;
10856 default:
10857 gcc_unreachable ();
10858 }
10859
10860 return;
10861}
10862
8dec06f2
JG
10863/* Parse STRING looking for options in the format:
10864 string :: option:string
10865 option :: name=substring
10866 name :: {a-z}
10867 substring :: defined by option. */
10868
10869static void
10870aarch64_parse_override_string (const char* input_string,
10871 struct tune_params* tune)
10872{
10873 const char separator = ':';
10874 size_t string_length = strlen (input_string) + 1;
10875 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
10876 char *string = string_root;
10877 strncpy (string, input_string, string_length);
10878 string[string_length - 1] = '\0';
10879
10880 char* ntoken = string;
10881
10882 while ((ntoken = strchr (string, separator)))
10883 {
10884 size_t token_length = ntoken - string;
10885 /* Make this substring look like a string. */
10886 *ntoken = '\0';
10887 aarch64_parse_one_override_token (string, token_length, tune);
10888 string = ++ntoken;
10889 }
10890
10891 /* One last option to parse. */
10892 aarch64_parse_one_override_token (string, strlen (string), tune);
10893 free (string_root);
10894}
43e9d192 10895
43e9d192
IB
10896
10897static void
0cfff2a1 10898aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 10899{
acea40ac
WD
10900 /* PR 70044: We have to be careful about being called multiple times for the
10901 same function. This means all changes should be repeatable. */
10902
d6cb6d6a
WD
10903 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
10904 Disable the frame pointer flag so the mid-end will not use a frame
10905 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
10906 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
10907 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
10908 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 10909 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 10910 opts->x_flag_omit_frame_pointer = 2;
43e9d192 10911
1be34295 10912 /* If not optimizing for size, set the default
0cfff2a1
KT
10913 alignment to what the target wants. */
10914 if (!opts->x_optimize_size)
43e9d192 10915 {
c518c102
ML
10916 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
10917 opts->x_str_align_loops = aarch64_tune_params.loop_align;
10918 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
10919 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
10920 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
10921 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 10922 }
b4f50fd4 10923
9ee6540a
WD
10924 /* We default to no pc-relative literal loads. */
10925
10926 aarch64_pcrelative_literal_loads = false;
10927
10928 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 10929 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
10930 if (opts->x_pcrelative_literal_loads == 1)
10931 aarch64_pcrelative_literal_loads = true;
b4f50fd4 10932
9ee6540a
WD
10933 /* In the tiny memory model it makes no sense to disallow PC relative
10934 literal pool loads. */
10935 if (aarch64_cmodel == AARCH64_CMODEL_TINY
10936 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
10937 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
10938
10939 /* When enabling the lower precision Newton series for the square root, also
10940 enable it for the reciprocal square root, since the latter is an
10941 intermediary step for the former. */
10942 if (flag_mlow_precision_sqrt)
10943 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 10944}
43e9d192 10945
0cfff2a1
KT
10946/* 'Unpack' up the internal tuning structs and update the options
10947 in OPTS. The caller must have set up selected_tune and selected_arch
10948 as all the other target-specific codegen decisions are
10949 derived from them. */
10950
e4ea20c8 10951void
0cfff2a1
KT
10952aarch64_override_options_internal (struct gcc_options *opts)
10953{
10954 aarch64_tune_flags = selected_tune->flags;
10955 aarch64_tune = selected_tune->sched_core;
10956 /* Make a copy of the tuning parameters attached to the core, which
10957 we may later overwrite. */
10958 aarch64_tune_params = *(selected_tune->tune);
10959 aarch64_architecture_version = selected_arch->architecture_version;
10960
10961 if (opts->x_aarch64_override_tune_string)
10962 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
10963 &aarch64_tune_params);
10964
10965 /* This target defaults to strict volatile bitfields. */
10966 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
10967 opts->x_flag_strict_volatile_bitfields = 1;
10968
0cfff2a1 10969 initialize_aarch64_code_model (opts);
5eee3c34 10970 initialize_aarch64_tls_size (opts);
63892fa2 10971
2d6bc7fa
KT
10972 int queue_depth = 0;
10973 switch (aarch64_tune_params.autoprefetcher_model)
10974 {
10975 case tune_params::AUTOPREFETCHER_OFF:
10976 queue_depth = -1;
10977 break;
10978 case tune_params::AUTOPREFETCHER_WEAK:
10979 queue_depth = 0;
10980 break;
10981 case tune_params::AUTOPREFETCHER_STRONG:
10982 queue_depth = max_insn_queue_index + 1;
10983 break;
10984 default:
10985 gcc_unreachable ();
10986 }
10987
10988 /* We don't mind passing in global_options_set here as we don't use
10989 the *options_set structs anyway. */
10990 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
10991 queue_depth,
10992 opts->x_param_values,
10993 global_options_set.x_param_values);
10994
9d2c6e2e
MK
10995 /* Set up parameters to be used in prefetching algorithm. Do not
10996 override the defaults unless we are tuning for a core we have
10997 researched values for. */
10998 if (aarch64_tune_params.prefetch->num_slots > 0)
10999 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
11000 aarch64_tune_params.prefetch->num_slots,
11001 opts->x_param_values,
11002 global_options_set.x_param_values);
11003 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
11004 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
11005 aarch64_tune_params.prefetch->l1_cache_size,
11006 opts->x_param_values,
11007 global_options_set.x_param_values);
11008 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 11009 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
11010 aarch64_tune_params.prefetch->l1_cache_line_size,
11011 opts->x_param_values,
11012 global_options_set.x_param_values);
11013 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
11014 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
11015 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
11016 opts->x_param_values,
11017 global_options_set.x_param_values);
d2ff35c0
LM
11018 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
11019 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
11020 0,
11021 opts->x_param_values,
11022 global_options_set.x_param_values);
59100dfc
LM
11023 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
11024 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
11025 aarch64_tune_params.prefetch->minimum_stride,
11026 opts->x_param_values,
11027 global_options_set.x_param_values);
50487d79 11028
13494fcb
WD
11029 /* Use the alternative scheduling-pressure algorithm by default. */
11030 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
11031 opts->x_param_values,
11032 global_options_set.x_param_values);
11033
fbe9af50
TC
11034 /* If the user hasn't changed it via configure then set the default to 64 KB
11035 for the backend. */
11036 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
11037 DEFAULT_STK_CLASH_GUARD_SIZE == 0
11038 ? 16 : DEFAULT_STK_CLASH_GUARD_SIZE,
11039 opts->x_param_values,
11040 global_options_set.x_param_values);
11041
11042 /* Validate the guard size. */
11043 int guard_size = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
fbe9af50
TC
11044
11045 /* Enforce that interval is the same size as size so the mid-end does the
11046 right thing. */
11047 maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
11048 guard_size,
11049 opts->x_param_values,
11050 global_options_set.x_param_values);
11051
11052 /* The maybe_set calls won't update the value if the user has explicitly set
11053 one. Which means we need to validate that probing interval and guard size
11054 are equal. */
11055 int probe_interval
11056 = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11057 if (guard_size != probe_interval)
11058 error ("stack clash guard size '%d' must be equal to probing interval "
11059 "'%d'", guard_size, probe_interval);
11060
16b2cafd
MK
11061 /* Enable sw prefetching at specified optimization level for
11062 CPUS that have prefetch. Lower optimization level threshold by 1
11063 when profiling is enabled. */
11064 if (opts->x_flag_prefetch_loop_arrays < 0
11065 && !opts->x_optimize_size
11066 && aarch64_tune_params.prefetch->default_opt_level >= 0
11067 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
11068 opts->x_flag_prefetch_loop_arrays = 1;
11069
266c2b54
ML
11070 if (opts->x_aarch64_arch_string == NULL)
11071 opts->x_aarch64_arch_string = selected_arch->name;
11072 if (opts->x_aarch64_cpu_string == NULL)
11073 opts->x_aarch64_cpu_string = selected_cpu->name;
11074 if (opts->x_aarch64_tune_string == NULL)
11075 opts->x_aarch64_tune_string = selected_tune->name;
11076
0cfff2a1
KT
11077 aarch64_override_options_after_change_1 (opts);
11078}
43e9d192 11079
01f44038
KT
11080/* Print a hint with a suggestion for a core or architecture name that
11081 most closely resembles what the user passed in STR. ARCH is true if
11082 the user is asking for an architecture name. ARCH is false if the user
11083 is asking for a core name. */
11084
11085static void
11086aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
11087{
11088 auto_vec<const char *> candidates;
11089 const struct processor *entry = arch ? all_architectures : all_cores;
11090 for (; entry->name != NULL; entry++)
11091 candidates.safe_push (entry->name);
a08b5429
ML
11092
11093#ifdef HAVE_LOCAL_CPU_DETECT
11094 /* Add also "native" as possible value. */
11095 if (arch)
11096 candidates.safe_push ("native");
11097#endif
11098
01f44038
KT
11099 char *s;
11100 const char *hint = candidates_list_and_hint (str, s, candidates);
11101 if (hint)
11102 inform (input_location, "valid arguments are: %s;"
11103 " did you mean %qs?", s, hint);
6285e915
ML
11104 else
11105 inform (input_location, "valid arguments are: %s", s);
11106
01f44038
KT
11107 XDELETEVEC (s);
11108}
11109
11110/* Print a hint with a suggestion for a core name that most closely resembles
11111 what the user passed in STR. */
11112
11113inline static void
11114aarch64_print_hint_for_core (const char *str)
11115{
11116 aarch64_print_hint_for_core_or_arch (str, false);
11117}
11118
11119/* Print a hint with a suggestion for an architecture name that most closely
11120 resembles what the user passed in STR. */
11121
11122inline static void
11123aarch64_print_hint_for_arch (const char *str)
11124{
11125 aarch64_print_hint_for_core_or_arch (str, true);
11126}
11127
c7887347
ML
11128
11129/* Print a hint with a suggestion for an extension name
11130 that most closely resembles what the user passed in STR. */
11131
11132void
11133aarch64_print_hint_for_extensions (const std::string &str)
11134{
11135 auto_vec<const char *> candidates;
11136 aarch64_get_all_extension_candidates (&candidates);
11137 char *s;
11138 const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
11139 if (hint)
11140 inform (input_location, "valid arguments are: %s;"
11141 " did you mean %qs?", s, hint);
11142 else
11143 inform (input_location, "valid arguments are: %s;", s);
11144
11145 XDELETEVEC (s);
11146}
11147
0cfff2a1
KT
11148/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
11149 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
11150 they are valid in RES and ISA_FLAGS. Return whether the option is
11151 valid. */
43e9d192 11152
361fb3ee 11153static bool
0cfff2a1
KT
11154aarch64_validate_mcpu (const char *str, const struct processor **res,
11155 unsigned long *isa_flags)
11156{
c7887347 11157 std::string invalid_extension;
0cfff2a1 11158 enum aarch64_parse_opt_result parse_res
c7887347 11159 = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11160
11161 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11162 return true;
0cfff2a1
KT
11163
11164 switch (parse_res)
11165 {
11166 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11167 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
11168 break;
11169 case AARCH64_PARSE_INVALID_ARG:
11170 error ("unknown value %qs for -mcpu", str);
01f44038 11171 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11172 break;
11173 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11174 error ("invalid feature modifier %qs in %<-mcpu=%s%>",
11175 invalid_extension.c_str (), str);
11176 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11177 break;
11178 default:
11179 gcc_unreachable ();
11180 }
361fb3ee
KT
11181
11182 return false;
0cfff2a1
KT
11183}
11184
11185/* Validate a command-line -march option. Parse the arch and extensions
11186 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11187 results, if they are valid, in RES and ISA_FLAGS. Return whether the
11188 option is valid. */
0cfff2a1 11189
361fb3ee 11190static bool
0cfff2a1 11191aarch64_validate_march (const char *str, const struct processor **res,
01f44038 11192 unsigned long *isa_flags)
0cfff2a1 11193{
c7887347 11194 std::string invalid_extension;
0cfff2a1 11195 enum aarch64_parse_opt_result parse_res
c7887347 11196 = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
0cfff2a1
KT
11197
11198 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11199 return true;
0cfff2a1
KT
11200
11201 switch (parse_res)
11202 {
11203 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11204 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
11205 break;
11206 case AARCH64_PARSE_INVALID_ARG:
11207 error ("unknown value %qs for -march", str);
01f44038 11208 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
11209 break;
11210 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11211 error ("invalid feature modifier %qs in %<-march=%s%>",
11212 invalid_extension.c_str (), str);
11213 aarch64_print_hint_for_extensions (invalid_extension);
0cfff2a1
KT
11214 break;
11215 default:
11216 gcc_unreachable ();
11217 }
361fb3ee
KT
11218
11219 return false;
0cfff2a1
KT
11220}
11221
11222/* Validate a command-line -mtune option. Parse the cpu
11223 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
11224 result, if it is valid, in RES. Return whether the option is
11225 valid. */
0cfff2a1 11226
361fb3ee 11227static bool
0cfff2a1
KT
11228aarch64_validate_mtune (const char *str, const struct processor **res)
11229{
11230 enum aarch64_parse_opt_result parse_res
11231 = aarch64_parse_tune (str, res);
11232
11233 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 11234 return true;
0cfff2a1
KT
11235
11236 switch (parse_res)
11237 {
11238 case AARCH64_PARSE_MISSING_ARG:
fb241da2 11239 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
11240 break;
11241 case AARCH64_PARSE_INVALID_ARG:
11242 error ("unknown value %qs for -mtune", str);
01f44038 11243 aarch64_print_hint_for_core (str);
0cfff2a1
KT
11244 break;
11245 default:
11246 gcc_unreachable ();
11247 }
361fb3ee
KT
11248 return false;
11249}
11250
11251/* Return the CPU corresponding to the enum CPU.
11252 If it doesn't specify a cpu, return the default. */
11253
11254static const struct processor *
11255aarch64_get_tune_cpu (enum aarch64_processor cpu)
11256{
11257 if (cpu != aarch64_none)
11258 return &all_cores[cpu];
11259
11260 /* The & 0x3f is to extract the bottom 6 bits that encode the
11261 default cpu as selected by the --with-cpu GCC configure option
11262 in config.gcc.
11263 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
11264 flags mechanism should be reworked to make it more sane. */
11265 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11266}
11267
11268/* Return the architecture corresponding to the enum ARCH.
11269 If it doesn't specify a valid architecture, return the default. */
11270
11271static const struct processor *
11272aarch64_get_arch (enum aarch64_arch arch)
11273{
11274 if (arch != aarch64_no_arch)
11275 return &all_architectures[arch];
11276
11277 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
11278
11279 return &all_architectures[cpu->arch];
0cfff2a1
KT
11280}
11281
43cacb12
RS
11282/* Return the VG value associated with -msve-vector-bits= value VALUE. */
11283
11284static poly_uint16
11285aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
11286{
11287 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
11288 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
11289 deciding which .md file patterns to use and when deciding whether
11290 something is a legitimate address or constant. */
11291 if (value == SVE_SCALABLE || value == SVE_128)
11292 return poly_uint16 (2, 2);
11293 else
11294 return (int) value / 64;
11295}
11296
0cfff2a1
KT
11297/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
11298 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
11299 tuning structs. In particular it must set selected_tune and
11300 aarch64_isa_flags that define the available ISA features and tuning
11301 decisions. It must also set selected_arch as this will be used to
11302 output the .arch asm tags for each function. */
11303
11304static void
11305aarch64_override_options (void)
11306{
11307 unsigned long cpu_isa = 0;
11308 unsigned long arch_isa = 0;
11309 aarch64_isa_flags = 0;
11310
361fb3ee
KT
11311 bool valid_cpu = true;
11312 bool valid_tune = true;
11313 bool valid_arch = true;
11314
0cfff2a1
KT
11315 selected_cpu = NULL;
11316 selected_arch = NULL;
11317 selected_tune = NULL;
11318
11319 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
11320 If either of -march or -mtune is given, they override their
11321 respective component of -mcpu. */
11322 if (aarch64_cpu_string)
361fb3ee
KT
11323 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
11324 &cpu_isa);
0cfff2a1
KT
11325
11326 if (aarch64_arch_string)
361fb3ee
KT
11327 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
11328 &arch_isa);
0cfff2a1
KT
11329
11330 if (aarch64_tune_string)
361fb3ee 11331 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
11332
11333 /* If the user did not specify a processor, choose the default
11334 one for them. This will be the CPU set during configuration using
a3cd0246 11335 --with-cpu, otherwise it is "generic". */
43e9d192
IB
11336 if (!selected_cpu)
11337 {
0cfff2a1
KT
11338 if (selected_arch)
11339 {
11340 selected_cpu = &all_cores[selected_arch->ident];
11341 aarch64_isa_flags = arch_isa;
361fb3ee 11342 explicit_arch = selected_arch->arch;
0cfff2a1
KT
11343 }
11344 else
11345 {
361fb3ee
KT
11346 /* Get default configure-time CPU. */
11347 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
11348 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
11349 }
361fb3ee
KT
11350
11351 if (selected_tune)
11352 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
11353 }
11354 /* If both -mcpu and -march are specified check that they are architecturally
11355 compatible, warn if they're not and prefer the -march ISA flags. */
11356 else if (selected_arch)
11357 {
11358 if (selected_arch->arch != selected_cpu->arch)
11359 {
11360 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
11361 all_architectures[selected_cpu->arch].name,
11362 selected_arch->name);
11363 }
11364 aarch64_isa_flags = arch_isa;
361fb3ee
KT
11365 explicit_arch = selected_arch->arch;
11366 explicit_tune_core = selected_tune ? selected_tune->ident
11367 : selected_cpu->ident;
0cfff2a1
KT
11368 }
11369 else
11370 {
11371 /* -mcpu but no -march. */
11372 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
11373 explicit_tune_core = selected_tune ? selected_tune->ident
11374 : selected_cpu->ident;
11375 gcc_assert (selected_cpu);
11376 selected_arch = &all_architectures[selected_cpu->arch];
11377 explicit_arch = selected_arch->arch;
43e9d192
IB
11378 }
11379
0cfff2a1
KT
11380 /* Set the arch as well as we will need it when outputing
11381 the .arch directive in assembly. */
11382 if (!selected_arch)
11383 {
11384 gcc_assert (selected_cpu);
11385 selected_arch = &all_architectures[selected_cpu->arch];
11386 }
43e9d192 11387
43e9d192 11388 if (!selected_tune)
3edaf26d 11389 selected_tune = selected_cpu;
43e9d192 11390
0cfff2a1
KT
11391#ifndef HAVE_AS_MABI_OPTION
11392 /* The compiler may have been configured with 2.23.* binutils, which does
11393 not have support for ILP32. */
11394 if (TARGET_ILP32)
ee61f880 11395 error ("assembler does not support -mabi=ilp32");
0cfff2a1 11396#endif
43e9d192 11397
43cacb12
RS
11398 /* Convert -msve-vector-bits to a VG count. */
11399 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
11400
db58fd89 11401 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 11402 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 11403
361fb3ee
KT
11404 /* Make sure we properly set up the explicit options. */
11405 if ((aarch64_cpu_string && valid_cpu)
11406 || (aarch64_tune_string && valid_tune))
11407 gcc_assert (explicit_tune_core != aarch64_none);
11408
11409 if ((aarch64_cpu_string && valid_cpu)
11410 || (aarch64_arch_string && valid_arch))
11411 gcc_assert (explicit_arch != aarch64_no_arch);
11412
5f7dbaa0
RE
11413 /* The pass to insert speculation tracking runs before
11414 shrink-wrapping and the latter does not know how to update the
11415 tracking status. So disable it in this case. */
11416 if (aarch64_track_speculation)
11417 flag_shrink_wrap = 0;
11418
0cfff2a1
KT
11419 aarch64_override_options_internal (&global_options);
11420
11421 /* Save these options as the default ones in case we push and pop them later
11422 while processing functions with potential target attributes. */
11423 target_option_default_node = target_option_current_node
11424 = build_target_option_node (&global_options);
43e9d192
IB
11425}
11426
11427/* Implement targetm.override_options_after_change. */
11428
11429static void
11430aarch64_override_options_after_change (void)
11431{
0cfff2a1 11432 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
11433}
11434
11435static struct machine_function *
11436aarch64_init_machine_status (void)
11437{
11438 struct machine_function *machine;
766090c2 11439 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
11440 return machine;
11441}
11442
11443void
11444aarch64_init_expanders (void)
11445{
11446 init_machine_status = aarch64_init_machine_status;
11447}
11448
11449/* A checking mechanism for the implementation of the various code models. */
11450static void
0cfff2a1 11451initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 11452{
0cfff2a1 11453 if (opts->x_flag_pic)
43e9d192 11454 {
0cfff2a1 11455 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
11456 {
11457 case AARCH64_CMODEL_TINY:
11458 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
11459 break;
11460 case AARCH64_CMODEL_SMALL:
34ecdb0f 11461#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
11462 aarch64_cmodel = (flag_pic == 2
11463 ? AARCH64_CMODEL_SMALL_PIC
11464 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
11465#else
11466 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
11467#endif
43e9d192
IB
11468 break;
11469 case AARCH64_CMODEL_LARGE:
11470 sorry ("code model %qs with -f%s", "large",
0cfff2a1 11471 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 11472 break;
43e9d192
IB
11473 default:
11474 gcc_unreachable ();
11475 }
11476 }
11477 else
0cfff2a1 11478 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
11479}
11480
361fb3ee
KT
11481/* Implement TARGET_OPTION_SAVE. */
11482
11483static void
11484aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
11485{
11486 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
11487}
11488
11489/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
11490 using the information saved in PTR. */
11491
11492static void
11493aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
11494{
11495 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
11496 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11497 opts->x_explicit_arch = ptr->x_explicit_arch;
11498 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
11499 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
11500
11501 aarch64_override_options_internal (opts);
11502}
11503
11504/* Implement TARGET_OPTION_PRINT. */
11505
11506static void
11507aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
11508{
11509 const struct processor *cpu
11510 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11511 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
11512 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 11513 std::string extension
04a99ebe 11514 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
11515
11516 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
11517 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
11518 arch->name, extension.c_str ());
361fb3ee
KT
11519}
11520
d78006d9
KT
11521static GTY(()) tree aarch64_previous_fndecl;
11522
e4ea20c8
KT
11523void
11524aarch64_reset_previous_fndecl (void)
11525{
11526 aarch64_previous_fndecl = NULL;
11527}
11528
acfc1ac1
KT
11529/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
11530 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
11531 make sure optab availability predicates are recomputed when necessary. */
11532
11533void
11534aarch64_save_restore_target_globals (tree new_tree)
11535{
11536 if (TREE_TARGET_GLOBALS (new_tree))
11537 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
11538 else if (new_tree == target_option_default_node)
11539 restore_target_globals (&default_target_globals);
11540 else
11541 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
11542}
11543
d78006d9
KT
11544/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
11545 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
11546 of the function, if such exists. This function may be called multiple
11547 times on a single function so use aarch64_previous_fndecl to avoid
11548 setting up identical state. */
11549
11550static void
11551aarch64_set_current_function (tree fndecl)
11552{
acfc1ac1
KT
11553 if (!fndecl || fndecl == aarch64_previous_fndecl)
11554 return;
11555
d78006d9
KT
11556 tree old_tree = (aarch64_previous_fndecl
11557 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
11558 : NULL_TREE);
11559
acfc1ac1 11560 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 11561
acfc1ac1
KT
11562 /* If current function has no attributes but the previous one did,
11563 use the default node. */
11564 if (!new_tree && old_tree)
11565 new_tree = target_option_default_node;
d78006d9 11566
acfc1ac1
KT
11567 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
11568 the default have been handled by aarch64_save_restore_target_globals from
11569 aarch64_pragma_target_parse. */
11570 if (old_tree == new_tree)
11571 return;
d78006d9 11572
acfc1ac1 11573 aarch64_previous_fndecl = fndecl;
6e17a23b 11574
acfc1ac1
KT
11575 /* First set the target options. */
11576 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11577
acfc1ac1 11578 aarch64_save_restore_target_globals (new_tree);
d78006d9 11579}
361fb3ee 11580
5a2c8331
KT
11581/* Enum describing the various ways we can handle attributes.
11582 In many cases we can reuse the generic option handling machinery. */
11583
11584enum aarch64_attr_opt_type
11585{
11586 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11587 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11588 aarch64_attr_enum, /* Attribute sets an enum variable. */
11589 aarch64_attr_custom /* Attribute requires a custom handling function. */
11590};
11591
11592/* All the information needed to handle a target attribute.
11593 NAME is the name of the attribute.
9c582551 11594 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11595 in the definition of enum aarch64_attr_opt_type.
11596 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11597 HANDLER is the function that takes the attribute string as an argument
11598 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11599 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11600 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11601 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11602 aarch64_attr_enum. */
11603
11604struct aarch64_attribute_info
11605{
11606 const char *name;
11607 enum aarch64_attr_opt_type attr_type;
11608 bool allow_neg;
ab93e9b7 11609 bool (*handler) (const char *);
5a2c8331
KT
11610 enum opt_code opt_num;
11611};
11612
ab93e9b7 11613/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11614
11615static bool
ab93e9b7 11616aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11617{
11618 const struct processor *tmp_arch = NULL;
c7887347 11619 std::string invalid_extension;
5a2c8331 11620 enum aarch64_parse_opt_result parse_res
c7887347 11621 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11622
11623 if (parse_res == AARCH64_PARSE_OK)
11624 {
11625 gcc_assert (tmp_arch);
11626 selected_arch = tmp_arch;
11627 explicit_arch = selected_arch->arch;
11628 return true;
11629 }
11630
11631 switch (parse_res)
11632 {
11633 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11634 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11635 break;
11636 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11637 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11638 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11639 break;
11640 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11641 error ("invalid feature modifier %s of value (\"%s\") in "
11642 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11643 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11644 break;
11645 default:
11646 gcc_unreachable ();
11647 }
11648
11649 return false;
11650}
11651
ab93e9b7 11652/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11653
11654static bool
ab93e9b7 11655aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11656{
11657 const struct processor *tmp_cpu = NULL;
c7887347 11658 std::string invalid_extension;
5a2c8331 11659 enum aarch64_parse_opt_result parse_res
c7887347 11660 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
5a2c8331
KT
11661
11662 if (parse_res == AARCH64_PARSE_OK)
11663 {
11664 gcc_assert (tmp_cpu);
11665 selected_tune = tmp_cpu;
11666 explicit_tune_core = selected_tune->ident;
11667
11668 selected_arch = &all_architectures[tmp_cpu->arch];
11669 explicit_arch = selected_arch->arch;
11670 return true;
11671 }
11672
11673 switch (parse_res)
11674 {
11675 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11676 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11677 break;
11678 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11679 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11680 aarch64_print_hint_for_core (str);
5a2c8331
KT
11681 break;
11682 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11683 error ("invalid feature modifier %s of value (\"%s\") in "
11684 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
11685 aarch64_print_hint_for_extensions (invalid_extension);
5a2c8331
KT
11686 break;
11687 default:
11688 gcc_unreachable ();
11689 }
11690
11691 return false;
11692}
11693
ab93e9b7 11694/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11695
11696static bool
ab93e9b7 11697aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11698{
11699 const struct processor *tmp_tune = NULL;
11700 enum aarch64_parse_opt_result parse_res
11701 = aarch64_parse_tune (str, &tmp_tune);
11702
11703 if (parse_res == AARCH64_PARSE_OK)
11704 {
11705 gcc_assert (tmp_tune);
11706 selected_tune = tmp_tune;
11707 explicit_tune_core = selected_tune->ident;
11708 return true;
11709 }
11710
11711 switch (parse_res)
11712 {
11713 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11714 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11715 aarch64_print_hint_for_core (str);
5a2c8331
KT
11716 break;
11717 default:
11718 gcc_unreachable ();
11719 }
11720
11721 return false;
11722}
11723
11724/* Parse an architecture extensions target attribute string specified in STR.
11725 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11726 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11727 modified. */
5a2c8331
KT
11728
11729static bool
ab93e9b7 11730aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11731{
11732 enum aarch64_parse_opt_result parse_res;
11733 unsigned long isa_flags = aarch64_isa_flags;
11734
e4ea20c8
KT
11735 /* We allow "+nothing" in the beginning to clear out all architectural
11736 features if the user wants to handpick specific features. */
11737 if (strncmp ("+nothing", str, 8) == 0)
11738 {
11739 isa_flags = 0;
11740 str += 8;
11741 }
11742
c7887347
ML
11743 std::string invalid_extension;
11744 parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
5a2c8331
KT
11745
11746 if (parse_res == AARCH64_PARSE_OK)
11747 {
11748 aarch64_isa_flags = isa_flags;
11749 return true;
11750 }
11751
11752 switch (parse_res)
11753 {
11754 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11755 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11756 break;
11757
11758 case AARCH64_PARSE_INVALID_FEATURE:
c7887347
ML
11759 error ("invalid feature modifier %s of value (\"%s\") in "
11760 "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
5a2c8331
KT
11761 break;
11762
11763 default:
11764 gcc_unreachable ();
11765 }
11766
11767 return false;
11768}
11769
11770/* The target attributes that we support. On top of these we also support just
11771 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11772 handled explicitly in aarch64_process_one_target_attr. */
11773
11774static const struct aarch64_attribute_info aarch64_attributes[] =
11775{
11776 { "general-regs-only", aarch64_attr_mask, false, NULL,
11777 OPT_mgeneral_regs_only },
11778 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11779 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11780 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11781 OPT_mfix_cortex_a53_843419 },
5a2c8331 11782 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11783 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11784 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11785 OPT_momit_leaf_frame_pointer },
11786 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11787 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11788 OPT_march_ },
11789 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11790 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11791 OPT_mtune_ },
db58fd89
JW
11792 { "sign-return-address", aarch64_attr_enum, false, NULL,
11793 OPT_msign_return_address_ },
5a2c8331
KT
11794 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
11795};
11796
11797/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 11798 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
11799
11800static bool
ab93e9b7 11801aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
11802{
11803 bool invert = false;
11804
11805 size_t len = strlen (arg_str);
11806
11807 if (len == 0)
11808 {
ab93e9b7 11809 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11810 return false;
11811 }
11812
11813 char *str_to_check = (char *) alloca (len + 1);
11814 strcpy (str_to_check, arg_str);
11815
11816 /* Skip leading whitespace. */
11817 while (*str_to_check == ' ' || *str_to_check == '\t')
11818 str_to_check++;
11819
11820 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
11821 It is easier to detect and handle it explicitly here rather than going
11822 through the machinery for the rest of the target attributes in this
11823 function. */
11824 if (*str_to_check == '+')
ab93e9b7 11825 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
11826
11827 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
11828 {
11829 invert = true;
11830 str_to_check += 3;
11831 }
11832 char *arg = strchr (str_to_check, '=');
11833
11834 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
11835 and point ARG to "foo". */
11836 if (arg)
11837 {
11838 *arg = '\0';
11839 arg++;
11840 }
11841 const struct aarch64_attribute_info *p_attr;
16d12992 11842 bool found = false;
5a2c8331
KT
11843 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
11844 {
11845 /* If the names don't match up, or the user has given an argument
11846 to an attribute that doesn't accept one, or didn't give an argument
11847 to an attribute that expects one, fail to match. */
11848 if (strcmp (str_to_check, p_attr->name) != 0)
11849 continue;
11850
16d12992 11851 found = true;
5a2c8331
KT
11852 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
11853 || p_attr->attr_type == aarch64_attr_enum;
11854
11855 if (attr_need_arg_p ^ (arg != NULL))
11856 {
ab93e9b7 11857 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
11858 return false;
11859 }
11860
11861 /* If the name matches but the attribute does not allow "no-" versions
11862 then we can't match. */
11863 if (invert && !p_attr->allow_neg)
11864 {
ab93e9b7 11865 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
11866 return false;
11867 }
11868
11869 switch (p_attr->attr_type)
11870 {
11871 /* Has a custom handler registered.
11872 For example, cpu=, arch=, tune=. */
11873 case aarch64_attr_custom:
11874 gcc_assert (p_attr->handler);
ab93e9b7 11875 if (!p_attr->handler (arg))
5a2c8331
KT
11876 return false;
11877 break;
11878
11879 /* Either set or unset a boolean option. */
11880 case aarch64_attr_bool:
11881 {
11882 struct cl_decoded_option decoded;
11883
11884 generate_option (p_attr->opt_num, NULL, !invert,
11885 CL_TARGET, &decoded);
11886 aarch64_handle_option (&global_options, &global_options_set,
11887 &decoded, input_location);
11888 break;
11889 }
11890 /* Set or unset a bit in the target_flags. aarch64_handle_option
11891 should know what mask to apply given the option number. */
11892 case aarch64_attr_mask:
11893 {
11894 struct cl_decoded_option decoded;
11895 /* We only need to specify the option number.
11896 aarch64_handle_option will know which mask to apply. */
11897 decoded.opt_index = p_attr->opt_num;
11898 decoded.value = !invert;
11899 aarch64_handle_option (&global_options, &global_options_set,
11900 &decoded, input_location);
11901 break;
11902 }
11903 /* Use the option setting machinery to set an option to an enum. */
11904 case aarch64_attr_enum:
11905 {
11906 gcc_assert (arg);
11907 bool valid;
11908 int value;
11909 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
11910 &value, CL_TARGET);
11911 if (valid)
11912 {
11913 set_option (&global_options, NULL, p_attr->opt_num, value,
11914 NULL, DK_UNSPECIFIED, input_location,
11915 global_dc);
11916 }
11917 else
11918 {
ab93e9b7 11919 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
11920 }
11921 break;
11922 }
11923 default:
11924 gcc_unreachable ();
11925 }
11926 }
11927
16d12992
KT
11928 /* If we reached here we either have found an attribute and validated
11929 it or didn't match any. If we matched an attribute but its arguments
11930 were malformed we will have returned false already. */
11931 return found;
5a2c8331
KT
11932}
11933
11934/* Count how many times the character C appears in
11935 NULL-terminated string STR. */
11936
11937static unsigned int
11938num_occurences_in_str (char c, char *str)
11939{
11940 unsigned int res = 0;
11941 while (*str != '\0')
11942 {
11943 if (*str == c)
11944 res++;
11945
11946 str++;
11947 }
11948
11949 return res;
11950}
11951
11952/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 11953 and update the global target options space. */
5a2c8331
KT
11954
11955bool
ab93e9b7 11956aarch64_process_target_attr (tree args)
5a2c8331
KT
11957{
11958 if (TREE_CODE (args) == TREE_LIST)
11959 {
11960 do
11961 {
11962 tree head = TREE_VALUE (args);
11963 if (head)
11964 {
ab93e9b7 11965 if (!aarch64_process_target_attr (head))
5a2c8331
KT
11966 return false;
11967 }
11968 args = TREE_CHAIN (args);
11969 } while (args);
11970
11971 return true;
11972 }
3b6cb9e3
ML
11973
11974 if (TREE_CODE (args) != STRING_CST)
11975 {
11976 error ("attribute %<target%> argument not a string");
11977 return false;
11978 }
5a2c8331
KT
11979
11980 size_t len = strlen (TREE_STRING_POINTER (args));
11981 char *str_to_check = (char *) alloca (len + 1);
11982 strcpy (str_to_check, TREE_STRING_POINTER (args));
11983
11984 if (len == 0)
11985 {
ab93e9b7 11986 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11987 return false;
11988 }
11989
11990 /* Used to catch empty spaces between commas i.e.
11991 attribute ((target ("attr1,,attr2"))). */
11992 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
11993
11994 /* Handle multiple target attributes separated by ','. */
11995 char *token = strtok (str_to_check, ",");
11996
11997 unsigned int num_attrs = 0;
11998 while (token)
11999 {
12000 num_attrs++;
ab93e9b7 12001 if (!aarch64_process_one_target_attr (token))
5a2c8331 12002 {
ab93e9b7 12003 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
12004 return false;
12005 }
12006
12007 token = strtok (NULL, ",");
12008 }
12009
12010 if (num_attrs != num_commas + 1)
12011 {
ab93e9b7 12012 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
12013 return false;
12014 }
12015
12016 return true;
12017}
12018
12019/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
12020 process attribute ((target ("..."))). */
12021
12022static bool
12023aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
12024{
12025 struct cl_target_option cur_target;
12026 bool ret;
12027 tree old_optimize;
12028 tree new_target, new_optimize;
12029 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
12030
12031 /* If what we're processing is the current pragma string then the
12032 target option node is already stored in target_option_current_node
12033 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
12034 having to re-parse the string. This is especially useful to keep
12035 arm_neon.h compile times down since that header contains a lot
12036 of intrinsics enclosed in pragmas. */
12037 if (!existing_target && args == current_target_pragma)
12038 {
12039 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
12040 return true;
12041 }
5a2c8331
KT
12042 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12043
12044 old_optimize = build_optimization_node (&global_options);
12045 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
12046
12047 /* If the function changed the optimization levels as well as setting
12048 target options, start with the optimizations specified. */
12049 if (func_optimize && func_optimize != old_optimize)
12050 cl_optimization_restore (&global_options,
12051 TREE_OPTIMIZATION (func_optimize));
12052
12053 /* Save the current target options to restore at the end. */
12054 cl_target_option_save (&cur_target, &global_options);
12055
12056 /* If fndecl already has some target attributes applied to it, unpack
12057 them so that we add this attribute on top of them, rather than
12058 overwriting them. */
12059 if (existing_target)
12060 {
12061 struct cl_target_option *existing_options
12062 = TREE_TARGET_OPTION (existing_target);
12063
12064 if (existing_options)
12065 cl_target_option_restore (&global_options, existing_options);
12066 }
12067 else
12068 cl_target_option_restore (&global_options,
12069 TREE_TARGET_OPTION (target_option_current_node));
12070
ab93e9b7 12071 ret = aarch64_process_target_attr (args);
5a2c8331
KT
12072
12073 /* Set up any additional state. */
12074 if (ret)
12075 {
12076 aarch64_override_options_internal (&global_options);
e95a988a
KT
12077 /* Initialize SIMD builtins if we haven't already.
12078 Set current_target_pragma to NULL for the duration so that
12079 the builtin initialization code doesn't try to tag the functions
12080 being built with the attributes specified by any current pragma, thus
12081 going into an infinite recursion. */
12082 if (TARGET_SIMD)
12083 {
12084 tree saved_current_target_pragma = current_target_pragma;
12085 current_target_pragma = NULL;
12086 aarch64_init_simd_builtins ();
12087 current_target_pragma = saved_current_target_pragma;
12088 }
5a2c8331
KT
12089 new_target = build_target_option_node (&global_options);
12090 }
12091 else
12092 new_target = NULL;
12093
12094 new_optimize = build_optimization_node (&global_options);
12095
12096 if (fndecl && ret)
12097 {
12098 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
12099
12100 if (old_optimize != new_optimize)
12101 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
12102 }
12103
12104 cl_target_option_restore (&global_options, &cur_target);
12105
12106 if (old_optimize != new_optimize)
12107 cl_optimization_restore (&global_options,
12108 TREE_OPTIMIZATION (old_optimize));
12109 return ret;
12110}
12111
1fd8d40c
KT
12112/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
12113 tri-bool options (yes, no, don't care) and the default value is
12114 DEF, determine whether to reject inlining. */
12115
12116static bool
12117aarch64_tribools_ok_for_inlining_p (int caller, int callee,
12118 int dont_care, int def)
12119{
12120 /* If the callee doesn't care, always allow inlining. */
12121 if (callee == dont_care)
12122 return true;
12123
12124 /* If the caller doesn't care, always allow inlining. */
12125 if (caller == dont_care)
12126 return true;
12127
12128 /* Otherwise, allow inlining if either the callee and caller values
12129 agree, or if the callee is using the default value. */
12130 return (callee == caller || callee == def);
12131}
12132
12133/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
12134 to inline CALLEE into CALLER based on target-specific info.
12135 Make sure that the caller and callee have compatible architectural
12136 features. Then go through the other possible target attributes
12137 and see if they can block inlining. Try not to reject always_inline
12138 callees unless they are incompatible architecturally. */
12139
12140static bool
12141aarch64_can_inline_p (tree caller, tree callee)
12142{
12143 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
12144 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
12145
1fd8d40c
KT
12146 struct cl_target_option *caller_opts
12147 = TREE_TARGET_OPTION (caller_tree ? caller_tree
12148 : target_option_default_node);
12149
675d044c
SD
12150 struct cl_target_option *callee_opts
12151 = TREE_TARGET_OPTION (callee_tree ? callee_tree
12152 : target_option_default_node);
1fd8d40c
KT
12153
12154 /* Callee's ISA flags should be a subset of the caller's. */
12155 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
12156 != callee_opts->x_aarch64_isa_flags)
12157 return false;
12158
12159 /* Allow non-strict aligned functions inlining into strict
12160 aligned ones. */
12161 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
12162 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
12163 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
12164 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
12165 return false;
12166
12167 bool always_inline = lookup_attribute ("always_inline",
12168 DECL_ATTRIBUTES (callee));
12169
12170 /* If the architectural features match up and the callee is always_inline
12171 then the other attributes don't matter. */
12172 if (always_inline)
12173 return true;
12174
12175 if (caller_opts->x_aarch64_cmodel_var
12176 != callee_opts->x_aarch64_cmodel_var)
12177 return false;
12178
12179 if (caller_opts->x_aarch64_tls_dialect
12180 != callee_opts->x_aarch64_tls_dialect)
12181 return false;
12182
12183 /* Honour explicit requests to workaround errata. */
12184 if (!aarch64_tribools_ok_for_inlining_p (
12185 caller_opts->x_aarch64_fix_a53_err835769,
12186 callee_opts->x_aarch64_fix_a53_err835769,
12187 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
12188 return false;
12189
48bb1a55
CL
12190 if (!aarch64_tribools_ok_for_inlining_p (
12191 caller_opts->x_aarch64_fix_a53_err843419,
12192 callee_opts->x_aarch64_fix_a53_err843419,
12193 2, TARGET_FIX_ERR_A53_843419))
12194 return false;
12195
1fd8d40c
KT
12196 /* If the user explicitly specified -momit-leaf-frame-pointer for the
12197 caller and calle and they don't match up, reject inlining. */
12198 if (!aarch64_tribools_ok_for_inlining_p (
12199 caller_opts->x_flag_omit_leaf_frame_pointer,
12200 callee_opts->x_flag_omit_leaf_frame_pointer,
12201 2, 1))
12202 return false;
12203
12204 /* If the callee has specific tuning overrides, respect them. */
12205 if (callee_opts->x_aarch64_override_tune_string != NULL
12206 && caller_opts->x_aarch64_override_tune_string == NULL)
12207 return false;
12208
12209 /* If the user specified tuning override strings for the
12210 caller and callee and they don't match up, reject inlining.
12211 We just do a string compare here, we don't analyze the meaning
12212 of the string, as it would be too costly for little gain. */
12213 if (callee_opts->x_aarch64_override_tune_string
12214 && caller_opts->x_aarch64_override_tune_string
12215 && (strcmp (callee_opts->x_aarch64_override_tune_string,
12216 caller_opts->x_aarch64_override_tune_string) != 0))
12217 return false;
12218
12219 return true;
12220}
12221
43e9d192
IB
12222/* Return true if SYMBOL_REF X binds locally. */
12223
12224static bool
12225aarch64_symbol_binds_local_p (const_rtx x)
12226{
12227 return (SYMBOL_REF_DECL (x)
12228 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
12229 : SYMBOL_REF_LOCAL_P (x));
12230}
12231
12232/* Return true if SYMBOL_REF X is thread local */
12233static bool
12234aarch64_tls_symbol_p (rtx x)
12235{
12236 if (! TARGET_HAVE_TLS)
12237 return false;
12238
12239 if (GET_CODE (x) != SYMBOL_REF)
12240 return false;
12241
12242 return SYMBOL_REF_TLS_MODEL (x) != 0;
12243}
12244
12245/* Classify a TLS symbol into one of the TLS kinds. */
12246enum aarch64_symbol_type
12247aarch64_classify_tls_symbol (rtx x)
12248{
12249 enum tls_model tls_kind = tls_symbolic_operand_type (x);
12250
12251 switch (tls_kind)
12252 {
12253 case TLS_MODEL_GLOBAL_DYNAMIC:
12254 case TLS_MODEL_LOCAL_DYNAMIC:
12255 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
12256
12257 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
12258 switch (aarch64_cmodel)
12259 {
12260 case AARCH64_CMODEL_TINY:
12261 case AARCH64_CMODEL_TINY_PIC:
12262 return SYMBOL_TINY_TLSIE;
12263 default:
79496620 12264 return SYMBOL_SMALL_TLSIE;
5ae7caad 12265 }
43e9d192
IB
12266
12267 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
12268 if (aarch64_tls_size == 12)
12269 return SYMBOL_TLSLE12;
12270 else if (aarch64_tls_size == 24)
12271 return SYMBOL_TLSLE24;
12272 else if (aarch64_tls_size == 32)
12273 return SYMBOL_TLSLE32;
12274 else if (aarch64_tls_size == 48)
12275 return SYMBOL_TLSLE48;
12276 else
12277 gcc_unreachable ();
43e9d192
IB
12278
12279 case TLS_MODEL_EMULATED:
12280 case TLS_MODEL_NONE:
12281 return SYMBOL_FORCE_TO_MEM;
12282
12283 default:
12284 gcc_unreachable ();
12285 }
12286}
12287
43cacb12
RS
12288/* Return the correct method for accessing X + OFFSET, where X is either
12289 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 12290
43e9d192 12291enum aarch64_symbol_type
43cacb12 12292aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
12293{
12294 if (GET_CODE (x) == LABEL_REF)
12295 {
12296 switch (aarch64_cmodel)
12297 {
12298 case AARCH64_CMODEL_LARGE:
12299 return SYMBOL_FORCE_TO_MEM;
12300
12301 case AARCH64_CMODEL_TINY_PIC:
12302 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
12303 return SYMBOL_TINY_ABSOLUTE;
12304
1b1e81f8 12305 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12306 case AARCH64_CMODEL_SMALL_PIC:
12307 case AARCH64_CMODEL_SMALL:
12308 return SYMBOL_SMALL_ABSOLUTE;
12309
12310 default:
12311 gcc_unreachable ();
12312 }
12313 }
12314
17f4d4bf 12315 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 12316 {
43e9d192
IB
12317 if (aarch64_tls_symbol_p (x))
12318 return aarch64_classify_tls_symbol (x);
12319
17f4d4bf
CSS
12320 switch (aarch64_cmodel)
12321 {
12322 case AARCH64_CMODEL_TINY:
15f6e0da 12323 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
12324 the offset does not cause overflow of the final address. But
12325 we have no way of knowing the address of symbol at compile time
12326 so we can't accurately say if the distance between the PC and
12327 symbol + offset is outside the addressible range of +/-1M in the
12328 TINY code model. So we rely on images not being greater than
12329 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
12330 be loaded using an alternative mechanism. Furthermore if the
12331 symbol is a weak reference to something that isn't known to
12332 resolve to a symbol in this module, then force to memory. */
12333 if ((SYMBOL_REF_WEAK (x)
12334 && !aarch64_symbol_binds_local_p (x))
43cacb12 12335 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
12336 return SYMBOL_FORCE_TO_MEM;
12337 return SYMBOL_TINY_ABSOLUTE;
12338
17f4d4bf 12339 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
12340 /* Same reasoning as the tiny code model, but the offset cap here is
12341 4G. */
15f6e0da
RR
12342 if ((SYMBOL_REF_WEAK (x)
12343 && !aarch64_symbol_binds_local_p (x))
43cacb12 12344 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 12345 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
12346 return SYMBOL_FORCE_TO_MEM;
12347 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12348
17f4d4bf 12349 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 12350 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 12351 return SYMBOL_TINY_GOT;
38e6c9a6
MS
12352 return SYMBOL_TINY_ABSOLUTE;
12353
1b1e81f8 12354 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
12355 case AARCH64_CMODEL_SMALL_PIC:
12356 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
12357 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
12358 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 12359 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 12360
9ee6540a
WD
12361 case AARCH64_CMODEL_LARGE:
12362 /* This is alright even in PIC code as the constant
12363 pool reference is always PC relative and within
12364 the same translation unit. */
d47d34bb 12365 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
12366 return SYMBOL_SMALL_ABSOLUTE;
12367 else
12368 return SYMBOL_FORCE_TO_MEM;
12369
17f4d4bf
CSS
12370 default:
12371 gcc_unreachable ();
12372 }
43e9d192 12373 }
17f4d4bf 12374
43e9d192
IB
12375 /* By default push everything into the constant pool. */
12376 return SYMBOL_FORCE_TO_MEM;
12377}
12378
43e9d192
IB
12379bool
12380aarch64_constant_address_p (rtx x)
12381{
12382 return (CONSTANT_P (x) && memory_address_p (DImode, x));
12383}
12384
12385bool
12386aarch64_legitimate_pic_operand_p (rtx x)
12387{
12388 if (GET_CODE (x) == SYMBOL_REF
12389 || (GET_CODE (x) == CONST
12390 && GET_CODE (XEXP (x, 0)) == PLUS
12391 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
12392 return false;
12393
12394 return true;
12395}
12396
26895c21
WD
12397/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
12398 that should be rematerialized rather than spilled. */
3520f7cc 12399
43e9d192 12400static bool
ef4bddc2 12401aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 12402{
26895c21 12403 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 12404 if (CONST_INT_P (x)
9f7b87ca 12405 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 12406 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
12407 return true;
12408
43cacb12
RS
12409 /* Do not allow vector struct mode constants for Advanced SIMD.
12410 We could support 0 and -1 easily, but they need support in
12411 aarch64-simd.md. */
12412 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12413 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
12414 return false;
12415
43cacb12
RS
12416 /* Only accept variable-length vector constants if they can be
12417 handled directly.
12418
12419 ??? It would be possible to handle rematerialization of other
12420 constants via secondary reloads. */
12421 if (vec_flags & VEC_ANY_SVE)
12422 return aarch64_simd_valid_immediate (x, NULL);
12423
509bb9b6
RS
12424 if (GET_CODE (x) == HIGH)
12425 x = XEXP (x, 0);
12426
43cacb12
RS
12427 /* Accept polynomial constants that can be calculated by using the
12428 destination of a move as the sole temporary. Constants that
12429 require a second temporary cannot be rematerialized (they can't be
12430 forced to memory and also aren't legitimate constants). */
12431 poly_int64 offset;
12432 if (poly_int_rtx_p (x, &offset))
12433 return aarch64_offset_temporaries (false, offset) <= 1;
12434
12435 /* If an offset is being added to something else, we need to allow the
12436 base to be moved into the destination register, meaning that there
12437 are no free temporaries for the offset. */
12438 x = strip_offset (x, &offset);
12439 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
12440 return false;
26895c21 12441
43cacb12
RS
12442 /* Do not allow const (plus (anchor_symbol, const_int)). */
12443 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
12444 return false;
26895c21 12445
f28e54bd
WD
12446 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
12447 so spilling them is better than rematerialization. */
12448 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
12449 return true;
12450
26895c21
WD
12451 /* Label references are always constant. */
12452 if (GET_CODE (x) == LABEL_REF)
12453 return true;
12454
12455 return false;
43e9d192
IB
12456}
12457
a5bc806c 12458rtx
43e9d192
IB
12459aarch64_load_tp (rtx target)
12460{
12461 if (!target
12462 || GET_MODE (target) != Pmode
12463 || !register_operand (target, Pmode))
12464 target = gen_reg_rtx (Pmode);
12465
12466 /* Can return in any reg. */
12467 emit_insn (gen_aarch64_load_tp_hard (target));
12468 return target;
12469}
12470
43e9d192
IB
12471/* On AAPCS systems, this is the "struct __va_list". */
12472static GTY(()) tree va_list_type;
12473
12474/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
12475 Return the type to use as __builtin_va_list.
12476
12477 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
12478
12479 struct __va_list
12480 {
12481 void *__stack;
12482 void *__gr_top;
12483 void *__vr_top;
12484 int __gr_offs;
12485 int __vr_offs;
12486 }; */
12487
12488static tree
12489aarch64_build_builtin_va_list (void)
12490{
12491 tree va_list_name;
12492 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12493
12494 /* Create the type. */
12495 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
12496 /* Give it the required name. */
12497 va_list_name = build_decl (BUILTINS_LOCATION,
12498 TYPE_DECL,
12499 get_identifier ("__va_list"),
12500 va_list_type);
12501 DECL_ARTIFICIAL (va_list_name) = 1;
12502 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 12503 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
12504
12505 /* Create the fields. */
12506 f_stack = build_decl (BUILTINS_LOCATION,
12507 FIELD_DECL, get_identifier ("__stack"),
12508 ptr_type_node);
12509 f_grtop = build_decl (BUILTINS_LOCATION,
12510 FIELD_DECL, get_identifier ("__gr_top"),
12511 ptr_type_node);
12512 f_vrtop = build_decl (BUILTINS_LOCATION,
12513 FIELD_DECL, get_identifier ("__vr_top"),
12514 ptr_type_node);
12515 f_groff = build_decl (BUILTINS_LOCATION,
12516 FIELD_DECL, get_identifier ("__gr_offs"),
12517 integer_type_node);
12518 f_vroff = build_decl (BUILTINS_LOCATION,
12519 FIELD_DECL, get_identifier ("__vr_offs"),
12520 integer_type_node);
12521
88e3bdd1 12522 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
12523 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
12524 purpose to identify whether the code is updating va_list internal
12525 offset fields through irregular way. */
12526 va_list_gpr_counter_field = f_groff;
12527 va_list_fpr_counter_field = f_vroff;
12528
43e9d192
IB
12529 DECL_ARTIFICIAL (f_stack) = 1;
12530 DECL_ARTIFICIAL (f_grtop) = 1;
12531 DECL_ARTIFICIAL (f_vrtop) = 1;
12532 DECL_ARTIFICIAL (f_groff) = 1;
12533 DECL_ARTIFICIAL (f_vroff) = 1;
12534
12535 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
12536 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
12537 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
12538 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
12539 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
12540
12541 TYPE_FIELDS (va_list_type) = f_stack;
12542 DECL_CHAIN (f_stack) = f_grtop;
12543 DECL_CHAIN (f_grtop) = f_vrtop;
12544 DECL_CHAIN (f_vrtop) = f_groff;
12545 DECL_CHAIN (f_groff) = f_vroff;
12546
12547 /* Compute its layout. */
12548 layout_type (va_list_type);
12549
12550 return va_list_type;
12551}
12552
12553/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
12554static void
12555aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12556{
12557 const CUMULATIVE_ARGS *cum;
12558 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12559 tree stack, grtop, vrtop, groff, vroff;
12560 tree t;
88e3bdd1
JW
12561 int gr_save_area_size = cfun->va_list_gpr_size;
12562 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
12563 int vr_offset;
12564
12565 cum = &crtl->args.info;
88e3bdd1
JW
12566 if (cfun->va_list_gpr_size)
12567 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
12568 cfun->va_list_gpr_size);
12569 if (cfun->va_list_fpr_size)
12570 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
12571 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 12572
d5726973 12573 if (!TARGET_FLOAT)
43e9d192 12574 {
261fb553 12575 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
12576 vr_save_area_size = 0;
12577 }
12578
12579 f_stack = TYPE_FIELDS (va_list_type_node);
12580 f_grtop = DECL_CHAIN (f_stack);
12581 f_vrtop = DECL_CHAIN (f_grtop);
12582 f_groff = DECL_CHAIN (f_vrtop);
12583 f_vroff = DECL_CHAIN (f_groff);
12584
12585 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12586 NULL_TREE);
12587 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12588 NULL_TREE);
12589 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12590 NULL_TREE);
12591 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12592 NULL_TREE);
12593 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12594 NULL_TREE);
12595
12596 /* Emit code to initialize STACK, which points to the next varargs stack
12597 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12598 by named arguments. STACK is 8-byte aligned. */
12599 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12600 if (cum->aapcs_stack_size > 0)
12601 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12602 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12604
12605 /* Emit code to initialize GRTOP, the top of the GR save area.
12606 virtual_incoming_args_rtx should have been 16 byte aligned. */
12607 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12608 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12610
12611 /* Emit code to initialize VRTOP, the top of the VR save area.
12612 This address is gr_save_area_bytes below GRTOP, rounded
12613 down to the next 16-byte boundary. */
12614 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12615 vr_offset = ROUND_UP (gr_save_area_size,
12616 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12617
12618 if (vr_offset)
12619 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12620 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12621 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12622
12623 /* Emit code to initialize GROFF, the offset from GRTOP of the
12624 next GPR argument. */
12625 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12626 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12628
12629 /* Likewise emit code to initialize VROFF, the offset from FTOP
12630 of the next VR argument. */
12631 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12632 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12633 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12634}
12635
12636/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12637
12638static tree
12639aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12640 gimple_seq *post_p ATTRIBUTE_UNUSED)
12641{
12642 tree addr;
12643 bool indirect_p;
12644 bool is_ha; /* is HFA or HVA. */
12645 bool dw_align; /* double-word align. */
ef4bddc2 12646 machine_mode ag_mode = VOIDmode;
43e9d192 12647 int nregs;
ef4bddc2 12648 machine_mode mode;
43e9d192
IB
12649
12650 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12651 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12652 HOST_WIDE_INT size, rsize, adjust, align;
12653 tree t, u, cond1, cond2;
12654
12655 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12656 if (indirect_p)
12657 type = build_pointer_type (type);
12658
12659 mode = TYPE_MODE (type);
12660
12661 f_stack = TYPE_FIELDS (va_list_type_node);
12662 f_grtop = DECL_CHAIN (f_stack);
12663 f_vrtop = DECL_CHAIN (f_grtop);
12664 f_groff = DECL_CHAIN (f_vrtop);
12665 f_vroff = DECL_CHAIN (f_groff);
12666
12667 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12668 f_stack, NULL_TREE);
12669 size = int_size_in_bytes (type);
985b8393 12670 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12671
12672 dw_align = false;
12673 adjust = 0;
12674 if (aarch64_vfp_is_call_or_return_candidate (mode,
12675 type,
12676 &ag_mode,
12677 &nregs,
12678 &is_ha))
12679 {
6a70badb
RS
12680 /* No frontends can create types with variable-sized modes, so we
12681 shouldn't be asked to pass or return them. */
12682 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12683
43e9d192 12684 /* TYPE passed in fp/simd registers. */
d5726973 12685 if (!TARGET_FLOAT)
fc29dfc9 12686 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
12687
12688 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12689 unshare_expr (valist), f_vrtop, NULL_TREE);
12690 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12691 unshare_expr (valist), f_vroff, NULL_TREE);
12692
12693 rsize = nregs * UNITS_PER_VREG;
12694
12695 if (is_ha)
12696 {
6a70badb
RS
12697 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12698 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12699 }
76b0cbf8 12700 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12701 && size < UNITS_PER_VREG)
12702 {
12703 adjust = UNITS_PER_VREG - size;
12704 }
12705 }
12706 else
12707 {
12708 /* TYPE passed in general registers. */
12709 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12710 unshare_expr (valist), f_grtop, NULL_TREE);
12711 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12712 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12713 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12714 nregs = rsize / UNITS_PER_WORD;
12715
12716 if (align > 8)
12717 dw_align = true;
12718
76b0cbf8 12719 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12720 && size < UNITS_PER_WORD)
12721 {
12722 adjust = UNITS_PER_WORD - size;
12723 }
12724 }
12725
12726 /* Get a local temporary for the field value. */
12727 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12728
12729 /* Emit code to branch if off >= 0. */
12730 t = build2 (GE_EXPR, boolean_type_node, off,
12731 build_int_cst (TREE_TYPE (off), 0));
12732 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12733
12734 if (dw_align)
12735 {
12736 /* Emit: offs = (offs + 15) & -16. */
12737 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12738 build_int_cst (TREE_TYPE (off), 15));
12739 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12740 build_int_cst (TREE_TYPE (off), -16));
12741 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12742 }
12743 else
12744 roundup = NULL;
12745
12746 /* Update ap.__[g|v]r_offs */
12747 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12748 build_int_cst (TREE_TYPE (off), rsize));
12749 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12750
12751 /* String up. */
12752 if (roundup)
12753 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12754
12755 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12756 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12757 build_int_cst (TREE_TYPE (f_off), 0));
12758 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12759
12760 /* String up: make sure the assignment happens before the use. */
12761 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12762 COND_EXPR_ELSE (cond1) = t;
12763
12764 /* Prepare the trees handling the argument that is passed on the stack;
12765 the top level node will store in ON_STACK. */
12766 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12767 if (align > 8)
12768 {
12769 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12770 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12771 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12772 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12773 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12774 }
12775 else
12776 roundup = NULL;
12777 /* Advance ap.__stack */
4bdc2738 12778 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12779 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12780 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12781 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12782 /* String up roundup and advance. */
12783 if (roundup)
12784 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12785 /* String up with arg */
12786 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12787 /* Big-endianness related address adjustment. */
76b0cbf8 12788 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12789 && size < UNITS_PER_WORD)
12790 {
12791 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12792 size_int (UNITS_PER_WORD - size));
12793 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
12794 }
12795
12796 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
12797 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
12798
12799 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
12800 t = off;
12801 if (adjust)
12802 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
12803 build_int_cst (TREE_TYPE (off), adjust));
12804
12805 t = fold_convert (sizetype, t);
12806 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
12807
12808 if (is_ha)
12809 {
12810 /* type ha; // treat as "struct {ftype field[n];}"
12811 ... [computing offs]
12812 for (i = 0; i <nregs; ++i, offs += 16)
12813 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
12814 return ha; */
12815 int i;
12816 tree tmp_ha, field_t, field_ptr_t;
12817
12818 /* Declare a local variable. */
12819 tmp_ha = create_tmp_var_raw (type, "ha");
12820 gimple_add_tmp_var (tmp_ha);
12821
12822 /* Establish the base type. */
12823 switch (ag_mode)
12824 {
4e10a5a7 12825 case E_SFmode:
43e9d192
IB
12826 field_t = float_type_node;
12827 field_ptr_t = float_ptr_type_node;
12828 break;
4e10a5a7 12829 case E_DFmode:
43e9d192
IB
12830 field_t = double_type_node;
12831 field_ptr_t = double_ptr_type_node;
12832 break;
4e10a5a7 12833 case E_TFmode:
43e9d192
IB
12834 field_t = long_double_type_node;
12835 field_ptr_t = long_double_ptr_type_node;
12836 break;
4e10a5a7 12837 case E_HFmode:
1b62ed4f
JG
12838 field_t = aarch64_fp16_type_node;
12839 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 12840 break;
4e10a5a7
RS
12841 case E_V2SImode:
12842 case E_V4SImode:
43e9d192
IB
12843 {
12844 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
12845 field_t = build_vector_type_for_mode (innertype, ag_mode);
12846 field_ptr_t = build_pointer_type (field_t);
12847 }
12848 break;
12849 default:
12850 gcc_assert (0);
12851 }
12852
12853 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
12854 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
12855 addr = t;
12856 t = fold_convert (field_ptr_t, addr);
12857 t = build2 (MODIFY_EXPR, field_t,
12858 build1 (INDIRECT_REF, field_t, tmp_ha),
12859 build1 (INDIRECT_REF, field_t, t));
12860
12861 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
12862 for (i = 1; i < nregs; ++i)
12863 {
12864 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
12865 u = fold_convert (field_ptr_t, addr);
12866 u = build2 (MODIFY_EXPR, field_t,
12867 build2 (MEM_REF, field_t, tmp_ha,
12868 build_int_cst (field_ptr_t,
12869 (i *
12870 int_size_in_bytes (field_t)))),
12871 build1 (INDIRECT_REF, field_t, u));
12872 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
12873 }
12874
12875 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
12876 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
12877 }
12878
12879 COND_EXPR_ELSE (cond2) = t;
12880 addr = fold_convert (build_pointer_type (type), cond1);
12881 addr = build_va_arg_indirect_ref (addr);
12882
12883 if (indirect_p)
12884 addr = build_va_arg_indirect_ref (addr);
12885
12886 return addr;
12887}
12888
12889/* Implement TARGET_SETUP_INCOMING_VARARGS. */
12890
12891static void
ef4bddc2 12892aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
12893 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12894 int no_rtl)
12895{
12896 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12897 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
12898 int gr_saved = cfun->va_list_gpr_size;
12899 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
12900
12901 /* The caller has advanced CUM up to, but not beyond, the last named
12902 argument. Advance a local copy of CUM past the last "real" named
12903 argument, to find out how many registers are left over. */
12904 local_cum = *cum;
12905 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
12906
88e3bdd1
JW
12907 /* Found out how many registers we need to save.
12908 Honor tree-stdvar analysis results. */
12909 if (cfun->va_list_gpr_size)
12910 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
12911 cfun->va_list_gpr_size / UNITS_PER_WORD);
12912 if (cfun->va_list_fpr_size)
12913 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
12914 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 12915
d5726973 12916 if (!TARGET_FLOAT)
43e9d192 12917 {
261fb553 12918 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
12919 vr_saved = 0;
12920 }
12921
12922 if (!no_rtl)
12923 {
12924 if (gr_saved > 0)
12925 {
12926 rtx ptr, mem;
12927
12928 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
12929 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
12930 - gr_saved * UNITS_PER_WORD);
12931 mem = gen_frame_mem (BLKmode, ptr);
12932 set_mem_alias_set (mem, get_varargs_alias_set ());
12933
12934 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
12935 mem, gr_saved);
12936 }
12937 if (vr_saved > 0)
12938 {
12939 /* We can't use move_block_from_reg, because it will use
12940 the wrong mode, storing D regs only. */
ef4bddc2 12941 machine_mode mode = TImode;
88e3bdd1 12942 int off, i, vr_start;
43e9d192
IB
12943
12944 /* Set OFF to the offset from virtual_incoming_args_rtx of
12945 the first vector register. The VR save area lies below
12946 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
12947 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
12948 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12949 off -= vr_saved * UNITS_PER_VREG;
12950
88e3bdd1
JW
12951 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
12952 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
12953 {
12954 rtx ptr, mem;
12955
12956 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
12957 mem = gen_frame_mem (mode, ptr);
12958 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 12959 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
12960 off += UNITS_PER_VREG;
12961 }
12962 }
12963 }
12964
12965 /* We don't save the size into *PRETEND_SIZE because we want to avoid
12966 any complication of having crtl->args.pretend_args_size changed. */
8799637a 12967 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
12968 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
12969 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
12970 + vr_saved * UNITS_PER_VREG);
12971}
12972
12973static void
12974aarch64_conditional_register_usage (void)
12975{
12976 int i;
12977 if (!TARGET_FLOAT)
12978 {
12979 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
12980 {
12981 fixed_regs[i] = 1;
12982 call_used_regs[i] = 1;
12983 }
12984 }
43cacb12
RS
12985 if (!TARGET_SVE)
12986 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
12987 {
12988 fixed_regs[i] = 1;
12989 call_used_regs[i] = 1;
12990 }
3751345d
RE
12991
12992 /* When tracking speculation, we need a couple of call-clobbered registers
12993 to track the speculation state. It would be nice to just use
12994 IP0 and IP1, but currently there are numerous places that just
12995 assume these registers are free for other uses (eg pointer
12996 authentication). */
12997 if (aarch64_track_speculation)
12998 {
12999 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
13000 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
13001 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13002 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
13003 }
43e9d192
IB
13004}
13005
13006/* Walk down the type tree of TYPE counting consecutive base elements.
13007 If *MODEP is VOIDmode, then set it to the first valid floating point
13008 type. If a non-floating point type is found, or if a floating point
13009 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
13010 otherwise return the count in the sub-tree. */
13011static int
ef4bddc2 13012aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 13013{
ef4bddc2 13014 machine_mode mode;
43e9d192
IB
13015 HOST_WIDE_INT size;
13016
13017 switch (TREE_CODE (type))
13018 {
13019 case REAL_TYPE:
13020 mode = TYPE_MODE (type);
1b62ed4f
JG
13021 if (mode != DFmode && mode != SFmode
13022 && mode != TFmode && mode != HFmode)
43e9d192
IB
13023 return -1;
13024
13025 if (*modep == VOIDmode)
13026 *modep = mode;
13027
13028 if (*modep == mode)
13029 return 1;
13030
13031 break;
13032
13033 case COMPLEX_TYPE:
13034 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
13035 if (mode != DFmode && mode != SFmode
13036 && mode != TFmode && mode != HFmode)
43e9d192
IB
13037 return -1;
13038
13039 if (*modep == VOIDmode)
13040 *modep = mode;
13041
13042 if (*modep == mode)
13043 return 2;
13044
13045 break;
13046
13047 case VECTOR_TYPE:
13048 /* Use V2SImode and V4SImode as representatives of all 64-bit
13049 and 128-bit vector types. */
13050 size = int_size_in_bytes (type);
13051 switch (size)
13052 {
13053 case 8:
13054 mode = V2SImode;
13055 break;
13056 case 16:
13057 mode = V4SImode;
13058 break;
13059 default:
13060 return -1;
13061 }
13062
13063 if (*modep == VOIDmode)
13064 *modep = mode;
13065
13066 /* Vector modes are considered to be opaque: two vectors are
13067 equivalent for the purposes of being homogeneous aggregates
13068 if they are the same size. */
13069 if (*modep == mode)
13070 return 1;
13071
13072 break;
13073
13074 case ARRAY_TYPE:
13075 {
13076 int count;
13077 tree index = TYPE_DOMAIN (type);
13078
807e902e
KZ
13079 /* Can't handle incomplete types nor sizes that are not
13080 fixed. */
13081 if (!COMPLETE_TYPE_P (type)
13082 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13083 return -1;
13084
13085 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
13086 if (count == -1
13087 || !index
13088 || !TYPE_MAX_VALUE (index)
cc269bb6 13089 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 13090 || !TYPE_MIN_VALUE (index)
cc269bb6 13091 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
13092 || count < 0)
13093 return -1;
13094
ae7e9ddd
RS
13095 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
13096 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
13097
13098 /* There must be no padding. */
6a70badb
RS
13099 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13100 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13101 return -1;
13102
13103 return count;
13104 }
13105
13106 case RECORD_TYPE:
13107 {
13108 int count = 0;
13109 int sub_count;
13110 tree field;
13111
807e902e
KZ
13112 /* Can't handle incomplete types nor sizes that are not
13113 fixed. */
13114 if (!COMPLETE_TYPE_P (type)
13115 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13116 return -1;
13117
13118 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13119 {
13120 if (TREE_CODE (field) != FIELD_DECL)
13121 continue;
13122
13123 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13124 if (sub_count < 0)
13125 return -1;
13126 count += sub_count;
13127 }
13128
13129 /* There must be no padding. */
6a70badb
RS
13130 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13131 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13132 return -1;
13133
13134 return count;
13135 }
13136
13137 case UNION_TYPE:
13138 case QUAL_UNION_TYPE:
13139 {
13140 /* These aren't very interesting except in a degenerate case. */
13141 int count = 0;
13142 int sub_count;
13143 tree field;
13144
807e902e
KZ
13145 /* Can't handle incomplete types nor sizes that are not
13146 fixed. */
13147 if (!COMPLETE_TYPE_P (type)
13148 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
13149 return -1;
13150
13151 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
13152 {
13153 if (TREE_CODE (field) != FIELD_DECL)
13154 continue;
13155
13156 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
13157 if (sub_count < 0)
13158 return -1;
13159 count = count > sub_count ? count : sub_count;
13160 }
13161
13162 /* There must be no padding. */
6a70badb
RS
13163 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
13164 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
13165 return -1;
13166
13167 return count;
13168 }
13169
13170 default:
13171 break;
13172 }
13173
13174 return -1;
13175}
13176
b6ec6215
KT
13177/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
13178 type as described in AAPCS64 \S 4.1.2.
13179
13180 See the comment above aarch64_composite_type_p for the notes on MODE. */
13181
13182static bool
13183aarch64_short_vector_p (const_tree type,
13184 machine_mode mode)
13185{
6a70badb 13186 poly_int64 size = -1;
b6ec6215
KT
13187
13188 if (type && TREE_CODE (type) == VECTOR_TYPE)
13189 size = int_size_in_bytes (type);
13190 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13191 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13192 size = GET_MODE_SIZE (mode);
13193
6a70badb 13194 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
13195}
13196
43e9d192
IB
13197/* Return TRUE if the type, as described by TYPE and MODE, is a composite
13198 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
13199 array types. The C99 floating-point complex types are also considered
13200 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
13201 types, which are GCC extensions and out of the scope of AAPCS64, are
13202 treated as composite types here as well.
13203
13204 Note that MODE itself is not sufficient in determining whether a type
13205 is such a composite type or not. This is because
13206 stor-layout.c:compute_record_mode may have already changed the MODE
13207 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
13208 structure with only one field may have its MODE set to the mode of the
13209 field. Also an integer mode whose size matches the size of the
13210 RECORD_TYPE type may be used to substitute the original mode
13211 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
13212 solely relied on. */
13213
13214static bool
13215aarch64_composite_type_p (const_tree type,
ef4bddc2 13216 machine_mode mode)
43e9d192 13217{
b6ec6215
KT
13218 if (aarch64_short_vector_p (type, mode))
13219 return false;
13220
43e9d192
IB
13221 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
13222 return true;
13223
13224 if (mode == BLKmode
13225 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
13226 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13227 return true;
13228
13229 return false;
13230}
13231
43e9d192
IB
13232/* Return TRUE if an argument, whose type is described by TYPE and MODE,
13233 shall be passed or returned in simd/fp register(s) (providing these
13234 parameter passing registers are available).
13235
13236 Upon successful return, *COUNT returns the number of needed registers,
13237 *BASE_MODE returns the mode of the individual register and when IS_HAF
13238 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
13239 floating-point aggregate or a homogeneous short-vector aggregate. */
13240
13241static bool
ef4bddc2 13242aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 13243 const_tree type,
ef4bddc2 13244 machine_mode *base_mode,
43e9d192
IB
13245 int *count,
13246 bool *is_ha)
13247{
ef4bddc2 13248 machine_mode new_mode = VOIDmode;
43e9d192
IB
13249 bool composite_p = aarch64_composite_type_p (type, mode);
13250
13251 if (is_ha != NULL) *is_ha = false;
13252
13253 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
13254 || aarch64_short_vector_p (type, mode))
13255 {
13256 *count = 1;
13257 new_mode = mode;
13258 }
13259 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
13260 {
13261 if (is_ha != NULL) *is_ha = true;
13262 *count = 2;
13263 new_mode = GET_MODE_INNER (mode);
13264 }
13265 else if (type && composite_p)
13266 {
13267 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
13268
13269 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
13270 {
13271 if (is_ha != NULL) *is_ha = true;
13272 *count = ag_count;
13273 }
13274 else
13275 return false;
13276 }
13277 else
13278 return false;
13279
13280 *base_mode = new_mode;
13281 return true;
13282}
13283
13284/* Implement TARGET_STRUCT_VALUE_RTX. */
13285
13286static rtx
13287aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
13288 int incoming ATTRIBUTE_UNUSED)
13289{
13290 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
13291}
13292
13293/* Implements target hook vector_mode_supported_p. */
13294static bool
ef4bddc2 13295aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 13296{
43cacb12
RS
13297 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13298 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
13299}
13300
b7342d25
IB
13301/* Return appropriate SIMD container
13302 for MODE within a vector of WIDTH bits. */
ef4bddc2 13303static machine_mode
43cacb12 13304aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 13305{
43cacb12
RS
13306 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
13307 switch (mode)
13308 {
13309 case E_DFmode:
13310 return VNx2DFmode;
13311 case E_SFmode:
13312 return VNx4SFmode;
13313 case E_HFmode:
13314 return VNx8HFmode;
13315 case E_DImode:
13316 return VNx2DImode;
13317 case E_SImode:
13318 return VNx4SImode;
13319 case E_HImode:
13320 return VNx8HImode;
13321 case E_QImode:
13322 return VNx16QImode;
13323 default:
13324 return word_mode;
13325 }
13326
13327 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 13328 if (TARGET_SIMD)
b7342d25 13329 {
43cacb12 13330 if (known_eq (width, 128))
b7342d25
IB
13331 switch (mode)
13332 {
4e10a5a7 13333 case E_DFmode:
b7342d25 13334 return V2DFmode;
4e10a5a7 13335 case E_SFmode:
b7342d25 13336 return V4SFmode;
4e10a5a7 13337 case E_HFmode:
b719f884 13338 return V8HFmode;
4e10a5a7 13339 case E_SImode:
b7342d25 13340 return V4SImode;
4e10a5a7 13341 case E_HImode:
b7342d25 13342 return V8HImode;
4e10a5a7 13343 case E_QImode:
b7342d25 13344 return V16QImode;
4e10a5a7 13345 case E_DImode:
b7342d25
IB
13346 return V2DImode;
13347 default:
13348 break;
13349 }
13350 else
13351 switch (mode)
13352 {
4e10a5a7 13353 case E_SFmode:
b7342d25 13354 return V2SFmode;
4e10a5a7 13355 case E_HFmode:
b719f884 13356 return V4HFmode;
4e10a5a7 13357 case E_SImode:
b7342d25 13358 return V2SImode;
4e10a5a7 13359 case E_HImode:
b7342d25 13360 return V4HImode;
4e10a5a7 13361 case E_QImode:
b7342d25
IB
13362 return V8QImode;
13363 default:
13364 break;
13365 }
13366 }
43e9d192
IB
13367 return word_mode;
13368}
13369
b7342d25 13370/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 13371static machine_mode
005ba29c 13372aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 13373{
43cacb12
RS
13374 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
13375 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
13376}
13377
86e36728 13378/* Return a list of possible vector sizes for the vectorizer
3b357264 13379 to iterate over. */
86e36728
RS
13380static void
13381aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 13382{
43cacb12
RS
13383 if (TARGET_SVE)
13384 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
13385 sizes->safe_push (16);
13386 sizes->safe_push (8);
3b357264
JG
13387}
13388
ac2b960f
YZ
13389/* Implement TARGET_MANGLE_TYPE. */
13390
6f549691 13391static const char *
ac2b960f
YZ
13392aarch64_mangle_type (const_tree type)
13393{
13394 /* The AArch64 ABI documents say that "__va_list" has to be
13395 managled as if it is in the "std" namespace. */
13396 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
13397 return "St9__va_list";
13398
c2ec330c
AL
13399 /* Half-precision float. */
13400 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
13401 return "Dh";
13402
f9d53c27
TB
13403 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
13404 builtin types. */
13405 if (TYPE_NAME (type) != NULL)
13406 return aarch64_mangle_builtin_type (type);
c6fc9e43 13407
ac2b960f
YZ
13408 /* Use the default mangling. */
13409 return NULL;
13410}
13411
75cf1494
KT
13412/* Find the first rtx_insn before insn that will generate an assembly
13413 instruction. */
13414
13415static rtx_insn *
13416aarch64_prev_real_insn (rtx_insn *insn)
13417{
13418 if (!insn)
13419 return NULL;
13420
13421 do
13422 {
13423 insn = prev_real_insn (insn);
13424 }
13425 while (insn && recog_memoized (insn) < 0);
13426
13427 return insn;
13428}
13429
13430static bool
13431is_madd_op (enum attr_type t1)
13432{
13433 unsigned int i;
13434 /* A number of these may be AArch32 only. */
13435 enum attr_type mlatypes[] = {
13436 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
13437 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
13438 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
13439 };
13440
13441 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
13442 {
13443 if (t1 == mlatypes[i])
13444 return true;
13445 }
13446
13447 return false;
13448}
13449
13450/* Check if there is a register dependency between a load and the insn
13451 for which we hold recog_data. */
13452
13453static bool
13454dep_between_memop_and_curr (rtx memop)
13455{
13456 rtx load_reg;
13457 int opno;
13458
8baff86e 13459 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
13460
13461 if (!REG_P (SET_DEST (memop)))
13462 return false;
13463
13464 load_reg = SET_DEST (memop);
8baff86e 13465 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
13466 {
13467 rtx operand = recog_data.operand[opno];
13468 if (REG_P (operand)
13469 && reg_overlap_mentioned_p (load_reg, operand))
13470 return true;
13471
13472 }
13473 return false;
13474}
13475
8baff86e
KT
13476
13477/* When working around the Cortex-A53 erratum 835769,
13478 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
13479 instruction and has a preceding memory instruction such that a NOP
13480 should be inserted between them. */
13481
75cf1494
KT
13482bool
13483aarch64_madd_needs_nop (rtx_insn* insn)
13484{
13485 enum attr_type attr_type;
13486 rtx_insn *prev;
13487 rtx body;
13488
b32c1043 13489 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
13490 return false;
13491
e322d6e3 13492 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
13493 return false;
13494
13495 attr_type = get_attr_type (insn);
13496 if (!is_madd_op (attr_type))
13497 return false;
13498
13499 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
13500 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
13501 Restore recog state to INSN to avoid state corruption. */
13502 extract_constrain_insn_cached (insn);
13503
550e2205 13504 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
13505 return false;
13506
13507 body = single_set (prev);
13508
13509 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
13510 it and the DImode madd, emit a NOP between them. If body is NULL then we
13511 have a complex memory operation, probably a load/store pair.
13512 Be conservative for now and emit a NOP. */
13513 if (GET_MODE (recog_data.operand[0]) == DImode
13514 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
13515 return true;
13516
13517 return false;
13518
13519}
13520
8baff86e
KT
13521
13522/* Implement FINAL_PRESCAN_INSN. */
13523
75cf1494
KT
13524void
13525aarch64_final_prescan_insn (rtx_insn *insn)
13526{
13527 if (aarch64_madd_needs_nop (insn))
13528 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
13529}
13530
13531
43cacb12
RS
13532/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
13533 instruction. */
13534
13535bool
13536aarch64_sve_index_immediate_p (rtx base_or_step)
13537{
13538 return (CONST_INT_P (base_or_step)
13539 && IN_RANGE (INTVAL (base_or_step), -16, 15));
13540}
13541
13542/* Return true if X is a valid immediate for the SVE ADD and SUB
13543 instructions. Negate X first if NEGATE_P is true. */
13544
13545bool
13546aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
13547{
13548 rtx elt;
13549
13550 if (!const_vec_duplicate_p (x, &elt)
13551 || !CONST_INT_P (elt))
13552 return false;
13553
13554 HOST_WIDE_INT val = INTVAL (elt);
13555 if (negate_p)
13556 val = -val;
13557 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
13558
13559 if (val & 0xff)
13560 return IN_RANGE (val, 0, 0xff);
13561 return IN_RANGE (val, 0, 0xff00);
13562}
13563
13564/* Return true if X is a valid immediate operand for an SVE logical
13565 instruction such as AND. */
13566
13567bool
13568aarch64_sve_bitmask_immediate_p (rtx x)
13569{
13570 rtx elt;
13571
13572 return (const_vec_duplicate_p (x, &elt)
13573 && CONST_INT_P (elt)
13574 && aarch64_bitmask_imm (INTVAL (elt),
13575 GET_MODE_INNER (GET_MODE (x))));
13576}
13577
13578/* Return true if X is a valid immediate for the SVE DUP and CPY
13579 instructions. */
13580
13581bool
13582aarch64_sve_dup_immediate_p (rtx x)
13583{
13584 rtx elt;
13585
13586 if (!const_vec_duplicate_p (x, &elt)
13587 || !CONST_INT_P (elt))
13588 return false;
13589
13590 HOST_WIDE_INT val = INTVAL (elt);
13591 if (val & 0xff)
13592 return IN_RANGE (val, -0x80, 0x7f);
13593 return IN_RANGE (val, -0x8000, 0x7f00);
13594}
13595
13596/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13597 SIGNED_P says whether the operand is signed rather than unsigned. */
13598
13599bool
13600aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13601{
13602 rtx elt;
13603
13604 return (const_vec_duplicate_p (x, &elt)
13605 && CONST_INT_P (elt)
13606 && (signed_p
13607 ? IN_RANGE (INTVAL (elt), -16, 15)
13608 : IN_RANGE (INTVAL (elt), 0, 127)));
13609}
13610
13611/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13612 instruction. Negate X first if NEGATE_P is true. */
13613
13614bool
13615aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13616{
13617 rtx elt;
13618 REAL_VALUE_TYPE r;
13619
13620 if (!const_vec_duplicate_p (x, &elt)
13621 || GET_CODE (elt) != CONST_DOUBLE)
13622 return false;
13623
13624 r = *CONST_DOUBLE_REAL_VALUE (elt);
13625
13626 if (negate_p)
13627 r = real_value_negate (&r);
13628
13629 if (real_equal (&r, &dconst1))
13630 return true;
13631 if (real_equal (&r, &dconsthalf))
13632 return true;
13633 return false;
13634}
13635
13636/* Return true if X is a valid immediate operand for an SVE FMUL
13637 instruction. */
13638
13639bool
13640aarch64_sve_float_mul_immediate_p (rtx x)
13641{
13642 rtx elt;
13643
13644 /* GCC will never generate a multiply with an immediate of 2, so there is no
13645 point testing for it (even though it is a valid constant). */
13646 return (const_vec_duplicate_p (x, &elt)
13647 && GET_CODE (elt) == CONST_DOUBLE
13648 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13649}
13650
b187677b
RS
13651/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13652 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13653 is nonnull, use it to describe valid immediates. */
3520f7cc 13654static bool
b187677b
RS
13655aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13656 simd_immediate_info *info,
13657 enum simd_immediate_check which,
13658 simd_immediate_info::insn_type insn)
13659{
13660 /* Try a 4-byte immediate with LSL. */
13661 for (unsigned int shift = 0; shift < 32; shift += 8)
13662 if ((val32 & (0xff << shift)) == val32)
13663 {
13664 if (info)
13665 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13666 simd_immediate_info::LSL, shift);
13667 return true;
13668 }
3520f7cc 13669
b187677b
RS
13670 /* Try a 2-byte immediate with LSL. */
13671 unsigned int imm16 = val32 & 0xffff;
13672 if (imm16 == (val32 >> 16))
13673 for (unsigned int shift = 0; shift < 16; shift += 8)
13674 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13675 {
b187677b
RS
13676 if (info)
13677 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13678 simd_immediate_info::LSL, shift);
13679 return true;
48063b9d 13680 }
3520f7cc 13681
b187677b
RS
13682 /* Try a 4-byte immediate with MSL, except for cases that MVN
13683 can handle. */
13684 if (which == AARCH64_CHECK_MOV)
13685 for (unsigned int shift = 8; shift < 24; shift += 8)
13686 {
13687 unsigned int low = (1 << shift) - 1;
13688 if (((val32 & (0xff << shift)) | low) == val32)
13689 {
13690 if (info)
13691 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13692 simd_immediate_info::MSL, shift);
13693 return true;
13694 }
13695 }
43e9d192 13696
b187677b
RS
13697 return false;
13698}
13699
13700/* Return true if replicating VAL64 is a valid immediate for the
13701 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13702 use it to describe valid immediates. */
13703static bool
13704aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13705 simd_immediate_info *info,
13706 enum simd_immediate_check which)
13707{
13708 unsigned int val32 = val64 & 0xffffffff;
13709 unsigned int val16 = val64 & 0xffff;
13710 unsigned int val8 = val64 & 0xff;
13711
13712 if (val32 == (val64 >> 32))
43e9d192 13713 {
b187677b
RS
13714 if ((which & AARCH64_CHECK_ORR) != 0
13715 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13716 simd_immediate_info::MOV))
13717 return true;
43e9d192 13718
b187677b
RS
13719 if ((which & AARCH64_CHECK_BIC) != 0
13720 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13721 simd_immediate_info::MVN))
13722 return true;
ee78df47 13723
b187677b
RS
13724 /* Try using a replicated byte. */
13725 if (which == AARCH64_CHECK_MOV
13726 && val16 == (val32 >> 16)
13727 && val8 == (val16 >> 8))
ee78df47 13728 {
b187677b
RS
13729 if (info)
13730 *info = simd_immediate_info (QImode, val8);
13731 return true;
ee78df47 13732 }
43e9d192
IB
13733 }
13734
b187677b
RS
13735 /* Try using a bit-to-bytemask. */
13736 if (which == AARCH64_CHECK_MOV)
43e9d192 13737 {
b187677b
RS
13738 unsigned int i;
13739 for (i = 0; i < 64; i += 8)
ab6501d7 13740 {
b187677b
RS
13741 unsigned char byte = (val64 >> i) & 0xff;
13742 if (byte != 0 && byte != 0xff)
13743 break;
ab6501d7 13744 }
b187677b 13745 if (i == 64)
ab6501d7 13746 {
b187677b
RS
13747 if (info)
13748 *info = simd_immediate_info (DImode, val64);
13749 return true;
ab6501d7 13750 }
43e9d192 13751 }
b187677b
RS
13752 return false;
13753}
43e9d192 13754
43cacb12
RS
13755/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13756 instruction. If INFO is nonnull, use it to describe valid immediates. */
13757
13758static bool
13759aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13760 simd_immediate_info *info)
13761{
13762 scalar_int_mode mode = DImode;
13763 unsigned int val32 = val64 & 0xffffffff;
13764 if (val32 == (val64 >> 32))
13765 {
13766 mode = SImode;
13767 unsigned int val16 = val32 & 0xffff;
13768 if (val16 == (val32 >> 16))
13769 {
13770 mode = HImode;
13771 unsigned int val8 = val16 & 0xff;
13772 if (val8 == (val16 >> 8))
13773 mode = QImode;
13774 }
13775 }
13776 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13777 if (IN_RANGE (val, -0x80, 0x7f))
13778 {
13779 /* DUP with no shift. */
13780 if (info)
13781 *info = simd_immediate_info (mode, val);
13782 return true;
13783 }
13784 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13785 {
13786 /* DUP with LSL #8. */
13787 if (info)
13788 *info = simd_immediate_info (mode, val);
13789 return true;
13790 }
13791 if (aarch64_bitmask_imm (val64, mode))
13792 {
13793 /* DUPM. */
13794 if (info)
13795 *info = simd_immediate_info (mode, val);
13796 return true;
13797 }
13798 return false;
13799}
13800
b187677b
RS
13801/* Return true if OP is a valid SIMD immediate for the operation
13802 described by WHICH. If INFO is nonnull, use it to describe valid
13803 immediates. */
13804bool
13805aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
13806 enum simd_immediate_check which)
13807{
43cacb12
RS
13808 machine_mode mode = GET_MODE (op);
13809 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13810 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
13811 return false;
13812
13813 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 13814 rtx base, step;
b187677b 13815 unsigned int n_elts;
f9093f23
RS
13816 if (GET_CODE (op) == CONST_VECTOR
13817 && CONST_VECTOR_DUPLICATE_P (op))
13818 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
13819 else if ((vec_flags & VEC_SVE_DATA)
13820 && const_vec_series_p (op, &base, &step))
13821 {
13822 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
13823 if (!aarch64_sve_index_immediate_p (base)
13824 || !aarch64_sve_index_immediate_p (step))
13825 return false;
13826
13827 if (info)
13828 *info = simd_immediate_info (elt_mode, base, step);
13829 return true;
13830 }
6a70badb
RS
13831 else if (GET_CODE (op) == CONST_VECTOR
13832 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
13833 /* N_ELTS set above. */;
b187677b 13834 else
d8edd899 13835 return false;
43e9d192 13836
43cacb12
RS
13837 /* Handle PFALSE and PTRUE. */
13838 if (vec_flags & VEC_SVE_PRED)
13839 return (op == CONST0_RTX (mode)
13840 || op == CONSTM1_RTX (mode));
13841
b187677b 13842 scalar_float_mode elt_float_mode;
f9093f23
RS
13843 if (n_elts == 1
13844 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 13845 {
f9093f23
RS
13846 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
13847 if (aarch64_float_const_zero_rtx_p (elt)
13848 || aarch64_float_const_representable_p (elt))
13849 {
13850 if (info)
13851 *info = simd_immediate_info (elt_float_mode, elt);
13852 return true;
13853 }
b187677b 13854 }
43e9d192 13855
b187677b
RS
13856 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
13857 if (elt_size > 8)
13858 return false;
e4f0f84d 13859
b187677b 13860 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 13861
b187677b
RS
13862 /* Expand the vector constant out into a byte vector, with the least
13863 significant byte of the register first. */
13864 auto_vec<unsigned char, 16> bytes;
13865 bytes.reserve (n_elts * elt_size);
13866 for (unsigned int i = 0; i < n_elts; i++)
13867 {
f9093f23
RS
13868 /* The vector is provided in gcc endian-neutral fashion.
13869 For aarch64_be Advanced SIMD, it must be laid out in the vector
13870 register in reverse order. */
13871 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
13872 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 13873
b187677b
RS
13874 if (elt_mode != elt_int_mode)
13875 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 13876
b187677b
RS
13877 if (!CONST_INT_P (elt))
13878 return false;
43e9d192 13879
b187677b
RS
13880 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
13881 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 13882 {
b187677b
RS
13883 bytes.quick_push (elt_val & 0xff);
13884 elt_val >>= BITS_PER_UNIT;
48063b9d 13885 }
43e9d192
IB
13886 }
13887
b187677b
RS
13888 /* The immediate must repeat every eight bytes. */
13889 unsigned int nbytes = bytes.length ();
13890 for (unsigned i = 8; i < nbytes; ++i)
13891 if (bytes[i] != bytes[i - 8])
13892 return false;
13893
13894 /* Get the repeating 8-byte value as an integer. No endian correction
13895 is needed here because bytes is already in lsb-first order. */
13896 unsigned HOST_WIDE_INT val64 = 0;
13897 for (unsigned int i = 0; i < 8; i++)
13898 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
13899 << (i * BITS_PER_UNIT));
13900
43cacb12
RS
13901 if (vec_flags & VEC_SVE_DATA)
13902 return aarch64_sve_valid_immediate (val64, info);
13903 else
13904 return aarch64_advsimd_valid_immediate (val64, info, which);
13905}
13906
13907/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
13908 has a step in the range of INDEX. Return the index expression if so,
13909 otherwise return null. */
13910rtx
13911aarch64_check_zero_based_sve_index_immediate (rtx x)
13912{
13913 rtx base, step;
13914 if (const_vec_series_p (x, &base, &step)
13915 && base == const0_rtx
13916 && aarch64_sve_index_immediate_p (step))
13917 return step;
13918 return NULL_RTX;
43e9d192
IB
13919}
13920
43e9d192
IB
13921/* Check of immediate shift constants are within range. */
13922bool
ef4bddc2 13923aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
13924{
13925 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
13926 if (left)
ddeabd3e 13927 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 13928 else
ddeabd3e 13929 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
13930}
13931
7325d85a
KT
13932/* Return the bitmask CONST_INT to select the bits required by a zero extract
13933 operation of width WIDTH at bit position POS. */
13934
13935rtx
13936aarch64_mask_from_zextract_ops (rtx width, rtx pos)
13937{
13938 gcc_assert (CONST_INT_P (width));
13939 gcc_assert (CONST_INT_P (pos));
13940
13941 unsigned HOST_WIDE_INT mask
13942 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
13943 return GEN_INT (mask << UINTVAL (pos));
13944}
13945
83f8c414 13946bool
a6e0bfa7 13947aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 13948{
83f8c414
CSS
13949 if (GET_CODE (x) == HIGH
13950 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
13951 return true;
13952
82614948 13953 if (CONST_INT_P (x))
83f8c414
CSS
13954 return true;
13955
43cacb12
RS
13956 if (VECTOR_MODE_P (GET_MODE (x)))
13957 return aarch64_simd_valid_immediate (x, NULL);
13958
83f8c414
CSS
13959 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
13960 return true;
13961
43cacb12
RS
13962 if (aarch64_sve_cnt_immediate_p (x))
13963 return true;
13964
a6e0bfa7 13965 return aarch64_classify_symbolic_expression (x)
a5350ddc 13966 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
13967}
13968
43e9d192
IB
13969/* Return a const_int vector of VAL. */
13970rtx
ab014eb3 13971aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 13972{
59d06c05
RS
13973 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
13974 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
13975}
13976
051d0e2f
SN
13977/* Check OP is a legal scalar immediate for the MOVI instruction. */
13978
13979bool
77e994c9 13980aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 13981{
ef4bddc2 13982 machine_mode vmode;
051d0e2f 13983
43cacb12 13984 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 13985 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 13986 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
13987}
13988
988fa693
JG
13989/* Construct and return a PARALLEL RTX vector with elements numbering the
13990 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
13991 the vector - from the perspective of the architecture. This does not
13992 line up with GCC's perspective on lane numbers, so we end up with
13993 different masks depending on our target endian-ness. The diagram
13994 below may help. We must draw the distinction when building masks
13995 which select one half of the vector. An instruction selecting
13996 architectural low-lanes for a big-endian target, must be described using
13997 a mask selecting GCC high-lanes.
13998
13999 Big-Endian Little-Endian
14000
14001GCC 0 1 2 3 3 2 1 0
14002 | x | x | x | x | | x | x | x | x |
14003Architecture 3 2 1 0 3 2 1 0
14004
14005Low Mask: { 2, 3 } { 0, 1 }
14006High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
14007
14008 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 14009
43e9d192 14010rtx
f5cbabc1 14011aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 14012{
43e9d192 14013 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
14014 int high_base = nunits / 2;
14015 int low_base = 0;
14016 int base;
43e9d192
IB
14017 rtx t1;
14018 int i;
14019
988fa693
JG
14020 if (BYTES_BIG_ENDIAN)
14021 base = high ? low_base : high_base;
14022 else
14023 base = high ? high_base : low_base;
14024
14025 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
14026 RTVEC_ELT (v, i) = GEN_INT (base + i);
14027
14028 t1 = gen_rtx_PARALLEL (mode, v);
14029 return t1;
14030}
14031
988fa693
JG
14032/* Check OP for validity as a PARALLEL RTX vector with elements
14033 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
14034 from the perspective of the architecture. See the diagram above
14035 aarch64_simd_vect_par_cnst_half for more details. */
14036
14037bool
ef4bddc2 14038aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
14039 bool high)
14040{
6a70badb
RS
14041 int nelts;
14042 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
14043 return false;
14044
6a70badb 14045 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
14046 HOST_WIDE_INT count_op = XVECLEN (op, 0);
14047 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
14048 int i = 0;
14049
988fa693
JG
14050 if (count_op != count_ideal)
14051 return false;
14052
14053 for (i = 0; i < count_ideal; i++)
14054 {
14055 rtx elt_op = XVECEXP (op, 0, i);
14056 rtx elt_ideal = XVECEXP (ideal, 0, i);
14057
4aa81c2e 14058 if (!CONST_INT_P (elt_op)
988fa693
JG
14059 || INTVAL (elt_ideal) != INTVAL (elt_op))
14060 return false;
14061 }
14062 return true;
14063}
14064
43e9d192
IB
14065/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
14066 HIGH (exclusive). */
14067void
46ed6024
CB
14068aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
14069 const_tree exp)
43e9d192
IB
14070{
14071 HOST_WIDE_INT lane;
4aa81c2e 14072 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
14073 lane = INTVAL (operand);
14074
14075 if (lane < low || lane >= high)
46ed6024
CB
14076 {
14077 if (exp)
cf0c27ef 14078 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 14079 else
cf0c27ef 14080 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 14081 }
43e9d192
IB
14082}
14083
7ac29c0f
RS
14084/* Peform endian correction on lane number N, which indexes a vector
14085 of mode MODE, and return the result as an SImode rtx. */
14086
14087rtx
14088aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
14089{
14090 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
14091}
14092
43e9d192 14093/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 14094
43e9d192
IB
14095bool
14096aarch64_simd_mem_operand_p (rtx op)
14097{
14098 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 14099 || REG_P (XEXP (op, 0)));
43e9d192
IB
14100}
14101
43cacb12
RS
14102/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
14103
14104bool
14105aarch64_sve_ld1r_operand_p (rtx op)
14106{
14107 struct aarch64_address_info addr;
14108 scalar_mode mode;
14109
14110 return (MEM_P (op)
14111 && is_a <scalar_mode> (GET_MODE (op), &mode)
14112 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
14113 && addr.type == ADDRESS_REG_IMM
14114 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
14115}
14116
14117/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
14118 The conditions for STR are the same. */
14119bool
14120aarch64_sve_ldr_operand_p (rtx op)
14121{
14122 struct aarch64_address_info addr;
14123
14124 return (MEM_P (op)
14125 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
14126 false, ADDR_QUERY_ANY)
14127 && addr.type == ADDRESS_REG_IMM);
14128}
14129
9f4cbab8
RS
14130/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
14131 We need to be able to access the individual pieces, so the range
14132 is different from LD[234] and ST[234]. */
14133bool
14134aarch64_sve_struct_memory_operand_p (rtx op)
14135{
14136 if (!MEM_P (op))
14137 return false;
14138
14139 machine_mode mode = GET_MODE (op);
14140 struct aarch64_address_info addr;
14141 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
14142 ADDR_QUERY_ANY)
14143 || addr.type != ADDRESS_REG_IMM)
14144 return false;
14145
14146 poly_int64 first = addr.const_offset;
14147 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
14148 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
14149 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
14150}
14151
2d8c6dc1
AH
14152/* Emit a register copy from operand to operand, taking care not to
14153 early-clobber source registers in the process.
43e9d192 14154
2d8c6dc1
AH
14155 COUNT is the number of components into which the copy needs to be
14156 decomposed. */
43e9d192 14157void
b8506a8a 14158aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 14159 unsigned int count)
43e9d192
IB
14160{
14161 unsigned int i;
2d8c6dc1
AH
14162 int rdest = REGNO (operands[0]);
14163 int rsrc = REGNO (operands[1]);
43e9d192
IB
14164
14165 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
14166 || rdest < rsrc)
14167 for (i = 0; i < count; i++)
14168 emit_move_insn (gen_rtx_REG (mode, rdest + i),
14169 gen_rtx_REG (mode, rsrc + i));
43e9d192 14170 else
2d8c6dc1
AH
14171 for (i = 0; i < count; i++)
14172 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
14173 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
14174}
14175
668046d1 14176/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 14177 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 14178int
b8506a8a 14179aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 14180{
6a70badb
RS
14181 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
14182 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
14183}
14184
db0253a4 14185/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
14186 alignment of a vector to 128 bits. SVE predicates have an alignment of
14187 16 bits. */
db0253a4
TB
14188static HOST_WIDE_INT
14189aarch64_simd_vector_alignment (const_tree type)
14190{
43cacb12
RS
14191 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
14192 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
14193 be set for non-predicate vectors of booleans. Modes are the most
14194 direct way we have of identifying real SVE predicate types. */
14195 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 14196 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
14197 return MIN (align, 128);
14198}
14199
43cacb12 14200/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
ca31798e 14201static poly_uint64
43cacb12
RS
14202aarch64_vectorize_preferred_vector_alignment (const_tree type)
14203{
14204 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
14205 {
14206 /* If the length of the vector is fixed, try to align to that length,
14207 otherwise don't try to align at all. */
14208 HOST_WIDE_INT result;
14209 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
14210 result = TYPE_ALIGN (TREE_TYPE (type));
14211 return result;
14212 }
14213 return TYPE_ALIGN (type);
14214}
14215
db0253a4
TB
14216/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
14217static bool
14218aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
14219{
14220 if (is_packed)
14221 return false;
14222
43cacb12
RS
14223 /* For fixed-length vectors, check that the vectorizer will aim for
14224 full-vector alignment. This isn't true for generic GCC vectors
14225 that are wider than the ABI maximum of 128 bits. */
ca31798e
AV
14226 poly_uint64 preferred_alignment =
14227 aarch64_vectorize_preferred_vector_alignment (type);
43cacb12 14228 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
ca31798e
AV
14229 && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
14230 preferred_alignment))
db0253a4
TB
14231 return false;
14232
14233 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
14234 return true;
14235}
14236
7df76747
N
14237/* Return true if the vector misalignment factor is supported by the
14238 target. */
14239static bool
14240aarch64_builtin_support_vector_misalignment (machine_mode mode,
14241 const_tree type, int misalignment,
14242 bool is_packed)
14243{
14244 if (TARGET_SIMD && STRICT_ALIGNMENT)
14245 {
14246 /* Return if movmisalign pattern is not supported for this mode. */
14247 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
14248 return false;
14249
a509c571 14250 /* Misalignment factor is unknown at compile time. */
7df76747 14251 if (misalignment == -1)
a509c571 14252 return false;
7df76747
N
14253 }
14254 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14255 is_packed);
14256}
14257
4369c11e
TB
14258/* If VALS is a vector constant that can be loaded into a register
14259 using DUP, generate instructions to do so and return an RTX to
14260 assign to the register. Otherwise return NULL_RTX. */
14261static rtx
14262aarch64_simd_dup_constant (rtx vals)
14263{
ef4bddc2
RS
14264 machine_mode mode = GET_MODE (vals);
14265 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 14266 rtx x;
4369c11e 14267
92695fbb 14268 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
14269 return NULL_RTX;
14270
14271 /* We can load this constant by using DUP and a constant in a
14272 single ARM register. This will be cheaper than a vector
14273 load. */
92695fbb 14274 x = copy_to_mode_reg (inner_mode, x);
59d06c05 14275 return gen_vec_duplicate (mode, x);
4369c11e
TB
14276}
14277
14278
14279/* Generate code to load VALS, which is a PARALLEL containing only
14280 constants (for vec_init) or CONST_VECTOR, efficiently into a
14281 register. Returns an RTX to copy into the register, or NULL_RTX
14282 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 14283static rtx
4369c11e
TB
14284aarch64_simd_make_constant (rtx vals)
14285{
ef4bddc2 14286 machine_mode mode = GET_MODE (vals);
4369c11e
TB
14287 rtx const_dup;
14288 rtx const_vec = NULL_RTX;
4369c11e
TB
14289 int n_const = 0;
14290 int i;
14291
14292 if (GET_CODE (vals) == CONST_VECTOR)
14293 const_vec = vals;
14294 else if (GET_CODE (vals) == PARALLEL)
14295 {
14296 /* A CONST_VECTOR must contain only CONST_INTs and
14297 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
14298 Only store valid constants in a CONST_VECTOR. */
6a70badb 14299 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
14300 for (i = 0; i < n_elts; ++i)
14301 {
14302 rtx x = XVECEXP (vals, 0, i);
14303 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14304 n_const++;
14305 }
14306 if (n_const == n_elts)
14307 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
14308 }
14309 else
14310 gcc_unreachable ();
14311
14312 if (const_vec != NULL_RTX
b187677b 14313 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
14314 /* Load using MOVI/MVNI. */
14315 return const_vec;
14316 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
14317 /* Loaded using DUP. */
14318 return const_dup;
14319 else if (const_vec != NULL_RTX)
14320 /* Load from constant pool. We can not take advantage of single-cycle
14321 LD1 because we need a PC-relative addressing mode. */
14322 return const_vec;
14323 else
14324 /* A PARALLEL containing something not valid inside CONST_VECTOR.
14325 We can not construct an initializer. */
14326 return NULL_RTX;
14327}
14328
35a093b6
JG
14329/* Expand a vector initialisation sequence, such that TARGET is
14330 initialised to contain VALS. */
14331
4369c11e
TB
14332void
14333aarch64_expand_vector_init (rtx target, rtx vals)
14334{
ef4bddc2 14335 machine_mode mode = GET_MODE (target);
146c2e3a 14336 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 14337 /* The number of vector elements. */
6a70badb 14338 int n_elts = XVECLEN (vals, 0);
35a093b6 14339 /* The number of vector elements which are not constant. */
8b66a2d4
AL
14340 int n_var = 0;
14341 rtx any_const = NULL_RTX;
35a093b6
JG
14342 /* The first element of vals. */
14343 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 14344 bool all_same = true;
4369c11e 14345
35a093b6 14346 /* Count the number of variable elements to initialise. */
8b66a2d4 14347 for (int i = 0; i < n_elts; ++i)
4369c11e 14348 {
8b66a2d4 14349 rtx x = XVECEXP (vals, 0, i);
35a093b6 14350 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
14351 ++n_var;
14352 else
14353 any_const = x;
4369c11e 14354
35a093b6 14355 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
14356 }
14357
35a093b6
JG
14358 /* No variable elements, hand off to aarch64_simd_make_constant which knows
14359 how best to handle this. */
4369c11e
TB
14360 if (n_var == 0)
14361 {
14362 rtx constant = aarch64_simd_make_constant (vals);
14363 if (constant != NULL_RTX)
14364 {
14365 emit_move_insn (target, constant);
14366 return;
14367 }
14368 }
14369
14370 /* Splat a single non-constant element if we can. */
14371 if (all_same)
14372 {
35a093b6 14373 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 14374 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
14375 return;
14376 }
14377
85c1b6d7
AP
14378 enum insn_code icode = optab_handler (vec_set_optab, mode);
14379 gcc_assert (icode != CODE_FOR_nothing);
14380
14381 /* If there are only variable elements, try to optimize
14382 the insertion using dup for the most common element
14383 followed by insertions. */
14384
14385 /* The algorithm will fill matches[*][0] with the earliest matching element,
14386 and matches[X][1] with the count of duplicate elements (if X is the
14387 earliest element which has duplicates). */
14388
14389 if (n_var == n_elts && n_elts <= 16)
14390 {
14391 int matches[16][2] = {0};
14392 for (int i = 0; i < n_elts; i++)
14393 {
14394 for (int j = 0; j <= i; j++)
14395 {
14396 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
14397 {
14398 matches[i][0] = j;
14399 matches[j][1]++;
14400 break;
14401 }
14402 }
14403 }
14404 int maxelement = 0;
14405 int maxv = 0;
14406 for (int i = 0; i < n_elts; i++)
14407 if (matches[i][1] > maxv)
14408 {
14409 maxelement = i;
14410 maxv = matches[i][1];
14411 }
14412
b4e2cd5b
JG
14413 /* Create a duplicate of the most common element, unless all elements
14414 are equally useless to us, in which case just immediately set the
14415 vector register using the first element. */
14416
14417 if (maxv == 1)
14418 {
14419 /* For vectors of two 64-bit elements, we can do even better. */
14420 if (n_elts == 2
14421 && (inner_mode == E_DImode
14422 || inner_mode == E_DFmode))
14423
14424 {
14425 rtx x0 = XVECEXP (vals, 0, 0);
14426 rtx x1 = XVECEXP (vals, 0, 1);
14427 /* Combine can pick up this case, but handling it directly
14428 here leaves clearer RTL.
14429
14430 This is load_pair_lanes<mode>, and also gives us a clean-up
14431 for store_pair_lanes<mode>. */
14432 if (memory_operand (x0, inner_mode)
14433 && memory_operand (x1, inner_mode)
14434 && !STRICT_ALIGNMENT
14435 && rtx_equal_p (XEXP (x1, 0),
14436 plus_constant (Pmode,
14437 XEXP (x0, 0),
14438 GET_MODE_SIZE (inner_mode))))
14439 {
14440 rtx t;
14441 if (inner_mode == DFmode)
14442 t = gen_load_pair_lanesdf (target, x0, x1);
14443 else
14444 t = gen_load_pair_lanesdi (target, x0, x1);
14445 emit_insn (t);
14446 return;
14447 }
14448 }
14449 /* The subreg-move sequence below will move into lane zero of the
14450 vector register. For big-endian we want that position to hold
14451 the last element of VALS. */
14452 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
14453 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14454 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
14455 }
14456 else
14457 {
14458 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14459 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
14460 }
85c1b6d7
AP
14461
14462 /* Insert the rest. */
14463 for (int i = 0; i < n_elts; i++)
14464 {
14465 rtx x = XVECEXP (vals, 0, i);
14466 if (matches[i][0] == maxelement)
14467 continue;
14468 x = copy_to_mode_reg (inner_mode, x);
14469 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14470 }
14471 return;
14472 }
14473
35a093b6
JG
14474 /* Initialise a vector which is part-variable. We want to first try
14475 to build those lanes which are constant in the most efficient way we
14476 can. */
14477 if (n_var != n_elts)
4369c11e
TB
14478 {
14479 rtx copy = copy_rtx (vals);
4369c11e 14480
8b66a2d4
AL
14481 /* Load constant part of vector. We really don't care what goes into the
14482 parts we will overwrite, but we're more likely to be able to load the
14483 constant efficiently if it has fewer, larger, repeating parts
14484 (see aarch64_simd_valid_immediate). */
14485 for (int i = 0; i < n_elts; i++)
14486 {
14487 rtx x = XVECEXP (vals, 0, i);
14488 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14489 continue;
14490 rtx subst = any_const;
14491 for (int bit = n_elts / 2; bit > 0; bit /= 2)
14492 {
14493 /* Look in the copied vector, as more elements are const. */
14494 rtx test = XVECEXP (copy, 0, i ^ bit);
14495 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
14496 {
14497 subst = test;
14498 break;
14499 }
14500 }
14501 XVECEXP (copy, 0, i) = subst;
14502 }
4369c11e 14503 aarch64_expand_vector_init (target, copy);
35a093b6 14504 }
4369c11e 14505
35a093b6 14506 /* Insert the variable lanes directly. */
8b66a2d4 14507 for (int i = 0; i < n_elts; i++)
35a093b6
JG
14508 {
14509 rtx x = XVECEXP (vals, 0, i);
14510 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14511 continue;
14512 x = copy_to_mode_reg (inner_mode, x);
14513 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14514 }
4369c11e
TB
14515}
14516
43e9d192 14517static unsigned HOST_WIDE_INT
ef4bddc2 14518aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 14519{
43cacb12
RS
14520 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
14521 return 0;
14522 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
14523}
14524
43e9d192
IB
14525/* Select a format to encode pointers in exception handling data. */
14526int
14527aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
14528{
14529 int type;
14530 switch (aarch64_cmodel)
14531 {
14532 case AARCH64_CMODEL_TINY:
14533 case AARCH64_CMODEL_TINY_PIC:
14534 case AARCH64_CMODEL_SMALL:
14535 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 14536 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
14537 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
14538 for everything. */
14539 type = DW_EH_PE_sdata4;
14540 break;
14541 default:
14542 /* No assumptions here. 8-byte relocs required. */
14543 type = DW_EH_PE_sdata8;
14544 break;
14545 }
14546 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
14547}
14548
e1c1ecb0
KT
14549/* The last .arch and .tune assembly strings that we printed. */
14550static std::string aarch64_last_printed_arch_string;
14551static std::string aarch64_last_printed_tune_string;
14552
361fb3ee
KT
14553/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
14554 by the function fndecl. */
14555
14556void
14557aarch64_declare_function_name (FILE *stream, const char* name,
14558 tree fndecl)
14559{
14560 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14561
14562 struct cl_target_option *targ_options;
14563 if (target_parts)
14564 targ_options = TREE_TARGET_OPTION (target_parts);
14565 else
14566 targ_options = TREE_TARGET_OPTION (target_option_current_node);
14567 gcc_assert (targ_options);
14568
14569 const struct processor *this_arch
14570 = aarch64_get_arch (targ_options->x_explicit_arch);
14571
054b4005
JG
14572 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
14573 std::string extension
04a99ebe
JG
14574 = aarch64_get_extension_string_for_isa_flags (isa_flags,
14575 this_arch->flags);
e1c1ecb0
KT
14576 /* Only update the assembler .arch string if it is distinct from the last
14577 such string we printed. */
14578 std::string to_print = this_arch->name + extension;
14579 if (to_print != aarch64_last_printed_arch_string)
14580 {
14581 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14582 aarch64_last_printed_arch_string = to_print;
14583 }
361fb3ee
KT
14584
14585 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14586 useful to readers of the generated asm. Do it only when it changes
14587 from function to function and verbose assembly is requested. */
361fb3ee
KT
14588 const struct processor *this_tune
14589 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14590
e1c1ecb0
KT
14591 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14592 {
14593 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14594 this_tune->name);
14595 aarch64_last_printed_tune_string = this_tune->name;
14596 }
361fb3ee
KT
14597
14598 /* Don't forget the type directive for ELF. */
14599 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14600 ASM_OUTPUT_LABEL (stream, name);
14601}
14602
e1c1ecb0
KT
14603/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14604
14605static void
14606aarch64_start_file (void)
14607{
14608 struct cl_target_option *default_options
14609 = TREE_TARGET_OPTION (target_option_default_node);
14610
14611 const struct processor *default_arch
14612 = aarch64_get_arch (default_options->x_explicit_arch);
14613 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14614 std::string extension
04a99ebe
JG
14615 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14616 default_arch->flags);
e1c1ecb0
KT
14617
14618 aarch64_last_printed_arch_string = default_arch->name + extension;
14619 aarch64_last_printed_tune_string = "";
14620 asm_fprintf (asm_out_file, "\t.arch %s\n",
14621 aarch64_last_printed_arch_string.c_str ());
14622
14623 default_file_start ();
14624}
14625
0462169c
SN
14626/* Emit load exclusive. */
14627
14628static void
ef4bddc2 14629aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14630 rtx mem, rtx model_rtx)
14631{
0016d8d9 14632 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
14633}
14634
14635/* Emit store exclusive. */
14636
14637static void
ef4bddc2 14638aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14639 rtx rval, rtx mem, rtx model_rtx)
14640{
0016d8d9 14641 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
14642}
14643
14644/* Mark the previous jump instruction as unlikely. */
14645
14646static void
14647aarch64_emit_unlikely_jump (rtx insn)
14648{
f370536c 14649 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14650 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14651}
14652
14653/* Expand a compare and swap pattern. */
14654
14655void
14656aarch64_expand_compare_and_swap (rtx operands[])
14657{
d400fda3
RH
14658 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
14659 machine_mode mode, r_mode;
0462169c
SN
14660
14661 bval = operands[0];
14662 rval = operands[1];
14663 mem = operands[2];
14664 oldval = operands[3];
14665 newval = operands[4];
14666 is_weak = operands[5];
14667 mod_s = operands[6];
14668 mod_f = operands[7];
14669 mode = GET_MODE (mem);
0462169c
SN
14670
14671 /* Normally the succ memory model must be stronger than fail, but in the
14672 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14673 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
46b35980
AM
14674 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14675 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14676 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14677
d400fda3
RH
14678 r_mode = mode;
14679 if (mode == QImode || mode == HImode)
0462169c 14680 {
d400fda3
RH
14681 r_mode = SImode;
14682 rval = gen_reg_rtx (r_mode);
0462169c
SN
14683 }
14684
b0770c0f 14685 if (TARGET_LSE)
77f33f44
RH
14686 {
14687 /* The CAS insn requires oldval and rval overlap, but we need to
14688 have a copy of oldval saved across the operation to tell if
14689 the operation is successful. */
d400fda3
RH
14690 if (reg_overlap_mentioned_p (rval, oldval))
14691 rval = copy_to_mode_reg (r_mode, oldval);
77f33f44 14692 else
d400fda3
RH
14693 emit_move_insn (rval, gen_lowpart (r_mode, oldval));
14694
77f33f44
RH
14695 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
14696 newval, mod_s));
d400fda3 14697 cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
77f33f44 14698 }
b0770c0f 14699 else
d400fda3
RH
14700 {
14701 /* The oldval predicate varies by mode. Test it and force to reg. */
14702 insn_code code = code_for_aarch64_compare_and_swap (mode);
14703 if (!insn_data[code].operand[2].predicate (oldval, mode))
14704 oldval = force_reg (mode, oldval);
0462169c 14705
d400fda3
RH
14706 emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
14707 is_weak, mod_s, mod_f));
14708 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
14709 }
14710
14711 if (r_mode != mode)
77f33f44
RH
14712 rval = gen_lowpart (mode, rval);
14713 emit_move_insn (operands[1], rval);
0462169c 14714
d400fda3 14715 x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
f7df4a84 14716 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14717}
14718
f70fb3b6
MW
14719/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14720 sequence implementing an atomic operation. */
14721
14722static void
14723aarch64_emit_post_barrier (enum memmodel model)
14724{
14725 const enum memmodel base_model = memmodel_base (model);
14726
14727 if (is_mm_sync (model)
14728 && (base_model == MEMMODEL_ACQUIRE
14729 || base_model == MEMMODEL_ACQ_REL
14730 || base_model == MEMMODEL_SEQ_CST))
14731 {
14732 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14733 }
14734}
14735
0462169c
SN
14736/* Split a compare and swap pattern. */
14737
14738void
14739aarch64_split_compare_and_swap (rtx operands[])
14740{
14741 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14742 machine_mode mode;
0462169c 14743 bool is_weak;
5d8a22a5
DM
14744 rtx_code_label *label1, *label2;
14745 rtx x, cond;
ab876106
MW
14746 enum memmodel model;
14747 rtx model_rtx;
0462169c
SN
14748
14749 rval = operands[0];
14750 mem = operands[1];
14751 oldval = operands[2];
14752 newval = operands[3];
14753 is_weak = (operands[4] != const0_rtx);
ab876106 14754 model_rtx = operands[5];
0462169c
SN
14755 scratch = operands[7];
14756 mode = GET_MODE (mem);
ab876106 14757 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14758
17f47f86
KT
14759 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14760 loop:
14761 .label1:
14762 LD[A]XR rval, [mem]
14763 CBNZ rval, .label2
14764 ST[L]XR scratch, newval, [mem]
14765 CBNZ scratch, .label1
14766 .label2:
14767 CMP rval, 0. */
14768 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14769
5d8a22a5 14770 label1 = NULL;
0462169c
SN
14771 if (!is_weak)
14772 {
14773 label1 = gen_label_rtx ();
14774 emit_label (label1);
14775 }
14776 label2 = gen_label_rtx ();
14777
ab876106
MW
14778 /* The initial load can be relaxed for a __sync operation since a final
14779 barrier will be emitted to stop code hoisting. */
14780 if (is_mm_sync (model))
14781 aarch64_emit_load_exclusive (mode, rval, mem,
14782 GEN_INT (MEMMODEL_RELAXED));
14783 else
14784 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14785
17f47f86
KT
14786 if (strong_zero_p)
14787 {
6e1eaca9
RE
14788 if (aarch64_track_speculation)
14789 {
14790 /* Emit an explicit compare instruction, so that we can correctly
14791 track the condition codes. */
14792 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
14793 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14794 }
14795 else
14796 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
14797
17f47f86
KT
14798 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14799 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14800 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14801 }
14802 else
14803 {
d400fda3 14804 cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
17f47f86
KT
14805 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14806 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
d400fda3 14807 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
17f47f86
KT
14808 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14809 }
0462169c 14810
ab876106 14811 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
14812
14813 if (!is_weak)
14814 {
6e1eaca9
RE
14815 if (aarch64_track_speculation)
14816 {
14817 /* Emit an explicit compare instruction, so that we can correctly
14818 track the condition codes. */
14819 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
14820 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14821 }
14822 else
14823 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
14824
0462169c
SN
14825 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14826 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 14827 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
14828 }
14829 else
14830 {
14831 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14832 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 14833 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
14834 }
14835
14836 emit_label (label2);
17f47f86
KT
14837 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
14838 to set the condition flags. If this is not used it will be removed by
14839 later passes. */
14840 if (strong_zero_p)
14841 {
14842 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14843 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
14844 emit_insn (gen_rtx_SET (cond, x));
14845 }
ab876106
MW
14846 /* Emit any final barrier needed for a __sync operation. */
14847 if (is_mm_sync (model))
14848 aarch64_emit_post_barrier (model);
0462169c 14849}
9cd7b720 14850
0462169c
SN
14851/* Split an atomic operation. */
14852
14853void
14854aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 14855 rtx value, rtx model_rtx, rtx cond)
0462169c 14856{
ef4bddc2
RS
14857 machine_mode mode = GET_MODE (mem);
14858 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
14859 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
14860 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
14861 rtx_code_label *label;
14862 rtx x;
0462169c 14863
9cd7b720 14864 /* Split the atomic operation into a sequence. */
0462169c
SN
14865 label = gen_label_rtx ();
14866 emit_label (label);
14867
14868 if (new_out)
14869 new_out = gen_lowpart (wmode, new_out);
14870 if (old_out)
14871 old_out = gen_lowpart (wmode, old_out);
14872 else
14873 old_out = new_out;
14874 value = simplify_gen_subreg (wmode, value, mode, 0);
14875
f70fb3b6
MW
14876 /* The initial load can be relaxed for a __sync operation since a final
14877 barrier will be emitted to stop code hoisting. */
14878 if (is_sync)
14879 aarch64_emit_load_exclusive (mode, old_out, mem,
14880 GEN_INT (MEMMODEL_RELAXED));
14881 else
14882 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
14883
14884 switch (code)
14885 {
14886 case SET:
14887 new_out = value;
14888 break;
14889
14890 case NOT:
14891 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 14892 emit_insn (gen_rtx_SET (new_out, x));
0462169c 14893 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 14894 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14895 break;
14896
14897 case MINUS:
14898 if (CONST_INT_P (value))
14899 {
14900 value = GEN_INT (-INTVAL (value));
14901 code = PLUS;
14902 }
14903 /* Fall through. */
14904
14905 default:
14906 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 14907 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14908 break;
14909 }
14910
14911 aarch64_emit_store_exclusive (mode, cond, mem,
14912 gen_lowpart (mode, new_out), model_rtx);
14913
6e1eaca9
RE
14914 if (aarch64_track_speculation)
14915 {
14916 /* Emit an explicit compare instruction, so that we can correctly
14917 track the condition codes. */
14918 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
14919 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14920 }
14921 else
14922 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14923
0462169c
SN
14924 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14925 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 14926 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
14927
14928 /* Emit any final barrier needed for a __sync operation. */
14929 if (is_sync)
14930 aarch64_emit_post_barrier (model);
0462169c
SN
14931}
14932
c2ec330c
AL
14933static void
14934aarch64_init_libfuncs (void)
14935{
14936 /* Half-precision float operations. The compiler handles all operations
14937 with NULL libfuncs by converting to SFmode. */
14938
14939 /* Conversions. */
14940 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
14941 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
14942
14943 /* Arithmetic. */
14944 set_optab_libfunc (add_optab, HFmode, NULL);
14945 set_optab_libfunc (sdiv_optab, HFmode, NULL);
14946 set_optab_libfunc (smul_optab, HFmode, NULL);
14947 set_optab_libfunc (neg_optab, HFmode, NULL);
14948 set_optab_libfunc (sub_optab, HFmode, NULL);
14949
14950 /* Comparisons. */
14951 set_optab_libfunc (eq_optab, HFmode, NULL);
14952 set_optab_libfunc (ne_optab, HFmode, NULL);
14953 set_optab_libfunc (lt_optab, HFmode, NULL);
14954 set_optab_libfunc (le_optab, HFmode, NULL);
14955 set_optab_libfunc (ge_optab, HFmode, NULL);
14956 set_optab_libfunc (gt_optab, HFmode, NULL);
14957 set_optab_libfunc (unord_optab, HFmode, NULL);
14958}
14959
43e9d192 14960/* Target hook for c_mode_for_suffix. */
ef4bddc2 14961static machine_mode
43e9d192
IB
14962aarch64_c_mode_for_suffix (char suffix)
14963{
14964 if (suffix == 'q')
14965 return TFmode;
14966
14967 return VOIDmode;
14968}
14969
3520f7cc
JG
14970/* We can only represent floating point constants which will fit in
14971 "quarter-precision" values. These values are characterised by
14972 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
14973 by:
14974
14975 (-1)^s * (n/16) * 2^r
14976
14977 Where:
14978 's' is the sign bit.
14979 'n' is an integer in the range 16 <= n <= 31.
14980 'r' is an integer in the range -3 <= r <= 4. */
14981
14982/* Return true iff X can be represented by a quarter-precision
14983 floating point immediate operand X. Note, we cannot represent 0.0. */
14984bool
14985aarch64_float_const_representable_p (rtx x)
14986{
14987 /* This represents our current view of how many bits
14988 make up the mantissa. */
14989 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 14990 int exponent;
3520f7cc 14991 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 14992 REAL_VALUE_TYPE r, m;
807e902e 14993 bool fail;
3520f7cc
JG
14994
14995 if (!CONST_DOUBLE_P (x))
14996 return false;
14997
a4518821
RS
14998 if (GET_MODE (x) == VOIDmode
14999 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
15000 return false;
15001
34a72c33 15002 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
15003
15004 /* We cannot represent infinities, NaNs or +/-zero. We won't
15005 know if we have +zero until we analyse the mantissa, but we
15006 can reject the other invalid values. */
15007 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
15008 || REAL_VALUE_MINUS_ZERO (r))
15009 return false;
15010
ba96cdfb 15011 /* Extract exponent. */
3520f7cc
JG
15012 r = real_value_abs (&r);
15013 exponent = REAL_EXP (&r);
15014
15015 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
15016 highest (sign) bit, with a fixed binary point at bit point_pos.
15017 m1 holds the low part of the mantissa, m2 the high part.
15018 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
15019 bits for the mantissa, this can fail (low bits will be lost). */
15020 real_ldexp (&m, &r, point_pos - exponent);
807e902e 15021 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
15022
15023 /* If the low part of the mantissa has bits set we cannot represent
15024 the value. */
d9074b29 15025 if (w.ulow () != 0)
3520f7cc
JG
15026 return false;
15027 /* We have rejected the lower HOST_WIDE_INT, so update our
15028 understanding of how many bits lie in the mantissa and
15029 look only at the high HOST_WIDE_INT. */
807e902e 15030 mantissa = w.elt (1);
3520f7cc
JG
15031 point_pos -= HOST_BITS_PER_WIDE_INT;
15032
15033 /* We can only represent values with a mantissa of the form 1.xxxx. */
15034 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
15035 if ((mantissa & mask) != 0)
15036 return false;
15037
15038 /* Having filtered unrepresentable values, we may now remove all
15039 but the highest 5 bits. */
15040 mantissa >>= point_pos - 5;
15041
15042 /* We cannot represent the value 0.0, so reject it. This is handled
15043 elsewhere. */
15044 if (mantissa == 0)
15045 return false;
15046
15047 /* Then, as bit 4 is always set, we can mask it off, leaving
15048 the mantissa in the range [0, 15]. */
15049 mantissa &= ~(1 << 4);
15050 gcc_assert (mantissa <= 15);
15051
15052 /* GCC internally does not use IEEE754-like encoding (where normalized
15053 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
15054 Our mantissa values are shifted 4 places to the left relative to
15055 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
15056 by 5 places to correct for GCC's representation. */
15057 exponent = 5 - exponent;
15058
15059 return (exponent >= 0 && exponent <= 7);
15060}
15061
ab6501d7
SD
15062/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
15063 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
15064 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 15065char*
b187677b 15066aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 15067 enum simd_immediate_check which)
3520f7cc 15068{
3ea63f60 15069 bool is_valid;
3520f7cc 15070 static char templ[40];
3520f7cc 15071 const char *mnemonic;
e4f0f84d 15072 const char *shift_op;
3520f7cc 15073 unsigned int lane_count = 0;
81c2dfb9 15074 char element_char;
3520f7cc 15075
b187677b 15076 struct simd_immediate_info info;
48063b9d
IB
15077
15078 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
15079 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
15080 It will also update INFO to show how the immediate should be generated.
15081 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 15082 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
15083 gcc_assert (is_valid);
15084
b187677b
RS
15085 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15086 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 15087
b187677b 15088 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 15089 {
b187677b 15090 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
15091 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
15092 move immediate path. */
48063b9d
IB
15093 if (aarch64_float_const_zero_rtx_p (info.value))
15094 info.value = GEN_INT (0);
15095 else
15096 {
83faf7d0 15097 const unsigned int buf_size = 20;
48063b9d 15098 char float_buf[buf_size] = {'\0'};
34a72c33
RS
15099 real_to_decimal_for_mode (float_buf,
15100 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 15101 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
15102
15103 if (lane_count == 1)
15104 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
15105 else
15106 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 15107 lane_count, element_char, float_buf);
48063b9d
IB
15108 return templ;
15109 }
3520f7cc 15110 }
3520f7cc 15111
0d8e1702 15112 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
15113
15114 if (which == AARCH64_CHECK_MOV)
15115 {
b187677b
RS
15116 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
15117 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
15118 if (lane_count == 1)
15119 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
15120 mnemonic, UINTVAL (info.value));
15121 else if (info.shift)
15122 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15123 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
15124 element_char, UINTVAL (info.value), shift_op, info.shift);
15125 else
15126 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15127 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
15128 element_char, UINTVAL (info.value));
15129 }
3520f7cc 15130 else
ab6501d7
SD
15131 {
15132 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 15133 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
15134 if (info.shift)
15135 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15136 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
15137 element_char, UINTVAL (info.value), "lsl", info.shift);
15138 else
15139 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15140 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
15141 element_char, UINTVAL (info.value));
15142 }
3520f7cc
JG
15143 return templ;
15144}
15145
b7342d25 15146char*
77e994c9 15147aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 15148{
a2170965
TC
15149
15150 /* If a floating point number was passed and we desire to use it in an
15151 integer mode do the conversion to integer. */
15152 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
15153 {
15154 unsigned HOST_WIDE_INT ival;
15155 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15156 gcc_unreachable ();
15157 immediate = gen_int_mode (ival, mode);
15158 }
15159
ef4bddc2 15160 machine_mode vmode;
a2170965
TC
15161 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15162 a 128 bit vector mode. */
15163 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15164
a2170965 15165 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15166 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15167 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15168}
15169
43cacb12
RS
15170/* Return the output string to use for moving immediate CONST_VECTOR
15171 into an SVE register. */
15172
15173char *
15174aarch64_output_sve_mov_immediate (rtx const_vector)
15175{
15176 static char templ[40];
15177 struct simd_immediate_info info;
15178 char element_char;
15179
15180 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15181 gcc_assert (is_valid);
15182
15183 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15184
15185 if (info.step)
15186 {
15187 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15188 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15189 element_char, INTVAL (info.value), INTVAL (info.step));
15190 return templ;
15191 }
15192
15193 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15194 {
15195 if (aarch64_float_const_zero_rtx_p (info.value))
15196 info.value = GEN_INT (0);
15197 else
15198 {
15199 const int buf_size = 20;
15200 char float_buf[buf_size] = {};
15201 real_to_decimal_for_mode (float_buf,
15202 CONST_DOUBLE_REAL_VALUE (info.value),
15203 buf_size, buf_size, 1, info.elt_mode);
15204
15205 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15206 element_char, float_buf);
15207 return templ;
15208 }
15209 }
15210
15211 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15212 element_char, INTVAL (info.value));
15213 return templ;
15214}
15215
15216/* Return the asm format for a PTRUE instruction whose destination has
15217 mode MODE. SUFFIX is the element size suffix. */
15218
15219char *
15220aarch64_output_ptrue (machine_mode mode, char suffix)
15221{
15222 unsigned int nunits;
15223 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15224 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15225 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15226 else
15227 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15228 return buf;
15229}
15230
88b08073
JG
15231/* Split operands into moves from op[1] + op[2] into op[0]. */
15232
15233void
15234aarch64_split_combinev16qi (rtx operands[3])
15235{
15236 unsigned int dest = REGNO (operands[0]);
15237 unsigned int src1 = REGNO (operands[1]);
15238 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15239 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15240 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15241 rtx destlo, desthi;
15242
15243 gcc_assert (halfmode == V16QImode);
15244
15245 if (src1 == dest && src2 == dest + halfregs)
15246 {
15247 /* No-op move. Can't split to nothing; emit something. */
15248 emit_note (NOTE_INSN_DELETED);
15249 return;
15250 }
15251
15252 /* Preserve register attributes for variable tracking. */
15253 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15254 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15255 GET_MODE_SIZE (halfmode));
15256
15257 /* Special case of reversed high/low parts. */
15258 if (reg_overlap_mentioned_p (operands[2], destlo)
15259 && reg_overlap_mentioned_p (operands[1], desthi))
15260 {
15261 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15262 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15263 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15264 }
15265 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15266 {
15267 /* Try to avoid unnecessary moves if part of the result
15268 is in the right place already. */
15269 if (src1 != dest)
15270 emit_move_insn (destlo, operands[1]);
15271 if (src2 != dest + halfregs)
15272 emit_move_insn (desthi, operands[2]);
15273 }
15274 else
15275 {
15276 if (src2 != dest + halfregs)
15277 emit_move_insn (desthi, operands[2]);
15278 if (src1 != dest)
15279 emit_move_insn (destlo, operands[1]);
15280 }
15281}
15282
15283/* vec_perm support. */
15284
88b08073
JG
15285struct expand_vec_perm_d
15286{
15287 rtx target, op0, op1;
e3342de4 15288 vec_perm_indices perm;
ef4bddc2 15289 machine_mode vmode;
43cacb12 15290 unsigned int vec_flags;
88b08073
JG
15291 bool one_vector_p;
15292 bool testing_p;
15293};
15294
15295/* Generate a variable permutation. */
15296
15297static void
15298aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
15299{
ef4bddc2 15300 machine_mode vmode = GET_MODE (target);
88b08073
JG
15301 bool one_vector_p = rtx_equal_p (op0, op1);
15302
15303 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
15304 gcc_checking_assert (GET_MODE (op0) == vmode);
15305 gcc_checking_assert (GET_MODE (op1) == vmode);
15306 gcc_checking_assert (GET_MODE (sel) == vmode);
15307 gcc_checking_assert (TARGET_SIMD);
15308
15309 if (one_vector_p)
15310 {
15311 if (vmode == V8QImode)
15312 {
15313 /* Expand the argument to a V16QI mode by duplicating it. */
15314 rtx pair = gen_reg_rtx (V16QImode);
15315 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
15316 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15317 }
15318 else
15319 {
15320 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
15321 }
15322 }
15323 else
15324 {
15325 rtx pair;
15326
15327 if (vmode == V8QImode)
15328 {
15329 pair = gen_reg_rtx (V16QImode);
15330 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
15331 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15332 }
15333 else
15334 {
15335 pair = gen_reg_rtx (OImode);
15336 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
15337 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
15338 }
15339 }
15340}
15341
80940017
RS
15342/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
15343 NELT is the number of elements in the vector. */
15344
88b08073 15345void
80940017
RS
15346aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
15347 unsigned int nelt)
88b08073 15348{
ef4bddc2 15349 machine_mode vmode = GET_MODE (target);
88b08073 15350 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 15351 rtx mask;
88b08073
JG
15352
15353 /* The TBL instruction does not use a modulo index, so we must take care
15354 of that ourselves. */
f7c4e5b8
AL
15355 mask = aarch64_simd_gen_const_vector_dup (vmode,
15356 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
15357 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
15358
f7c4e5b8
AL
15359 /* For big-endian, we also need to reverse the index within the vector
15360 (but not which vector). */
15361 if (BYTES_BIG_ENDIAN)
15362 {
15363 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
15364 if (!one_vector_p)
15365 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15366 sel = expand_simple_binop (vmode, XOR, sel, mask,
15367 NULL, 0, OPTAB_LIB_WIDEN);
15368 }
88b08073
JG
15369 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15370}
15371
43cacb12
RS
15372/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15373
15374static void
15375emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15376{
15377 emit_insn (gen_rtx_SET (target,
15378 gen_rtx_UNSPEC (GET_MODE (target),
15379 gen_rtvec (2, op0, op1), code)));
15380}
15381
15382/* Expand an SVE vec_perm with the given operands. */
15383
15384void
15385aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15386{
15387 machine_mode data_mode = GET_MODE (target);
15388 machine_mode sel_mode = GET_MODE (sel);
15389 /* Enforced by the pattern condition. */
15390 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15391
15392 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15393 size of the two value vectors, i.e. the upper bits of the indices
15394 are effectively ignored. SVE TBL instead produces 0 for any
15395 out-of-range indices, so we need to modulo all the vec_perm indices
15396 to ensure they are all in range. */
15397 rtx sel_reg = force_reg (sel_mode, sel);
15398
15399 /* Check if the sel only references the first values vector. */
15400 if (GET_CODE (sel) == CONST_VECTOR
15401 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15402 {
15403 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15404 return;
15405 }
15406
15407 /* Check if the two values vectors are the same. */
15408 if (rtx_equal_p (op0, op1))
15409 {
15410 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15411 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15412 NULL, 0, OPTAB_DIRECT);
15413 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15414 return;
15415 }
15416
15417 /* Run TBL on for each value vector and combine the results. */
15418
15419 rtx res0 = gen_reg_rtx (data_mode);
15420 rtx res1 = gen_reg_rtx (data_mode);
15421 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15422 if (GET_CODE (sel) != CONST_VECTOR
15423 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15424 {
15425 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15426 2 * nunits - 1);
15427 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15428 NULL, 0, OPTAB_DIRECT);
15429 }
15430 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15431 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15432 NULL, 0, OPTAB_DIRECT);
15433 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15434 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15435 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15436 else
15437 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15438}
15439
cc4d934f
JG
15440/* Recognize patterns suitable for the TRN instructions. */
15441static bool
15442aarch64_evpc_trn (struct expand_vec_perm_d *d)
15443{
6a70badb
RS
15444 HOST_WIDE_INT odd;
15445 poly_uint64 nelt = d->perm.length ();
cc4d934f 15446 rtx out, in0, in1, x;
ef4bddc2 15447 machine_mode vmode = d->vmode;
cc4d934f
JG
15448
15449 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15450 return false;
15451
15452 /* Note that these are little-endian tests.
15453 We correct for big-endian later. */
6a70badb
RS
15454 if (!d->perm[0].is_constant (&odd)
15455 || (odd != 0 && odd != 1)
326ac20e
RS
15456 || !d->perm.series_p (0, 2, odd, 2)
15457 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15458 return false;
cc4d934f
JG
15459
15460 /* Success! */
15461 if (d->testing_p)
15462 return true;
15463
15464 in0 = d->op0;
15465 in1 = d->op1;
43cacb12
RS
15466 /* We don't need a big-endian lane correction for SVE; see the comment
15467 at the head of aarch64-sve.md for details. */
15468 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15469 {
15470 x = in0, in0 = in1, in1 = x;
15471 odd = !odd;
15472 }
15473 out = d->target;
15474
3f8334a5
RS
15475 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15476 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15477 return true;
15478}
15479
15480/* Recognize patterns suitable for the UZP instructions. */
15481static bool
15482aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15483{
6a70badb 15484 HOST_WIDE_INT odd;
cc4d934f 15485 rtx out, in0, in1, x;
ef4bddc2 15486 machine_mode vmode = d->vmode;
cc4d934f
JG
15487
15488 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15489 return false;
15490
15491 /* Note that these are little-endian tests.
15492 We correct for big-endian later. */
6a70badb
RS
15493 if (!d->perm[0].is_constant (&odd)
15494 || (odd != 0 && odd != 1)
326ac20e 15495 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15496 return false;
cc4d934f
JG
15497
15498 /* Success! */
15499 if (d->testing_p)
15500 return true;
15501
15502 in0 = d->op0;
15503 in1 = d->op1;
43cacb12
RS
15504 /* We don't need a big-endian lane correction for SVE; see the comment
15505 at the head of aarch64-sve.md for details. */
15506 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15507 {
15508 x = in0, in0 = in1, in1 = x;
15509 odd = !odd;
15510 }
15511 out = d->target;
15512
3f8334a5
RS
15513 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15514 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15515 return true;
15516}
15517
15518/* Recognize patterns suitable for the ZIP instructions. */
15519static bool
15520aarch64_evpc_zip (struct expand_vec_perm_d *d)
15521{
6a70badb
RS
15522 unsigned int high;
15523 poly_uint64 nelt = d->perm.length ();
cc4d934f 15524 rtx out, in0, in1, x;
ef4bddc2 15525 machine_mode vmode = d->vmode;
cc4d934f
JG
15526
15527 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15528 return false;
15529
15530 /* Note that these are little-endian tests.
15531 We correct for big-endian later. */
6a70badb
RS
15532 poly_uint64 first = d->perm[0];
15533 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15534 || !d->perm.series_p (0, 2, first, 1)
15535 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15536 return false;
6a70badb 15537 high = maybe_ne (first, 0U);
cc4d934f
JG
15538
15539 /* Success! */
15540 if (d->testing_p)
15541 return true;
15542
15543 in0 = d->op0;
15544 in1 = d->op1;
43cacb12
RS
15545 /* We don't need a big-endian lane correction for SVE; see the comment
15546 at the head of aarch64-sve.md for details. */
15547 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15548 {
15549 x = in0, in0 = in1, in1 = x;
15550 high = !high;
15551 }
15552 out = d->target;
15553
3f8334a5
RS
15554 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15555 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15556 return true;
15557}
15558
ae0533da
AL
15559/* Recognize patterns for the EXT insn. */
15560
15561static bool
15562aarch64_evpc_ext (struct expand_vec_perm_d *d)
15563{
6a70badb 15564 HOST_WIDE_INT location;
ae0533da
AL
15565 rtx offset;
15566
6a70badb
RS
15567 /* The first element always refers to the first vector.
15568 Check if the extracted indices are increasing by one. */
43cacb12
RS
15569 if (d->vec_flags == VEC_SVE_PRED
15570 || !d->perm[0].is_constant (&location)
6a70badb 15571 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15572 return false;
ae0533da 15573
ae0533da
AL
15574 /* Success! */
15575 if (d->testing_p)
15576 return true;
15577
b31e65bb 15578 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15579 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15580
43cacb12
RS
15581 We don't need a big-endian lane correction for SVE; see the comment
15582 at the head of aarch64-sve.md for details. */
15583 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15584 {
15585 /* After setup, we want the high elements of the first vector (stored
15586 at the LSB end of the register), and the low elements of the second
15587 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15588 std::swap (d->op0, d->op1);
6a70badb
RS
15589 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15590 to_constant () is safe since this is restricted to Advanced SIMD
15591 vectors. */
15592 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15593 }
15594
15595 offset = GEN_INT (location);
3f8334a5
RS
15596 emit_set_insn (d->target,
15597 gen_rtx_UNSPEC (d->vmode,
15598 gen_rtvec (3, d->op0, d->op1, offset),
15599 UNSPEC_EXT));
ae0533da
AL
15600 return true;
15601}
15602
43cacb12
RS
15603/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15604 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15605
15606static bool
43cacb12 15607aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15608{
6a70badb
RS
15609 HOST_WIDE_INT diff;
15610 unsigned int i, size, unspec;
43cacb12 15611 machine_mode pred_mode;
923fcec3 15612
43cacb12
RS
15613 if (d->vec_flags == VEC_SVE_PRED
15614 || !d->one_vector_p
6a70badb 15615 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15616 return false;
15617
3f8334a5
RS
15618 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15619 if (size == 8)
43cacb12
RS
15620 {
15621 unspec = UNSPEC_REV64;
15622 pred_mode = VNx2BImode;
15623 }
3f8334a5 15624 else if (size == 4)
43cacb12
RS
15625 {
15626 unspec = UNSPEC_REV32;
15627 pred_mode = VNx4BImode;
15628 }
3f8334a5 15629 else if (size == 2)
43cacb12
RS
15630 {
15631 unspec = UNSPEC_REV16;
15632 pred_mode = VNx8BImode;
15633 }
3f8334a5
RS
15634 else
15635 return false;
923fcec3 15636
326ac20e
RS
15637 unsigned int step = diff + 1;
15638 for (i = 0; i < step; ++i)
15639 if (!d->perm.series_p (i, step, diff - i, step))
15640 return false;
923fcec3
AL
15641
15642 /* Success! */
15643 if (d->testing_p)
15644 return true;
15645
43cacb12
RS
15646 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15647 if (d->vec_flags == VEC_SVE_DATA)
15648 {
15649 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15650 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15651 UNSPEC_MERGE_PTRUE);
15652 }
15653 emit_set_insn (d->target, src);
15654 return true;
15655}
15656
15657/* Recognize patterns for the REV insn, which reverses elements within
15658 a full vector. */
15659
15660static bool
15661aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15662{
15663 poly_uint64 nelt = d->perm.length ();
15664
15665 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15666 return false;
15667
15668 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15669 return false;
15670
15671 /* Success! */
15672 if (d->testing_p)
15673 return true;
15674
15675 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15676 emit_set_insn (d->target, src);
923fcec3
AL
15677 return true;
15678}
15679
91bd4114
JG
15680static bool
15681aarch64_evpc_dup (struct expand_vec_perm_d *d)
15682{
91bd4114
JG
15683 rtx out = d->target;
15684 rtx in0;
6a70badb 15685 HOST_WIDE_INT elt;
ef4bddc2 15686 machine_mode vmode = d->vmode;
91bd4114
JG
15687 rtx lane;
15688
43cacb12
RS
15689 if (d->vec_flags == VEC_SVE_PRED
15690 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15691 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15692 return false;
15693
43cacb12
RS
15694 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15695 return false;
15696
326ac20e
RS
15697 /* Success! */
15698 if (d->testing_p)
15699 return true;
15700
91bd4114
JG
15701 /* The generic preparation in aarch64_expand_vec_perm_const_1
15702 swaps the operand order and the permute indices if it finds
15703 d->perm[0] to be in the second operand. Thus, we can always
15704 use d->op0 and need not do any extra arithmetic to get the
15705 correct lane number. */
15706 in0 = d->op0;
f901401e 15707 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15708
3f8334a5
RS
15709 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15710 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15711 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15712 return true;
15713}
15714
88b08073
JG
15715static bool
15716aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15717{
43cacb12 15718 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15719 machine_mode vmode = d->vmode;
6a70badb
RS
15720
15721 /* Make sure that the indices are constant. */
15722 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15723 for (unsigned int i = 0; i < encoded_nelts; ++i)
15724 if (!d->perm[i].is_constant ())
15725 return false;
88b08073 15726
88b08073
JG
15727 if (d->testing_p)
15728 return true;
15729
15730 /* Generic code will try constant permutation twice. Once with the
15731 original mode and again with the elements lowered to QImode.
15732 So wait and don't do the selector expansion ourselves. */
15733 if (vmode != V8QImode && vmode != V16QImode)
15734 return false;
15735
6a70badb
RS
15736 /* to_constant is safe since this routine is specific to Advanced SIMD
15737 vectors. */
15738 unsigned int nelt = d->perm.length ().to_constant ();
15739 for (unsigned int i = 0; i < nelt; ++i)
15740 /* If big-endian and two vectors we end up with a weird mixed-endian
15741 mode on NEON. Reverse the index within each word but not the word
15742 itself. to_constant is safe because we checked is_constant above. */
15743 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15744 ? d->perm[i].to_constant () ^ (nelt - 1)
15745 : d->perm[i].to_constant ());
bbcc9c00 15746
88b08073
JG
15747 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15748 sel = force_reg (vmode, sel);
15749
15750 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15751 return true;
15752}
15753
43cacb12
RS
15754/* Try to implement D using an SVE TBL instruction. */
15755
15756static bool
15757aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15758{
15759 unsigned HOST_WIDE_INT nelt;
15760
15761 /* Permuting two variable-length vectors could overflow the
15762 index range. */
15763 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15764 return false;
15765
15766 if (d->testing_p)
15767 return true;
15768
15769 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15770 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
e25c95ef
RS
15771 if (d->one_vector_p)
15772 emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
15773 else
15774 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
43cacb12
RS
15775 return true;
15776}
15777
88b08073
JG
15778static bool
15779aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15780{
15781 /* The pattern matching functions above are written to look for a small
15782 number to begin the sequence (0, 1, N/2). If we begin with an index
15783 from the second operand, we can swap the operands. */
6a70badb
RS
15784 poly_int64 nelt = d->perm.length ();
15785 if (known_ge (d->perm[0], nelt))
88b08073 15786 {
e3342de4 15787 d->perm.rotate_inputs (1);
cb5c6c29 15788 std::swap (d->op0, d->op1);
88b08073
JG
15789 }
15790
43cacb12
RS
15791 if ((d->vec_flags == VEC_ADVSIMD
15792 || d->vec_flags == VEC_SVE_DATA
15793 || d->vec_flags == VEC_SVE_PRED)
15794 && known_gt (nelt, 1))
cc4d934f 15795 {
43cacb12
RS
15796 if (aarch64_evpc_rev_local (d))
15797 return true;
15798 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
15799 return true;
15800 else if (aarch64_evpc_ext (d))
ae0533da 15801 return true;
f901401e
AL
15802 else if (aarch64_evpc_dup (d))
15803 return true;
ae0533da 15804 else if (aarch64_evpc_zip (d))
cc4d934f
JG
15805 return true;
15806 else if (aarch64_evpc_uzp (d))
15807 return true;
15808 else if (aarch64_evpc_trn (d))
15809 return true;
43cacb12
RS
15810 if (d->vec_flags == VEC_SVE_DATA)
15811 return aarch64_evpc_sve_tbl (d);
4ec8bb67 15812 else if (d->vec_flags == VEC_ADVSIMD)
43cacb12 15813 return aarch64_evpc_tbl (d);
cc4d934f 15814 }
88b08073
JG
15815 return false;
15816}
15817
f151c9e1 15818/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 15819
f151c9e1
RS
15820static bool
15821aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
15822 rtx op1, const vec_perm_indices &sel)
88b08073
JG
15823{
15824 struct expand_vec_perm_d d;
88b08073 15825
326ac20e 15826 /* Check whether the mask can be applied to a single vector. */
e25c95ef
RS
15827 if (sel.ninputs () == 1
15828 || (op0 && rtx_equal_p (op0, op1)))
326ac20e
RS
15829 d.one_vector_p = true;
15830 else if (sel.all_from_input_p (0))
88b08073 15831 {
326ac20e
RS
15832 d.one_vector_p = true;
15833 op1 = op0;
88b08073 15834 }
326ac20e 15835 else if (sel.all_from_input_p (1))
88b08073 15836 {
88b08073 15837 d.one_vector_p = true;
326ac20e 15838 op0 = op1;
88b08073 15839 }
326ac20e
RS
15840 else
15841 d.one_vector_p = false;
88b08073 15842
326ac20e
RS
15843 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
15844 sel.nelts_per_input ());
15845 d.vmode = vmode;
43cacb12 15846 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
15847 d.target = target;
15848 d.op0 = op0;
15849 d.op1 = op1;
15850 d.testing_p = !target;
e3342de4 15851
f151c9e1
RS
15852 if (!d.testing_p)
15853 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 15854
326ac20e 15855 rtx_insn *last = get_last_insn ();
f151c9e1 15856 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 15857 gcc_assert (last == get_last_insn ());
88b08073
JG
15858
15859 return ret;
15860}
15861
73e3da51
RS
15862/* Generate a byte permute mask for a register of mode MODE,
15863 which has NUNITS units. */
15864
668046d1 15865rtx
73e3da51 15866aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
15867{
15868 /* We have to reverse each vector because we dont have
15869 a permuted load that can reverse-load according to ABI rules. */
15870 rtx mask;
15871 rtvec v = rtvec_alloc (16);
73e3da51
RS
15872 unsigned int i, j;
15873 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
15874
15875 gcc_assert (BYTES_BIG_ENDIAN);
15876 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
15877
15878 for (i = 0; i < nunits; i++)
15879 for (j = 0; j < usize; j++)
15880 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
15881 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
15882 return force_reg (V16QImode, mask);
15883}
15884
43cacb12
RS
15885/* Return true if X is a valid second operand for the SVE instruction
15886 that implements integer comparison OP_CODE. */
15887
15888static bool
15889aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
15890{
15891 if (register_operand (x, VOIDmode))
15892 return true;
15893
15894 switch (op_code)
15895 {
15896 case LTU:
15897 case LEU:
15898 case GEU:
15899 case GTU:
15900 return aarch64_sve_cmp_immediate_p (x, false);
15901 case LT:
15902 case LE:
15903 case GE:
15904 case GT:
15905 case NE:
15906 case EQ:
15907 return aarch64_sve_cmp_immediate_p (x, true);
15908 default:
15909 gcc_unreachable ();
15910 }
15911}
15912
f22d7973
RS
15913/* Use predicated SVE instructions to implement the equivalent of:
15914
15915 (set TARGET OP)
15916
15917 given that PTRUE is an all-true predicate of the appropriate mode. */
15918
15919static void
15920aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
15921{
15922 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15923 gen_rtvec (2, ptrue, op),
15924 UNSPEC_MERGE_PTRUE);
15925 rtx_insn *insn = emit_set_insn (target, unspec);
15926 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15927}
15928
15929/* Likewise, but also clobber the condition codes. */
15930
15931static void
15932aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
15933{
15934 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15935 gen_rtvec (2, ptrue, op),
15936 UNSPEC_MERGE_PTRUE);
15937 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
15938 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15939}
15940
43cacb12
RS
15941/* Return the UNSPEC_COND_* code for comparison CODE. */
15942
15943static unsigned int
15944aarch64_unspec_cond_code (rtx_code code)
15945{
15946 switch (code)
15947 {
15948 case NE:
15949 return UNSPEC_COND_NE;
15950 case EQ:
15951 return UNSPEC_COND_EQ;
15952 case LT:
15953 return UNSPEC_COND_LT;
15954 case GT:
15955 return UNSPEC_COND_GT;
15956 case LE:
15957 return UNSPEC_COND_LE;
15958 case GE:
15959 return UNSPEC_COND_GE;
43cacb12
RS
15960 default:
15961 gcc_unreachable ();
15962 }
15963}
15964
f22d7973 15965/* Emit:
43cacb12 15966
f22d7973
RS
15967 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
15968
15969 where <X> is the operation associated with comparison CODE. This form
15970 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
15971 semantics, such as when PRED might not be all-true and when comparing
15972 inactive lanes could have side effects. */
15973
15974static void
15975aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
15976 rtx pred, rtx op0, rtx op1)
43cacb12 15977{
f22d7973
RS
15978 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
15979 gen_rtvec (3, pred, op0, op1),
15980 aarch64_unspec_cond_code (code));
15981 emit_set_insn (target, unspec);
43cacb12
RS
15982}
15983
f22d7973 15984/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 15985
f22d7973 15986 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
15987
15988void
15989aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
15990{
15991 machine_mode pred_mode = GET_MODE (target);
15992 machine_mode data_mode = GET_MODE (op0);
15993
15994 if (!aarch64_sve_cmp_operand_p (code, op1))
15995 op1 = force_reg (data_mode, op1);
15996
15997 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
15998 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15999 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
16000}
16001
f22d7973 16002/* Emit the SVE equivalent of:
43cacb12 16003
f22d7973
RS
16004 (set TMP1 (CODE1 OP0 OP1))
16005 (set TMP2 (CODE2 OP0 OP1))
16006 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 16007
f22d7973 16008 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16009
16010static void
f22d7973
RS
16011aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
16012 rtx ptrue, rtx op0, rtx op1)
43cacb12 16013{
f22d7973 16014 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 16015 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
16016 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
16017 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 16018 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
16019 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
16020 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
16021 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
16022}
16023
f22d7973 16024/* Emit the SVE equivalent of:
43cacb12 16025
f22d7973
RS
16026 (set TMP (CODE OP0 OP1))
16027 (set TARGET (not TMP))
43cacb12 16028
f22d7973 16029 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16030
16031static void
f22d7973
RS
16032aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
16033 rtx op0, rtx op1)
43cacb12 16034{
f22d7973
RS
16035 machine_mode pred_mode = GET_MODE (ptrue);
16036 rtx tmp = gen_reg_rtx (pred_mode);
16037 aarch64_emit_sve_ptrue_op (tmp, ptrue,
16038 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
16039 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
16040}
16041
f22d7973 16042/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 16043
f22d7973 16044 (set TARGET (CODE OP0 OP1))
43cacb12
RS
16045
16046 If CAN_INVERT_P is true, the caller can also handle inverted results;
16047 return true if the result is in fact inverted. */
16048
16049bool
16050aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
16051 rtx op0, rtx op1, bool can_invert_p)
16052{
16053 machine_mode pred_mode = GET_MODE (target);
16054 machine_mode data_mode = GET_MODE (op0);
16055
16056 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16057 switch (code)
16058 {
16059 case UNORDERED:
16060 /* UNORDERED has no immediate form. */
16061 op1 = force_reg (data_mode, op1);
f22d7973 16062 /* fall through */
43cacb12
RS
16063 case LT:
16064 case LE:
16065 case GT:
16066 case GE:
16067 case EQ:
16068 case NE:
f22d7973
RS
16069 {
16070 /* There is native support for the comparison. */
16071 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16072 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16073 return false;
16074 }
43cacb12
RS
16075
16076 case LTGT:
16077 /* This is a trapping operation (LT or GT). */
f22d7973 16078 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
16079 return false;
16080
16081 case UNEQ:
16082 if (!flag_trapping_math)
16083 {
16084 /* This would trap for signaling NaNs. */
16085 op1 = force_reg (data_mode, op1);
f22d7973 16086 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
16087 return false;
16088 }
16089 /* fall through */
43cacb12
RS
16090 case UNLT:
16091 case UNLE:
16092 case UNGT:
16093 case UNGE:
f22d7973
RS
16094 if (flag_trapping_math)
16095 {
16096 /* Work out which elements are ordered. */
16097 rtx ordered = gen_reg_rtx (pred_mode);
16098 op1 = force_reg (data_mode, op1);
16099 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
16100
16101 /* Test the opposite condition for the ordered elements,
16102 then invert the result. */
16103 if (code == UNEQ)
16104 code = NE;
16105 else
16106 code = reverse_condition_maybe_unordered (code);
16107 if (can_invert_p)
16108 {
16109 aarch64_emit_sve_predicated_cond (target, code,
16110 ordered, op0, op1);
16111 return true;
16112 }
16113 rtx tmp = gen_reg_rtx (pred_mode);
16114 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
16115 aarch64_emit_unop (target, one_cmpl_optab, tmp);
16116 return false;
16117 }
16118 break;
16119
16120 case ORDERED:
16121 /* ORDERED has no immediate form. */
16122 op1 = force_reg (data_mode, op1);
16123 break;
43cacb12
RS
16124
16125 default:
16126 gcc_unreachable ();
16127 }
f22d7973
RS
16128
16129 /* There is native support for the inverse comparison. */
16130 code = reverse_condition_maybe_unordered (code);
16131 if (can_invert_p)
16132 {
16133 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16134 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16135 return true;
16136 }
16137 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
16138 return false;
43cacb12
RS
16139}
16140
16141/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
16142 of the data being selected and CMP_MODE is the mode of the values being
16143 compared. */
16144
16145void
16146aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
16147 rtx *ops)
16148{
16149 machine_mode pred_mode
16150 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
16151 GET_MODE_SIZE (cmp_mode)).require ();
16152 rtx pred = gen_reg_rtx (pred_mode);
16153 if (FLOAT_MODE_P (cmp_mode))
16154 {
16155 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
16156 ops[4], ops[5], true))
16157 std::swap (ops[1], ops[2]);
16158 }
16159 else
16160 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16161
16162 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16163 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16164}
16165
99e1629f
RS
16166/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16167 true. However due to issues with register allocation it is preferable
16168 to avoid tieing integer scalar and FP scalar modes. Executing integer
16169 operations in general registers is better than treating them as scalar
16170 vector operations. This reduces latency and avoids redundant int<->FP
16171 moves. So tie modes if they are either the same class, or vector modes
16172 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16173
99e1629f 16174static bool
ef4bddc2 16175aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16176{
16177 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16178 return true;
16179
16180 /* We specifically want to allow elements of "structure" modes to
16181 be tieable to the structure. This more general condition allows
43cacb12
RS
16182 other rarer situations too. The reason we don't extend this to
16183 predicate modes is that there are no predicate structure modes
16184 nor any specific instructions for extracting part of a predicate
16185 register. */
16186 if (aarch64_vector_data_mode_p (mode1)
16187 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16188 return true;
16189
16190 /* Also allow any scalar modes with vectors. */
16191 if (aarch64_vector_mode_supported_p (mode1)
16192 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16193 return true;
16194
16195 return false;
16196}
16197
e2c75eea
JG
16198/* Return a new RTX holding the result of moving POINTER forward by
16199 AMOUNT bytes. */
16200
16201static rtx
6a70badb 16202aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16203{
16204 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16205
16206 return adjust_automodify_address (pointer, GET_MODE (pointer),
16207 next, amount);
16208}
16209
16210/* Return a new RTX holding the result of moving POINTER forward by the
16211 size of the mode it points to. */
16212
16213static rtx
16214aarch64_progress_pointer (rtx pointer)
16215{
6a70badb 16216 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16217}
16218
16219/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16220 MODE bytes. */
16221
16222static void
16223aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16224 machine_mode mode)
e2c75eea
JG
16225{
16226 rtx reg = gen_reg_rtx (mode);
16227
16228 /* "Cast" the pointers to the correct mode. */
16229 *src = adjust_address (*src, mode, 0);
16230 *dst = adjust_address (*dst, mode, 0);
16231 /* Emit the memcpy. */
16232 emit_move_insn (reg, *src);
16233 emit_move_insn (*dst, reg);
16234 /* Move the pointers forward. */
16235 *src = aarch64_progress_pointer (*src);
16236 *dst = aarch64_progress_pointer (*dst);
16237}
16238
16239/* Expand movmem, as if from a __builtin_memcpy. Return true if
16240 we succeed, otherwise return false. */
16241
16242bool
16243aarch64_expand_movmem (rtx *operands)
16244{
89c52e5e 16245 int n, mode_bits;
e2c75eea
JG
16246 rtx dst = operands[0];
16247 rtx src = operands[1];
16248 rtx base;
89c52e5e 16249 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
16250 bool speed_p = !optimize_function_for_size_p (cfun);
16251
16252 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
16253 memcpy call, but use the default otherwise. Moves larger than 8 bytes
16254 will always require an even number of instructions to do now. And each
16255 operation requires both a load+store, so devide the max number by 2. */
16256 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
16257
16258 /* We can't do anything smart if the amount to copy is not constant. */
16259 if (!CONST_INT_P (operands[2]))
16260 return false;
16261
89c52e5e 16262 n = INTVAL (operands[2]);
e2c75eea 16263
89c52e5e
TC
16264 /* Try to keep the number of instructions low. For all cases we will do at
16265 most two moves for the residual amount, since we'll always overlap the
16266 remainder. */
16267 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
16268 return false;
16269
16270 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16271 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16272
16273 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
16274 src = adjust_automodify_address (src, VOIDmode, base, 0);
16275
89c52e5e
TC
16276 /* Convert n to bits to make the rest of the code simpler. */
16277 n = n * BITS_PER_UNIT;
e2c75eea 16278
f7e1d19d
TC
16279 /* Maximum amount to copy in one go. The AArch64 back-end has integer modes
16280 larger than TImode, but we should not use them for loads/stores here. */
16281 const int copy_limit = GET_MODE_BITSIZE (TImode);
16282
89c52e5e 16283 while (n > 0)
e2c75eea 16284 {
89c52e5e
TC
16285 /* Find the largest mode in which to do the copy in without over reading
16286 or writing. */
16287 opt_scalar_int_mode mode_iter;
16288 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
f7e1d19d 16289 if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
89c52e5e 16290 cur_mode = mode_iter.require ();
e2c75eea 16291
89c52e5e 16292 gcc_assert (cur_mode != BLKmode);
e2c75eea 16293
89c52e5e
TC
16294 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
16295 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 16296
89c52e5e 16297 n -= mode_bits;
e2c75eea 16298
89c52e5e
TC
16299 /* Do certain trailing copies as overlapping if it's going to be
16300 cheaper. i.e. less instructions to do so. For instance doing a 15
16301 byte copy it's more efficient to do two overlapping 8 byte copies than
16302 8 + 6 + 1. */
f7e1d19d 16303 if (n > 0 && n <= 8 * BITS_PER_UNIT)
89c52e5e 16304 {
f7e1d19d
TC
16305 next_mode = smallest_mode_for_size (n, MODE_INT);
16306 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
89c52e5e
TC
16307 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
16308 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
16309 n = n_bits;
e2c75eea
JG
16310 }
16311 }
16312
16313 return true;
16314}
16315
141a3ccf
KT
16316/* Split a DImode store of a CONST_INT SRC to MEM DST as two
16317 SImode stores. Handle the case when the constant has identical
16318 bottom and top halves. This is beneficial when the two stores can be
16319 merged into an STP and we avoid synthesising potentially expensive
16320 immediates twice. Return true if such a split is possible. */
16321
16322bool
16323aarch64_split_dimode_const_store (rtx dst, rtx src)
16324{
16325 rtx lo = gen_lowpart (SImode, src);
16326 rtx hi = gen_highpart_mode (SImode, DImode, src);
16327
16328 bool size_p = optimize_function_for_size_p (cfun);
16329
16330 if (!rtx_equal_p (lo, hi))
16331 return false;
16332
16333 unsigned int orig_cost
16334 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
16335 unsigned int lo_cost
16336 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
16337
16338 /* We want to transform:
16339 MOV x1, 49370
16340 MOVK x1, 0x140, lsl 16
16341 MOVK x1, 0xc0da, lsl 32
16342 MOVK x1, 0x140, lsl 48
16343 STR x1, [x0]
16344 into:
16345 MOV w1, 49370
16346 MOVK w1, 0x140, lsl 16
16347 STP w1, w1, [x0]
16348 So we want to perform this only when we save two instructions
16349 or more. When optimizing for size, however, accept any code size
16350 savings we can. */
16351 if (size_p && orig_cost <= lo_cost)
16352 return false;
16353
16354 if (!size_p
16355 && (orig_cost <= lo_cost + 1))
16356 return false;
16357
16358 rtx mem_lo = adjust_address (dst, SImode, 0);
16359 if (!aarch64_mem_pair_operand (mem_lo, SImode))
16360 return false;
16361
16362 rtx tmp_reg = gen_reg_rtx (SImode);
16363 aarch64_expand_mov_immediate (tmp_reg, lo);
16364 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
16365 /* Don't emit an explicit store pair as this may not be always profitable.
16366 Let the sched-fusion logic decide whether to merge them. */
16367 emit_move_insn (mem_lo, tmp_reg);
16368 emit_move_insn (mem_hi, tmp_reg);
16369
16370 return true;
16371}
16372
30c46053
MC
16373/* Generate RTL for a conditional branch with rtx comparison CODE in
16374 mode CC_MODE. The destination of the unlikely conditional branch
16375 is LABEL_REF. */
16376
16377void
16378aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
16379 rtx label_ref)
16380{
16381 rtx x;
16382 x = gen_rtx_fmt_ee (code, VOIDmode,
16383 gen_rtx_REG (cc_mode, CC_REGNUM),
16384 const0_rtx);
16385
16386 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16387 gen_rtx_LABEL_REF (VOIDmode, label_ref),
16388 pc_rtx);
16389 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16390}
16391
16392/* Generate DImode scratch registers for 128-bit (TImode) addition.
16393
16394 OP1 represents the TImode destination operand 1
16395 OP2 represents the TImode destination operand 2
16396 LOW_DEST represents the low half (DImode) of TImode operand 0
16397 LOW_IN1 represents the low half (DImode) of TImode operand 1
16398 LOW_IN2 represents the low half (DImode) of TImode operand 2
16399 HIGH_DEST represents the high half (DImode) of TImode operand 0
16400 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16401 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16402
16403void
16404aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16405 rtx *low_in1, rtx *low_in2,
16406 rtx *high_dest, rtx *high_in1,
16407 rtx *high_in2)
16408{
16409 *low_dest = gen_reg_rtx (DImode);
16410 *low_in1 = gen_lowpart (DImode, op1);
16411 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16412 subreg_lowpart_offset (DImode, TImode));
16413 *high_dest = gen_reg_rtx (DImode);
16414 *high_in1 = gen_highpart (DImode, op1);
16415 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16416 subreg_highpart_offset (DImode, TImode));
16417}
16418
16419/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
16420
16421 This function differs from 'arch64_addti_scratch_regs' in that
16422 OP1 can be an immediate constant (zero). We must call
16423 subreg_highpart_offset with DImode and TImode arguments, otherwise
16424 VOIDmode will be used for the const_int which generates an internal
16425 error from subreg_size_highpart_offset which does not expect a size of zero.
16426
16427 OP1 represents the TImode destination operand 1
16428 OP2 represents the TImode destination operand 2
16429 LOW_DEST represents the low half (DImode) of TImode operand 0
16430 LOW_IN1 represents the low half (DImode) of TImode operand 1
16431 LOW_IN2 represents the low half (DImode) of TImode operand 2
16432 HIGH_DEST represents the high half (DImode) of TImode operand 0
16433 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16434 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16435
16436
16437void
16438aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16439 rtx *low_in1, rtx *low_in2,
16440 rtx *high_dest, rtx *high_in1,
16441 rtx *high_in2)
16442{
16443 *low_dest = gen_reg_rtx (DImode);
16444 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
16445 subreg_lowpart_offset (DImode, TImode));
16446
16447 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16448 subreg_lowpart_offset (DImode, TImode));
16449 *high_dest = gen_reg_rtx (DImode);
16450
16451 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
16452 subreg_highpart_offset (DImode, TImode));
16453 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16454 subreg_highpart_offset (DImode, TImode));
16455}
16456
16457/* Generate RTL for 128-bit (TImode) subtraction with overflow.
16458
16459 OP0 represents the TImode destination operand 0
16460 LOW_DEST represents the low half (DImode) of TImode operand 0
16461 LOW_IN1 represents the low half (DImode) of TImode operand 1
16462 LOW_IN2 represents the low half (DImode) of TImode operand 2
16463 HIGH_DEST represents the high half (DImode) of TImode operand 0
16464 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16465 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16466
16467void
16468aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
16469 rtx low_in2, rtx high_dest, rtx high_in1,
16470 rtx high_in2)
16471{
16472 if (low_in2 == const0_rtx)
16473 {
16474 low_dest = low_in1;
16475 emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
16476 force_reg (DImode, high_in2)));
16477 }
16478 else
16479 {
16480 if (CONST_INT_P (low_in2))
16481 {
16482 low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
16483 high_in2 = force_reg (DImode, high_in2);
16484 emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
16485 }
16486 else
16487 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
16488 emit_insn (gen_subdi3_carryinCV (high_dest,
16489 force_reg (DImode, high_in1),
16490 high_in2));
16491 }
16492
16493 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
16494 emit_move_insn (gen_highpart (DImode, op0), high_dest);
16495
16496}
16497
a3125fc2
CL
16498/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16499
16500static unsigned HOST_WIDE_INT
16501aarch64_asan_shadow_offset (void)
16502{
16503 return (HOST_WIDE_INT_1 << 36);
16504}
16505
5f3bc026 16506static rtx
cb4347e8 16507aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16508 int code, tree treeop0, tree treeop1)
16509{
c8012fbc
WD
16510 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16511 rtx op0, op1;
5f3bc026 16512 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16513 insn_code icode;
5f3bc026
ZC
16514 struct expand_operand ops[4];
16515
5f3bc026
ZC
16516 start_sequence ();
16517 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16518
16519 op_mode = GET_MODE (op0);
16520 if (op_mode == VOIDmode)
16521 op_mode = GET_MODE (op1);
16522
16523 switch (op_mode)
16524 {
4e10a5a7
RS
16525 case E_QImode:
16526 case E_HImode:
16527 case E_SImode:
5f3bc026
ZC
16528 cmp_mode = SImode;
16529 icode = CODE_FOR_cmpsi;
16530 break;
16531
4e10a5a7 16532 case E_DImode:
5f3bc026
ZC
16533 cmp_mode = DImode;
16534 icode = CODE_FOR_cmpdi;
16535 break;
16536
4e10a5a7 16537 case E_SFmode:
786e3c06
WD
16538 cmp_mode = SFmode;
16539 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16540 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16541 break;
16542
4e10a5a7 16543 case E_DFmode:
786e3c06
WD
16544 cmp_mode = DFmode;
16545 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16546 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16547 break;
16548
5f3bc026
ZC
16549 default:
16550 end_sequence ();
16551 return NULL_RTX;
16552 }
16553
c8012fbc
WD
16554 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16555 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16556 if (!op0 || !op1)
16557 {
16558 end_sequence ();
16559 return NULL_RTX;
16560 }
16561 *prep_seq = get_insns ();
16562 end_sequence ();
16563
c8012fbc
WD
16564 create_fixed_operand (&ops[0], op0);
16565 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16566
16567 start_sequence ();
c8012fbc 16568 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16569 {
16570 end_sequence ();
16571 return NULL_RTX;
16572 }
16573 *gen_seq = get_insns ();
16574 end_sequence ();
16575
c8012fbc
WD
16576 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16577 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16578}
16579
16580static rtx
cb4347e8
TS
16581aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16582 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16583{
c8012fbc
WD
16584 rtx op0, op1, target;
16585 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16586 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16587 insn_code icode;
5f3bc026 16588 struct expand_operand ops[6];
c8012fbc 16589 int aarch64_cond;
5f3bc026 16590
cb4347e8 16591 push_to_sequence (*prep_seq);
5f3bc026
ZC
16592 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16593
16594 op_mode = GET_MODE (op0);
16595 if (op_mode == VOIDmode)
16596 op_mode = GET_MODE (op1);
16597
16598 switch (op_mode)
16599 {
4e10a5a7
RS
16600 case E_QImode:
16601 case E_HImode:
16602 case E_SImode:
5f3bc026 16603 cmp_mode = SImode;
c8012fbc 16604 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16605 break;
16606
4e10a5a7 16607 case E_DImode:
5f3bc026 16608 cmp_mode = DImode;
c8012fbc 16609 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16610 break;
16611
4e10a5a7 16612 case E_SFmode:
786e3c06
WD
16613 cmp_mode = SFmode;
16614 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16615 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16616 break;
16617
4e10a5a7 16618 case E_DFmode:
786e3c06
WD
16619 cmp_mode = DFmode;
16620 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16621 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16622 break;
16623
5f3bc026
ZC
16624 default:
16625 end_sequence ();
16626 return NULL_RTX;
16627 }
16628
16629 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16630 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16631 if (!op0 || !op1)
16632 {
16633 end_sequence ();
16634 return NULL_RTX;
16635 }
16636 *prep_seq = get_insns ();
16637 end_sequence ();
16638
16639 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16640 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16641
c8012fbc
WD
16642 if (bit_code != AND)
16643 {
16644 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16645 GET_MODE (XEXP (prev, 0))),
16646 VOIDmode, XEXP (prev, 0), const0_rtx);
16647 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16648 }
16649
16650 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16651 create_fixed_operand (&ops[1], target);
16652 create_fixed_operand (&ops[2], op0);
16653 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16654 create_fixed_operand (&ops[4], prev);
16655 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16656
cb4347e8 16657 push_to_sequence (*gen_seq);
5f3bc026
ZC
16658 if (!maybe_expand_insn (icode, 6, ops))
16659 {
16660 end_sequence ();
16661 return NULL_RTX;
16662 }
16663
16664 *gen_seq = get_insns ();
16665 end_sequence ();
16666
c8012fbc 16667 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16668}
16669
16670#undef TARGET_GEN_CCMP_FIRST
16671#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16672
16673#undef TARGET_GEN_CCMP_NEXT
16674#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16675
6a569cdd
KT
16676/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16677 instruction fusion of some sort. */
16678
16679static bool
16680aarch64_macro_fusion_p (void)
16681{
b175b679 16682 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16683}
16684
16685
16686/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16687 should be kept together during scheduling. */
16688
16689static bool
16690aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16691{
16692 rtx set_dest;
16693 rtx prev_set = single_set (prev);
16694 rtx curr_set = single_set (curr);
16695 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16696 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16697
16698 if (!aarch64_macro_fusion_p ())
16699 return false;
16700
d7b03373 16701 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16702 {
16703 /* We are trying to match:
16704 prev (mov) == (set (reg r0) (const_int imm16))
16705 curr (movk) == (set (zero_extract (reg r0)
16706 (const_int 16)
16707 (const_int 16))
16708 (const_int imm16_1)) */
16709
16710 set_dest = SET_DEST (curr_set);
16711
16712 if (GET_CODE (set_dest) == ZERO_EXTRACT
16713 && CONST_INT_P (SET_SRC (curr_set))
16714 && CONST_INT_P (SET_SRC (prev_set))
16715 && CONST_INT_P (XEXP (set_dest, 2))
16716 && INTVAL (XEXP (set_dest, 2)) == 16
16717 && REG_P (XEXP (set_dest, 0))
16718 && REG_P (SET_DEST (prev_set))
16719 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16720 {
16721 return true;
16722 }
16723 }
16724
d7b03373 16725 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16726 {
16727
16728 /* We're trying to match:
16729 prev (adrp) == (set (reg r1)
16730 (high (symbol_ref ("SYM"))))
16731 curr (add) == (set (reg r0)
16732 (lo_sum (reg r1)
16733 (symbol_ref ("SYM"))))
16734 Note that r0 need not necessarily be the same as r1, especially
16735 during pre-regalloc scheduling. */
16736
16737 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16738 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16739 {
16740 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16741 && REG_P (XEXP (SET_SRC (curr_set), 0))
16742 && REGNO (XEXP (SET_SRC (curr_set), 0))
16743 == REGNO (SET_DEST (prev_set))
16744 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16745 XEXP (SET_SRC (curr_set), 1)))
16746 return true;
16747 }
16748 }
16749
d7b03373 16750 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16751 {
16752
16753 /* We're trying to match:
16754 prev (movk) == (set (zero_extract (reg r0)
16755 (const_int 16)
16756 (const_int 32))
16757 (const_int imm16_1))
16758 curr (movk) == (set (zero_extract (reg r0)
16759 (const_int 16)
16760 (const_int 48))
16761 (const_int imm16_2)) */
16762
16763 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16764 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16765 && REG_P (XEXP (SET_DEST (prev_set), 0))
16766 && REG_P (XEXP (SET_DEST (curr_set), 0))
16767 && REGNO (XEXP (SET_DEST (prev_set), 0))
16768 == REGNO (XEXP (SET_DEST (curr_set), 0))
16769 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16770 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16771 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16772 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16773 && CONST_INT_P (SET_SRC (prev_set))
16774 && CONST_INT_P (SET_SRC (curr_set)))
16775 return true;
16776
16777 }
d7b03373 16778 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16779 {
16780 /* We're trying to match:
16781 prev (adrp) == (set (reg r0)
16782 (high (symbol_ref ("SYM"))))
16783 curr (ldr) == (set (reg r1)
16784 (mem (lo_sum (reg r0)
16785 (symbol_ref ("SYM")))))
16786 or
16787 curr (ldr) == (set (reg r1)
16788 (zero_extend (mem
16789 (lo_sum (reg r0)
16790 (symbol_ref ("SYM")))))) */
16791 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16792 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16793 {
16794 rtx curr_src = SET_SRC (curr_set);
16795
16796 if (GET_CODE (curr_src) == ZERO_EXTEND)
16797 curr_src = XEXP (curr_src, 0);
16798
16799 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
16800 && REG_P (XEXP (XEXP (curr_src, 0), 0))
16801 && REGNO (XEXP (XEXP (curr_src, 0), 0))
16802 == REGNO (SET_DEST (prev_set))
16803 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
16804 XEXP (SET_SRC (prev_set), 0)))
16805 return true;
16806 }
16807 }
cd0cb232 16808
d7b03373 16809 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
16810 && aarch_crypto_can_dual_issue (prev, curr))
16811 return true;
16812
d7b03373 16813 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
16814 && any_condjump_p (curr))
16815 {
509f819a
N
16816 unsigned int condreg1, condreg2;
16817 rtx cc_reg_1;
16818 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
16819 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
16820
16821 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
16822 && prev
16823 && modified_in_p (cc_reg_1, prev))
16824 {
f8a27206
AP
16825 enum attr_type prev_type = get_attr_type (prev);
16826
509f819a
N
16827 /* FIXME: this misses some which is considered simple arthematic
16828 instructions for ThunderX. Simple shifts are missed here. */
16829 if (prev_type == TYPE_ALUS_SREG
16830 || prev_type == TYPE_ALUS_IMM
16831 || prev_type == TYPE_LOGICS_REG
16832 || prev_type == TYPE_LOGICS_IMM)
16833 return true;
16834 }
3759108f
AP
16835 }
16836
bee7e0fc
AP
16837 if (prev_set
16838 && curr_set
16839 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
16840 && any_condjump_p (curr))
16841 {
16842 /* We're trying to match:
16843 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
16844 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
16845 (const_int 0))
16846 (label_ref ("SYM"))
16847 (pc)) */
16848 if (SET_DEST (curr_set) == (pc_rtx)
16849 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
16850 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
16851 && REG_P (SET_DEST (prev_set))
16852 && REGNO (SET_DEST (prev_set))
16853 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
16854 {
16855 /* Fuse ALU operations followed by conditional branch instruction. */
16856 switch (get_attr_type (prev))
16857 {
16858 case TYPE_ALU_IMM:
16859 case TYPE_ALU_SREG:
16860 case TYPE_ADC_REG:
16861 case TYPE_ADC_IMM:
16862 case TYPE_ADCS_REG:
16863 case TYPE_ADCS_IMM:
16864 case TYPE_LOGIC_REG:
16865 case TYPE_LOGIC_IMM:
16866 case TYPE_CSEL:
16867 case TYPE_ADR:
16868 case TYPE_MOV_IMM:
16869 case TYPE_SHIFT_REG:
16870 case TYPE_SHIFT_IMM:
16871 case TYPE_BFM:
16872 case TYPE_RBIT:
16873 case TYPE_REV:
16874 case TYPE_EXTEND:
16875 return true;
16876
16877 default:;
16878 }
16879 }
16880 }
16881
6a569cdd
KT
16882 return false;
16883}
16884
f2879a90
KT
16885/* Return true iff the instruction fusion described by OP is enabled. */
16886
16887bool
16888aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
16889{
16890 return (aarch64_tune_params.fusible_ops & op) != 0;
16891}
16892
350013bc
BC
16893/* If MEM is in the form of [base+offset], extract the two parts
16894 of address and set to BASE and OFFSET, otherwise return false
16895 after clearing BASE and OFFSET. */
16896
16897bool
16898extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
16899{
16900 rtx addr;
16901
16902 gcc_assert (MEM_P (mem));
16903
16904 addr = XEXP (mem, 0);
16905
16906 if (REG_P (addr))
16907 {
16908 *base = addr;
16909 *offset = const0_rtx;
16910 return true;
16911 }
16912
16913 if (GET_CODE (addr) == PLUS
16914 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
16915 {
16916 *base = XEXP (addr, 0);
16917 *offset = XEXP (addr, 1);
16918 return true;
16919 }
16920
16921 *base = NULL_RTX;
16922 *offset = NULL_RTX;
16923
16924 return false;
16925}
16926
16927/* Types for scheduling fusion. */
16928enum sched_fusion_type
16929{
16930 SCHED_FUSION_NONE = 0,
16931 SCHED_FUSION_LD_SIGN_EXTEND,
16932 SCHED_FUSION_LD_ZERO_EXTEND,
16933 SCHED_FUSION_LD,
16934 SCHED_FUSION_ST,
16935 SCHED_FUSION_NUM
16936};
16937
16938/* If INSN is a load or store of address in the form of [base+offset],
16939 extract the two parts and set to BASE and OFFSET. Return scheduling
16940 fusion type this INSN is. */
16941
16942static enum sched_fusion_type
16943fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
16944{
16945 rtx x, dest, src;
16946 enum sched_fusion_type fusion = SCHED_FUSION_LD;
16947
16948 gcc_assert (INSN_P (insn));
16949 x = PATTERN (insn);
16950 if (GET_CODE (x) != SET)
16951 return SCHED_FUSION_NONE;
16952
16953 src = SET_SRC (x);
16954 dest = SET_DEST (x);
16955
abc52318
KT
16956 machine_mode dest_mode = GET_MODE (dest);
16957
16958 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
16959 return SCHED_FUSION_NONE;
16960
16961 if (GET_CODE (src) == SIGN_EXTEND)
16962 {
16963 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
16964 src = XEXP (src, 0);
16965 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16966 return SCHED_FUSION_NONE;
16967 }
16968 else if (GET_CODE (src) == ZERO_EXTEND)
16969 {
16970 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
16971 src = XEXP (src, 0);
16972 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16973 return SCHED_FUSION_NONE;
16974 }
16975
16976 if (GET_CODE (src) == MEM && REG_P (dest))
16977 extract_base_offset_in_addr (src, base, offset);
16978 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
16979 {
16980 fusion = SCHED_FUSION_ST;
16981 extract_base_offset_in_addr (dest, base, offset);
16982 }
16983 else
16984 return SCHED_FUSION_NONE;
16985
16986 if (*base == NULL_RTX || *offset == NULL_RTX)
16987 fusion = SCHED_FUSION_NONE;
16988
16989 return fusion;
16990}
16991
16992/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
16993
16994 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
16995 and PRI are only calculated for these instructions. For other instruction,
16996 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
16997 type instruction fusion can be added by returning different priorities.
16998
16999 It's important that irrelevant instructions get the largest FUSION_PRI. */
17000
17001static void
17002aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
17003 int *fusion_pri, int *pri)
17004{
17005 int tmp, off_val;
17006 rtx base, offset;
17007 enum sched_fusion_type fusion;
17008
17009 gcc_assert (INSN_P (insn));
17010
17011 tmp = max_pri - 1;
17012 fusion = fusion_load_store (insn, &base, &offset);
17013 if (fusion == SCHED_FUSION_NONE)
17014 {
17015 *pri = tmp;
17016 *fusion_pri = tmp;
17017 return;
17018 }
17019
17020 /* Set FUSION_PRI according to fusion type and base register. */
17021 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
17022
17023 /* Calculate PRI. */
17024 tmp /= 2;
17025
17026 /* INSN with smaller offset goes first. */
17027 off_val = (int)(INTVAL (offset));
17028 if (off_val >= 0)
17029 tmp -= (off_val & 0xfffff);
17030 else
17031 tmp += ((- off_val) & 0xfffff);
17032
17033 *pri = tmp;
17034 return;
17035}
17036
9bca63d4
WD
17037/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
17038 Adjust priority of sha1h instructions so they are scheduled before
17039 other SHA1 instructions. */
17040
17041static int
17042aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
17043{
17044 rtx x = PATTERN (insn);
17045
17046 if (GET_CODE (x) == SET)
17047 {
17048 x = SET_SRC (x);
17049
17050 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
17051 return priority + 10;
17052 }
17053
17054 return priority;
17055}
17056
350013bc
BC
17057/* Given OPERANDS of consecutive load/store, check if we can merge
17058 them into ldp/stp. LOAD is true if they are load instructions.
17059 MODE is the mode of memory operands. */
17060
17061bool
17062aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 17063 machine_mode mode)
350013bc
BC
17064{
17065 HOST_WIDE_INT offval_1, offval_2, msize;
17066 enum reg_class rclass_1, rclass_2;
17067 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
17068
17069 if (load)
17070 {
17071 mem_1 = operands[1];
17072 mem_2 = operands[3];
17073 reg_1 = operands[0];
17074 reg_2 = operands[2];
17075 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
17076 if (REGNO (reg_1) == REGNO (reg_2))
17077 return false;
17078 }
17079 else
17080 {
17081 mem_1 = operands[0];
17082 mem_2 = operands[2];
17083 reg_1 = operands[1];
17084 reg_2 = operands[3];
17085 }
17086
bf84ac44
AP
17087 /* The mems cannot be volatile. */
17088 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
17089 return false;
17090
54700e2e
AP
17091 /* If we have SImode and slow unaligned ldp,
17092 check the alignment to be at least 8 byte. */
17093 if (mode == SImode
17094 && (aarch64_tune_params.extra_tuning_flags
17095 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17096 && !optimize_size
17097 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17098 return false;
17099
350013bc
BC
17100 /* Check if the addresses are in the form of [base+offset]. */
17101 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17102 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17103 return false;
17104 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17105 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17106 return false;
17107
17108 /* Check if the bases are same. */
17109 if (!rtx_equal_p (base_1, base_2))
17110 return false;
17111
dfe1da23
JW
17112 /* The operands must be of the same size. */
17113 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
17114 GET_MODE_SIZE (GET_MODE (mem_2))));
17115
350013bc
BC
17116 offval_1 = INTVAL (offset_1);
17117 offval_2 = INTVAL (offset_2);
6a70badb
RS
17118 /* We should only be trying this for fixed-sized modes. There is no
17119 SVE LDP/STP instruction. */
17120 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
17121 /* Check if the offsets are consecutive. */
17122 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
17123 return false;
17124
17125 /* Check if the addresses are clobbered by load. */
17126 if (load)
17127 {
17128 if (reg_mentioned_p (reg_1, mem_1))
17129 return false;
17130
17131 /* In increasing order, the last load can clobber the address. */
17132 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 17133 return false;
350013bc
BC
17134 }
17135
9b56ec11
JW
17136 /* One of the memory accesses must be a mempair operand.
17137 If it is not the first one, they need to be swapped by the
17138 peephole. */
17139 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
17140 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
17141 return false;
17142
350013bc
BC
17143 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17144 rclass_1 = FP_REGS;
17145 else
17146 rclass_1 = GENERAL_REGS;
17147
17148 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17149 rclass_2 = FP_REGS;
17150 else
17151 rclass_2 = GENERAL_REGS;
17152
17153 /* Check if the registers are of same class. */
17154 if (rclass_1 != rclass_2)
17155 return false;
17156
17157 return true;
17158}
17159
9b56ec11
JW
17160/* Given OPERANDS of consecutive load/store that can be merged,
17161 swap them if they are not in ascending order. */
17162void
17163aarch64_swap_ldrstr_operands (rtx* operands, bool load)
17164{
17165 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
17166 HOST_WIDE_INT offval_1, offval_2;
17167
17168 if (load)
17169 {
17170 mem_1 = operands[1];
17171 mem_2 = operands[3];
17172 }
17173 else
17174 {
17175 mem_1 = operands[0];
17176 mem_2 = operands[2];
17177 }
17178
17179 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17180 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17181
17182 offval_1 = INTVAL (offset_1);
17183 offval_2 = INTVAL (offset_2);
17184
17185 if (offval_1 > offval_2)
17186 {
17187 /* Irrespective of whether this is a load or a store,
17188 we do the same swap. */
17189 std::swap (operands[0], operands[2]);
17190 std::swap (operands[1], operands[3]);
17191 }
17192}
17193
d0b51297
JW
17194/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
17195 comparison between the two. */
17196int
17197aarch64_host_wide_int_compare (const void *x, const void *y)
17198{
17199 return wi::cmps (* ((const HOST_WIDE_INT *) x),
17200 * ((const HOST_WIDE_INT *) y));
17201}
17202
17203/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
17204 other pointing to a REG rtx containing an offset, compare the offsets
17205 of the two pairs.
17206
17207 Return:
17208
17209 1 iff offset (X) > offset (Y)
17210 0 iff offset (X) == offset (Y)
17211 -1 iff offset (X) < offset (Y) */
17212int
17213aarch64_ldrstr_offset_compare (const void *x, const void *y)
17214{
17215 const rtx * operands_1 = (const rtx *) x;
17216 const rtx * operands_2 = (const rtx *) y;
17217 rtx mem_1, mem_2, base, offset_1, offset_2;
17218
17219 if (MEM_P (operands_1[0]))
17220 mem_1 = operands_1[0];
17221 else
17222 mem_1 = operands_1[1];
17223
17224 if (MEM_P (operands_2[0]))
17225 mem_2 = operands_2[0];
17226 else
17227 mem_2 = operands_2[1];
17228
17229 /* Extract the offsets. */
17230 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17231 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17232
17233 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
17234
17235 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
17236}
17237
350013bc
BC
17238/* Given OPERANDS of consecutive load/store, check if we can merge
17239 them into ldp/stp by adjusting the offset. LOAD is true if they
17240 are load instructions. MODE is the mode of memory operands.
17241
17242 Given below consecutive stores:
17243
17244 str w1, [xb, 0x100]
17245 str w1, [xb, 0x104]
17246 str w1, [xb, 0x108]
17247 str w1, [xb, 0x10c]
17248
17249 Though the offsets are out of the range supported by stp, we can
17250 still pair them after adjusting the offset, like:
17251
17252 add scratch, xb, 0x100
17253 stp w1, w1, [scratch]
17254 stp w1, w1, [scratch, 0x8]
17255
17256 The peephole patterns detecting this opportunity should guarantee
17257 the scratch register is avaliable. */
17258
17259bool
17260aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 17261 scalar_mode mode)
350013bc 17262{
34d7854d
JW
17263 const int num_insns = 4;
17264 enum reg_class rclass;
17265 HOST_WIDE_INT offvals[num_insns], msize;
17266 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
17267
17268 if (load)
17269 {
34d7854d
JW
17270 for (int i = 0; i < num_insns; i++)
17271 {
17272 reg[i] = operands[2 * i];
17273 mem[i] = operands[2 * i + 1];
17274
17275 gcc_assert (REG_P (reg[i]));
17276 }
d0b51297
JW
17277
17278 /* Do not attempt to merge the loads if the loads clobber each other. */
17279 for (int i = 0; i < 8; i += 2)
17280 for (int j = i + 2; j < 8; j += 2)
17281 if (reg_overlap_mentioned_p (operands[i], operands[j]))
17282 return false;
350013bc
BC
17283 }
17284 else
34d7854d
JW
17285 for (int i = 0; i < num_insns; i++)
17286 {
17287 mem[i] = operands[2 * i];
17288 reg[i] = operands[2 * i + 1];
17289 }
350013bc 17290
34d7854d
JW
17291 /* Skip if memory operand is by itself valid for ldp/stp. */
17292 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
17293 return false;
17294
34d7854d
JW
17295 for (int i = 0; i < num_insns; i++)
17296 {
17297 /* The mems cannot be volatile. */
17298 if (MEM_VOLATILE_P (mem[i]))
17299 return false;
17300
17301 /* Check if the addresses are in the form of [base+offset]. */
17302 extract_base_offset_in_addr (mem[i], base + i, offset + i);
17303 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
17304 return false;
17305 }
17306
363b395b
JW
17307 /* Check if the registers are of same class. */
17308 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
17309 ? FP_REGS : GENERAL_REGS;
17310
17311 for (int i = 1; i < num_insns; i++)
17312 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
17313 {
17314 if (rclass != FP_REGS)
17315 return false;
17316 }
17317 else
17318 {
17319 if (rclass != GENERAL_REGS)
17320 return false;
17321 }
17322
17323 /* Only the last register in the order in which they occur
17324 may be clobbered by the load. */
17325 if (rclass == GENERAL_REGS && load)
17326 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
17327 if (reg_mentioned_p (reg[i], mem[i]))
17328 return false;
350013bc
BC
17329
17330 /* Check if the bases are same. */
34d7854d
JW
17331 for (int i = 0; i < num_insns - 1; i++)
17332 if (!rtx_equal_p (base[i], base[i + 1]))
17333 return false;
17334
17335 for (int i = 0; i < num_insns; i++)
17336 offvals[i] = INTVAL (offset[i]);
350013bc 17337
350013bc 17338 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17339
17340 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
17341 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
17342 aarch64_host_wide_int_compare);
d0b51297
JW
17343
17344 if (!(offvals[1] == offvals[0] + msize
17345 && offvals[3] == offvals[2] + msize))
350013bc
BC
17346 return false;
17347
d0b51297
JW
17348 /* Check that offsets are within range of each other. The ldp/stp
17349 instructions have 7 bit immediate offsets, so use 0x80. */
17350 if (offvals[2] - offvals[0] >= msize * 0x80)
17351 return false;
350013bc 17352
d0b51297
JW
17353 /* The offsets must be aligned with respect to each other. */
17354 if (offvals[0] % msize != offvals[2] % msize)
17355 return false;
17356
54700e2e
AP
17357 /* If we have SImode and slow unaligned ldp,
17358 check the alignment to be at least 8 byte. */
17359 if (mode == SImode
17360 && (aarch64_tune_params.extra_tuning_flags
34d7854d 17361 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 17362 && !optimize_size
34d7854d 17363 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
17364 return false;
17365
350013bc
BC
17366 return true;
17367}
17368
17369/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
17370 into LDP/STP after adjusting the offset. It depends on the fact
17371 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
17372 MODE is the mode of memory operands. CODE is the rtl operator
17373 which should be applied to all memory operands, it's SIGN_EXTEND,
17374 ZERO_EXTEND or UNKNOWN. */
17375
17376bool
17377aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17378 scalar_mode mode, RTX_CODE code)
350013bc 17379{
d0b51297 17380 rtx base, offset_1, offset_3, t1, t2;
350013bc 17381 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
17382 rtx temp_operands[8];
17383 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
17384 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 17385
d0b51297
JW
17386 /* We make changes on a copy as we may still bail out. */
17387 for (int i = 0; i < 8; i ++)
17388 temp_operands[i] = operands[i];
9b56ec11 17389
d0b51297
JW
17390 /* Sort the operands. */
17391 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 17392
350013bc
BC
17393 if (load)
17394 {
d0b51297
JW
17395 mem_1 = temp_operands[1];
17396 mem_2 = temp_operands[3];
17397 mem_3 = temp_operands[5];
17398 mem_4 = temp_operands[7];
350013bc
BC
17399 }
17400 else
17401 {
d0b51297
JW
17402 mem_1 = temp_operands[0];
17403 mem_2 = temp_operands[2];
17404 mem_3 = temp_operands[4];
17405 mem_4 = temp_operands[6];
350013bc
BC
17406 gcc_assert (code == UNKNOWN);
17407 }
17408
9b56ec11 17409 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
17410 extract_base_offset_in_addr (mem_3, &base, &offset_3);
17411 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17412 && offset_3 != NULL_RTX);
350013bc 17413
d0b51297 17414 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 17415 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17416 stp_off_upper_limit = msize * (0x40 - 1);
17417 stp_off_lower_limit = - msize * 0x40;
350013bc 17418
d0b51297
JW
17419 off_val_1 = INTVAL (offset_1);
17420 off_val_3 = INTVAL (offset_3);
17421
17422 /* The base offset is optimally half way between the two STP/LDP offsets. */
17423 if (msize <= 4)
17424 base_off = (off_val_1 + off_val_3) / 2;
17425 else
17426 /* However, due to issues with negative LDP/STP offset generation for
17427 larger modes, for DF, DI and vector modes. we must not use negative
17428 addresses smaller than 9 signed unadjusted bits can store. This
17429 provides the most range in this case. */
17430 base_off = off_val_1;
17431
17432 /* Adjust the base so that it is aligned with the addresses but still
17433 optimal. */
17434 if (base_off % msize != off_val_1 % msize)
17435 /* Fix the offset, bearing in mind we want to make it bigger not
17436 smaller. */
17437 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
17438 else if (msize <= 4)
17439 /* The negative range of LDP/STP is one larger than the positive range. */
17440 base_off += msize;
17441
17442 /* Check if base offset is too big or too small. We can attempt to resolve
17443 this issue by setting it to the maximum value and seeing if the offsets
17444 still fit. */
17445 if (base_off >= 0x1000)
350013bc 17446 {
d0b51297
JW
17447 base_off = 0x1000 - 1;
17448 /* We must still make sure that the base offset is aligned with respect
17449 to the address. But it may may not be made any bigger. */
17450 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17451 }
17452
d0b51297
JW
17453 /* Likewise for the case where the base is too small. */
17454 if (base_off <= -0x1000)
350013bc 17455 {
d0b51297
JW
17456 base_off = -0x1000 + 1;
17457 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17458 }
17459
d0b51297
JW
17460 /* Offset of the first STP/LDP. */
17461 new_off_1 = off_val_1 - base_off;
17462
17463 /* Offset of the second STP/LDP. */
17464 new_off_3 = off_val_3 - base_off;
350013bc 17465
d0b51297
JW
17466 /* The offsets must be within the range of the LDP/STP instructions. */
17467 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
17468 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
17469 return false;
17470
d0b51297
JW
17471 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
17472 new_off_1), true);
17473 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
17474 new_off_1 + msize), true);
17475 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
17476 new_off_3), true);
17477 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
17478 new_off_3 + msize), true);
17479
17480 if (!aarch64_mem_pair_operand (mem_1, mode)
17481 || !aarch64_mem_pair_operand (mem_3, mode))
17482 return false;
350013bc
BC
17483
17484 if (code == ZERO_EXTEND)
17485 {
17486 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17487 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17488 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17489 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17490 }
17491 else if (code == SIGN_EXTEND)
17492 {
17493 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17494 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17495 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17496 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17497 }
17498
17499 if (load)
17500 {
d0b51297 17501 operands[0] = temp_operands[0];
350013bc 17502 operands[1] = mem_1;
d0b51297 17503 operands[2] = temp_operands[2];
350013bc 17504 operands[3] = mem_2;
d0b51297 17505 operands[4] = temp_operands[4];
350013bc 17506 operands[5] = mem_3;
d0b51297 17507 operands[6] = temp_operands[6];
350013bc
BC
17508 operands[7] = mem_4;
17509 }
17510 else
17511 {
17512 operands[0] = mem_1;
d0b51297 17513 operands[1] = temp_operands[1];
350013bc 17514 operands[2] = mem_2;
d0b51297 17515 operands[3] = temp_operands[3];
350013bc 17516 operands[4] = mem_3;
d0b51297 17517 operands[5] = temp_operands[5];
350013bc 17518 operands[6] = mem_4;
d0b51297 17519 operands[7] = temp_operands[7];
350013bc
BC
17520 }
17521
17522 /* Emit adjusting instruction. */
d0b51297 17523 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 17524 /* Emit ldp/stp instructions. */
f7df4a84
RS
17525 t1 = gen_rtx_SET (operands[0], operands[1]);
17526 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17527 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17528 t1 = gen_rtx_SET (operands[4], operands[5]);
17529 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17530 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17531 return true;
17532}
17533
76a34e3f
RS
17534/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17535 it isn't worth branching around empty masked ops (including masked
17536 stores). */
17537
17538static bool
17539aarch64_empty_mask_is_expensive (unsigned)
17540{
17541 return false;
17542}
17543
1b1e81f8
JW
17544/* Return 1 if pseudo register should be created and used to hold
17545 GOT address for PIC code. */
17546
17547bool
17548aarch64_use_pseudo_pic_reg (void)
17549{
17550 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17551}
17552
7b841a12
JW
17553/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17554
17555static int
17556aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17557{
17558 switch (XINT (x, 1))
17559 {
17560 case UNSPEC_GOTSMALLPIC:
17561 case UNSPEC_GOTSMALLPIC28K:
17562 case UNSPEC_GOTTINYPIC:
17563 return 0;
17564 default:
17565 break;
17566 }
17567
17568 return default_unspec_may_trap_p (x, flags);
17569}
17570
39252973
KT
17571
17572/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17573 return the log2 of that value. Otherwise return -1. */
17574
17575int
17576aarch64_fpconst_pow_of_2 (rtx x)
17577{
17578 const REAL_VALUE_TYPE *r;
17579
17580 if (!CONST_DOUBLE_P (x))
17581 return -1;
17582
17583 r = CONST_DOUBLE_REAL_VALUE (x);
17584
17585 if (REAL_VALUE_NEGATIVE (*r)
17586 || REAL_VALUE_ISNAN (*r)
17587 || REAL_VALUE_ISINF (*r)
17588 || !real_isinteger (r, DFmode))
17589 return -1;
17590
17591 return exact_log2 (real_to_integer (r));
17592}
17593
17594/* If X is a vector of equal CONST_DOUBLE values and that value is
17595 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17596
17597int
17598aarch64_vec_fpconst_pow_of_2 (rtx x)
17599{
6a70badb
RS
17600 int nelts;
17601 if (GET_CODE (x) != CONST_VECTOR
17602 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17603 return -1;
17604
17605 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17606 return -1;
17607
17608 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17609 if (firstval <= 0)
17610 return -1;
17611
6a70badb 17612 for (int i = 1; i < nelts; i++)
39252973
KT
17613 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17614 return -1;
17615
17616 return firstval;
17617}
17618
11e554b3
JG
17619/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17620 to float.
17621
17622 __fp16 always promotes through this hook.
17623 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17624 through the generic excess precision logic rather than here. */
17625
c2ec330c
AL
17626static tree
17627aarch64_promoted_type (const_tree t)
17628{
11e554b3
JG
17629 if (SCALAR_FLOAT_TYPE_P (t)
17630 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17631 return float_type_node;
11e554b3 17632
c2ec330c
AL
17633 return NULL_TREE;
17634}
ee62a5a6
RS
17635
17636/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17637
17638static bool
9acc9cbe 17639aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17640 optimization_type opt_type)
17641{
17642 switch (op)
17643 {
17644 case rsqrt_optab:
9acc9cbe 17645 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17646
17647 default:
17648 return true;
17649 }
17650}
17651
43cacb12
RS
17652/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17653
17654static unsigned int
17655aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17656 int *offset)
17657{
17658 /* Polynomial invariant 1 == (VG / 2) - 1. */
17659 gcc_assert (i == 1);
17660 *factor = 2;
17661 *offset = 1;
17662 return AARCH64_DWARF_VG;
17663}
17664
11e554b3
JG
17665/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17666 if MODE is HFmode, and punt to the generic implementation otherwise. */
17667
17668static bool
7c5bd57a 17669aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17670{
17671 return (mode == HFmode
17672 ? true
17673 : default_libgcc_floating_mode_supported_p (mode));
17674}
17675
2e5f8203
JG
17676/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17677 if MODE is HFmode, and punt to the generic implementation otherwise. */
17678
17679static bool
18e2a8b8 17680aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17681{
17682 return (mode == HFmode
17683 ? true
17684 : default_scalar_mode_supported_p (mode));
17685}
17686
11e554b3
JG
17687/* Set the value of FLT_EVAL_METHOD.
17688 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17689
17690 0: evaluate all operations and constants, whose semantic type has at
17691 most the range and precision of type float, to the range and
17692 precision of float; evaluate all other operations and constants to
17693 the range and precision of the semantic type;
17694
17695 N, where _FloatN is a supported interchange floating type
17696 evaluate all operations and constants, whose semantic type has at
17697 most the range and precision of _FloatN type, to the range and
17698 precision of the _FloatN type; evaluate all other operations and
17699 constants to the range and precision of the semantic type;
17700
17701 If we have the ARMv8.2-A extensions then we support _Float16 in native
17702 precision, so we should set this to 16. Otherwise, we support the type,
17703 but want to evaluate expressions in float precision, so set this to
17704 0. */
17705
17706static enum flt_eval_method
17707aarch64_excess_precision (enum excess_precision_type type)
17708{
17709 switch (type)
17710 {
17711 case EXCESS_PRECISION_TYPE_FAST:
17712 case EXCESS_PRECISION_TYPE_STANDARD:
17713 /* We can calculate either in 16-bit range and precision or
17714 32-bit range and precision. Make that decision based on whether
17715 we have native support for the ARMv8.2-A 16-bit floating-point
17716 instructions or not. */
17717 return (TARGET_FP_F16INST
17718 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17719 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17720 case EXCESS_PRECISION_TYPE_IMPLICIT:
17721 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17722 default:
17723 gcc_unreachable ();
17724 }
17725 return FLT_EVAL_METHOD_UNPREDICTABLE;
17726}
17727
b48d6421
KT
17728/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17729 scheduled for speculative execution. Reject the long-running division
17730 and square-root instructions. */
17731
17732static bool
17733aarch64_sched_can_speculate_insn (rtx_insn *insn)
17734{
17735 switch (get_attr_type (insn))
17736 {
17737 case TYPE_SDIV:
17738 case TYPE_UDIV:
17739 case TYPE_FDIVS:
17740 case TYPE_FDIVD:
17741 case TYPE_FSQRTS:
17742 case TYPE_FSQRTD:
17743 case TYPE_NEON_FP_SQRT_S:
17744 case TYPE_NEON_FP_SQRT_D:
17745 case TYPE_NEON_FP_SQRT_S_Q:
17746 case TYPE_NEON_FP_SQRT_D_Q:
17747 case TYPE_NEON_FP_DIV_S:
17748 case TYPE_NEON_FP_DIV_D:
17749 case TYPE_NEON_FP_DIV_S_Q:
17750 case TYPE_NEON_FP_DIV_D_Q:
17751 return false;
17752 default:
17753 return true;
17754 }
17755}
17756
43cacb12
RS
17757/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17758
17759static int
17760aarch64_compute_pressure_classes (reg_class *classes)
17761{
17762 int i = 0;
17763 classes[i++] = GENERAL_REGS;
17764 classes[i++] = FP_REGS;
17765 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17766 registers need to go in PR_LO_REGS at some point during their
17767 lifetime. Splitting it into two halves has the effect of making
17768 all predicates count against PR_LO_REGS, so that we try whenever
17769 possible to restrict the number of live predicates to 8. This
17770 greatly reduces the amount of spilling in certain loops. */
17771 classes[i++] = PR_LO_REGS;
17772 classes[i++] = PR_HI_REGS;
17773 return i;
17774}
17775
17776/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17777
17778static bool
17779aarch64_can_change_mode_class (machine_mode from,
17780 machine_mode to, reg_class_t)
17781{
002092be
RS
17782 if (BYTES_BIG_ENDIAN)
17783 {
17784 bool from_sve_p = aarch64_sve_data_mode_p (from);
17785 bool to_sve_p = aarch64_sve_data_mode_p (to);
17786
17787 /* Don't allow changes between SVE data modes and non-SVE modes.
17788 See the comment at the head of aarch64-sve.md for details. */
17789 if (from_sve_p != to_sve_p)
17790 return false;
17791
17792 /* Don't allow changes in element size: lane 0 of the new vector
17793 would not then be lane 0 of the old vector. See the comment
17794 above aarch64_maybe_expand_sve_subreg_move for a more detailed
17795 description.
17796
17797 In the worst case, this forces a register to be spilled in
17798 one mode and reloaded in the other, which handles the
17799 endianness correctly. */
17800 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
17801 return false;
17802 }
43cacb12
RS
17803 return true;
17804}
17805
5cce8171
RS
17806/* Implement TARGET_EARLY_REMAT_MODES. */
17807
17808static void
17809aarch64_select_early_remat_modes (sbitmap modes)
17810{
17811 /* SVE values are not normally live across a call, so it should be
17812 worth doing early rematerialization even in VL-specific mode. */
17813 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
17814 {
17815 machine_mode mode = (machine_mode) i;
17816 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17817 if (vec_flags & VEC_ANY_SVE)
17818 bitmap_set_bit (modes, i);
17819 }
17820}
17821
c0111dc4
RE
17822/* Override the default target speculation_safe_value. */
17823static rtx
17824aarch64_speculation_safe_value (machine_mode mode,
17825 rtx result, rtx val, rtx failval)
17826{
17827 /* Maybe we should warn if falling back to hard barriers. They are
17828 likely to be noticably more expensive than the alternative below. */
17829 if (!aarch64_track_speculation)
17830 return default_speculation_safe_value (mode, result, val, failval);
17831
17832 if (!REG_P (val))
17833 val = copy_to_mode_reg (mode, val);
17834
17835 if (!aarch64_reg_or_zero (failval, mode))
17836 failval = copy_to_mode_reg (mode, failval);
17837
21cebf90 17838 emit_insn (gen_despeculate_copy (mode, result, val, failval));
c0111dc4
RE
17839 return result;
17840}
17841
51b86113
DM
17842/* Target-specific selftests. */
17843
17844#if CHECKING_P
17845
17846namespace selftest {
17847
17848/* Selftest for the RTL loader.
17849 Verify that the RTL loader copes with a dump from
17850 print_rtx_function. This is essentially just a test that class
17851 function_reader can handle a real dump, but it also verifies
17852 that lookup_reg_by_dump_name correctly handles hard regs.
17853 The presence of hard reg names in the dump means that the test is
17854 target-specific, hence it is in this file. */
17855
17856static void
17857aarch64_test_loading_full_dump ()
17858{
17859 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
17860
17861 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
17862
17863 rtx_insn *insn_1 = get_insn_by_uid (1);
17864 ASSERT_EQ (NOTE, GET_CODE (insn_1));
17865
17866 rtx_insn *insn_15 = get_insn_by_uid (15);
17867 ASSERT_EQ (INSN, GET_CODE (insn_15));
17868 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
17869
17870 /* Verify crtl->return_rtx. */
17871 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
17872 ASSERT_EQ (0, REGNO (crtl->return_rtx));
17873 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
17874}
17875
17876/* Run all target-specific selftests. */
17877
17878static void
17879aarch64_run_selftests (void)
17880{
17881 aarch64_test_loading_full_dump ();
17882}
17883
17884} // namespace selftest
17885
17886#endif /* #if CHECKING_P */
17887
43e9d192
IB
17888#undef TARGET_ADDRESS_COST
17889#define TARGET_ADDRESS_COST aarch64_address_cost
17890
17891/* This hook will determines whether unnamed bitfields affect the alignment
17892 of the containing structure. The hook returns true if the structure
17893 should inherit the alignment requirements of an unnamed bitfield's
17894 type. */
17895#undef TARGET_ALIGN_ANON_BITFIELD
17896#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
17897
17898#undef TARGET_ASM_ALIGNED_DI_OP
17899#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
17900
17901#undef TARGET_ASM_ALIGNED_HI_OP
17902#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
17903
17904#undef TARGET_ASM_ALIGNED_SI_OP
17905#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
17906
17907#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17908#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
17909 hook_bool_const_tree_hwi_hwi_const_tree_true
17910
e1c1ecb0
KT
17911#undef TARGET_ASM_FILE_START
17912#define TARGET_ASM_FILE_START aarch64_start_file
17913
43e9d192
IB
17914#undef TARGET_ASM_OUTPUT_MI_THUNK
17915#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
17916
17917#undef TARGET_ASM_SELECT_RTX_SECTION
17918#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
17919
17920#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17921#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
17922
17923#undef TARGET_BUILD_BUILTIN_VA_LIST
17924#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
17925
17926#undef TARGET_CALLEE_COPIES
17927#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
17928
17929#undef TARGET_CAN_ELIMINATE
17930#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
17931
1fd8d40c
KT
17932#undef TARGET_CAN_INLINE_P
17933#define TARGET_CAN_INLINE_P aarch64_can_inline_p
17934
43e9d192
IB
17935#undef TARGET_CANNOT_FORCE_CONST_MEM
17936#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
17937
50487d79
EM
17938#undef TARGET_CASE_VALUES_THRESHOLD
17939#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
17940
43e9d192
IB
17941#undef TARGET_CONDITIONAL_REGISTER_USAGE
17942#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
17943
17944/* Only the least significant bit is used for initialization guard
17945 variables. */
17946#undef TARGET_CXX_GUARD_MASK_BIT
17947#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
17948
17949#undef TARGET_C_MODE_FOR_SUFFIX
17950#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
17951
17952#ifdef TARGET_BIG_ENDIAN_DEFAULT
17953#undef TARGET_DEFAULT_TARGET_FLAGS
17954#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
17955#endif
17956
17957#undef TARGET_CLASS_MAX_NREGS
17958#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
17959
119103ca
JG
17960#undef TARGET_BUILTIN_DECL
17961#define TARGET_BUILTIN_DECL aarch64_builtin_decl
17962
a6fc00da
BH
17963#undef TARGET_BUILTIN_RECIPROCAL
17964#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
17965
11e554b3
JG
17966#undef TARGET_C_EXCESS_PRECISION
17967#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
17968
43e9d192
IB
17969#undef TARGET_EXPAND_BUILTIN
17970#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
17971
17972#undef TARGET_EXPAND_BUILTIN_VA_START
17973#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
17974
9697e620
JG
17975#undef TARGET_FOLD_BUILTIN
17976#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
17977
43e9d192
IB
17978#undef TARGET_FUNCTION_ARG
17979#define TARGET_FUNCTION_ARG aarch64_function_arg
17980
17981#undef TARGET_FUNCTION_ARG_ADVANCE
17982#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
17983
17984#undef TARGET_FUNCTION_ARG_BOUNDARY
17985#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
17986
76b0cbf8
RS
17987#undef TARGET_FUNCTION_ARG_PADDING
17988#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
17989
43cacb12
RS
17990#undef TARGET_GET_RAW_RESULT_MODE
17991#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
17992#undef TARGET_GET_RAW_ARG_MODE
17993#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
17994
43e9d192
IB
17995#undef TARGET_FUNCTION_OK_FOR_SIBCALL
17996#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
17997
17998#undef TARGET_FUNCTION_VALUE
17999#define TARGET_FUNCTION_VALUE aarch64_function_value
18000
18001#undef TARGET_FUNCTION_VALUE_REGNO_P
18002#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
18003
fc72cba7
AL
18004#undef TARGET_GIMPLE_FOLD_BUILTIN
18005#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 18006
43e9d192
IB
18007#undef TARGET_GIMPLIFY_VA_ARG_EXPR
18008#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
18009
18010#undef TARGET_INIT_BUILTINS
18011#define TARGET_INIT_BUILTINS aarch64_init_builtins
18012
c64f7d37
WD
18013#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
18014#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
18015 aarch64_ira_change_pseudo_allocno_class
18016
43e9d192
IB
18017#undef TARGET_LEGITIMATE_ADDRESS_P
18018#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
18019
18020#undef TARGET_LEGITIMATE_CONSTANT_P
18021#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
18022
491ec060
WD
18023#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
18024#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
18025 aarch64_legitimize_address_displacement
18026
43e9d192
IB
18027#undef TARGET_LIBGCC_CMP_RETURN_MODE
18028#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
18029
11e554b3
JG
18030#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
18031#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
18032aarch64_libgcc_floating_mode_supported_p
18033
ac2b960f
YZ
18034#undef TARGET_MANGLE_TYPE
18035#define TARGET_MANGLE_TYPE aarch64_mangle_type
18036
43e9d192
IB
18037#undef TARGET_MEMORY_MOVE_COST
18038#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
18039
26e0ff94
WD
18040#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
18041#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
18042
43e9d192
IB
18043#undef TARGET_MUST_PASS_IN_STACK
18044#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
18045
18046/* This target hook should return true if accesses to volatile bitfields
18047 should use the narrowest mode possible. It should return false if these
18048 accesses should use the bitfield container type. */
18049#undef TARGET_NARROW_VOLATILE_BITFIELD
18050#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
18051
18052#undef TARGET_OPTION_OVERRIDE
18053#define TARGET_OPTION_OVERRIDE aarch64_override_options
18054
18055#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18056#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
18057 aarch64_override_options_after_change
18058
361fb3ee
KT
18059#undef TARGET_OPTION_SAVE
18060#define TARGET_OPTION_SAVE aarch64_option_save
18061
18062#undef TARGET_OPTION_RESTORE
18063#define TARGET_OPTION_RESTORE aarch64_option_restore
18064
18065#undef TARGET_OPTION_PRINT
18066#define TARGET_OPTION_PRINT aarch64_option_print
18067
5a2c8331
KT
18068#undef TARGET_OPTION_VALID_ATTRIBUTE_P
18069#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
18070
d78006d9
KT
18071#undef TARGET_SET_CURRENT_FUNCTION
18072#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
18073
43e9d192
IB
18074#undef TARGET_PASS_BY_REFERENCE
18075#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
18076
18077#undef TARGET_PREFERRED_RELOAD_CLASS
18078#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
18079
cee66c68
WD
18080#undef TARGET_SCHED_REASSOCIATION_WIDTH
18081#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
18082
c2ec330c
AL
18083#undef TARGET_PROMOTED_TYPE
18084#define TARGET_PROMOTED_TYPE aarch64_promoted_type
18085
43e9d192
IB
18086#undef TARGET_SECONDARY_RELOAD
18087#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
18088
18089#undef TARGET_SHIFT_TRUNCATION_MASK
18090#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
18091
18092#undef TARGET_SETUP_INCOMING_VARARGS
18093#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
18094
18095#undef TARGET_STRUCT_VALUE_RTX
18096#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
18097
18098#undef TARGET_REGISTER_MOVE_COST
18099#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
18100
18101#undef TARGET_RETURN_IN_MEMORY
18102#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
18103
18104#undef TARGET_RETURN_IN_MSB
18105#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
18106
18107#undef TARGET_RTX_COSTS
7cc2145f 18108#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 18109
2e5f8203
JG
18110#undef TARGET_SCALAR_MODE_SUPPORTED_P
18111#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
18112
d126a4ae
AP
18113#undef TARGET_SCHED_ISSUE_RATE
18114#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
18115
d03f7e44
MK
18116#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18117#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
18118 aarch64_sched_first_cycle_multipass_dfa_lookahead
18119
2d6bc7fa
KT
18120#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
18121#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
18122 aarch64_first_cycle_multipass_dfa_lookahead_guard
18123
827ab47a
KT
18124#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
18125#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
18126 aarch64_get_separate_components
18127
18128#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
18129#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
18130 aarch64_components_for_bb
18131
18132#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
18133#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
18134 aarch64_disqualify_components
18135
18136#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
18137#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
18138 aarch64_emit_prologue_components
18139
18140#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
18141#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
18142 aarch64_emit_epilogue_components
18143
18144#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
18145#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
18146 aarch64_set_handled_components
18147
43e9d192
IB
18148#undef TARGET_TRAMPOLINE_INIT
18149#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
18150
18151#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
18152#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
18153
18154#undef TARGET_VECTOR_MODE_SUPPORTED_P
18155#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
18156
7df76747
N
18157#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18158#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
18159 aarch64_builtin_support_vector_misalignment
18160
9f4cbab8
RS
18161#undef TARGET_ARRAY_MODE
18162#define TARGET_ARRAY_MODE aarch64_array_mode
18163
43e9d192
IB
18164#undef TARGET_ARRAY_MODE_SUPPORTED_P
18165#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
18166
8990e73a
TB
18167#undef TARGET_VECTORIZE_ADD_STMT_COST
18168#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
18169
18170#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18171#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18172 aarch64_builtin_vectorization_cost
18173
43e9d192
IB
18174#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18175#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
18176
42fc9a7f
JG
18177#undef TARGET_VECTORIZE_BUILTINS
18178#define TARGET_VECTORIZE_BUILTINS
18179
18180#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
18181#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
18182 aarch64_builtin_vectorized_function
18183
3b357264
JG
18184#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
18185#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
18186 aarch64_autovectorize_vector_sizes
18187
aa87aced
KV
18188#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18189#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
18190 aarch64_atomic_assign_expand_fenv
18191
43e9d192
IB
18192/* Section anchor support. */
18193
18194#undef TARGET_MIN_ANCHOR_OFFSET
18195#define TARGET_MIN_ANCHOR_OFFSET -256
18196
18197/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
18198 byte offset; we can do much more for larger data types, but have no way
18199 to determine the size of the access. We assume accesses are aligned. */
18200#undef TARGET_MAX_ANCHOR_OFFSET
18201#define TARGET_MAX_ANCHOR_OFFSET 4095
18202
db0253a4
TB
18203#undef TARGET_VECTOR_ALIGNMENT
18204#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
18205
43cacb12
RS
18206#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
18207#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
18208 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
18209#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
18210#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
18211 aarch64_simd_vector_alignment_reachable
18212
88b08073
JG
18213/* vec_perm support. */
18214
f151c9e1
RS
18215#undef TARGET_VECTORIZE_VEC_PERM_CONST
18216#define TARGET_VECTORIZE_VEC_PERM_CONST \
18217 aarch64_vectorize_vec_perm_const
88b08073 18218
43cacb12
RS
18219#undef TARGET_VECTORIZE_GET_MASK_MODE
18220#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
18221#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
18222#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
18223 aarch64_empty_mask_is_expensive
6a86928d
RS
18224#undef TARGET_PREFERRED_ELSE_VALUE
18225#define TARGET_PREFERRED_ELSE_VALUE \
18226 aarch64_preferred_else_value
43cacb12 18227
c2ec330c
AL
18228#undef TARGET_INIT_LIBFUNCS
18229#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 18230
706b2314 18231#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
18232#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
18233
5cb74e90
RR
18234#undef TARGET_FLAGS_REGNUM
18235#define TARGET_FLAGS_REGNUM CC_REGNUM
18236
78607708
TV
18237#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
18238#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
18239
a3125fc2
CL
18240#undef TARGET_ASAN_SHADOW_OFFSET
18241#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
18242
0c4ec427
RE
18243#undef TARGET_LEGITIMIZE_ADDRESS
18244#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
18245
b48d6421
KT
18246#undef TARGET_SCHED_CAN_SPECULATE_INSN
18247#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
18248
594bdd53
FY
18249#undef TARGET_CAN_USE_DOLOOP_P
18250#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
18251
9bca63d4
WD
18252#undef TARGET_SCHED_ADJUST_PRIORITY
18253#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
18254
6a569cdd
KT
18255#undef TARGET_SCHED_MACRO_FUSION_P
18256#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
18257
18258#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
18259#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
18260
350013bc
BC
18261#undef TARGET_SCHED_FUSION_PRIORITY
18262#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
18263
7b841a12
JW
18264#undef TARGET_UNSPEC_MAY_TRAP_P
18265#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
18266
1b1e81f8
JW
18267#undef TARGET_USE_PSEUDO_PIC_REG
18268#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
18269
cc8ca59e
JB
18270#undef TARGET_PRINT_OPERAND
18271#define TARGET_PRINT_OPERAND aarch64_print_operand
18272
18273#undef TARGET_PRINT_OPERAND_ADDRESS
18274#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
18275
ee62a5a6
RS
18276#undef TARGET_OPTAB_SUPPORTED_P
18277#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
18278
43203dea
RR
18279#undef TARGET_OMIT_STRUCT_RETURN_REG
18280#define TARGET_OMIT_STRUCT_RETURN_REG true
18281
43cacb12
RS
18282#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
18283#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
18284 aarch64_dwarf_poly_indeterminate_value
18285
f46fe37e
EB
18286/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
18287#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18288#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
18289
c43f4279
RS
18290#undef TARGET_HARD_REGNO_NREGS
18291#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
18292#undef TARGET_HARD_REGNO_MODE_OK
18293#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
18294
99e1629f
RS
18295#undef TARGET_MODES_TIEABLE_P
18296#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
18297
80ec73f4
RS
18298#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18299#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18300 aarch64_hard_regno_call_part_clobbered
18301
58e17cf8
RS
18302#undef TARGET_CONSTANT_ALIGNMENT
18303#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
18304
8c6e3b23
TC
18305#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
18306#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
18307 aarch64_stack_clash_protection_alloca_probe_range
18308
43cacb12
RS
18309#undef TARGET_COMPUTE_PRESSURE_CLASSES
18310#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
18311
18312#undef TARGET_CAN_CHANGE_MODE_CLASS
18313#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
18314
5cce8171
RS
18315#undef TARGET_SELECT_EARLY_REMAT_MODES
18316#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
18317
c0111dc4
RE
18318#undef TARGET_SPECULATION_SAFE_VALUE
18319#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
18320
51b86113
DM
18321#if CHECKING_P
18322#undef TARGET_RUN_TARGET_SELFTESTS
18323#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
18324#endif /* #if CHECKING_P */
18325
43e9d192
IB
18326struct gcc_target targetm = TARGET_INITIALIZER;
18327
18328#include "gt-aarch64.h"