]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[gen/AArch64] Generate helpers for substituting iterator values into pattern names
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
43e9d192
IB
81/* Classifies an address.
82
83 ADDRESS_REG_IMM
84 A simple base register plus immediate offset.
85
86 ADDRESS_REG_WB
87 A base register indexed by immediate offset with writeback.
88
89 ADDRESS_REG_REG
90 A base register indexed by (optionally scaled) register.
91
92 ADDRESS_REG_UXTW
93 A base register indexed by (optionally scaled) zero-extended register.
94
95 ADDRESS_REG_SXTW
96 A base register indexed by (optionally scaled) sign-extended register.
97
98 ADDRESS_LO_SUM
99 A LO_SUM rtx with a base register and "LO12" symbol relocation.
100
101 ADDRESS_SYMBOLIC:
102 A constant symbolic address, in pc-relative literal pool. */
103
104enum aarch64_address_type {
105 ADDRESS_REG_IMM,
106 ADDRESS_REG_WB,
107 ADDRESS_REG_REG,
108 ADDRESS_REG_UXTW,
109 ADDRESS_REG_SXTW,
110 ADDRESS_LO_SUM,
111 ADDRESS_SYMBOLIC
112};
113
114struct aarch64_address_info {
115 enum aarch64_address_type type;
116 rtx base;
117 rtx offset;
dc640181 118 poly_int64 const_offset;
43e9d192
IB
119 int shift;
120 enum aarch64_symbol_type symbol_type;
121};
122
b187677b 123/* Information about a legitimate vector immediate operand. */
48063b9d
IB
124struct simd_immediate_info
125{
b187677b
RS
126 enum insn_type { MOV, MVN };
127 enum modifier_type { LSL, MSL };
128
129 simd_immediate_info () {}
130 simd_immediate_info (scalar_float_mode, rtx);
131 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
132 insn_type = MOV, modifier_type = LSL,
133 unsigned int = 0);
43cacb12 134 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
135
136 /* The mode of the elements. */
137 scalar_mode elt_mode;
138
43cacb12
RS
139 /* The value of each element if all elements are the same, or the
140 first value if the constant is a series. */
48063b9d 141 rtx value;
b187677b 142
43cacb12
RS
143 /* The value of the step if the constant is a series, null otherwise. */
144 rtx step;
145
b187677b
RS
146 /* The instruction to use to move the immediate into a vector. */
147 insn_type insn;
148
149 /* The kind of shift modifier to use, and the number of bits to shift.
150 This is (LSL, 0) if no shift is needed. */
151 modifier_type modifier;
152 unsigned int shift;
48063b9d
IB
153};
154
b187677b
RS
155/* Construct a floating-point immediate in which each element has mode
156 ELT_MODE_IN and value VALUE_IN. */
157inline simd_immediate_info
158::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 159 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
160 modifier (LSL), shift (0)
161{}
162
163/* Construct an integer immediate in which each element has mode ELT_MODE_IN
164 and value VALUE_IN. The other parameters are as for the structure
165 fields. */
166inline simd_immediate_info
167::simd_immediate_info (scalar_int_mode elt_mode_in,
168 unsigned HOST_WIDE_INT value_in,
169 insn_type insn_in, modifier_type modifier_in,
170 unsigned int shift_in)
171 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
172 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
173{}
174
175/* Construct an integer immediate in which each element has mode ELT_MODE_IN
176 and where element I is equal to VALUE_IN + I * STEP_IN. */
177inline simd_immediate_info
178::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
179 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
180 modifier (LSL), shift (0)
b187677b
RS
181{}
182
43e9d192
IB
183/* The current code model. */
184enum aarch64_code_model aarch64_cmodel;
185
43cacb12
RS
186/* The number of 64-bit elements in an SVE vector. */
187poly_uint16 aarch64_sve_vg;
188
43e9d192
IB
189#ifdef HAVE_AS_TLS
190#undef TARGET_HAVE_TLS
191#define TARGET_HAVE_TLS 1
192#endif
193
ef4bddc2
RS
194static bool aarch64_composite_type_p (const_tree, machine_mode);
195static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 196 const_tree,
ef4bddc2 197 machine_mode *, int *,
43e9d192
IB
198 bool *);
199static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
200static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 201static void aarch64_override_options_after_change (void);
ef4bddc2 202static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 203static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
204static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
205 const_tree type,
206 int misalignment,
207 bool is_packed);
43cacb12 208static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
209static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
210 aarch64_addr_query_type);
88b08073 211
0c6caaf8
RL
212/* Major revision number of the ARM Architecture implemented by the target. */
213unsigned aarch64_architecture_version;
214
43e9d192 215/* The processor for which instructions should be scheduled. */
02fdbd5b 216enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 217
43e9d192
IB
218/* Mask to specify which instruction scheduling options should be used. */
219unsigned long aarch64_tune_flags = 0;
220
1be34295 221/* Global flag for PC relative loads. */
9ee6540a 222bool aarch64_pcrelative_literal_loads;
1be34295 223
d6cb6d6a
WD
224/* Global flag for whether frame pointer is enabled. */
225bool aarch64_use_frame_pointer;
226
8dec06f2
JG
227/* Support for command line parsing of boolean flags in the tuning
228 structures. */
229struct aarch64_flag_desc
230{
231 const char* name;
232 unsigned int flag;
233};
234
ed9fa8d2 235#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
236 { name, AARCH64_FUSE_##internal_name },
237static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
238{
239 { "none", AARCH64_FUSE_NOTHING },
240#include "aarch64-fusion-pairs.def"
241 { "all", AARCH64_FUSE_ALL },
242 { NULL, AARCH64_FUSE_NOTHING }
243};
8dec06f2 244
a339a01c 245#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
246 { name, AARCH64_EXTRA_TUNE_##internal_name },
247static const struct aarch64_flag_desc aarch64_tuning_flags[] =
248{
249 { "none", AARCH64_EXTRA_TUNE_NONE },
250#include "aarch64-tuning-flags.def"
251 { "all", AARCH64_EXTRA_TUNE_ALL },
252 { NULL, AARCH64_EXTRA_TUNE_NONE }
253};
8dec06f2 254
43e9d192
IB
255/* Tuning parameters. */
256
43e9d192
IB
257static const struct cpu_addrcost_table generic_addrcost_table =
258{
67747367 259 {
2fae724a 260 1, /* hi */
bd95e655
JG
261 0, /* si */
262 0, /* di */
2fae724a 263 1, /* ti */
67747367 264 },
bd95e655
JG
265 0, /* pre_modify */
266 0, /* post_modify */
267 0, /* register_offset */
783879e6
EM
268 0, /* register_sextend */
269 0, /* register_zextend */
bd95e655 270 0 /* imm_offset */
43e9d192
IB
271};
272
5ec1ae3b
EM
273static const struct cpu_addrcost_table exynosm1_addrcost_table =
274{
275 {
276 0, /* hi */
277 0, /* si */
278 0, /* di */
279 2, /* ti */
280 },
281 0, /* pre_modify */
282 0, /* post_modify */
283 1, /* register_offset */
284 1, /* register_sextend */
285 2, /* register_zextend */
286 0, /* imm_offset */
287};
288
381e27aa
PT
289static const struct cpu_addrcost_table xgene1_addrcost_table =
290{
381e27aa 291 {
bd95e655
JG
292 1, /* hi */
293 0, /* si */
294 0, /* di */
295 1, /* ti */
381e27aa 296 },
bd95e655
JG
297 1, /* pre_modify */
298 0, /* post_modify */
299 0, /* register_offset */
783879e6
EM
300 1, /* register_sextend */
301 1, /* register_zextend */
bd95e655 302 0, /* imm_offset */
381e27aa
PT
303};
304
d1261ac6 305static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
306{
307 {
5f407e57
AP
308 1, /* hi */
309 1, /* si */
310 1, /* di */
ad611a4c
VP
311 2, /* ti */
312 },
313 0, /* pre_modify */
314 0, /* post_modify */
315 2, /* register_offset */
316 3, /* register_sextend */
317 3, /* register_zextend */
318 0, /* imm_offset */
319};
320
8d39ea2f
LM
321static const struct cpu_addrcost_table qdf24xx_addrcost_table =
322{
323 {
324 1, /* hi */
325 1, /* si */
326 1, /* di */
327 2, /* ti */
328 },
329 1, /* pre_modify */
330 1, /* post_modify */
331 3, /* register_offset */
332 4, /* register_sextend */
333 3, /* register_zextend */
334 2, /* imm_offset */
335};
336
43e9d192
IB
337static const struct cpu_regmove_cost generic_regmove_cost =
338{
bd95e655 339 1, /* GP2GP */
3969c510
WD
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
bd95e655
JG
342 5, /* GP2FP */
343 5, /* FP2GP */
344 2 /* FP2FP */
43e9d192
IB
345};
346
e4a9c55a
WD
347static const struct cpu_regmove_cost cortexa57_regmove_cost =
348{
bd95e655 349 1, /* GP2GP */
e4a9c55a
WD
350 /* Avoid the use of slow int<->fp moves for spilling by setting
351 their cost higher than memmov_cost. */
bd95e655
JG
352 5, /* GP2FP */
353 5, /* FP2GP */
354 2 /* FP2FP */
e4a9c55a
WD
355};
356
357static const struct cpu_regmove_cost cortexa53_regmove_cost =
358{
bd95e655 359 1, /* GP2GP */
e4a9c55a
WD
360 /* Avoid the use of slow int<->fp moves for spilling by setting
361 their cost higher than memmov_cost. */
bd95e655
JG
362 5, /* GP2FP */
363 5, /* FP2GP */
364 2 /* FP2FP */
e4a9c55a
WD
365};
366
5ec1ae3b
EM
367static const struct cpu_regmove_cost exynosm1_regmove_cost =
368{
369 1, /* GP2GP */
370 /* Avoid the use of slow int<->fp moves for spilling by setting
371 their cost higher than memmov_cost (actual, 4 and 9). */
372 9, /* GP2FP */
373 9, /* FP2GP */
374 1 /* FP2FP */
375};
376
d1bcc29f
AP
377static const struct cpu_regmove_cost thunderx_regmove_cost =
378{
bd95e655
JG
379 2, /* GP2GP */
380 2, /* GP2FP */
381 6, /* FP2GP */
382 4 /* FP2FP */
d1bcc29f
AP
383};
384
381e27aa
PT
385static const struct cpu_regmove_cost xgene1_regmove_cost =
386{
bd95e655 387 1, /* GP2GP */
381e27aa
PT
388 /* Avoid the use of slow int<->fp moves for spilling by setting
389 their cost higher than memmov_cost. */
bd95e655
JG
390 8, /* GP2FP */
391 8, /* FP2GP */
392 2 /* FP2FP */
381e27aa
PT
393};
394
ee446d9f
JW
395static const struct cpu_regmove_cost qdf24xx_regmove_cost =
396{
397 2, /* GP2GP */
398 /* Avoid the use of int<->fp moves for spilling. */
399 6, /* GP2FP */
400 6, /* FP2GP */
401 4 /* FP2FP */
402};
403
d1261ac6 404static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
405{
406 1, /* GP2GP */
407 /* Avoid the use of int<->fp moves for spilling. */
408 8, /* GP2FP */
409 8, /* FP2GP */
410 4 /* FP2FP */
411};
412
8990e73a 413/* Generic costs for vector insn classes. */
8990e73a
TB
414static const struct cpu_vector_cost generic_vector_cost =
415{
cd8ae5ed
AP
416 1, /* scalar_int_stmt_cost */
417 1, /* scalar_fp_stmt_cost */
bd95e655
JG
418 1, /* scalar_load_cost */
419 1, /* scalar_store_cost */
cd8ae5ed
AP
420 1, /* vec_int_stmt_cost */
421 1, /* vec_fp_stmt_cost */
c428f91c 422 2, /* vec_permute_cost */
bd95e655
JG
423 1, /* vec_to_scalar_cost */
424 1, /* scalar_to_vec_cost */
425 1, /* vec_align_load_cost */
426 1, /* vec_unalign_load_cost */
427 1, /* vec_unalign_store_cost */
428 1, /* vec_store_cost */
429 3, /* cond_taken_branch_cost */
430 1 /* cond_not_taken_branch_cost */
8990e73a
TB
431};
432
c3f20327
AP
433/* ThunderX costs for vector insn classes. */
434static const struct cpu_vector_cost thunderx_vector_cost =
435{
cd8ae5ed
AP
436 1, /* scalar_int_stmt_cost */
437 1, /* scalar_fp_stmt_cost */
c3f20327
AP
438 3, /* scalar_load_cost */
439 1, /* scalar_store_cost */
cd8ae5ed 440 4, /* vec_int_stmt_cost */
b29d7591 441 1, /* vec_fp_stmt_cost */
c3f20327
AP
442 4, /* vec_permute_cost */
443 2, /* vec_to_scalar_cost */
444 2, /* scalar_to_vec_cost */
445 3, /* vec_align_load_cost */
7e87a3d9
AP
446 5, /* vec_unalign_load_cost */
447 5, /* vec_unalign_store_cost */
c3f20327
AP
448 1, /* vec_store_cost */
449 3, /* cond_taken_branch_cost */
450 3 /* cond_not_taken_branch_cost */
451};
452
60bff090 453/* Generic costs for vector insn classes. */
60bff090
JG
454static const struct cpu_vector_cost cortexa57_vector_cost =
455{
cd8ae5ed
AP
456 1, /* scalar_int_stmt_cost */
457 1, /* scalar_fp_stmt_cost */
bd95e655
JG
458 4, /* scalar_load_cost */
459 1, /* scalar_store_cost */
cd8ae5ed
AP
460 2, /* vec_int_stmt_cost */
461 2, /* vec_fp_stmt_cost */
c428f91c 462 3, /* vec_permute_cost */
bd95e655
JG
463 8, /* vec_to_scalar_cost */
464 8, /* scalar_to_vec_cost */
db4a1c18
WD
465 4, /* vec_align_load_cost */
466 4, /* vec_unalign_load_cost */
bd95e655
JG
467 1, /* vec_unalign_store_cost */
468 1, /* vec_store_cost */
469 1, /* cond_taken_branch_cost */
470 1 /* cond_not_taken_branch_cost */
60bff090
JG
471};
472
5ec1ae3b
EM
473static const struct cpu_vector_cost exynosm1_vector_cost =
474{
cd8ae5ed
AP
475 1, /* scalar_int_stmt_cost */
476 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
477 5, /* scalar_load_cost */
478 1, /* scalar_store_cost */
cd8ae5ed
AP
479 3, /* vec_int_stmt_cost */
480 3, /* vec_fp_stmt_cost */
c428f91c 481 3, /* vec_permute_cost */
5ec1ae3b
EM
482 3, /* vec_to_scalar_cost */
483 3, /* scalar_to_vec_cost */
484 5, /* vec_align_load_cost */
485 5, /* vec_unalign_load_cost */
486 1, /* vec_unalign_store_cost */
487 1, /* vec_store_cost */
488 1, /* cond_taken_branch_cost */
489 1 /* cond_not_taken_branch_cost */
490};
491
381e27aa 492/* Generic costs for vector insn classes. */
381e27aa
PT
493static const struct cpu_vector_cost xgene1_vector_cost =
494{
cd8ae5ed
AP
495 1, /* scalar_int_stmt_cost */
496 1, /* scalar_fp_stmt_cost */
bd95e655
JG
497 5, /* scalar_load_cost */
498 1, /* scalar_store_cost */
cd8ae5ed
AP
499 2, /* vec_int_stmt_cost */
500 2, /* vec_fp_stmt_cost */
c428f91c 501 2, /* vec_permute_cost */
bd95e655
JG
502 4, /* vec_to_scalar_cost */
503 4, /* scalar_to_vec_cost */
504 10, /* vec_align_load_cost */
505 10, /* vec_unalign_load_cost */
506 2, /* vec_unalign_store_cost */
507 2, /* vec_store_cost */
508 2, /* cond_taken_branch_cost */
509 1 /* cond_not_taken_branch_cost */
381e27aa
PT
510};
511
ad611a4c 512/* Costs for vector insn classes for Vulcan. */
d1261ac6 513static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 514{
cd8ae5ed
AP
515 1, /* scalar_int_stmt_cost */
516 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
517 4, /* scalar_load_cost */
518 1, /* scalar_store_cost */
cd8ae5ed
AP
519 5, /* vec_int_stmt_cost */
520 6, /* vec_fp_stmt_cost */
ad611a4c
VP
521 3, /* vec_permute_cost */
522 6, /* vec_to_scalar_cost */
523 5, /* scalar_to_vec_cost */
524 8, /* vec_align_load_cost */
525 8, /* vec_unalign_load_cost */
526 4, /* vec_unalign_store_cost */
527 4, /* vec_store_cost */
528 2, /* cond_taken_branch_cost */
529 1 /* cond_not_taken_branch_cost */
530};
531
b9066f5a
MW
532/* Generic costs for branch instructions. */
533static const struct cpu_branch_cost generic_branch_cost =
534{
9094d4a4
WD
535 1, /* Predictable. */
536 3 /* Unpredictable. */
b9066f5a
MW
537};
538
9acc9cbe
EM
539/* Generic approximation modes. */
540static const cpu_approx_modes generic_approx_modes =
541{
79a2bc2d 542 AARCH64_APPROX_NONE, /* division */
98daafa0 543 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
544 AARCH64_APPROX_NONE /* recip_sqrt */
545};
546
547/* Approximation modes for Exynos M1. */
548static const cpu_approx_modes exynosm1_approx_modes =
549{
79a2bc2d 550 AARCH64_APPROX_NONE, /* division */
98daafa0 551 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
552 AARCH64_APPROX_ALL /* recip_sqrt */
553};
554
555/* Approximation modes for X-Gene 1. */
556static const cpu_approx_modes xgene1_approx_modes =
557{
79a2bc2d 558 AARCH64_APPROX_NONE, /* division */
98daafa0 559 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
560 AARCH64_APPROX_ALL /* recip_sqrt */
561};
562
9d2c6e2e
MK
563/* Generic prefetch settings (which disable prefetch). */
564static const cpu_prefetch_tune generic_prefetch_tune =
565{
566 0, /* num_slots */
567 -1, /* l1_cache_size */
568 -1, /* l1_cache_line_size */
16b2cafd 569 -1, /* l2_cache_size */
d2ff35c0 570 true, /* prefetch_dynamic_strides */
59100dfc 571 -1, /* minimum_stride */
16b2cafd 572 -1 /* default_opt_level */
9d2c6e2e
MK
573};
574
575static const cpu_prefetch_tune exynosm1_prefetch_tune =
576{
577 0, /* num_slots */
578 -1, /* l1_cache_size */
579 64, /* l1_cache_line_size */
16b2cafd 580 -1, /* l2_cache_size */
d2ff35c0 581 true, /* prefetch_dynamic_strides */
59100dfc 582 -1, /* minimum_stride */
16b2cafd 583 -1 /* default_opt_level */
9d2c6e2e
MK
584};
585
586static const cpu_prefetch_tune qdf24xx_prefetch_tune =
587{
70c51b58
MK
588 4, /* num_slots */
589 32, /* l1_cache_size */
9d2c6e2e 590 64, /* l1_cache_line_size */
725e2110 591 512, /* l2_cache_size */
d2ff35c0 592 false, /* prefetch_dynamic_strides */
59100dfc
LM
593 2048, /* minimum_stride */
594 3 /* default_opt_level */
9d2c6e2e
MK
595};
596
f1e247d0
AP
597static const cpu_prefetch_tune thunderxt88_prefetch_tune =
598{
599 8, /* num_slots */
600 32, /* l1_cache_size */
601 128, /* l1_cache_line_size */
602 16*1024, /* l2_cache_size */
d2ff35c0 603 true, /* prefetch_dynamic_strides */
59100dfc 604 -1, /* minimum_stride */
f1e247d0
AP
605 3 /* default_opt_level */
606};
607
608static const cpu_prefetch_tune thunderx_prefetch_tune =
609{
610 8, /* num_slots */
611 32, /* l1_cache_size */
612 128, /* l1_cache_line_size */
613 -1, /* l2_cache_size */
d2ff35c0 614 true, /* prefetch_dynamic_strides */
59100dfc 615 -1, /* minimum_stride */
f1e247d0
AP
616 -1 /* default_opt_level */
617};
618
9d2c6e2e
MK
619static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
620{
f1e247d0
AP
621 8, /* num_slots */
622 32, /* l1_cache_size */
9d2c6e2e 623 64, /* l1_cache_line_size */
f1e247d0 624 256, /* l2_cache_size */
d2ff35c0 625 true, /* prefetch_dynamic_strides */
59100dfc 626 -1, /* minimum_stride */
16b2cafd 627 -1 /* default_opt_level */
9d2c6e2e
MK
628};
629
43e9d192
IB
630static const struct tune_params generic_tunings =
631{
4e2cd668 632 &cortexa57_extra_costs,
43e9d192
IB
633 &generic_addrcost_table,
634 &generic_regmove_cost,
8990e73a 635 &generic_vector_cost,
b9066f5a 636 &generic_branch_cost,
9acc9cbe 637 &generic_approx_modes,
bd95e655
JG
638 4, /* memmov_cost */
639 2, /* issue_rate */
e0701ef0 640 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
641 "8", /* function_align. */
642 "4", /* jump_align. */
643 "8", /* loop_align. */
cee66c68
WD
644 2, /* int_reassoc_width. */
645 4, /* fp_reassoc_width. */
50093a33
WD
646 1, /* vec_reassoc_width. */
647 2, /* min_div_recip_mul_sf. */
dfba575f 648 2, /* min_div_recip_mul_df. */
50487d79 649 0, /* max_case_values. */
3b4c0f7e 650 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
651 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
652 &generic_prefetch_tune
43e9d192
IB
653};
654
1c72a3ca
JG
655static const struct tune_params cortexa35_tunings =
656{
657 &cortexa53_extra_costs,
658 &generic_addrcost_table,
659 &cortexa53_regmove_cost,
660 &generic_vector_cost,
aca97ef8 661 &generic_branch_cost,
9acc9cbe 662 &generic_approx_modes,
1c72a3ca
JG
663 4, /* memmov_cost */
664 1, /* issue_rate */
0bc24338 665 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 666 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
667 "16", /* function_align. */
668 "4", /* jump_align. */
669 "8", /* loop_align. */
1c72a3ca
JG
670 2, /* int_reassoc_width. */
671 4, /* fp_reassoc_width. */
672 1, /* vec_reassoc_width. */
673 2, /* min_div_recip_mul_sf. */
674 2, /* min_div_recip_mul_df. */
675 0, /* max_case_values. */
1c72a3ca 676 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
677 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
678 &generic_prefetch_tune
1c72a3ca
JG
679};
680
984239ad
KT
681static const struct tune_params cortexa53_tunings =
682{
683 &cortexa53_extra_costs,
684 &generic_addrcost_table,
e4a9c55a 685 &cortexa53_regmove_cost,
984239ad 686 &generic_vector_cost,
aca97ef8 687 &generic_branch_cost,
9acc9cbe 688 &generic_approx_modes,
bd95e655
JG
689 4, /* memmov_cost */
690 2, /* issue_rate */
00a8574a 691 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 692 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
693 "16", /* function_align. */
694 "4", /* jump_align. */
695 "8", /* loop_align. */
cee66c68
WD
696 2, /* int_reassoc_width. */
697 4, /* fp_reassoc_width. */
50093a33
WD
698 1, /* vec_reassoc_width. */
699 2, /* min_div_recip_mul_sf. */
dfba575f 700 2, /* min_div_recip_mul_df. */
50487d79 701 0, /* max_case_values. */
2d6bc7fa 702 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
703 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
704 &generic_prefetch_tune
984239ad
KT
705};
706
4fd92af6
KT
707static const struct tune_params cortexa57_tunings =
708{
709 &cortexa57_extra_costs,
a39d4348 710 &generic_addrcost_table,
e4a9c55a 711 &cortexa57_regmove_cost,
60bff090 712 &cortexa57_vector_cost,
aca97ef8 713 &generic_branch_cost,
9acc9cbe 714 &generic_approx_modes,
bd95e655
JG
715 4, /* memmov_cost */
716 3, /* issue_rate */
00a8574a 717 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 718 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
719 "16", /* function_align. */
720 "4", /* jump_align. */
721 "8", /* loop_align. */
cee66c68
WD
722 2, /* int_reassoc_width. */
723 4, /* fp_reassoc_width. */
50093a33
WD
724 1, /* vec_reassoc_width. */
725 2, /* min_div_recip_mul_sf. */
dfba575f 726 2, /* min_div_recip_mul_df. */
50487d79 727 0, /* max_case_values. */
2d6bc7fa 728 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
729 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
730 &generic_prefetch_tune
dfba575f
JG
731};
732
733static const struct tune_params cortexa72_tunings =
734{
735 &cortexa57_extra_costs,
a39d4348 736 &generic_addrcost_table,
dfba575f
JG
737 &cortexa57_regmove_cost,
738 &cortexa57_vector_cost,
aca97ef8 739 &generic_branch_cost,
9acc9cbe 740 &generic_approx_modes,
dfba575f
JG
741 4, /* memmov_cost */
742 3, /* issue_rate */
00a8574a 743 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 744 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
745 "16", /* function_align. */
746 "4", /* jump_align. */
747 "8", /* loop_align. */
dfba575f
JG
748 2, /* int_reassoc_width. */
749 4, /* fp_reassoc_width. */
750 1, /* vec_reassoc_width. */
751 2, /* min_div_recip_mul_sf. */
752 2, /* min_div_recip_mul_df. */
50487d79 753 0, /* max_case_values. */
0bc24338 754 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
755 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
756 &generic_prefetch_tune
4fd92af6
KT
757};
758
4fb570c4
KT
759static const struct tune_params cortexa73_tunings =
760{
761 &cortexa57_extra_costs,
a39d4348 762 &generic_addrcost_table,
4fb570c4
KT
763 &cortexa57_regmove_cost,
764 &cortexa57_vector_cost,
aca97ef8 765 &generic_branch_cost,
4fb570c4
KT
766 &generic_approx_modes,
767 4, /* memmov_cost. */
768 2, /* issue_rate. */
769 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
770 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
771 "16", /* function_align. */
772 "4", /* jump_align. */
773 "8", /* loop_align. */
4fb570c4
KT
774 2, /* int_reassoc_width. */
775 4, /* fp_reassoc_width. */
776 1, /* vec_reassoc_width. */
777 2, /* min_div_recip_mul_sf. */
778 2, /* min_div_recip_mul_df. */
779 0, /* max_case_values. */
4fb570c4 780 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
781 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
782 &generic_prefetch_tune
4fb570c4
KT
783};
784
9d2c6e2e
MK
785
786
5ec1ae3b
EM
787static const struct tune_params exynosm1_tunings =
788{
789 &exynosm1_extra_costs,
790 &exynosm1_addrcost_table,
791 &exynosm1_regmove_cost,
792 &exynosm1_vector_cost,
793 &generic_branch_cost,
9acc9cbe 794 &exynosm1_approx_modes,
5ec1ae3b
EM
795 4, /* memmov_cost */
796 3, /* issue_rate */
25cc2199 797 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
798 "4", /* function_align. */
799 "4", /* jump_align. */
800 "4", /* loop_align. */
5ec1ae3b
EM
801 2, /* int_reassoc_width. */
802 4, /* fp_reassoc_width. */
803 1, /* vec_reassoc_width. */
804 2, /* min_div_recip_mul_sf. */
805 2, /* min_div_recip_mul_df. */
806 48, /* max_case_values. */
220379df 807 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
808 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
809 &exynosm1_prefetch_tune
5ec1ae3b
EM
810};
811
f1e247d0
AP
812static const struct tune_params thunderxt88_tunings =
813{
814 &thunderx_extra_costs,
815 &generic_addrcost_table,
816 &thunderx_regmove_cost,
817 &thunderx_vector_cost,
818 &generic_branch_cost,
819 &generic_approx_modes,
820 6, /* memmov_cost */
821 2, /* issue_rate */
822 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
823 "8", /* function_align. */
824 "8", /* jump_align. */
825 "8", /* loop_align. */
f1e247d0
AP
826 2, /* int_reassoc_width. */
827 4, /* fp_reassoc_width. */
828 1, /* vec_reassoc_width. */
829 2, /* min_div_recip_mul_sf. */
830 2, /* min_div_recip_mul_df. */
831 0, /* max_case_values. */
832 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
833 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
834 &thunderxt88_prefetch_tune
835};
836
d1bcc29f
AP
837static const struct tune_params thunderx_tunings =
838{
839 &thunderx_extra_costs,
840 &generic_addrcost_table,
841 &thunderx_regmove_cost,
c3f20327 842 &thunderx_vector_cost,
b9066f5a 843 &generic_branch_cost,
9acc9cbe 844 &generic_approx_modes,
bd95e655
JG
845 6, /* memmov_cost */
846 2, /* issue_rate */
e9a3a175 847 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
848 "8", /* function_align. */
849 "8", /* jump_align. */
850 "8", /* loop_align. */
cee66c68
WD
851 2, /* int_reassoc_width. */
852 4, /* fp_reassoc_width. */
50093a33
WD
853 1, /* vec_reassoc_width. */
854 2, /* min_div_recip_mul_sf. */
dfba575f 855 2, /* min_div_recip_mul_df. */
50487d79 856 0, /* max_case_values. */
2d6bc7fa 857 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
858 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
859 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 860 &thunderx_prefetch_tune
d1bcc29f
AP
861};
862
381e27aa
PT
863static const struct tune_params xgene1_tunings =
864{
865 &xgene1_extra_costs,
866 &xgene1_addrcost_table,
867 &xgene1_regmove_cost,
868 &xgene1_vector_cost,
b9066f5a 869 &generic_branch_cost,
9acc9cbe 870 &xgene1_approx_modes,
bd95e655
JG
871 6, /* memmov_cost */
872 4, /* issue_rate */
e9a3a175 873 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102
ML
874 "16", /* function_align. */
875 "8", /* jump_align. */
876 "16", /* loop_align. */
381e27aa
PT
877 2, /* int_reassoc_width. */
878 4, /* fp_reassoc_width. */
50093a33
WD
879 1, /* vec_reassoc_width. */
880 2, /* min_div_recip_mul_sf. */
dfba575f 881 2, /* min_div_recip_mul_df. */
50487d79 882 0, /* max_case_values. */
2d6bc7fa 883 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 884 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
9d2c6e2e 885 &generic_prefetch_tune
381e27aa
PT
886};
887
ee446d9f
JW
888static const struct tune_params qdf24xx_tunings =
889{
890 &qdf24xx_extra_costs,
8d39ea2f 891 &qdf24xx_addrcost_table,
ee446d9f
JW
892 &qdf24xx_regmove_cost,
893 &generic_vector_cost,
894 &generic_branch_cost,
895 &generic_approx_modes,
896 4, /* memmov_cost */
897 4, /* issue_rate */
898 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
899 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
900 "16", /* function_align. */
901 "8", /* jump_align. */
902 "16", /* loop_align. */
ee446d9f
JW
903 2, /* int_reassoc_width. */
904 4, /* fp_reassoc_width. */
905 1, /* vec_reassoc_width. */
906 2, /* min_div_recip_mul_sf. */
907 2, /* min_div_recip_mul_df. */
908 0, /* max_case_values. */
4f2a94e6 909 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
910 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
911 &qdf24xx_prefetch_tune
ee446d9f
JW
912};
913
52ee8191
SP
914/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
915 for now. */
916static const struct tune_params saphira_tunings =
917{
918 &generic_extra_costs,
919 &generic_addrcost_table,
920 &generic_regmove_cost,
921 &generic_vector_cost,
922 &generic_branch_cost,
923 &generic_approx_modes,
924 4, /* memmov_cost */
925 4, /* issue_rate */
926 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
927 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
928 "16", /* function_align. */
929 "8", /* jump_align. */
930 "16", /* loop_align. */
52ee8191
SP
931 2, /* int_reassoc_width. */
932 4, /* fp_reassoc_width. */
933 1, /* vec_reassoc_width. */
934 2, /* min_div_recip_mul_sf. */
935 2, /* min_div_recip_mul_df. */
936 0, /* max_case_values. */
937 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
938 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
939 &generic_prefetch_tune
940};
941
d1261ac6 942static const struct tune_params thunderx2t99_tunings =
ad611a4c 943{
d1261ac6
AP
944 &thunderx2t99_extra_costs,
945 &thunderx2t99_addrcost_table,
946 &thunderx2t99_regmove_cost,
947 &thunderx2t99_vector_cost,
aca97ef8 948 &generic_branch_cost,
ad611a4c
VP
949 &generic_approx_modes,
950 4, /* memmov_cost. */
951 4, /* issue_rate. */
00c7c57f
JB
952 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
953 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
954 "16", /* function_align. */
955 "8", /* jump_align. */
956 "16", /* loop_align. */
ad611a4c
VP
957 3, /* int_reassoc_width. */
958 2, /* fp_reassoc_width. */
959 2, /* vec_reassoc_width. */
960 2, /* min_div_recip_mul_sf. */
961 2, /* min_div_recip_mul_df. */
962 0, /* max_case_values. */
f1e247d0 963 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
964 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
965 &thunderx2t99_prefetch_tune
ad611a4c
VP
966};
967
8dec06f2
JG
968/* Support for fine-grained override of the tuning structures. */
969struct aarch64_tuning_override_function
970{
971 const char* name;
972 void (*parse_override)(const char*, struct tune_params*);
973};
974
975static void aarch64_parse_fuse_string (const char*, struct tune_params*);
976static void aarch64_parse_tune_string (const char*, struct tune_params*);
977
978static const struct aarch64_tuning_override_function
979aarch64_tuning_override_functions[] =
980{
981 { "fuse", aarch64_parse_fuse_string },
982 { "tune", aarch64_parse_tune_string },
983 { NULL, NULL }
984};
985
43e9d192
IB
986/* A processor implementing AArch64. */
987struct processor
988{
989 const char *const name;
46806c44
KT
990 enum aarch64_processor ident;
991 enum aarch64_processor sched_core;
393ae126 992 enum aarch64_arch arch;
0c6caaf8 993 unsigned architecture_version;
43e9d192
IB
994 const unsigned long flags;
995 const struct tune_params *const tune;
996};
997
393ae126
KT
998/* Architectures implementing AArch64. */
999static const struct processor all_architectures[] =
1000{
1001#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1002 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1003#include "aarch64-arches.def"
393ae126
KT
1004 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1005};
1006
43e9d192
IB
1007/* Processor cores implementing AArch64. */
1008static const struct processor all_cores[] =
1009{
e8fcc9fa 1010#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1011 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1012 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1013 FLAGS, &COSTS##_tunings},
43e9d192 1014#include "aarch64-cores.def"
393ae126
KT
1015 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1016 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1017 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1018};
1019
43e9d192 1020
361fb3ee
KT
1021/* Target specification. These are populated by the -march, -mtune, -mcpu
1022 handling code or by target attributes. */
43e9d192
IB
1023static const struct processor *selected_arch;
1024static const struct processor *selected_cpu;
1025static const struct processor *selected_tune;
1026
b175b679
JG
1027/* The current tuning set. */
1028struct tune_params aarch64_tune_params = generic_tunings;
1029
43e9d192
IB
1030#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1031
1032/* An ISA extension in the co-processor and main instruction set space. */
1033struct aarch64_option_extension
1034{
1035 const char *const name;
1036 const unsigned long flags_on;
1037 const unsigned long flags_off;
1038};
1039
43e9d192
IB
1040typedef enum aarch64_cond_code
1041{
1042 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1043 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1044 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1045}
1046aarch64_cc;
1047
1048#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1049
1050/* The condition codes of the processor, and the inverse function. */
1051static const char * const aarch64_condition_codes[] =
1052{
1053 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1054 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1055};
1056
973d2e01
TP
1057/* Generate code to enable conditional branches in functions over 1 MiB. */
1058const char *
1059aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1060 const char * branch_format)
1061{
1062 rtx_code_label * tmp_label = gen_label_rtx ();
1063 char label_buf[256];
1064 char buffer[128];
1065 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1066 CODE_LABEL_NUMBER (tmp_label));
1067 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1068 rtx dest_label = operands[pos_label];
1069 operands[pos_label] = tmp_label;
1070
1071 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1072 output_asm_insn (buffer, operands);
1073
1074 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1075 operands[pos_label] = dest_label;
1076 output_asm_insn (buffer, operands);
1077 return "";
1078}
1079
261fb553 1080void
fc29dfc9 1081aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1082{
261fb553 1083 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1084 if (FLOAT_MODE_P (mode))
1085 error ("%qs is incompatible with the use of floating-point types",
1086 "-mgeneral-regs-only");
1087 else
1088 error ("%qs is incompatible with the use of vector types",
1089 "-mgeneral-regs-only");
261fb553 1090 else
fc29dfc9
SE
1091 if (FLOAT_MODE_P (mode))
1092 error ("%qs feature modifier is incompatible with the use of"
1093 " floating-point types", "+nofp");
1094 else
1095 error ("%qs feature modifier is incompatible with the use of"
1096 " vector types", "+nofp");
261fb553
AL
1097}
1098
c64f7d37 1099/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1100 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1101 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1102 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1103 and GENERAL_REGS is lower than the memory cost (in this case the best class
1104 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1105 cost results in bad allocations with many redundant int<->FP moves which
1106 are expensive on various cores.
1107 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1108 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1109 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1110 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1111 The result of this is that it is no longer inefficient to have a higher
1112 memory move cost than the register move cost.
1113*/
c64f7d37
WD
1114
1115static reg_class_t
31e2b5a3
WD
1116aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1117 reg_class_t best_class)
c64f7d37 1118{
b8506a8a 1119 machine_mode mode;
c64f7d37 1120
67e5c59a
RS
1121 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1122 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1123 return allocno_class;
1124
67e5c59a
RS
1125 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1126 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1127 return best_class;
1128
c64f7d37
WD
1129 mode = PSEUDO_REGNO_MODE (regno);
1130 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1131}
1132
26e0ff94 1133static unsigned int
b8506a8a 1134aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1135{
50093a33 1136 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1137 return aarch64_tune_params.min_div_recip_mul_sf;
1138 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1139}
1140
b5b33e11 1141/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1142static int
b5b33e11 1143aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1144{
1145 if (VECTOR_MODE_P (mode))
b175b679 1146 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1147 if (INTEGRAL_MODE_P (mode))
b175b679 1148 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1149 /* Avoid reassociating floating point addition so we emit more FMAs. */
1150 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1151 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1152 return 1;
1153}
1154
43e9d192
IB
1155/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1156unsigned
1157aarch64_dbx_register_number (unsigned regno)
1158{
1159 if (GP_REGNUM_P (regno))
1160 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1161 else if (regno == SP_REGNUM)
1162 return AARCH64_DWARF_SP;
1163 else if (FP_REGNUM_P (regno))
1164 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1165 else if (PR_REGNUM_P (regno))
1166 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1167 else if (regno == VG_REGNUM)
1168 return AARCH64_DWARF_VG;
43e9d192
IB
1169
1170 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1171 equivalent DWARF register. */
1172 return DWARF_FRAME_REGISTERS;
1173}
1174
43cacb12
RS
1175/* Return true if MODE is any of the Advanced SIMD structure modes. */
1176static bool
1177aarch64_advsimd_struct_mode_p (machine_mode mode)
1178{
1179 return (TARGET_SIMD
1180 && (mode == OImode || mode == CImode || mode == XImode));
1181}
1182
1183/* Return true if MODE is an SVE predicate mode. */
1184static bool
1185aarch64_sve_pred_mode_p (machine_mode mode)
1186{
1187 return (TARGET_SVE
1188 && (mode == VNx16BImode
1189 || mode == VNx8BImode
1190 || mode == VNx4BImode
1191 || mode == VNx2BImode));
1192}
1193
1194/* Three mutually-exclusive flags describing a vector or predicate type. */
1195const unsigned int VEC_ADVSIMD = 1;
1196const unsigned int VEC_SVE_DATA = 2;
1197const unsigned int VEC_SVE_PRED = 4;
1198/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1199 a structure of 2, 3 or 4 vectors. */
1200const unsigned int VEC_STRUCT = 8;
1201/* Useful combinations of the above. */
1202const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1203const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1204
1205/* Return a set of flags describing the vector properties of mode MODE.
1206 Ignore modes that are not supported by the current target. */
1207static unsigned int
1208aarch64_classify_vector_mode (machine_mode mode)
1209{
1210 if (aarch64_advsimd_struct_mode_p (mode))
1211 return VEC_ADVSIMD | VEC_STRUCT;
1212
1213 if (aarch64_sve_pred_mode_p (mode))
1214 return VEC_SVE_PRED;
1215
1216 scalar_mode inner = GET_MODE_INNER (mode);
1217 if (VECTOR_MODE_P (mode)
1218 && (inner == QImode
1219 || inner == HImode
1220 || inner == HFmode
1221 || inner == SImode
1222 || inner == SFmode
1223 || inner == DImode
1224 || inner == DFmode))
1225 {
9f4cbab8
RS
1226 if (TARGET_SVE)
1227 {
1228 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1229 return VEC_SVE_DATA;
1230 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1231 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1232 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1233 return VEC_SVE_DATA | VEC_STRUCT;
1234 }
43cacb12
RS
1235
1236 /* This includes V1DF but not V1DI (which doesn't exist). */
1237 if (TARGET_SIMD
1238 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1239 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1240 return VEC_ADVSIMD;
1241 }
1242
1243 return 0;
1244}
1245
1246/* Return true if MODE is any of the data vector modes, including
1247 structure modes. */
43e9d192 1248static bool
43cacb12 1249aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1250{
43cacb12 1251 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1252}
1253
43cacb12
RS
1254/* Return true if MODE is an SVE data vector mode; either a single vector
1255 or a structure of vectors. */
43e9d192 1256static bool
43cacb12 1257aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1258{
43cacb12 1259 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1260}
1261
9f4cbab8
RS
1262/* Implement target hook TARGET_ARRAY_MODE. */
1263static opt_machine_mode
1264aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1265{
1266 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1267 && IN_RANGE (nelems, 2, 4))
1268 return mode_for_vector (GET_MODE_INNER (mode),
1269 GET_MODE_NUNITS (mode) * nelems);
1270
1271 return opt_machine_mode ();
1272}
1273
43e9d192
IB
1274/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1275static bool
ef4bddc2 1276aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1277 unsigned HOST_WIDE_INT nelems)
1278{
1279 if (TARGET_SIMD
635e66fe
AL
1280 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1281 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1282 && (nelems >= 2 && nelems <= 4))
1283 return true;
1284
1285 return false;
1286}
1287
43cacb12
RS
1288/* Return the SVE predicate mode to use for elements that have
1289 ELEM_NBYTES bytes, if such a mode exists. */
1290
1291opt_machine_mode
1292aarch64_sve_pred_mode (unsigned int elem_nbytes)
1293{
1294 if (TARGET_SVE)
1295 {
1296 if (elem_nbytes == 1)
1297 return VNx16BImode;
1298 if (elem_nbytes == 2)
1299 return VNx8BImode;
1300 if (elem_nbytes == 4)
1301 return VNx4BImode;
1302 if (elem_nbytes == 8)
1303 return VNx2BImode;
1304 }
1305 return opt_machine_mode ();
1306}
1307
1308/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1309
1310static opt_machine_mode
1311aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1312{
1313 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1314 {
1315 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1316 machine_mode pred_mode;
1317 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1318 return pred_mode;
1319 }
1320
1321 return default_get_mask_mode (nunits, nbytes);
1322}
1323
b41d1f6e
RS
1324/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1325 prefer to use the first arithmetic operand as the else value if
1326 the else value doesn't matter, since that exactly matches the SVE
1327 destructive merging form. For ternary operations we could either
1328 pick the first operand and use FMAD-like instructions or the last
1329 operand and use FMLA-like instructions; the latter seems more
1330 natural. */
6a86928d
RS
1331
1332static tree
b41d1f6e 1333aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1334{
b41d1f6e 1335 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1336}
1337
c43f4279 1338/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1339
c43f4279 1340static unsigned int
ef4bddc2 1341aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1342{
6a70badb
RS
1343 /* ??? Logically we should only need to provide a value when
1344 HARD_REGNO_MODE_OK says that the combination is valid,
1345 but at the moment we need to handle all modes. Just ignore
1346 any runtime parts for registers that can't store them. */
1347 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1348 switch (aarch64_regno_regclass (regno))
1349 {
1350 case FP_REGS:
1351 case FP_LO_REGS:
43cacb12
RS
1352 if (aarch64_sve_data_mode_p (mode))
1353 return exact_div (GET_MODE_SIZE (mode),
1354 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1355 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1356 case PR_REGS:
1357 case PR_LO_REGS:
1358 case PR_HI_REGS:
1359 return 1;
43e9d192 1360 default:
6a70badb 1361 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1362 }
1363 gcc_unreachable ();
1364}
1365
f939c3e6 1366/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1367
f939c3e6 1368static bool
ef4bddc2 1369aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1370{
1371 if (GET_MODE_CLASS (mode) == MODE_CC)
1372 return regno == CC_REGNUM;
1373
43cacb12
RS
1374 if (regno == VG_REGNUM)
1375 /* This must have the same size as _Unwind_Word. */
1376 return mode == DImode;
1377
1378 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1379 if (vec_flags & VEC_SVE_PRED)
1380 return PR_REGNUM_P (regno);
1381
1382 if (PR_REGNUM_P (regno))
1383 return 0;
1384
9259db42
YZ
1385 if (regno == SP_REGNUM)
1386 /* The purpose of comparing with ptr_mode is to support the
1387 global register variable associated with the stack pointer
1388 register via the syntax of asm ("wsp") in ILP32. */
1389 return mode == Pmode || mode == ptr_mode;
1390
1391 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1392 return mode == Pmode;
1393
43cacb12 1394 if (GP_REGNUM_P (regno) && known_le (GET_MODE_SIZE (mode), 16))
f939c3e6 1395 return true;
43e9d192
IB
1396
1397 if (FP_REGNUM_P (regno))
1398 {
43cacb12 1399 if (vec_flags & VEC_STRUCT)
4edd6298 1400 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1401 else
43cacb12 1402 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1403 }
1404
f939c3e6 1405 return false;
43e9d192
IB
1406}
1407
80ec73f4
RS
1408/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1409 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1410 clobbers the top 64 bits when restoring the bottom 64 bits. */
1411
1412static bool
1413aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1414{
6a70badb 1415 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1416}
1417
43cacb12
RS
1418/* Implement REGMODE_NATURAL_SIZE. */
1419poly_uint64
1420aarch64_regmode_natural_size (machine_mode mode)
1421{
1422 /* The natural size for SVE data modes is one SVE data vector,
1423 and similarly for predicates. We can't independently modify
1424 anything smaller than that. */
1425 /* ??? For now, only do this for variable-width SVE registers.
1426 Doing it for constant-sized registers breaks lower-subreg.c. */
1427 /* ??? And once that's fixed, we should probably have similar
1428 code for Advanced SIMD. */
1429 if (!aarch64_sve_vg.is_constant ())
1430 {
1431 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1432 if (vec_flags & VEC_SVE_PRED)
1433 return BYTES_PER_SVE_PRED;
1434 if (vec_flags & VEC_SVE_DATA)
1435 return BYTES_PER_SVE_VECTOR;
1436 }
1437 return UNITS_PER_WORD;
1438}
1439
73d9ac6a 1440/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1441machine_mode
43cacb12
RS
1442aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1443 machine_mode mode)
1444{
1445 /* The predicate mode determines which bits are significant and
1446 which are "don't care". Decreasing the number of lanes would
1447 lose data while increasing the number of lanes would make bits
1448 unnecessarily significant. */
1449 if (PR_REGNUM_P (regno))
1450 return mode;
6a70badb
RS
1451 if (known_ge (GET_MODE_SIZE (mode), 4))
1452 return mode;
73d9ac6a 1453 else
6a70badb 1454 return SImode;
73d9ac6a
IB
1455}
1456
58e17cf8
RS
1457/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1458 that strcpy from constants will be faster. */
1459
1460static HOST_WIDE_INT
1461aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1462{
1463 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1464 return MAX (align, BITS_PER_WORD);
1465 return align;
1466}
1467
43e9d192
IB
1468/* Return true if calls to DECL should be treated as
1469 long-calls (ie called via a register). */
1470static bool
1471aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1472{
1473 return false;
1474}
1475
1476/* Return true if calls to symbol-ref SYM should be treated as
1477 long-calls (ie called via a register). */
1478bool
1479aarch64_is_long_call_p (rtx sym)
1480{
1481 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1482}
1483
b60d63cb
JW
1484/* Return true if calls to symbol-ref SYM should not go through
1485 plt stubs. */
1486
1487bool
1488aarch64_is_noplt_call_p (rtx sym)
1489{
1490 const_tree decl = SYMBOL_REF_DECL (sym);
1491
1492 if (flag_pic
1493 && decl
1494 && (!flag_plt
1495 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1496 && !targetm.binds_local_p (decl))
1497 return true;
1498
1499 return false;
1500}
1501
43e9d192
IB
1502/* Return true if the offsets to a zero/sign-extract operation
1503 represent an expression that matches an extend operation. The
1504 operands represent the paramters from
1505
4745e701 1506 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1507bool
77e994c9 1508aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1509 rtx extract_imm)
1510{
1511 HOST_WIDE_INT mult_val, extract_val;
1512
1513 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1514 return false;
1515
1516 mult_val = INTVAL (mult_imm);
1517 extract_val = INTVAL (extract_imm);
1518
1519 if (extract_val > 8
1520 && extract_val < GET_MODE_BITSIZE (mode)
1521 && exact_log2 (extract_val & ~7) > 0
1522 && (extract_val & 7) <= 4
1523 && mult_val == (1 << (extract_val & 7)))
1524 return true;
1525
1526 return false;
1527}
1528
1529/* Emit an insn that's a simple single-set. Both the operands must be
1530 known to be valid. */
827ab47a 1531inline static rtx_insn *
43e9d192
IB
1532emit_set_insn (rtx x, rtx y)
1533{
f7df4a84 1534 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1535}
1536
1537/* X and Y are two things to compare using CODE. Emit the compare insn and
1538 return the rtx for register 0 in the proper mode. */
1539rtx
1540aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1541{
ef4bddc2 1542 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1543 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1544
1545 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1546 return cc_reg;
1547}
1548
1549/* Build the SYMBOL_REF for __tls_get_addr. */
1550
1551static GTY(()) rtx tls_get_addr_libfunc;
1552
1553rtx
1554aarch64_tls_get_addr (void)
1555{
1556 if (!tls_get_addr_libfunc)
1557 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1558 return tls_get_addr_libfunc;
1559}
1560
1561/* Return the TLS model to use for ADDR. */
1562
1563static enum tls_model
1564tls_symbolic_operand_type (rtx addr)
1565{
1566 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1567 if (GET_CODE (addr) == CONST)
1568 {
6a70badb
RS
1569 poly_int64 addend;
1570 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1571 if (GET_CODE (sym) == SYMBOL_REF)
1572 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1573 }
1574 else if (GET_CODE (addr) == SYMBOL_REF)
1575 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1576
1577 return tls_kind;
1578}
1579
1580/* We'll allow lo_sum's in addresses in our legitimate addresses
1581 so that combine would take care of combining addresses where
1582 necessary, but for generation purposes, we'll generate the address
1583 as :
1584 RTL Absolute
1585 tmp = hi (symbol_ref); adrp x1, foo
1586 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1587 nop
1588
1589 PIC TLS
1590 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1591 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1592 bl __tls_get_addr
1593 nop
1594
1595 Load TLS symbol, depending on TLS mechanism and TLS access model.
1596
1597 Global Dynamic - Traditional TLS:
1598 adrp tmp, :tlsgd:imm
1599 add dest, tmp, #:tlsgd_lo12:imm
1600 bl __tls_get_addr
1601
1602 Global Dynamic - TLS Descriptors:
1603 adrp dest, :tlsdesc:imm
1604 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1605 add dest, dest, #:tlsdesc_lo12:imm
1606 blr tmp
1607 mrs tp, tpidr_el0
1608 add dest, dest, tp
1609
1610 Initial Exec:
1611 mrs tp, tpidr_el0
1612 adrp tmp, :gottprel:imm
1613 ldr dest, [tmp, #:gottprel_lo12:imm]
1614 add dest, dest, tp
1615
1616 Local Exec:
1617 mrs tp, tpidr_el0
0699caae
RL
1618 add t0, tp, #:tprel_hi12:imm, lsl #12
1619 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1620*/
1621
1622static void
1623aarch64_load_symref_appropriately (rtx dest, rtx imm,
1624 enum aarch64_symbol_type type)
1625{
1626 switch (type)
1627 {
1628 case SYMBOL_SMALL_ABSOLUTE:
1629 {
28514dda 1630 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1631 rtx tmp_reg = dest;
ef4bddc2 1632 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1633
1634 gcc_assert (mode == Pmode || mode == ptr_mode);
1635
43e9d192 1636 if (can_create_pseudo_p ())
28514dda 1637 tmp_reg = gen_reg_rtx (mode);
43e9d192 1638
28514dda 1639 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1640 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1641 return;
1642 }
1643
a5350ddc 1644 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1645 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1646 return;
1647
1b1e81f8
JW
1648 case SYMBOL_SMALL_GOT_28K:
1649 {
1650 machine_mode mode = GET_MODE (dest);
1651 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1652 rtx insn;
1653 rtx mem;
1b1e81f8
JW
1654
1655 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1656 here before rtl expand. Tree IVOPT will generate rtl pattern to
1657 decide rtx costs, in which case pic_offset_table_rtx is not
1658 initialized. For that case no need to generate the first adrp
026c3cfd 1659 instruction as the final cost for global variable access is
1b1e81f8
JW
1660 one instruction. */
1661 if (gp_rtx != NULL)
1662 {
1663 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1664 using the page base as GOT base, the first page may be wasted,
1665 in the worst scenario, there is only 28K space for GOT).
1666
1667 The generate instruction sequence for accessing global variable
1668 is:
1669
a3957742 1670 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1671
1672 Only one instruction needed. But we must initialize
1673 pic_offset_table_rtx properly. We generate initialize insn for
1674 every global access, and allow CSE to remove all redundant.
1675
1676 The final instruction sequences will look like the following
1677 for multiply global variables access.
1678
a3957742 1679 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1680
a3957742
JW
1681 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1682 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1683 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1684 ... */
1b1e81f8
JW
1685
1686 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1687 crtl->uses_pic_offset_table = 1;
1688 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1689
1690 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1691 gp_rtx = gen_lowpart (mode, gp_rtx);
1692
1b1e81f8
JW
1693 }
1694
1695 if (mode == ptr_mode)
1696 {
1697 if (mode == DImode)
53021678 1698 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1699 else
53021678
JW
1700 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1701
1702 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1703 }
1704 else
1705 {
1706 gcc_assert (mode == Pmode);
53021678
JW
1707
1708 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1709 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1710 }
1711
53021678
JW
1712 /* The operand is expected to be MEM. Whenever the related insn
1713 pattern changed, above code which calculate mem should be
1714 updated. */
1715 gcc_assert (GET_CODE (mem) == MEM);
1716 MEM_READONLY_P (mem) = 1;
1717 MEM_NOTRAP_P (mem) = 1;
1718 emit_insn (insn);
1b1e81f8
JW
1719 return;
1720 }
1721
6642bdb4 1722 case SYMBOL_SMALL_GOT_4G:
43e9d192 1723 {
28514dda
YZ
1724 /* In ILP32, the mode of dest can be either SImode or DImode,
1725 while the got entry is always of SImode size. The mode of
1726 dest depends on how dest is used: if dest is assigned to a
1727 pointer (e.g. in the memory), it has SImode; it may have
1728 DImode if dest is dereferenced to access the memeory.
1729 This is why we have to handle three different ldr_got_small
1730 patterns here (two patterns for ILP32). */
53021678
JW
1731
1732 rtx insn;
1733 rtx mem;
43e9d192 1734 rtx tmp_reg = dest;
ef4bddc2 1735 machine_mode mode = GET_MODE (dest);
28514dda 1736
43e9d192 1737 if (can_create_pseudo_p ())
28514dda
YZ
1738 tmp_reg = gen_reg_rtx (mode);
1739
1740 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1741 if (mode == ptr_mode)
1742 {
1743 if (mode == DImode)
53021678 1744 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1745 else
53021678
JW
1746 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1747
1748 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1749 }
1750 else
1751 {
1752 gcc_assert (mode == Pmode);
53021678
JW
1753
1754 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1755 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1756 }
1757
53021678
JW
1758 gcc_assert (GET_CODE (mem) == MEM);
1759 MEM_READONLY_P (mem) = 1;
1760 MEM_NOTRAP_P (mem) = 1;
1761 emit_insn (insn);
43e9d192
IB
1762 return;
1763 }
1764
1765 case SYMBOL_SMALL_TLSGD:
1766 {
5d8a22a5 1767 rtx_insn *insns;
23b88fda
N
1768 machine_mode mode = GET_MODE (dest);
1769 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1770
1771 start_sequence ();
23b88fda
N
1772 if (TARGET_ILP32)
1773 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1774 else
1775 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1776 insns = get_insns ();
1777 end_sequence ();
1778
1779 RTL_CONST_CALL_P (insns) = 1;
1780 emit_libcall_block (insns, dest, result, imm);
1781 return;
1782 }
1783
1784 case SYMBOL_SMALL_TLSDESC:
1785 {
ef4bddc2 1786 machine_mode mode = GET_MODE (dest);
621ad2de 1787 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1788 rtx tp;
1789
621ad2de
AP
1790 gcc_assert (mode == Pmode || mode == ptr_mode);
1791
2876a13f
JW
1792 /* In ILP32, the got entry is always of SImode size. Unlike
1793 small GOT, the dest is fixed at reg 0. */
1794 if (TARGET_ILP32)
1795 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1796 else
2876a13f 1797 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1798 tp = aarch64_load_tp (NULL);
621ad2de
AP
1799
1800 if (mode != Pmode)
1801 tp = gen_lowpart (mode, tp);
1802
2876a13f 1803 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1804 if (REG_P (dest))
1805 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1806 return;
1807 }
1808
79496620 1809 case SYMBOL_SMALL_TLSIE:
43e9d192 1810 {
621ad2de
AP
1811 /* In ILP32, the mode of dest can be either SImode or DImode,
1812 while the got entry is always of SImode size. The mode of
1813 dest depends on how dest is used: if dest is assigned to a
1814 pointer (e.g. in the memory), it has SImode; it may have
1815 DImode if dest is dereferenced to access the memeory.
1816 This is why we have to handle three different tlsie_small
1817 patterns here (two patterns for ILP32). */
ef4bddc2 1818 machine_mode mode = GET_MODE (dest);
621ad2de 1819 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1820 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1821
1822 if (mode == ptr_mode)
1823 {
1824 if (mode == DImode)
1825 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1826 else
1827 {
1828 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1829 tp = gen_lowpart (mode, tp);
1830 }
1831 }
1832 else
1833 {
1834 gcc_assert (mode == Pmode);
1835 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1836 }
1837
f7df4a84 1838 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
1839 if (REG_P (dest))
1840 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1841 return;
1842 }
1843
cbf5629e 1844 case SYMBOL_TLSLE12:
d18ba284 1845 case SYMBOL_TLSLE24:
cbf5629e
JW
1846 case SYMBOL_TLSLE32:
1847 case SYMBOL_TLSLE48:
43e9d192 1848 {
cbf5629e 1849 machine_mode mode = GET_MODE (dest);
43e9d192 1850 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1851
cbf5629e
JW
1852 if (mode != Pmode)
1853 tp = gen_lowpart (mode, tp);
1854
1855 switch (type)
1856 {
1857 case SYMBOL_TLSLE12:
1858 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1859 (dest, tp, imm));
1860 break;
1861 case SYMBOL_TLSLE24:
1862 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1863 (dest, tp, imm));
1864 break;
1865 case SYMBOL_TLSLE32:
1866 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1867 (dest, imm));
1868 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1869 (dest, dest, tp));
1870 break;
1871 case SYMBOL_TLSLE48:
1872 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1873 (dest, imm));
1874 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1875 (dest, dest, tp));
1876 break;
1877 default:
1878 gcc_unreachable ();
1879 }
e6f7f0e9 1880
241dbd9d
QZ
1881 if (REG_P (dest))
1882 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1883 return;
1884 }
1885
87dd8ab0
MS
1886 case SYMBOL_TINY_GOT:
1887 emit_insn (gen_ldr_got_tiny (dest, imm));
1888 return;
1889
5ae7caad
JW
1890 case SYMBOL_TINY_TLSIE:
1891 {
1892 machine_mode mode = GET_MODE (dest);
1893 rtx tp = aarch64_load_tp (NULL);
1894
1895 if (mode == ptr_mode)
1896 {
1897 if (mode == DImode)
1898 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1899 else
1900 {
1901 tp = gen_lowpart (mode, tp);
1902 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1903 }
1904 }
1905 else
1906 {
1907 gcc_assert (mode == Pmode);
1908 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1909 }
1910
241dbd9d
QZ
1911 if (REG_P (dest))
1912 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
1913 return;
1914 }
1915
43e9d192
IB
1916 default:
1917 gcc_unreachable ();
1918 }
1919}
1920
1921/* Emit a move from SRC to DEST. Assume that the move expanders can
1922 handle all moves if !can_create_pseudo_p (). The distinction is
1923 important because, unlike emit_move_insn, the move expanders know
1924 how to force Pmode objects into the constant pool even when the
1925 constant pool address is not itself legitimate. */
1926static rtx
1927aarch64_emit_move (rtx dest, rtx src)
1928{
1929 return (can_create_pseudo_p ()
1930 ? emit_move_insn (dest, src)
1931 : emit_move_insn_1 (dest, src));
1932}
1933
f22d7973
RS
1934/* Apply UNOPTAB to OP and store the result in DEST. */
1935
1936static void
1937aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
1938{
1939 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
1940 if (dest != tmp)
1941 emit_move_insn (dest, tmp);
1942}
1943
1944/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
1945
1946static void
1947aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
1948{
1949 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
1950 OPTAB_DIRECT);
1951 if (dest != tmp)
1952 emit_move_insn (dest, tmp);
1953}
1954
030d03b8
RE
1955/* Split a 128-bit move operation into two 64-bit move operations,
1956 taking care to handle partial overlap of register to register
1957 copies. Special cases are needed when moving between GP regs and
1958 FP regs. SRC can be a register, constant or memory; DST a register
1959 or memory. If either operand is memory it must not have any side
1960 effects. */
43e9d192
IB
1961void
1962aarch64_split_128bit_move (rtx dst, rtx src)
1963{
030d03b8
RE
1964 rtx dst_lo, dst_hi;
1965 rtx src_lo, src_hi;
43e9d192 1966
ef4bddc2 1967 machine_mode mode = GET_MODE (dst);
12dc6974 1968
030d03b8
RE
1969 gcc_assert (mode == TImode || mode == TFmode);
1970 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1971 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1972
1973 if (REG_P (dst) && REG_P (src))
1974 {
030d03b8
RE
1975 int src_regno = REGNO (src);
1976 int dst_regno = REGNO (dst);
43e9d192 1977
030d03b8 1978 /* Handle FP <-> GP regs. */
43e9d192
IB
1979 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1980 {
030d03b8
RE
1981 src_lo = gen_lowpart (word_mode, src);
1982 src_hi = gen_highpart (word_mode, src);
1983
0016d8d9
RS
1984 emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
1985 emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
030d03b8 1986 return;
43e9d192
IB
1987 }
1988 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1989 {
030d03b8
RE
1990 dst_lo = gen_lowpart (word_mode, dst);
1991 dst_hi = gen_highpart (word_mode, dst);
1992
0016d8d9
RS
1993 emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
1994 emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
030d03b8 1995 return;
43e9d192 1996 }
43e9d192
IB
1997 }
1998
030d03b8
RE
1999 dst_lo = gen_lowpart (word_mode, dst);
2000 dst_hi = gen_highpart (word_mode, dst);
2001 src_lo = gen_lowpart (word_mode, src);
2002 src_hi = gen_highpart_mode (word_mode, mode, src);
2003
2004 /* At most one pairing may overlap. */
2005 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2006 {
2007 aarch64_emit_move (dst_hi, src_hi);
2008 aarch64_emit_move (dst_lo, src_lo);
2009 }
2010 else
2011 {
2012 aarch64_emit_move (dst_lo, src_lo);
2013 aarch64_emit_move (dst_hi, src_hi);
2014 }
43e9d192
IB
2015}
2016
2017bool
2018aarch64_split_128bit_move_p (rtx dst, rtx src)
2019{
2020 return (! REG_P (src)
2021 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2022}
2023
8b033a8a
SN
2024/* Split a complex SIMD combine. */
2025
2026void
2027aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2028{
ef4bddc2
RS
2029 machine_mode src_mode = GET_MODE (src1);
2030 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2031
2032 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2033 gcc_assert (register_operand (dst, dst_mode)
2034 && register_operand (src1, src_mode)
2035 && register_operand (src2, src_mode));
8b033a8a 2036
0016d8d9 2037 emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
a977dc0c 2038 return;
8b033a8a
SN
2039}
2040
fd4842cd
SN
2041/* Split a complex SIMD move. */
2042
2043void
2044aarch64_split_simd_move (rtx dst, rtx src)
2045{
ef4bddc2
RS
2046 machine_mode src_mode = GET_MODE (src);
2047 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2048
2049 gcc_assert (VECTOR_MODE_P (dst_mode));
2050
2051 if (REG_P (dst) && REG_P (src))
2052 {
2053 gcc_assert (VECTOR_MODE_P (src_mode));
0016d8d9 2054 emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
fd4842cd
SN
2055 }
2056}
2057
ef22810a
RH
2058bool
2059aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2060 machine_mode ymode, rtx y)
2061{
2062 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2063 gcc_assert (r != NULL);
2064 return rtx_equal_p (x, r);
2065}
2066
2067
43e9d192 2068static rtx
ef4bddc2 2069aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2070{
2071 if (can_create_pseudo_p ())
e18b4a81 2072 return force_reg (mode, value);
43e9d192
IB
2073 else
2074 {
f5470a77
RS
2075 gcc_assert (x);
2076 aarch64_emit_move (x, value);
43e9d192
IB
2077 return x;
2078 }
2079}
2080
43cacb12
RS
2081/* Return true if we can move VALUE into a register using a single
2082 CNT[BHWD] instruction. */
2083
2084static bool
2085aarch64_sve_cnt_immediate_p (poly_int64 value)
2086{
2087 HOST_WIDE_INT factor = value.coeffs[0];
2088 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2089 return (value.coeffs[1] == factor
2090 && IN_RANGE (factor, 2, 16 * 16)
2091 && (factor & 1) == 0
2092 && factor <= 16 * (factor & -factor));
2093}
2094
2095/* Likewise for rtx X. */
2096
2097bool
2098aarch64_sve_cnt_immediate_p (rtx x)
2099{
2100 poly_int64 value;
2101 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2102}
2103
2104/* Return the asm string for an instruction with a CNT-like vector size
2105 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2106 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2107 first part of the operands template (the part that comes before the
2108 vector size itself). FACTOR is the number of quadwords.
2109 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2110 If it is zero, we can use any element size. */
2111
2112static char *
2113aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2114 unsigned int factor,
2115 unsigned int nelts_per_vq)
2116{
2117 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2118
2119 if (nelts_per_vq == 0)
2120 /* There is some overlap in the ranges of the four CNT instructions.
2121 Here we always use the smallest possible element size, so that the
2122 multiplier is 1 whereever possible. */
2123 nelts_per_vq = factor & -factor;
2124 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2125 gcc_assert (IN_RANGE (shift, 1, 4));
2126 char suffix = "dwhb"[shift - 1];
2127
2128 factor >>= shift;
2129 unsigned int written;
2130 if (factor == 1)
2131 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2132 prefix, suffix, operands);
2133 else
2134 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2135 prefix, suffix, operands, factor);
2136 gcc_assert (written < sizeof (buffer));
2137 return buffer;
2138}
2139
2140/* Return the asm string for an instruction with a CNT-like vector size
2141 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2142 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2143 first part of the operands template (the part that comes before the
2144 vector size itself). X is the value of the vector size operand,
2145 as a polynomial integer rtx. */
2146
2147char *
2148aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2149 rtx x)
2150{
2151 poly_int64 value = rtx_to_poly_int64 (x);
2152 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2153 return aarch64_output_sve_cnt_immediate (prefix, operands,
2154 value.coeffs[1], 0);
2155}
2156
2157/* Return true if we can add VALUE to a register using a single ADDVL
2158 or ADDPL instruction. */
2159
2160static bool
2161aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2162{
2163 HOST_WIDE_INT factor = value.coeffs[0];
2164 if (factor == 0 || value.coeffs[1] != factor)
2165 return false;
2166 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2167 and a value of 16 is one vector width. */
2168 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2169 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2170}
2171
2172/* Likewise for rtx X. */
2173
2174bool
2175aarch64_sve_addvl_addpl_immediate_p (rtx x)
2176{
2177 poly_int64 value;
2178 return (poly_int_rtx_p (x, &value)
2179 && aarch64_sve_addvl_addpl_immediate_p (value));
2180}
2181
2182/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2183 and storing the result in operand 0. */
2184
2185char *
2186aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2187{
2188 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2189 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2190 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2191
2192 /* Use INC or DEC if possible. */
2193 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2194 {
2195 if (aarch64_sve_cnt_immediate_p (offset_value))
2196 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2197 offset_value.coeffs[1], 0);
2198 if (aarch64_sve_cnt_immediate_p (-offset_value))
2199 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2200 -offset_value.coeffs[1], 0);
2201 }
2202
2203 int factor = offset_value.coeffs[1];
2204 if ((factor & 15) == 0)
2205 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2206 else
2207 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2208 return buffer;
2209}
2210
2211/* Return true if X is a valid immediate for an SVE vector INC or DEC
2212 instruction. If it is, store the number of elements in each vector
2213 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2214 factor in *FACTOR_OUT (if nonnull). */
2215
2216bool
2217aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2218 unsigned int *nelts_per_vq_out)
2219{
2220 rtx elt;
2221 poly_int64 value;
2222
2223 if (!const_vec_duplicate_p (x, &elt)
2224 || !poly_int_rtx_p (elt, &value))
2225 return false;
2226
2227 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2228 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2229 /* There's no vector INCB. */
2230 return false;
2231
2232 HOST_WIDE_INT factor = value.coeffs[0];
2233 if (value.coeffs[1] != factor)
2234 return false;
2235
2236 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2237 if ((factor % nelts_per_vq) != 0
2238 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2239 return false;
2240
2241 if (factor_out)
2242 *factor_out = factor;
2243 if (nelts_per_vq_out)
2244 *nelts_per_vq_out = nelts_per_vq;
2245 return true;
2246}
2247
2248/* Return true if X is a valid immediate for an SVE vector INC or DEC
2249 instruction. */
2250
2251bool
2252aarch64_sve_inc_dec_immediate_p (rtx x)
2253{
2254 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2255}
2256
2257/* Return the asm template for an SVE vector INC or DEC instruction.
2258 OPERANDS gives the operands before the vector count and X is the
2259 value of the vector count operand itself. */
2260
2261char *
2262aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2263{
2264 int factor;
2265 unsigned int nelts_per_vq;
2266 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2267 gcc_unreachable ();
2268 if (factor < 0)
2269 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2270 nelts_per_vq);
2271 else
2272 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2273 nelts_per_vq);
2274}
43e9d192 2275
82614948
RR
2276static int
2277aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2278 scalar_int_mode mode)
43e9d192 2279{
43e9d192 2280 int i;
9a4865db
WD
2281 unsigned HOST_WIDE_INT val, val2, mask;
2282 int one_match, zero_match;
2283 int num_insns;
43e9d192 2284
9a4865db
WD
2285 val = INTVAL (imm);
2286
2287 if (aarch64_move_imm (val, mode))
43e9d192 2288 {
82614948 2289 if (generate)
f7df4a84 2290 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2291 return 1;
43e9d192
IB
2292 }
2293
9de00935
TC
2294 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2295 (with XXXX non-zero). In that case check to see if the move can be done in
2296 a smaller mode. */
2297 val2 = val & 0xffffffff;
2298 if (mode == DImode
2299 && aarch64_move_imm (val2, SImode)
2300 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2301 {
2302 if (generate)
2303 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2304
2305 /* Check if we have to emit a second instruction by checking to see
2306 if any of the upper 32 bits of the original DI mode value is set. */
2307 if (val == val2)
2308 return 1;
2309
2310 i = (val >> 48) ? 48 : 32;
2311
2312 if (generate)
2313 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2314 GEN_INT ((val >> i) & 0xffff)));
2315
2316 return 2;
2317 }
2318
9a4865db 2319 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2320 {
82614948
RR
2321 if (generate)
2322 {
9a4865db
WD
2323 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2324 if (mode == SImode)
2325 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2326 GEN_INT ((val >> 16) & 0xffff)));
2327 else
2328 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2329 GEN_INT ((val >> 16) & 0xffff)));
82614948 2330 }
9a4865db 2331 return 2;
43e9d192
IB
2332 }
2333
2334 /* Remaining cases are all for DImode. */
2335
43e9d192 2336 mask = 0xffff;
9a4865db
WD
2337 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2338 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2339 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2340 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2341
62c8d76c 2342 if (zero_match != 2 && one_match != 2)
43e9d192 2343 {
62c8d76c
WD
2344 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2345 For a 64-bit bitmask try whether changing 16 bits to all ones or
2346 zeroes creates a valid bitmask. To check any repeated bitmask,
2347 try using 16 bits from the other 32-bit half of val. */
43e9d192 2348
62c8d76c 2349 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2350 {
62c8d76c
WD
2351 val2 = val & ~mask;
2352 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2353 break;
2354 val2 = val | mask;
2355 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2356 break;
2357 val2 = val2 & ~mask;
2358 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2359 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2360 break;
43e9d192 2361 }
62c8d76c 2362 if (i != 64)
43e9d192 2363 {
62c8d76c 2364 if (generate)
43e9d192 2365 {
62c8d76c
WD
2366 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2367 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2368 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2369 }
1312b1ba 2370 return 2;
43e9d192
IB
2371 }
2372 }
2373
9a4865db
WD
2374 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2375 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2376 otherwise skip zero bits. */
2c274197 2377
9a4865db 2378 num_insns = 1;
43e9d192 2379 mask = 0xffff;
9a4865db
WD
2380 val2 = one_match > zero_match ? ~val : val;
2381 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2382
2383 if (generate)
2384 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2385 ? (val | ~(mask << i))
2386 : (val & (mask << i)))));
2387 for (i += 16; i < 64; i += 16)
43e9d192 2388 {
9a4865db
WD
2389 if ((val2 & (mask << i)) == 0)
2390 continue;
2391 if (generate)
2392 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2393 GEN_INT ((val >> i) & 0xffff)));
2394 num_insns ++;
82614948
RR
2395 }
2396
2397 return num_insns;
2398}
2399
c0bb5bc5
WD
2400/* Return whether imm is a 128-bit immediate which is simple enough to
2401 expand inline. */
2402bool
2403aarch64_mov128_immediate (rtx imm)
2404{
2405 if (GET_CODE (imm) == CONST_INT)
2406 return true;
2407
2408 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2409
2410 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2411 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2412
2413 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2414 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2415}
2416
2417
43cacb12
RS
2418/* Return the number of temporary registers that aarch64_add_offset_1
2419 would need to add OFFSET to a register. */
2420
2421static unsigned int
2422aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2423{
2424 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2425}
2426
f5470a77
RS
2427/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2428 a non-polynomial OFFSET. MODE is the mode of the addition.
2429 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2430 be set and CFA adjustments added to the generated instructions.
2431
2432 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2433 temporary if register allocation is already complete. This temporary
2434 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2435 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2436 the immediate again.
0100c5f9
RS
2437
2438 Since this function may be used to adjust the stack pointer, we must
2439 ensure that it cannot cause transient stack deallocation (for example
2440 by first incrementing SP and then decrementing when adjusting by a
2441 large immediate). */
2442
2443static void
f5470a77
RS
2444aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2445 rtx src, HOST_WIDE_INT offset, rtx temp1,
2446 bool frame_related_p, bool emit_move_imm)
0100c5f9 2447{
f5470a77
RS
2448 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2449 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2450
2451 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2452 rtx_insn *insn;
2453
f5470a77
RS
2454 if (!moffset)
2455 {
2456 if (!rtx_equal_p (dest, src))
2457 {
2458 insn = emit_insn (gen_rtx_SET (dest, src));
2459 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2460 }
2461 return;
2462 }
0100c5f9
RS
2463
2464 /* Single instruction adjustment. */
f5470a77 2465 if (aarch64_uimm12_shift (moffset))
0100c5f9 2466 {
f5470a77 2467 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2468 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2469 return;
2470 }
2471
f5470a77
RS
2472 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2473 and either:
2474
2475 a) the offset cannot be loaded by a 16-bit move or
2476 b) there is no spare register into which we can move it. */
2477 if (moffset < 0x1000000
2478 && ((!temp1 && !can_create_pseudo_p ())
2479 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2480 {
f5470a77 2481 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2482
f5470a77
RS
2483 low_off = offset < 0 ? -low_off : low_off;
2484 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2485 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2486 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2487 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2488 return;
2489 }
2490
2491 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2492 if (emit_move_imm)
f5470a77
RS
2493 {
2494 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2495 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2496 }
2497 insn = emit_insn (offset < 0
2498 ? gen_sub3_insn (dest, src, temp1)
2499 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2500 if (frame_related_p)
2501 {
2502 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2503 rtx adj = plus_constant (mode, src, offset);
2504 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2505 }
2506}
2507
43cacb12
RS
2508/* Return the number of temporary registers that aarch64_add_offset
2509 would need to move OFFSET into a register or add OFFSET to a register;
2510 ADD_P is true if we want the latter rather than the former. */
2511
2512static unsigned int
2513aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2514{
2515 /* This follows the same structure as aarch64_add_offset. */
2516 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2517 return 0;
2518
2519 unsigned int count = 0;
2520 HOST_WIDE_INT factor = offset.coeffs[1];
2521 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2522 poly_int64 poly_offset (factor, factor);
2523 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2524 /* Need one register for the ADDVL/ADDPL result. */
2525 count += 1;
2526 else if (factor != 0)
2527 {
2528 factor = abs (factor);
2529 if (factor > 16 * (factor & -factor))
2530 /* Need one register for the CNT result and one for the multiplication
2531 factor. If necessary, the second temporary can be reused for the
2532 constant part of the offset. */
2533 return 2;
2534 /* Need one register for the CNT result (which might then
2535 be shifted). */
2536 count += 1;
2537 }
2538 return count + aarch64_add_offset_1_temporaries (constant);
2539}
2540
2541/* If X can be represented as a poly_int64, return the number
2542 of temporaries that are required to add it to a register.
2543 Return -1 otherwise. */
2544
2545int
2546aarch64_add_offset_temporaries (rtx x)
2547{
2548 poly_int64 offset;
2549 if (!poly_int_rtx_p (x, &offset))
2550 return -1;
2551 return aarch64_offset_temporaries (true, offset);
2552}
2553
f5470a77
RS
2554/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2555 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2556 be set and CFA adjustments added to the generated instructions.
2557
2558 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2559 temporary if register allocation is already complete. This temporary
43cacb12
RS
2560 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2561 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2562 false to avoid emitting the immediate again.
2563
2564 TEMP2, if nonnull, is a second temporary register that doesn't
2565 overlap either DEST or REG.
f5470a77
RS
2566
2567 Since this function may be used to adjust the stack pointer, we must
2568 ensure that it cannot cause transient stack deallocation (for example
2569 by first incrementing SP and then decrementing when adjusting by a
2570 large immediate). */
2571
2572static void
2573aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2574 poly_int64 offset, rtx temp1, rtx temp2,
2575 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2576{
f5470a77
RS
2577 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2578 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2579 gcc_assert (temp1 == NULL_RTX
2580 || !frame_related_p
2581 || !reg_overlap_mentioned_p (temp1, dest));
2582 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2583
2584 /* Try using ADDVL or ADDPL to add the whole value. */
2585 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2586 {
2587 rtx offset_rtx = gen_int_mode (offset, mode);
2588 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2589 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2590 return;
2591 }
2592
2593 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2594 SVE vector register, over and above the minimum size of 128 bits.
2595 This is equivalent to half the value returned by CNTD with a
2596 vector shape of ALL. */
2597 HOST_WIDE_INT factor = offset.coeffs[1];
2598 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2599
2600 /* Try using ADDVL or ADDPL to add the VG-based part. */
2601 poly_int64 poly_offset (factor, factor);
2602 if (src != const0_rtx
2603 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2604 {
2605 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2606 if (frame_related_p)
2607 {
2608 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2609 RTX_FRAME_RELATED_P (insn) = true;
2610 src = dest;
2611 }
2612 else
2613 {
2614 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2615 src = aarch64_force_temporary (mode, temp1, addr);
2616 temp1 = temp2;
2617 temp2 = NULL_RTX;
2618 }
2619 }
2620 /* Otherwise use a CNT-based sequence. */
2621 else if (factor != 0)
2622 {
2623 /* Use a subtraction if we have a negative factor. */
2624 rtx_code code = PLUS;
2625 if (factor < 0)
2626 {
2627 factor = -factor;
2628 code = MINUS;
2629 }
2630
2631 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2632 into the multiplication. */
2633 rtx val;
2634 int shift = 0;
2635 if (factor & 1)
2636 /* Use a right shift by 1. */
2637 shift = -1;
2638 else
2639 factor /= 2;
2640 HOST_WIDE_INT low_bit = factor & -factor;
2641 if (factor <= 16 * low_bit)
2642 {
2643 if (factor > 16 * 8)
2644 {
2645 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2646 the value with the minimum multiplier and shift it into
2647 position. */
2648 int extra_shift = exact_log2 (low_bit);
2649 shift += extra_shift;
2650 factor >>= extra_shift;
2651 }
2652 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2653 }
2654 else
2655 {
2656 /* Use CNTD, then multiply it by FACTOR. */
2657 val = gen_int_mode (poly_int64 (2, 2), mode);
2658 val = aarch64_force_temporary (mode, temp1, val);
2659
2660 /* Go back to using a negative multiplication factor if we have
2661 no register from which to subtract. */
2662 if (code == MINUS && src == const0_rtx)
2663 {
2664 factor = -factor;
2665 code = PLUS;
2666 }
2667 rtx coeff1 = gen_int_mode (factor, mode);
2668 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2669 val = gen_rtx_MULT (mode, val, coeff1);
2670 }
2671
2672 if (shift > 0)
2673 {
2674 /* Multiply by 1 << SHIFT. */
2675 val = aarch64_force_temporary (mode, temp1, val);
2676 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2677 }
2678 else if (shift == -1)
2679 {
2680 /* Divide by 2. */
2681 val = aarch64_force_temporary (mode, temp1, val);
2682 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2683 }
2684
2685 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2686 if (src != const0_rtx)
2687 {
2688 val = aarch64_force_temporary (mode, temp1, val);
2689 val = gen_rtx_fmt_ee (code, mode, src, val);
2690 }
2691 else if (code == MINUS)
2692 {
2693 val = aarch64_force_temporary (mode, temp1, val);
2694 val = gen_rtx_NEG (mode, val);
2695 }
2696
2697 if (constant == 0 || frame_related_p)
2698 {
2699 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2700 if (frame_related_p)
2701 {
2702 RTX_FRAME_RELATED_P (insn) = true;
2703 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2704 gen_rtx_SET (dest, plus_constant (Pmode, src,
2705 poly_offset)));
2706 }
2707 src = dest;
2708 if (constant == 0)
2709 return;
2710 }
2711 else
2712 {
2713 src = aarch64_force_temporary (mode, temp1, val);
2714 temp1 = temp2;
2715 temp2 = NULL_RTX;
2716 }
2717
2718 emit_move_imm = true;
2719 }
f5470a77 2720
f5470a77
RS
2721 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2722 frame_related_p, emit_move_imm);
0100c5f9
RS
2723}
2724
43cacb12
RS
2725/* Like aarch64_add_offset, but the offset is given as an rtx rather
2726 than a poly_int64. */
2727
2728void
2729aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2730 rtx offset_rtx, rtx temp1, rtx temp2)
2731{
2732 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2733 temp1, temp2, false);
2734}
2735
f5470a77
RS
2736/* Add DELTA to the stack pointer, marking the instructions frame-related.
2737 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2738 if TEMP1 already contains abs (DELTA). */
2739
0100c5f9 2740static inline void
43cacb12 2741aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2742{
f5470a77 2743 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2744 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2745}
2746
f5470a77
RS
2747/* Subtract DELTA from the stack pointer, marking the instructions
2748 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2749 if nonnull. */
2750
0100c5f9 2751static inline void
43cacb12 2752aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p)
0100c5f9 2753{
f5470a77 2754 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
43cacb12 2755 temp1, temp2, frame_related_p);
0100c5f9 2756}
82614948 2757
43cacb12
RS
2758/* Set DEST to (vec_series BASE STEP). */
2759
2760static void
2761aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2762{
2763 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2764 scalar_mode inner = GET_MODE_INNER (mode);
2765
2766 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2767 if (!aarch64_sve_index_immediate_p (base))
2768 base = force_reg (inner, base);
2769 if (!aarch64_sve_index_immediate_p (step))
2770 step = force_reg (inner, step);
2771
2772 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2773}
82614948 2774
43cacb12
RS
2775/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2776 integer of mode INT_MODE. Return true on success. */
2777
2778static bool
2779aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2780 rtx src)
2781{
2782 /* If the constant is smaller than 128 bits, we can do the move
2783 using a vector of SRC_MODEs. */
2784 if (src_mode != TImode)
2785 {
2786 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2787 GET_MODE_SIZE (src_mode));
2788 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2789 emit_move_insn (gen_lowpart (dup_mode, dest),
2790 gen_const_vec_duplicate (dup_mode, src));
2791 return true;
2792 }
2793
947b1372 2794 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2795 src = force_const_mem (src_mode, src);
2796 if (!src)
2797 return false;
2798
2799 /* Make sure that the address is legitimate. */
2800 if (!aarch64_sve_ld1r_operand_p (src))
2801 {
2802 rtx addr = force_reg (Pmode, XEXP (src, 0));
2803 src = replace_equiv_address (src, addr);
2804 }
2805
947b1372
RS
2806 machine_mode mode = GET_MODE (dest);
2807 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
2808 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
2809 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
2810 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
2811 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
2812 return true;
2813}
2814
2815/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
2816 isn't a simple duplicate or series. */
2817
2818static void
2819aarch64_expand_sve_const_vector (rtx dest, rtx src)
2820{
2821 machine_mode mode = GET_MODE (src);
2822 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
2823 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
2824 gcc_assert (npatterns > 1);
2825
2826 if (nelts_per_pattern == 1)
2827 {
2828 /* The constant is a repeating seqeuence of at least two elements,
2829 where the repeating elements occupy no more than 128 bits.
2830 Get an integer representation of the replicated value. */
8179efe0
RS
2831 scalar_int_mode int_mode;
2832 if (BYTES_BIG_ENDIAN)
2833 /* For now, always use LD1RQ to load the value on big-endian
2834 targets, since the handling of smaller integers includes a
2835 subreg that is semantically an element reverse. */
2836 int_mode = TImode;
2837 else
2838 {
2839 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
2840 gcc_assert (int_bits <= 128);
2841 int_mode = int_mode_for_size (int_bits, 0).require ();
2842 }
43cacb12
RS
2843 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
2844 if (int_value
2845 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
2846 return;
2847 }
2848
2849 /* Expand each pattern individually. */
2850 rtx_vector_builder builder;
2851 auto_vec<rtx, 16> vectors (npatterns);
2852 for (unsigned int i = 0; i < npatterns; ++i)
2853 {
2854 builder.new_vector (mode, 1, nelts_per_pattern);
2855 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
2856 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
2857 vectors.quick_push (force_reg (mode, builder.build ()));
2858 }
2859
2860 /* Use permutes to interleave the separate vectors. */
2861 while (npatterns > 1)
2862 {
2863 npatterns /= 2;
2864 for (unsigned int i = 0; i < npatterns; ++i)
2865 {
2866 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
2867 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
2868 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
2869 vectors[i] = tmp;
2870 }
2871 }
2872 gcc_assert (vectors[0] == dest);
2873}
2874
2875/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
2876 is a pattern that can be used to set DEST to a replicated scalar
2877 element. */
2878
2879void
2880aarch64_expand_mov_immediate (rtx dest, rtx imm,
2881 rtx (*gen_vec_duplicate) (rtx, rtx))
2882{
2883 machine_mode mode = GET_MODE (dest);
82614948
RR
2884
2885 /* Check on what type of symbol it is. */
77e994c9
RS
2886 scalar_int_mode int_mode;
2887 if ((GET_CODE (imm) == SYMBOL_REF
2888 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
2889 || GET_CODE (imm) == CONST
2890 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 2891 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 2892 {
43cacb12
RS
2893 rtx mem;
2894 poly_int64 offset;
2895 HOST_WIDE_INT const_offset;
82614948
RR
2896 enum aarch64_symbol_type sty;
2897
2898 /* If we have (const (plus symbol offset)), separate out the offset
2899 before we start classifying the symbol. */
43cacb12 2900 rtx base = strip_offset (imm, &offset);
82614948 2901
43cacb12
RS
2902 /* We must always add an offset involving VL separately, rather than
2903 folding it into the relocation. */
2904 if (!offset.is_constant (&const_offset))
2905 {
2906 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
2907 emit_insn (gen_rtx_SET (dest, imm));
2908 else
2909 {
2910 /* Do arithmetic on 32-bit values if the result is smaller
2911 than that. */
2912 if (partial_subreg_p (int_mode, SImode))
2913 {
2914 /* It is invalid to do symbol calculations in modes
2915 narrower than SImode. */
2916 gcc_assert (base == const0_rtx);
2917 dest = gen_lowpart (SImode, dest);
2918 int_mode = SImode;
2919 }
2920 if (base != const0_rtx)
2921 {
2922 base = aarch64_force_temporary (int_mode, dest, base);
2923 aarch64_add_offset (int_mode, dest, base, offset,
2924 NULL_RTX, NULL_RTX, false);
2925 }
2926 else
2927 aarch64_add_offset (int_mode, dest, base, offset,
2928 dest, NULL_RTX, false);
2929 }
2930 return;
2931 }
2932
2933 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
2934 switch (sty)
2935 {
2936 case SYMBOL_FORCE_TO_MEM:
43cacb12 2937 if (const_offset != 0
77e994c9 2938 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
2939 {
2940 gcc_assert (can_create_pseudo_p ());
77e994c9 2941 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
2942 aarch64_add_offset (int_mode, dest, base, const_offset,
2943 NULL_RTX, NULL_RTX, false);
82614948
RR
2944 return;
2945 }
b4f50fd4 2946
82614948
RR
2947 mem = force_const_mem (ptr_mode, imm);
2948 gcc_assert (mem);
b4f50fd4
RR
2949
2950 /* If we aren't generating PC relative literals, then
2951 we need to expand the literal pool access carefully.
2952 This is something that needs to be done in a number
2953 of places, so could well live as a separate function. */
9ee6540a 2954 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
2955 {
2956 gcc_assert (can_create_pseudo_p ());
2957 base = gen_reg_rtx (ptr_mode);
2958 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
2959 if (ptr_mode != Pmode)
2960 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
2961 mem = gen_rtx_MEM (ptr_mode, base);
2962 }
2963
77e994c9
RS
2964 if (int_mode != ptr_mode)
2965 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 2966
f7df4a84 2967 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 2968
82614948
RR
2969 return;
2970
2971 case SYMBOL_SMALL_TLSGD:
2972 case SYMBOL_SMALL_TLSDESC:
79496620 2973 case SYMBOL_SMALL_TLSIE:
1b1e81f8 2974 case SYMBOL_SMALL_GOT_28K:
6642bdb4 2975 case SYMBOL_SMALL_GOT_4G:
82614948 2976 case SYMBOL_TINY_GOT:
5ae7caad 2977 case SYMBOL_TINY_TLSIE:
43cacb12 2978 if (const_offset != 0)
82614948
RR
2979 {
2980 gcc_assert(can_create_pseudo_p ());
77e994c9 2981 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
2982 aarch64_add_offset (int_mode, dest, base, const_offset,
2983 NULL_RTX, NULL_RTX, false);
82614948
RR
2984 return;
2985 }
2986 /* FALLTHRU */
2987
82614948
RR
2988 case SYMBOL_SMALL_ABSOLUTE:
2989 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 2990 case SYMBOL_TLSLE12:
d18ba284 2991 case SYMBOL_TLSLE24:
cbf5629e
JW
2992 case SYMBOL_TLSLE32:
2993 case SYMBOL_TLSLE48:
82614948
RR
2994 aarch64_load_symref_appropriately (dest, imm, sty);
2995 return;
2996
2997 default:
2998 gcc_unreachable ();
2999 }
3000 }
3001
3002 if (!CONST_INT_P (imm))
3003 {
43cacb12
RS
3004 rtx base, step, value;
3005 if (GET_CODE (imm) == HIGH
3006 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3007 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3008 else if (const_vec_series_p (imm, &base, &step))
3009 aarch64_expand_vec_series (dest, base, step);
3010 else if (const_vec_duplicate_p (imm, &value))
3011 {
3012 /* If the constant is out of range of an SVE vector move,
3013 load it from memory if we can, otherwise move it into
3014 a register and use a DUP. */
3015 scalar_mode inner_mode = GET_MODE_INNER (mode);
3016 rtx op = force_const_mem (inner_mode, value);
3017 if (!op)
3018 op = force_reg (inner_mode, value);
3019 else if (!aarch64_sve_ld1r_operand_p (op))
3020 {
3021 rtx addr = force_reg (Pmode, XEXP (op, 0));
3022 op = replace_equiv_address (op, addr);
3023 }
3024 emit_insn (gen_vec_duplicate (dest, op));
3025 }
3026 else if (GET_CODE (imm) == CONST_VECTOR
3027 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3028 aarch64_expand_sve_const_vector (dest, imm);
82614948 3029 else
43cacb12 3030 {
82614948
RR
3031 rtx mem = force_const_mem (mode, imm);
3032 gcc_assert (mem);
43cacb12 3033 emit_move_insn (dest, mem);
43e9d192 3034 }
82614948
RR
3035
3036 return;
43e9d192 3037 }
82614948 3038
77e994c9
RS
3039 aarch64_internal_mov_immediate (dest, imm, true,
3040 as_a <scalar_int_mode> (mode));
43e9d192
IB
3041}
3042
43cacb12
RS
3043/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3044 that is known to contain PTRUE. */
3045
3046void
3047aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3048{
3049 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3050 gen_rtvec (2, pred, src),
3051 UNSPEC_MERGE_PTRUE)));
3052}
3053
3054/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3055 operand is in memory. In this case we need to use the predicated LD1
3056 and ST1 instead of LDR and STR, both for correctness on big-endian
3057 targets and because LD1 and ST1 support a wider range of addressing modes.
3058 PRED_MODE is the mode of the predicate.
3059
3060 See the comment at the head of aarch64-sve.md for details about the
3061 big-endian handling. */
3062
3063void
3064aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3065{
3066 machine_mode mode = GET_MODE (dest);
3067 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3068 if (!register_operand (src, mode)
3069 && !register_operand (dest, mode))
3070 {
3071 rtx tmp = gen_reg_rtx (mode);
3072 if (MEM_P (src))
3073 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3074 else
3075 emit_move_insn (tmp, src);
3076 src = tmp;
3077 }
3078 aarch64_emit_sve_pred_move (dest, ptrue, src);
3079}
3080
002092be
RS
3081/* Called only on big-endian targets. See whether an SVE vector move
3082 from SRC to DEST is effectively a REV[BHW] instruction, because at
3083 least one operand is a subreg of an SVE vector that has wider or
3084 narrower elements. Return true and emit the instruction if so.
3085
3086 For example:
3087
3088 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3089
3090 represents a VIEW_CONVERT between the following vectors, viewed
3091 in memory order:
3092
3093 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3094 R1: { [0], [1], [2], [3], ... }
3095
3096 The high part of lane X in R2 should therefore correspond to lane X*2
3097 of R1, but the register representations are:
3098
3099 msb lsb
3100 R2: ...... [1].high [1].low [0].high [0].low
3101 R1: ...... [3] [2] [1] [0]
3102
3103 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3104 We therefore need a reverse operation to swap the high and low values
3105 around.
3106
3107 This is purely an optimization. Without it we would spill the
3108 subreg operand to the stack in one mode and reload it in the
3109 other mode, which has the same effect as the REV. */
3110
3111bool
3112aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3113{
3114 gcc_assert (BYTES_BIG_ENDIAN);
3115 if (GET_CODE (dest) == SUBREG)
3116 dest = SUBREG_REG (dest);
3117 if (GET_CODE (src) == SUBREG)
3118 src = SUBREG_REG (src);
3119
3120 /* The optimization handles two single SVE REGs with different element
3121 sizes. */
3122 if (!REG_P (dest)
3123 || !REG_P (src)
3124 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3125 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3126 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3127 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3128 return false;
3129
3130 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3131 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3132 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3133 UNSPEC_REV_SUBREG);
3134 emit_insn (gen_rtx_SET (dest, unspec));
3135 return true;
3136}
3137
3138/* Return a copy of X with mode MODE, without changing its other
3139 attributes. Unlike gen_lowpart, this doesn't care whether the
3140 mode change is valid. */
3141
3142static rtx
3143aarch64_replace_reg_mode (rtx x, machine_mode mode)
3144{
3145 if (GET_MODE (x) == mode)
3146 return x;
3147
3148 x = shallow_copy_rtx (x);
3149 set_mode_and_regno (x, mode, REGNO (x));
3150 return x;
3151}
3152
3153/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3154 operands. */
3155
3156void
3157aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3158{
3159 /* Decide which REV operation we need. The mode with narrower elements
3160 determines the mode of the operands and the mode with the wider
3161 elements determines the reverse width. */
3162 machine_mode mode_with_wider_elts = GET_MODE (dest);
3163 machine_mode mode_with_narrower_elts = GET_MODE (src);
3164 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3165 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3166 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3167
3168 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3169 unsigned int unspec;
3170 if (wider_bytes == 8)
3171 unspec = UNSPEC_REV64;
3172 else if (wider_bytes == 4)
3173 unspec = UNSPEC_REV32;
3174 else if (wider_bytes == 2)
3175 unspec = UNSPEC_REV16;
3176 else
3177 gcc_unreachable ();
3178 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3179
3180 /* Emit:
3181
3182 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3183 UNSPEC_MERGE_PTRUE))
3184
3185 with the appropriate modes. */
3186 ptrue = gen_lowpart (pred_mode, ptrue);
3187 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3188 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3189 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3190 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3191 UNSPEC_MERGE_PTRUE);
3192 emit_insn (gen_rtx_SET (dest, src));
3193}
3194
43e9d192 3195static bool
fee9ba42
JW
3196aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3197 tree exp ATTRIBUTE_UNUSED)
43e9d192 3198{
fee9ba42 3199 /* Currently, always true. */
43e9d192
IB
3200 return true;
3201}
3202
3203/* Implement TARGET_PASS_BY_REFERENCE. */
3204
3205static bool
3206aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3207 machine_mode mode,
43e9d192
IB
3208 const_tree type,
3209 bool named ATTRIBUTE_UNUSED)
3210{
3211 HOST_WIDE_INT size;
ef4bddc2 3212 machine_mode dummymode;
43e9d192
IB
3213 int nregs;
3214
3215 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3216 if (mode == BLKmode && type)
3217 size = int_size_in_bytes (type);
3218 else
3219 /* No frontends can create types with variable-sized modes, so we
3220 shouldn't be asked to pass or return them. */
3221 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3222
aadc1c43
MHD
3223 /* Aggregates are passed by reference based on their size. */
3224 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3225 {
aadc1c43 3226 size = int_size_in_bytes (type);
43e9d192
IB
3227 }
3228
3229 /* Variable sized arguments are always returned by reference. */
3230 if (size < 0)
3231 return true;
3232
3233 /* Can this be a candidate to be passed in fp/simd register(s)? */
3234 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3235 &dummymode, &nregs,
3236 NULL))
3237 return false;
3238
3239 /* Arguments which are variable sized or larger than 2 registers are
3240 passed by reference unless they are a homogenous floating point
3241 aggregate. */
3242 return size > 2 * UNITS_PER_WORD;
3243}
3244
3245/* Return TRUE if VALTYPE is padded to its least significant bits. */
3246static bool
3247aarch64_return_in_msb (const_tree valtype)
3248{
ef4bddc2 3249 machine_mode dummy_mode;
43e9d192
IB
3250 int dummy_int;
3251
3252 /* Never happens in little-endian mode. */
3253 if (!BYTES_BIG_ENDIAN)
3254 return false;
3255
3256 /* Only composite types smaller than or equal to 16 bytes can
3257 be potentially returned in registers. */
3258 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3259 || int_size_in_bytes (valtype) <= 0
3260 || int_size_in_bytes (valtype) > 16)
3261 return false;
3262
3263 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3264 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3265 is always passed/returned in the least significant bits of fp/simd
3266 register(s). */
3267 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3268 &dummy_mode, &dummy_int, NULL))
3269 return false;
3270
3271 return true;
3272}
3273
3274/* Implement TARGET_FUNCTION_VALUE.
3275 Define how to find the value returned by a function. */
3276
3277static rtx
3278aarch64_function_value (const_tree type, const_tree func,
3279 bool outgoing ATTRIBUTE_UNUSED)
3280{
ef4bddc2 3281 machine_mode mode;
43e9d192
IB
3282 int unsignedp;
3283 int count;
ef4bddc2 3284 machine_mode ag_mode;
43e9d192
IB
3285
3286 mode = TYPE_MODE (type);
3287 if (INTEGRAL_TYPE_P (type))
3288 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3289
3290 if (aarch64_return_in_msb (type))
3291 {
3292 HOST_WIDE_INT size = int_size_in_bytes (type);
3293
3294 if (size % UNITS_PER_WORD != 0)
3295 {
3296 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3297 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3298 }
3299 }
3300
3301 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3302 &ag_mode, &count, NULL))
3303 {
3304 if (!aarch64_composite_type_p (type, mode))
3305 {
3306 gcc_assert (count == 1 && mode == ag_mode);
3307 return gen_rtx_REG (mode, V0_REGNUM);
3308 }
3309 else
3310 {
3311 int i;
3312 rtx par;
3313
3314 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3315 for (i = 0; i < count; i++)
3316 {
3317 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3318 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3319 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3320 XVECEXP (par, 0, i) = tmp;
3321 }
3322 return par;
3323 }
3324 }
3325 else
3326 return gen_rtx_REG (mode, R0_REGNUM);
3327}
3328
3329/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3330 Return true if REGNO is the number of a hard register in which the values
3331 of called function may come back. */
3332
3333static bool
3334aarch64_function_value_regno_p (const unsigned int regno)
3335{
3336 /* Maximum of 16 bytes can be returned in the general registers. Examples
3337 of 16-byte return values are: 128-bit integers and 16-byte small
3338 structures (excluding homogeneous floating-point aggregates). */
3339 if (regno == R0_REGNUM || regno == R1_REGNUM)
3340 return true;
3341
3342 /* Up to four fp/simd registers can return a function value, e.g. a
3343 homogeneous floating-point aggregate having four members. */
3344 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3345 return TARGET_FLOAT;
43e9d192
IB
3346
3347 return false;
3348}
3349
3350/* Implement TARGET_RETURN_IN_MEMORY.
3351
3352 If the type T of the result of a function is such that
3353 void func (T arg)
3354 would require that arg be passed as a value in a register (or set of
3355 registers) according to the parameter passing rules, then the result
3356 is returned in the same registers as would be used for such an
3357 argument. */
3358
3359static bool
3360aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3361{
3362 HOST_WIDE_INT size;
ef4bddc2 3363 machine_mode ag_mode;
43e9d192
IB
3364 int count;
3365
3366 if (!AGGREGATE_TYPE_P (type)
3367 && TREE_CODE (type) != COMPLEX_TYPE
3368 && TREE_CODE (type) != VECTOR_TYPE)
3369 /* Simple scalar types always returned in registers. */
3370 return false;
3371
3372 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3373 type,
3374 &ag_mode,
3375 &count,
3376 NULL))
3377 return false;
3378
3379 /* Types larger than 2 registers returned in memory. */
3380 size = int_size_in_bytes (type);
3381 return (size < 0 || size > 2 * UNITS_PER_WORD);
3382}
3383
3384static bool
ef4bddc2 3385aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3386 const_tree type, int *nregs)
3387{
3388 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3389 return aarch64_vfp_is_call_or_return_candidate (mode,
3390 type,
3391 &pcum->aapcs_vfp_rmode,
3392 nregs,
3393 NULL);
3394}
3395
985b8393 3396/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3397 bits. The idea is to suppress any stronger alignment requested by
3398 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3399 This is a helper function for local use only. */
3400
985b8393 3401static unsigned int
ef4bddc2 3402aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3403{
75d6cc81 3404 if (!type)
985b8393 3405 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3406
75d6cc81 3407 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3408 return 0;
43e9d192 3409
75d6cc81
AL
3410 gcc_assert (TYPE_MODE (type) == mode);
3411
3412 if (!AGGREGATE_TYPE_P (type))
985b8393 3413 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3414
3415 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3416 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3417
985b8393 3418 unsigned int alignment = 0;
75d6cc81 3419 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3420 if (TREE_CODE (field) == FIELD_DECL)
3421 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3422
985b8393 3423 return alignment;
43e9d192
IB
3424}
3425
3426/* Layout a function argument according to the AAPCS64 rules. The rule
3427 numbers refer to the rule numbers in the AAPCS64. */
3428
3429static void
ef4bddc2 3430aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3431 const_tree type,
3432 bool named ATTRIBUTE_UNUSED)
3433{
3434 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3435 int ncrn, nvrn, nregs;
3436 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3437 HOST_WIDE_INT size;
43e9d192
IB
3438
3439 /* We need to do this once per argument. */
3440 if (pcum->aapcs_arg_processed)
3441 return;
3442
3443 pcum->aapcs_arg_processed = true;
3444
3abf17cf 3445 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3446 if (type)
3447 size = int_size_in_bytes (type);
3448 else
3449 /* No frontends can create types with variable-sized modes, so we
3450 shouldn't be asked to pass or return them. */
3451 size = GET_MODE_SIZE (mode).to_constant ();
3452 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3453
43e9d192
IB
3454 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3455 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3456 mode,
3457 type,
3458 &nregs);
3459
3460 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3461 The following code thus handles passing by SIMD/FP registers first. */
3462
3463 nvrn = pcum->aapcs_nvrn;
3464
3465 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3466 and homogenous short-vector aggregates (HVA). */
3467 if (allocate_nvrn)
3468 {
261fb553 3469 if (!TARGET_FLOAT)
fc29dfc9 3470 aarch64_err_no_fpadvsimd (mode);
261fb553 3471
43e9d192
IB
3472 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3473 {
3474 pcum->aapcs_nextnvrn = nvrn + nregs;
3475 if (!aarch64_composite_type_p (type, mode))
3476 {
3477 gcc_assert (nregs == 1);
3478 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3479 }
3480 else
3481 {
3482 rtx par;
3483 int i;
3484 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3485 for (i = 0; i < nregs; i++)
3486 {
3487 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3488 V0_REGNUM + nvrn + i);
6a70badb
RS
3489 rtx offset = gen_int_mode
3490 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3491 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3492 XVECEXP (par, 0, i) = tmp;
3493 }
3494 pcum->aapcs_reg = par;
3495 }
3496 return;
3497 }
3498 else
3499 {
3500 /* C.3 NSRN is set to 8. */
3501 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3502 goto on_stack;
3503 }
3504 }
3505
3506 ncrn = pcum->aapcs_ncrn;
3abf17cf 3507 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3508
3509 /* C6 - C9. though the sign and zero extension semantics are
3510 handled elsewhere. This is the case where the argument fits
3511 entirely general registers. */
3512 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3513 {
43e9d192
IB
3514
3515 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3516
3517 /* C.8 if the argument has an alignment of 16 then the NGRN is
3518 rounded up to the next even number. */
985b8393
JJ
3519 if (nregs == 2
3520 && ncrn % 2
2ec07fa6 3521 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3522 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3523 alignment nregs should be > 2 and therefore it should be
3524 passed by reference rather than value. */
985b8393
JJ
3525 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3526 {
3527 ++ncrn;
3528 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3529 }
2ec07fa6 3530
43e9d192
IB
3531 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3532 A reg is still generated for it, but the caller should be smart
3533 enough not to use it. */
3534 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3535 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3536 else
3537 {
3538 rtx par;
3539 int i;
3540
3541 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3542 for (i = 0; i < nregs; i++)
3543 {
3544 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3545 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3546 GEN_INT (i * UNITS_PER_WORD));
3547 XVECEXP (par, 0, i) = tmp;
3548 }
3549 pcum->aapcs_reg = par;
3550 }
3551
3552 pcum->aapcs_nextncrn = ncrn + nregs;
3553 return;
3554 }
3555
3556 /* C.11 */
3557 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3558
3559 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3560 this argument and align the total size if necessary. */
43e9d192 3561on_stack:
3abf17cf 3562 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3563
985b8393 3564 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3565 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3566 16 / UNITS_PER_WORD);
43e9d192
IB
3567 return;
3568}
3569
3570/* Implement TARGET_FUNCTION_ARG. */
3571
3572static rtx
ef4bddc2 3573aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3574 const_tree type, bool named)
3575{
3576 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3577 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3578
3579 if (mode == VOIDmode)
3580 return NULL_RTX;
3581
3582 aarch64_layout_arg (pcum_v, mode, type, named);
3583 return pcum->aapcs_reg;
3584}
3585
3586void
3587aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3588 const_tree fntype ATTRIBUTE_UNUSED,
3589 rtx libname ATTRIBUTE_UNUSED,
3590 const_tree fndecl ATTRIBUTE_UNUSED,
3591 unsigned n_named ATTRIBUTE_UNUSED)
3592{
3593 pcum->aapcs_ncrn = 0;
3594 pcum->aapcs_nvrn = 0;
3595 pcum->aapcs_nextncrn = 0;
3596 pcum->aapcs_nextnvrn = 0;
3597 pcum->pcs_variant = ARM_PCS_AAPCS64;
3598 pcum->aapcs_reg = NULL_RTX;
3599 pcum->aapcs_arg_processed = false;
3600 pcum->aapcs_stack_words = 0;
3601 pcum->aapcs_stack_size = 0;
3602
261fb553
AL
3603 if (!TARGET_FLOAT
3604 && fndecl && TREE_PUBLIC (fndecl)
3605 && fntype && fntype != error_mark_node)
3606 {
3607 const_tree type = TREE_TYPE (fntype);
3608 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3609 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3610 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3611 &mode, &nregs, NULL))
fc29dfc9 3612 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 3613 }
43e9d192
IB
3614 return;
3615}
3616
3617static void
3618aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3619 machine_mode mode,
43e9d192
IB
3620 const_tree type,
3621 bool named)
3622{
3623 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3624 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3625 {
3626 aarch64_layout_arg (pcum_v, mode, type, named);
3627 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3628 != (pcum->aapcs_stack_words != 0));
3629 pcum->aapcs_arg_processed = false;
3630 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3631 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3632 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3633 pcum->aapcs_stack_words = 0;
3634 pcum->aapcs_reg = NULL_RTX;
3635 }
3636}
3637
3638bool
3639aarch64_function_arg_regno_p (unsigned regno)
3640{
3641 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3642 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3643}
3644
3645/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3646 PARM_BOUNDARY bits of alignment, but will be given anything up
3647 to STACK_BOUNDARY bits if the type requires it. This makes sure
3648 that both before and after the layout of each argument, the Next
3649 Stacked Argument Address (NSAA) will have a minimum alignment of
3650 8 bytes. */
3651
3652static unsigned int
ef4bddc2 3653aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3654{
985b8393
JJ
3655 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3656 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3657}
3658
43cacb12
RS
3659/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3660
3661static fixed_size_mode
3662aarch64_get_reg_raw_mode (int regno)
3663{
3664 if (TARGET_SVE && FP_REGNUM_P (regno))
3665 /* Don't use the SVE part of the register for __builtin_apply and
3666 __builtin_return. The SVE registers aren't used by the normal PCS,
3667 so using them there would be a waste of time. The PCS extensions
3668 for SVE types are fundamentally incompatible with the
3669 __builtin_return/__builtin_apply interface. */
3670 return as_a <fixed_size_mode> (V16QImode);
3671 return default_get_reg_raw_mode (regno);
3672}
3673
76b0cbf8 3674/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3675
3676 Small aggregate types are placed in the lowest memory address.
3677
3678 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3679
76b0cbf8
RS
3680static pad_direction
3681aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3682{
3683 /* On little-endian targets, the least significant byte of every stack
3684 argument is passed at the lowest byte address of the stack slot. */
3685 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3686 return PAD_UPWARD;
43e9d192 3687
00edcfbe 3688 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3689 the least significant byte of a stack argument is passed at the highest
3690 byte address of the stack slot. */
3691 if (type
00edcfbe
YZ
3692 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3693 || POINTER_TYPE_P (type))
43e9d192 3694 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3695 return PAD_DOWNWARD;
43e9d192
IB
3696
3697 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3698 return PAD_UPWARD;
43e9d192
IB
3699}
3700
3701/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3702
3703 It specifies padding for the last (may also be the only)
3704 element of a block move between registers and memory. If
3705 assuming the block is in the memory, padding upward means that
3706 the last element is padded after its highest significant byte,
3707 while in downward padding, the last element is padded at the
3708 its least significant byte side.
3709
3710 Small aggregates and small complex types are always padded
3711 upwards.
3712
3713 We don't need to worry about homogeneous floating-point or
3714 short-vector aggregates; their move is not affected by the
3715 padding direction determined here. Regardless of endianness,
3716 each element of such an aggregate is put in the least
3717 significant bits of a fp/simd register.
3718
3719 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3720 register has useful data, and return the opposite if the most
3721 significant byte does. */
3722
3723bool
ef4bddc2 3724aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3725 bool first ATTRIBUTE_UNUSED)
3726{
3727
3728 /* Small composite types are always padded upward. */
3729 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3730 {
6a70badb
RS
3731 HOST_WIDE_INT size;
3732 if (type)
3733 size = int_size_in_bytes (type);
3734 else
3735 /* No frontends can create types with variable-sized modes, so we
3736 shouldn't be asked to pass or return them. */
3737 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3738 if (size < 2 * UNITS_PER_WORD)
3739 return true;
3740 }
3741
3742 /* Otherwise, use the default padding. */
3743 return !BYTES_BIG_ENDIAN;
3744}
3745
095a2d76 3746static scalar_int_mode
43e9d192
IB
3747aarch64_libgcc_cmp_return_mode (void)
3748{
3749 return SImode;
3750}
3751
a3eb8a52
EB
3752#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3753
3754/* We use the 12-bit shifted immediate arithmetic instructions so values
3755 must be multiple of (1 << 12), i.e. 4096. */
3756#define ARITH_FACTOR 4096
3757
3758#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3759#error Cannot use simple address calculation for stack probing
3760#endif
3761
3762/* The pair of scratch registers used for stack probing. */
3763#define PROBE_STACK_FIRST_REG 9
3764#define PROBE_STACK_SECOND_REG 10
3765
6a70badb 3766/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3767 inclusive. These are offsets from the current stack pointer. */
3768
3769static void
6a70badb 3770aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3771{
6a70badb
RS
3772 HOST_WIDE_INT size;
3773 if (!poly_size.is_constant (&size))
3774 {
3775 sorry ("stack probes for SVE frames");
3776 return;
3777 }
3778
5f5c5e0f 3779 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3780
3781 /* See the same assertion on PROBE_INTERVAL above. */
3782 gcc_assert ((first % ARITH_FACTOR) == 0);
3783
3784 /* See if we have a constant small number of probes to generate. If so,
3785 that's the easy case. */
3786 if (size <= PROBE_INTERVAL)
3787 {
3788 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3789
3790 emit_set_insn (reg1,
5f5c5e0f 3791 plus_constant (Pmode,
a3eb8a52 3792 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3793 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3794 }
3795
3796 /* The run-time loop is made up of 8 insns in the generic case while the
3797 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3798 else if (size <= 4 * PROBE_INTERVAL)
3799 {
3800 HOST_WIDE_INT i, rem;
3801
3802 emit_set_insn (reg1,
5f5c5e0f 3803 plus_constant (Pmode,
a3eb8a52
EB
3804 stack_pointer_rtx,
3805 -(first + PROBE_INTERVAL)));
3806 emit_stack_probe (reg1);
3807
3808 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3809 it exceeds SIZE. If only two probes are needed, this will not
3810 generate any code. Then probe at FIRST + SIZE. */
3811 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3812 {
3813 emit_set_insn (reg1,
5f5c5e0f 3814 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
3815 emit_stack_probe (reg1);
3816 }
3817
3818 rem = size - (i - PROBE_INTERVAL);
3819 if (rem > 256)
3820 {
3821 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3822
5f5c5e0f
EB
3823 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
3824 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
3825 }
3826 else
5f5c5e0f 3827 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
3828 }
3829
3830 /* Otherwise, do the same as above, but in a loop. Note that we must be
3831 extra careful with variables wrapping around because we might be at
3832 the very top (or the very bottom) of the address space and we have
3833 to be able to handle this case properly; in particular, we use an
3834 equality test for the loop condition. */
3835 else
3836 {
5f5c5e0f 3837 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
3838
3839 /* Step 1: round SIZE to the previous multiple of the interval. */
3840
3841 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
3842
3843
3844 /* Step 2: compute initial and final value of the loop counter. */
3845
3846 /* TEST_ADDR = SP + FIRST. */
3847 emit_set_insn (reg1,
5f5c5e0f 3848 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
3849
3850 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
3851 HOST_WIDE_INT adjustment = - (first + rounded_size);
3852 if (! aarch64_uimm12_shift (adjustment))
3853 {
3854 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
3855 true, Pmode);
3856 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
3857 }
3858 else
8dd64cdf
EB
3859 emit_set_insn (reg2,
3860 plus_constant (Pmode, stack_pointer_rtx, adjustment));
3861
a3eb8a52
EB
3862 /* Step 3: the loop
3863
3864 do
3865 {
3866 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3867 probe at TEST_ADDR
3868 }
3869 while (TEST_ADDR != LAST_ADDR)
3870
3871 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3872 until it is equal to ROUNDED_SIZE. */
3873
5f5c5e0f 3874 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
3875
3876
3877 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3878 that SIZE is equal to ROUNDED_SIZE. */
3879
3880 if (size != rounded_size)
3881 {
3882 HOST_WIDE_INT rem = size - rounded_size;
3883
3884 if (rem > 256)
3885 {
3886 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3887
5f5c5e0f
EB
3888 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
3889 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
3890 }
3891 else
5f5c5e0f 3892 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
3893 }
3894 }
3895
3896 /* Make sure nothing is scheduled before we are done. */
3897 emit_insn (gen_blockage ());
3898}
3899
3900/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3901 absolute addresses. */
3902
3903const char *
3904aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
3905{
3906 static int labelno = 0;
3907 char loop_lab[32];
3908 rtx xops[2];
3909
3910 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3911
3912 /* Loop. */
3913 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3914
3915 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3916 xops[0] = reg1;
3917 xops[1] = GEN_INT (PROBE_INTERVAL);
3918 output_asm_insn ("sub\t%0, %0, %1", xops);
3919
3920 /* Probe at TEST_ADDR. */
3921 output_asm_insn ("str\txzr, [%0]", xops);
3922
3923 /* Test if TEST_ADDR == LAST_ADDR. */
3924 xops[1] = reg2;
3925 output_asm_insn ("cmp\t%0, %1", xops);
3926
3927 /* Branch. */
3928 fputs ("\tb.ne\t", asm_out_file);
3929 assemble_name_raw (asm_out_file, loop_lab);
3930 fputc ('\n', asm_out_file);
3931
3932 return "";
3933}
3934
d6cb6d6a
WD
3935/* Determine whether a frame chain needs to be generated. */
3936static bool
3937aarch64_needs_frame_chain (void)
3938{
3939 /* Force a frame chain for EH returns so the return address is at FP+8. */
3940 if (frame_pointer_needed || crtl->calls_eh_return)
3941 return true;
3942
3943 /* A leaf function cannot have calls or write LR. */
3944 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
3945
3946 /* Don't use a frame chain in leaf functions if leaf frame pointers
3947 are disabled. */
3948 if (flag_omit_leaf_frame_pointer && is_leaf)
3949 return false;
3950
3951 return aarch64_use_frame_pointer;
3952}
3953
43e9d192
IB
3954/* Mark the registers that need to be saved by the callee and calculate
3955 the size of the callee-saved registers area and frame record (both FP
33a2e348 3956 and LR may be omitted). */
43e9d192
IB
3957static void
3958aarch64_layout_frame (void)
3959{
3960 HOST_WIDE_INT offset = 0;
4b0685d9 3961 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
3962
3963 if (reload_completed && cfun->machine->frame.laid_out)
3964 return;
3965
d6cb6d6a 3966 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 3967
97826595
MS
3968#define SLOT_NOT_REQUIRED (-2)
3969#define SLOT_REQUIRED (-1)
3970
71bfb77a
WD
3971 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
3972 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 3973
43e9d192
IB
3974 /* First mark all the registers that really need to be saved... */
3975 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 3976 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
3977
3978 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 3979 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
3980
3981 /* ... that includes the eh data registers (if needed)... */
3982 if (crtl->calls_eh_return)
3983 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
3984 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
3985 = SLOT_REQUIRED;
43e9d192
IB
3986
3987 /* ... and any callee saved register that dataflow says is live. */
3988 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
3989 if (df_regs_ever_live_p (regno)
1c923b60
JW
3990 && (regno == R30_REGNUM
3991 || !call_used_regs[regno]))
97826595 3992 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
3993
3994 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
3995 if (df_regs_ever_live_p (regno)
3996 && !call_used_regs[regno])
4b0685d9
WD
3997 {
3998 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
3999 last_fp_reg = regno;
4000 }
43e9d192 4001
204d2c03 4002 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4003 {
2e1cdae5 4004 /* FP and LR are placed in the linkage record. */
43e9d192 4005 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4006 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4007 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4008 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4009 offset = 2 * UNITS_PER_WORD;
4010 }
43e9d192
IB
4011
4012 /* Now assign stack slots for them. */
2e1cdae5 4013 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4014 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4015 {
4016 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4017 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4018 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4019 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4020 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4021 offset += UNITS_PER_WORD;
4022 }
4023
4b0685d9
WD
4024 HOST_WIDE_INT max_int_offset = offset;
4025 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4026 bool has_align_gap = offset != max_int_offset;
4027
43e9d192 4028 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4029 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4030 {
4b0685d9
WD
4031 /* If there is an alignment gap between integer and fp callee-saves,
4032 allocate the last fp register to it if possible. */
4033 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
4034 {
4035 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4036 break;
4037 }
4038
43e9d192 4039 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4040 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4041 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4042 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4043 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4044 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4045 offset += UNITS_PER_WORD;
4046 }
4047
4f59f9f2 4048 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4049
4050 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4051
71bfb77a
WD
4052 HOST_WIDE_INT varargs_and_saved_regs_size
4053 = offset + cfun->machine->frame.saved_varargs_size;
4054
1c960e02 4055 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4056 = aligned_upper_bound (varargs_and_saved_regs_size
4057 + get_frame_size (),
4058 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4059
6a70badb
RS
4060 /* Both these values are already aligned. */
4061 gcc_assert (multiple_p (crtl->outgoing_args_size,
4062 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4063 cfun->machine->frame.frame_size
6a70badb
RS
4064 = (cfun->machine->frame.hard_fp_offset
4065 + crtl->outgoing_args_size);
1c960e02 4066
71bfb77a
WD
4067 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4068
4069 cfun->machine->frame.initial_adjust = 0;
4070 cfun->machine->frame.final_adjust = 0;
4071 cfun->machine->frame.callee_adjust = 0;
4072 cfun->machine->frame.callee_offset = 0;
4073
4074 HOST_WIDE_INT max_push_offset = 0;
4075 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4076 max_push_offset = 512;
4077 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4078 max_push_offset = 256;
4079
6a70badb
RS
4080 HOST_WIDE_INT const_size, const_fp_offset;
4081 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4082 && const_size < max_push_offset
4083 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4084 {
4085 /* Simple, small frame with no outgoing arguments:
4086 stp reg1, reg2, [sp, -frame_size]!
4087 stp reg3, reg4, [sp, 16] */
6a70badb 4088 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4089 }
6a70badb
RS
4090 else if (known_lt (crtl->outgoing_args_size
4091 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4092 && !(cfun->calls_alloca
6a70badb
RS
4093 && known_lt (cfun->machine->frame.hard_fp_offset,
4094 max_push_offset)))
71bfb77a
WD
4095 {
4096 /* Frame with small outgoing arguments:
4097 sub sp, sp, frame_size
4098 stp reg1, reg2, [sp, outgoing_args_size]
4099 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4100 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4101 cfun->machine->frame.callee_offset
4102 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4103 }
6a70badb
RS
4104 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4105 && const_fp_offset < max_push_offset)
71bfb77a
WD
4106 {
4107 /* Frame with large outgoing arguments but a small local area:
4108 stp reg1, reg2, [sp, -hard_fp_offset]!
4109 stp reg3, reg4, [sp, 16]
4110 sub sp, sp, outgoing_args_size */
6a70badb 4111 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4112 cfun->machine->frame.final_adjust
4113 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4114 }
71bfb77a
WD
4115 else
4116 {
4117 /* Frame with large local area and outgoing arguments using frame pointer:
4118 sub sp, sp, hard_fp_offset
4119 stp x29, x30, [sp, 0]
4120 add x29, sp, 0
4121 stp reg3, reg4, [sp, 16]
4122 sub sp, sp, outgoing_args_size */
4123 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4124 cfun->machine->frame.final_adjust
4125 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4126 }
4127
43e9d192
IB
4128 cfun->machine->frame.laid_out = true;
4129}
4130
04ddfe06
KT
4131/* Return true if the register REGNO is saved on entry to
4132 the current function. */
4133
43e9d192
IB
4134static bool
4135aarch64_register_saved_on_entry (int regno)
4136{
97826595 4137 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4138}
4139
04ddfe06
KT
4140/* Return the next register up from REGNO up to LIMIT for the callee
4141 to save. */
4142
64dedd72
JW
4143static unsigned
4144aarch64_next_callee_save (unsigned regno, unsigned limit)
4145{
4146 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4147 regno ++;
4148 return regno;
4149}
43e9d192 4150
04ddfe06
KT
4151/* Push the register number REGNO of mode MODE to the stack with write-back
4152 adjusting the stack by ADJUSTMENT. */
4153
c5e1f66e 4154static void
ef4bddc2 4155aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4156 HOST_WIDE_INT adjustment)
4157 {
4158 rtx base_rtx = stack_pointer_rtx;
4159 rtx insn, reg, mem;
4160
4161 reg = gen_rtx_REG (mode, regno);
4162 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4163 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4164 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4165
4166 insn = emit_move_insn (mem, reg);
4167 RTX_FRAME_RELATED_P (insn) = 1;
4168}
4169
04ddfe06
KT
4170/* Generate and return an instruction to store the pair of registers
4171 REG and REG2 of mode MODE to location BASE with write-back adjusting
4172 the stack location BASE by ADJUSTMENT. */
4173
80c11907 4174static rtx
ef4bddc2 4175aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4176 HOST_WIDE_INT adjustment)
4177{
4178 switch (mode)
4179 {
4e10a5a7 4180 case E_DImode:
80c11907
JW
4181 return gen_storewb_pairdi_di (base, base, reg, reg2,
4182 GEN_INT (-adjustment),
4183 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4184 case E_DFmode:
80c11907
JW
4185 return gen_storewb_pairdf_di (base, base, reg, reg2,
4186 GEN_INT (-adjustment),
4187 GEN_INT (UNITS_PER_WORD - adjustment));
4188 default:
4189 gcc_unreachable ();
4190 }
4191}
4192
04ddfe06
KT
4193/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4194 stack pointer by ADJUSTMENT. */
4195
80c11907 4196static void
89ac681e 4197aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4198{
5d8a22a5 4199 rtx_insn *insn;
0d4a1197 4200 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 4201
71bfb77a 4202 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4203 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4204
80c11907
JW
4205 rtx reg1 = gen_rtx_REG (mode, regno1);
4206 rtx reg2 = gen_rtx_REG (mode, regno2);
4207
4208 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4209 reg2, adjustment));
4210 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4211 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4212 RTX_FRAME_RELATED_P (insn) = 1;
4213}
4214
04ddfe06
KT
4215/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4216 adjusting it by ADJUSTMENT afterwards. */
4217
159313d9 4218static rtx
ef4bddc2 4219aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4220 HOST_WIDE_INT adjustment)
4221{
4222 switch (mode)
4223 {
4e10a5a7 4224 case E_DImode:
159313d9 4225 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4226 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4227 case E_DFmode:
159313d9 4228 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4229 GEN_INT (UNITS_PER_WORD));
159313d9
JW
4230 default:
4231 gcc_unreachable ();
4232 }
4233}
4234
04ddfe06
KT
4235/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4236 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4237 into CFI_OPS. */
4238
89ac681e
WD
4239static void
4240aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4241 rtx *cfi_ops)
4242{
0d4a1197 4243 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
4244 rtx reg1 = gen_rtx_REG (mode, regno1);
4245
4246 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4247
71bfb77a 4248 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4249 {
4250 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4251 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4252 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4253 }
4254 else
4255 {
4256 rtx reg2 = gen_rtx_REG (mode, regno2);
4257 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4258 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4259 reg2, adjustment));
4260 }
4261}
4262
04ddfe06
KT
4263/* Generate and return a store pair instruction of mode MODE to store
4264 register REG1 to MEM1 and register REG2 to MEM2. */
4265
72df5c1f 4266static rtx
ef4bddc2 4267aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4268 rtx reg2)
4269{
4270 switch (mode)
4271 {
4e10a5a7 4272 case E_DImode:
dfe1da23 4273 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4274
4e10a5a7 4275 case E_DFmode:
dfe1da23 4276 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f
JW
4277
4278 default:
4279 gcc_unreachable ();
4280 }
4281}
4282
04ddfe06
KT
4283/* Generate and regurn a load pair isntruction of mode MODE to load register
4284 REG1 from MEM1 and register REG2 from MEM2. */
4285
72df5c1f 4286static rtx
ef4bddc2 4287aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4288 rtx mem2)
4289{
4290 switch (mode)
4291 {
4e10a5a7 4292 case E_DImode:
dfe1da23 4293 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4294
4e10a5a7 4295 case E_DFmode:
dfe1da23 4296 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f
JW
4297
4298 default:
4299 gcc_unreachable ();
4300 }
4301}
4302
db58fd89
JW
4303/* Return TRUE if return address signing should be enabled for the current
4304 function, otherwise return FALSE. */
4305
4306bool
4307aarch64_return_address_signing_enabled (void)
4308{
4309 /* This function should only be called after frame laid out. */
4310 gcc_assert (cfun->machine->frame.laid_out);
4311
4312 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4313 if it's LR is pushed onto stack. */
4314 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4315 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4316 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4317}
4318
04ddfe06
KT
4319/* Emit code to save the callee-saved registers from register number START
4320 to LIMIT to the stack at the location starting at offset START_OFFSET,
4321 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4322
43e9d192 4323static void
6a70badb 4324aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4325 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4326{
5d8a22a5 4327 rtx_insn *insn;
43e9d192
IB
4328 unsigned regno;
4329 unsigned regno2;
4330
0ec74a1e 4331 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4332 regno <= limit;
4333 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4334 {
ae13fce3 4335 rtx reg, mem;
6a70badb 4336 poly_int64 offset;
64dedd72 4337
ae13fce3
JW
4338 if (skip_wb
4339 && (regno == cfun->machine->frame.wb_candidate1
4340 || regno == cfun->machine->frame.wb_candidate2))
4341 continue;
4342
827ab47a
KT
4343 if (cfun->machine->reg_is_wrapped_separately[regno])
4344 continue;
4345
ae13fce3
JW
4346 reg = gen_rtx_REG (mode, regno);
4347 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4348 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4349 offset));
64dedd72
JW
4350
4351 regno2 = aarch64_next_callee_save (regno + 1, limit);
4352
4353 if (regno2 <= limit
827ab47a 4354 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
4355 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4356 == cfun->machine->frame.reg_offset[regno2]))
4357
43e9d192 4358 {
0ec74a1e 4359 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4360 rtx mem2;
4361
4362 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4363 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4364 offset));
8ed2fc62
JW
4365 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4366 reg2));
0b4a9743 4367
64dedd72
JW
4368 /* The first part of a frame-related parallel insn is
4369 always assumed to be relevant to the frame
4370 calculations; subsequent parts, are only
4371 frame-related if explicitly marked. */
4372 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4373 regno = regno2;
4374 }
4375 else
8ed2fc62
JW
4376 insn = emit_move_insn (mem, reg);
4377
4378 RTX_FRAME_RELATED_P (insn) = 1;
4379 }
4380}
4381
04ddfe06
KT
4382/* Emit code to restore the callee registers of mode MODE from register
4383 number START up to and including LIMIT. Restore from the stack offset
4384 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4385 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4386
8ed2fc62 4387static void
ef4bddc2 4388aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4389 poly_int64 start_offset, unsigned start,
dd991abb 4390 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4391{
8ed2fc62 4392 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4393 unsigned regno;
4394 unsigned regno2;
6a70badb 4395 poly_int64 offset;
8ed2fc62
JW
4396
4397 for (regno = aarch64_next_callee_save (start, limit);
4398 regno <= limit;
4399 regno = aarch64_next_callee_save (regno + 1, limit))
4400 {
827ab47a
KT
4401 if (cfun->machine->reg_is_wrapped_separately[regno])
4402 continue;
4403
ae13fce3 4404 rtx reg, mem;
8ed2fc62 4405
ae13fce3
JW
4406 if (skip_wb
4407 && (regno == cfun->machine->frame.wb_candidate1
4408 || regno == cfun->machine->frame.wb_candidate2))
4409 continue;
4410
4411 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4412 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4413 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4414
4415 regno2 = aarch64_next_callee_save (regno + 1, limit);
4416
4417 if (regno2 <= limit
827ab47a 4418 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
4419 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4420 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 4421 {
8ed2fc62
JW
4422 rtx reg2 = gen_rtx_REG (mode, regno2);
4423 rtx mem2;
4424
4425 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4426 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4427 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4428
dd991abb 4429 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4430 regno = regno2;
43e9d192 4431 }
8ed2fc62 4432 else
dd991abb
RH
4433 emit_move_insn (reg, mem);
4434 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4435 }
43e9d192
IB
4436}
4437
43cacb12
RS
4438/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4439 of MODE. */
4440
4441static inline bool
4442offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4443{
4444 HOST_WIDE_INT multiple;
4445 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4446 && IN_RANGE (multiple, -8, 7));
4447}
4448
4449/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4450 of MODE. */
4451
4452static inline bool
4453offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4454{
4455 HOST_WIDE_INT multiple;
4456 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4457 && IN_RANGE (multiple, 0, 63));
4458}
4459
4460/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4461 of MODE. */
4462
4463bool
4464aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4465{
4466 HOST_WIDE_INT multiple;
4467 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4468 && IN_RANGE (multiple, -64, 63));
4469}
4470
4471/* Return true if OFFSET is a signed 9-bit value. */
4472
827ab47a
KT
4473static inline bool
4474offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
6a70badb 4475 poly_int64 offset)
827ab47a 4476{
6a70badb
RS
4477 HOST_WIDE_INT const_offset;
4478 return (offset.is_constant (&const_offset)
4479 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4480}
4481
43cacb12
RS
4482/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4483 of MODE. */
4484
827ab47a 4485static inline bool
43cacb12 4486offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4487{
6a70badb
RS
4488 HOST_WIDE_INT multiple;
4489 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4490 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4491}
4492
43cacb12
RS
4493/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4494 of MODE. */
4495
4496static inline bool
4497offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4498{
6a70badb
RS
4499 HOST_WIDE_INT multiple;
4500 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4501 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4502}
4503
4504/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4505
4506static sbitmap
4507aarch64_get_separate_components (void)
4508{
4509 aarch64_layout_frame ();
4510
4511 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4512 bitmap_clear (components);
4513
4514 /* The registers we need saved to the frame. */
4515 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4516 if (aarch64_register_saved_on_entry (regno))
4517 {
6a70badb 4518 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4519 if (!frame_pointer_needed)
4520 offset += cfun->machine->frame.frame_size
4521 - cfun->machine->frame.hard_fp_offset;
4522 /* Check that we can access the stack slot of the register with one
4523 direct load with no adjustments needed. */
4524 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4525 bitmap_set_bit (components, regno);
4526 }
4527
4528 /* Don't mess with the hard frame pointer. */
4529 if (frame_pointer_needed)
4530 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4531
4532 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4533 unsigned reg2 = cfun->machine->frame.wb_candidate2;
4534 /* If aarch64_layout_frame has chosen registers to store/restore with
4535 writeback don't interfere with them to avoid having to output explicit
4536 stack adjustment instructions. */
4537 if (reg2 != INVALID_REGNUM)
4538 bitmap_clear_bit (components, reg2);
4539 if (reg1 != INVALID_REGNUM)
4540 bitmap_clear_bit (components, reg1);
4541
4542 bitmap_clear_bit (components, LR_REGNUM);
4543 bitmap_clear_bit (components, SP_REGNUM);
4544
4545 return components;
4546}
4547
4548/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4549
4550static sbitmap
4551aarch64_components_for_bb (basic_block bb)
4552{
4553 bitmap in = DF_LIVE_IN (bb);
4554 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4555 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
4556
4557 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4558 bitmap_clear (components);
4559
4560 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4561 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4562 if ((!call_used_regs[regno])
4563 && (bitmap_bit_p (in, regno)
4564 || bitmap_bit_p (gen, regno)
4565 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4566 {
4567 unsigned regno2, offset, offset2;
4568 bitmap_set_bit (components, regno);
4569
4570 /* If there is a callee-save at an adjacent offset, add it too
4571 to increase the use of LDP/STP. */
4572 offset = cfun->machine->frame.reg_offset[regno];
4573 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4574
4575 if (regno2 <= LAST_SAVED_REGNUM)
4576 {
4577 offset2 = cfun->machine->frame.reg_offset[regno2];
4578 if ((offset & ~8) == (offset2 & ~8))
4579 bitmap_set_bit (components, regno2);
4580 }
4581 }
827ab47a
KT
4582
4583 return components;
4584}
4585
4586/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4587 Nothing to do for aarch64. */
4588
4589static void
4590aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4591{
4592}
4593
4594/* Return the next set bit in BMP from START onwards. Return the total number
4595 of bits in BMP if no set bit is found at or after START. */
4596
4597static unsigned int
4598aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4599{
4600 unsigned int nbits = SBITMAP_SIZE (bmp);
4601 if (start == nbits)
4602 return start;
4603
4604 gcc_assert (start < nbits);
4605 for (unsigned int i = start; i < nbits; i++)
4606 if (bitmap_bit_p (bmp, i))
4607 return i;
4608
4609 return nbits;
4610}
4611
4612/* Do the work for aarch64_emit_prologue_components and
4613 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4614 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4615 for these components or the epilogue sequence. That is, it determines
4616 whether we should emit stores or loads and what kind of CFA notes to attach
4617 to the insns. Otherwise the logic for the two sequences is very
4618 similar. */
4619
4620static void
4621aarch64_process_components (sbitmap components, bool prologue_p)
4622{
4623 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4624 ? HARD_FRAME_POINTER_REGNUM
4625 : STACK_POINTER_REGNUM);
4626
4627 unsigned last_regno = SBITMAP_SIZE (components);
4628 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4629 rtx_insn *insn = NULL;
4630
4631 while (regno != last_regno)
4632 {
4633 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
4634 so DFmode for the vector registers is enough. */
0d4a1197 4635 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a 4636 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4637 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4638 if (!frame_pointer_needed)
4639 offset += cfun->machine->frame.frame_size
4640 - cfun->machine->frame.hard_fp_offset;
4641 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4642 rtx mem = gen_frame_mem (mode, addr);
4643
4644 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4645 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4646 /* No more registers to handle after REGNO.
4647 Emit a single save/restore and exit. */
4648 if (regno2 == last_regno)
4649 {
4650 insn = emit_insn (set);
4651 RTX_FRAME_RELATED_P (insn) = 1;
4652 if (prologue_p)
4653 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4654 else
4655 add_reg_note (insn, REG_CFA_RESTORE, reg);
4656 break;
4657 }
4658
6a70badb 4659 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4660 /* The next register is not of the same class or its offset is not
4661 mergeable with the current one into a pair. */
4662 if (!satisfies_constraint_Ump (mem)
4663 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
6a70badb
RS
4664 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
4665 GET_MODE_SIZE (mode)))
827ab47a
KT
4666 {
4667 insn = emit_insn (set);
4668 RTX_FRAME_RELATED_P (insn) = 1;
4669 if (prologue_p)
4670 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4671 else
4672 add_reg_note (insn, REG_CFA_RESTORE, reg);
4673
4674 regno = regno2;
4675 continue;
4676 }
4677
4678 /* REGNO2 can be saved/restored in a pair with REGNO. */
4679 rtx reg2 = gen_rtx_REG (mode, regno2);
4680 if (!frame_pointer_needed)
4681 offset2 += cfun->machine->frame.frame_size
4682 - cfun->machine->frame.hard_fp_offset;
4683 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
4684 rtx mem2 = gen_frame_mem (mode, addr2);
4685 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
4686 : gen_rtx_SET (reg2, mem2);
4687
4688 if (prologue_p)
4689 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
4690 else
4691 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
4692
4693 RTX_FRAME_RELATED_P (insn) = 1;
4694 if (prologue_p)
4695 {
4696 add_reg_note (insn, REG_CFA_OFFSET, set);
4697 add_reg_note (insn, REG_CFA_OFFSET, set2);
4698 }
4699 else
4700 {
4701 add_reg_note (insn, REG_CFA_RESTORE, reg);
4702 add_reg_note (insn, REG_CFA_RESTORE, reg2);
4703 }
4704
4705 regno = aarch64_get_next_set_bit (components, regno2 + 1);
4706 }
4707}
4708
4709/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
4710
4711static void
4712aarch64_emit_prologue_components (sbitmap components)
4713{
4714 aarch64_process_components (components, true);
4715}
4716
4717/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
4718
4719static void
4720aarch64_emit_epilogue_components (sbitmap components)
4721{
4722 aarch64_process_components (components, false);
4723}
4724
4725/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
4726
4727static void
4728aarch64_set_handled_components (sbitmap components)
4729{
4730 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4731 if (bitmap_bit_p (components, regno))
4732 cfun->machine->reg_is_wrapped_separately[regno] = true;
4733}
4734
43cacb12
RS
4735/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
4736 is saved at BASE + OFFSET. */
4737
4738static void
4739aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
4740 rtx base, poly_int64 offset)
4741{
4742 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
4743 add_reg_note (insn, REG_CFA_EXPRESSION,
4744 gen_rtx_SET (mem, regno_reg_rtx[reg]));
4745}
4746
43e9d192
IB
4747/* AArch64 stack frames generated by this compiler look like:
4748
4749 +-------------------------------+
4750 | |
4751 | incoming stack arguments |
4752 | |
34834420
MS
4753 +-------------------------------+
4754 | | <-- incoming stack pointer (aligned)
43e9d192
IB
4755 | callee-allocated save area |
4756 | for register varargs |
4757 | |
34834420
MS
4758 +-------------------------------+
4759 | local variables | <-- frame_pointer_rtx
43e9d192
IB
4760 | |
4761 +-------------------------------+
454fdba9
RL
4762 | padding0 | \
4763 +-------------------------------+ |
454fdba9 4764 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
4765 +-------------------------------+ |
4766 | LR' | |
4767 +-------------------------------+ |
34834420
MS
4768 | FP' | / <- hard_frame_pointer_rtx (aligned)
4769 +-------------------------------+
43e9d192
IB
4770 | dynamic allocation |
4771 +-------------------------------+
34834420
MS
4772 | padding |
4773 +-------------------------------+
4774 | outgoing stack arguments | <-- arg_pointer
4775 | |
4776 +-------------------------------+
4777 | | <-- stack_pointer_rtx (aligned)
43e9d192 4778
34834420
MS
4779 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
4780 but leave frame_pointer_rtx and hard_frame_pointer_rtx
4781 unchanged. */
43e9d192
IB
4782
4783/* Generate the prologue instructions for entry into a function.
4784 Establish the stack frame by decreasing the stack pointer with a
4785 properly calculated size and, if necessary, create a frame record
4786 filled with the values of LR and previous frame pointer. The
6991c977 4787 current FP is also set up if it is in use. */
43e9d192
IB
4788
4789void
4790aarch64_expand_prologue (void)
4791{
43e9d192 4792 aarch64_layout_frame ();
43e9d192 4793
6a70badb
RS
4794 poly_int64 frame_size = cfun->machine->frame.frame_size;
4795 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4796 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4797 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
4798 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
4799 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4800 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 4801 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 4802 rtx_insn *insn;
43e9d192 4803
db58fd89
JW
4804 /* Sign return address for functions. */
4805 if (aarch64_return_address_signing_enabled ())
27169e45
JW
4806 {
4807 insn = emit_insn (gen_pacisp ());
4808 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
4809 RTX_FRAME_RELATED_P (insn) = 1;
4810 }
db58fd89 4811
dd991abb 4812 if (flag_stack_usage_info)
6a70badb 4813 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 4814
a3eb8a52
EB
4815 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4816 {
4817 if (crtl->is_leaf && !cfun->calls_alloca)
4818 {
6a70badb
RS
4819 if (maybe_gt (frame_size, PROBE_INTERVAL)
4820 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
4821 aarch64_emit_probe_stack_range (get_stack_check_protect (),
4822 (frame_size
4823 - get_stack_check_protect ()));
a3eb8a52 4824 }
6a70badb 4825 else if (maybe_gt (frame_size, 0))
8c1dd970 4826 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
4827 }
4828
f5470a77
RS
4829 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
4830 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
4831
43cacb12 4832 aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
43e9d192 4833
71bfb77a
WD
4834 if (callee_adjust != 0)
4835 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 4836
204d2c03 4837 if (emit_frame_chain)
43e9d192 4838 {
43cacb12 4839 poly_int64 reg_offset = callee_adjust;
71bfb77a 4840 if (callee_adjust == 0)
43cacb12
RS
4841 {
4842 reg1 = R29_REGNUM;
4843 reg2 = R30_REGNUM;
4844 reg_offset = callee_offset;
4845 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
4846 }
f5470a77 4847 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
4848 stack_pointer_rtx, callee_offset,
4849 ip1_rtx, ip0_rtx, frame_pointer_needed);
4850 if (frame_pointer_needed && !frame_size.is_constant ())
4851 {
4852 /* Variable-sized frames need to describe the save slot
4853 address using DW_CFA_expression rather than DW_CFA_offset.
4854 This means that, without taking further action, the
4855 locations of the registers that we've already saved would
4856 remain based on the stack pointer even after we redefine
4857 the CFA based on the frame pointer. We therefore need new
4858 DW_CFA_expressions to re-express the save slots with addresses
4859 based on the frame pointer. */
4860 rtx_insn *insn = get_last_insn ();
4861 gcc_assert (RTX_FRAME_RELATED_P (insn));
4862
4863 /* Add an explicit CFA definition if this was previously
4864 implicit. */
4865 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
4866 {
4867 rtx src = plus_constant (Pmode, stack_pointer_rtx,
4868 callee_offset);
4869 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4870 gen_rtx_SET (hard_frame_pointer_rtx, src));
4871 }
4872
4873 /* Change the save slot expressions for the registers that
4874 we've already saved. */
4875 reg_offset -= callee_offset;
4876 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
4877 reg_offset + UNITS_PER_WORD);
4878 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
4879 reg_offset);
4880 }
71bfb77a 4881 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 4882 }
71bfb77a
WD
4883
4884 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 4885 callee_adjust != 0 || emit_frame_chain);
71bfb77a 4886 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
204d2c03 4887 callee_adjust != 0 || emit_frame_chain);
43cacb12 4888 aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
43e9d192
IB
4889}
4890
4f942779
RL
4891/* Return TRUE if we can use a simple_return insn.
4892
4893 This function checks whether the callee saved stack is empty, which
4894 means no restore actions are need. The pro_and_epilogue will use
4895 this to check whether shrink-wrapping opt is feasible. */
4896
4897bool
4898aarch64_use_return_insn_p (void)
4899{
4900 if (!reload_completed)
4901 return false;
4902
4903 if (crtl->profile)
4904 return false;
4905
4906 aarch64_layout_frame ();
4907
6a70badb 4908 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
4909}
4910
71bfb77a
WD
4911/* Generate the epilogue instructions for returning from a function.
4912 This is almost exactly the reverse of the prolog sequence, except
4913 that we need to insert barriers to avoid scheduling loads that read
4914 from a deallocated stack, and we optimize the unwind records by
4915 emitting them all together if possible. */
43e9d192
IB
4916void
4917aarch64_expand_epilogue (bool for_sibcall)
4918{
43e9d192 4919 aarch64_layout_frame ();
43e9d192 4920
6a70badb 4921 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4922 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4923 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
4924 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
4925 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4926 unsigned reg2 = cfun->machine->frame.wb_candidate2;
4927 rtx cfi_ops = NULL;
4928 rtx_insn *insn;
43cacb12
RS
4929 /* A stack clash protection prologue may not have left IP0_REGNUM or
4930 IP1_REGNUM in a usable state. The same is true for allocations
4931 with an SVE component, since we then need both temporary registers
4932 for each allocation. */
4933 bool can_inherit_p = (initial_adjust.is_constant ()
4934 && final_adjust.is_constant ()
4935 && !flag_stack_clash_protection);
44c0e7b9 4936
71bfb77a 4937 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
4938 bool need_barrier_p
4939 = maybe_ne (get_frame_size ()
4940 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 4941
71bfb77a 4942 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
4943 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
4944 || cfun->calls_alloca
8144a493 4945 || crtl->calls_eh_return)
43e9d192 4946 {
71bfb77a
WD
4947 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
4948 need_barrier_p = false;
4949 }
7e8c2bd5 4950
71bfb77a
WD
4951 /* Restore the stack pointer from the frame pointer if it may not
4952 be the same as the stack pointer. */
f5470a77
RS
4953 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
4954 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
4955 if (frame_pointer_needed
4956 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
4957 /* If writeback is used when restoring callee-saves, the CFA
4958 is restored on the instruction doing the writeback. */
4959 aarch64_add_offset (Pmode, stack_pointer_rtx,
4960 hard_frame_pointer_rtx, -callee_offset,
43cacb12 4961 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 4962 else
43cacb12
RS
4963 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust,
4964 !can_inherit_p || df_regs_ever_live_p (IP1_REGNUM));
43e9d192 4965
71bfb77a
WD
4966 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
4967 callee_adjust != 0, &cfi_ops);
4968 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
4969 callee_adjust != 0, &cfi_ops);
43e9d192 4970
71bfb77a
WD
4971 if (need_barrier_p)
4972 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
4973
4974 if (callee_adjust != 0)
4975 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
4976
6a70badb 4977 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
4978 {
4979 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 4980 insn = get_last_insn ();
71bfb77a
WD
4981 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
4982 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 4983 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 4984 cfi_ops = NULL;
43e9d192
IB
4985 }
4986
43cacb12
RS
4987 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
4988 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 4989
71bfb77a
WD
4990 if (cfi_ops)
4991 {
4992 /* Emit delayed restores and reset the CFA to be SP. */
4993 insn = get_last_insn ();
4994 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
4995 REG_NOTES (insn) = cfi_ops;
4996 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
4997 }
4998
db58fd89
JW
4999 /* We prefer to emit the combined return/authenticate instruction RETAA,
5000 however there are three cases in which we must instead emit an explicit
5001 authentication instruction.
5002
5003 1) Sibcalls don't return in a normal way, so if we're about to call one
5004 we must authenticate.
5005
5006 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5007 generating code for !TARGET_ARMV8_3 we can't use it and must
5008 explicitly authenticate.
5009
5010 3) On an eh_return path we make extra stack adjustments to update the
5011 canonical frame address to be the exception handler's CFA. We want
5012 to authenticate using the CFA of the function which calls eh_return.
5013 */
5014 if (aarch64_return_address_signing_enabled ()
5015 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5016 {
5017 insn = emit_insn (gen_autisp ());
5018 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5019 RTX_FRAME_RELATED_P (insn) = 1;
5020 }
db58fd89 5021
dd991abb
RH
5022 /* Stack adjustment for exception handler. */
5023 if (crtl->calls_eh_return)
5024 {
5025 /* We need to unwind the stack by the offset computed by
5026 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5027 to be SP; letting the CFA move during this adjustment
5028 is just as correct as retaining the CFA from the body
5029 of the function. Therefore, do nothing special. */
5030 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5031 }
5032
5033 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5034 if (!for_sibcall)
5035 emit_jump_insn (ret_rtx);
5036}
5037
8144a493
WD
5038/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5039 normally or return to a previous frame after unwinding.
1c960e02 5040
8144a493
WD
5041 An EH return uses a single shared return sequence. The epilogue is
5042 exactly like a normal epilogue except that it has an extra input
5043 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5044 that must be applied after the frame has been destroyed. An extra label
5045 is inserted before the epilogue which initializes this register to zero,
5046 and this is the entry point for a normal return.
43e9d192 5047
8144a493
WD
5048 An actual EH return updates the return address, initializes the stack
5049 adjustment and jumps directly into the epilogue (bypassing the zeroing
5050 of the adjustment). Since the return address is typically saved on the
5051 stack when a function makes a call, the saved LR must be updated outside
5052 the epilogue.
43e9d192 5053
8144a493
WD
5054 This poses problems as the store is generated well before the epilogue,
5055 so the offset of LR is not known yet. Also optimizations will remove the
5056 store as it appears dead, even after the epilogue is generated (as the
5057 base or offset for loading LR is different in many cases).
43e9d192 5058
8144a493
WD
5059 To avoid these problems this implementation forces the frame pointer
5060 in eh_return functions so that the location of LR is fixed and known early.
5061 It also marks the store volatile, so no optimization is permitted to
5062 remove the store. */
5063rtx
5064aarch64_eh_return_handler_rtx (void)
5065{
5066 rtx tmp = gen_frame_mem (Pmode,
5067 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5068
8144a493
WD
5069 /* Mark the store volatile, so no optimization is permitted to remove it. */
5070 MEM_VOLATILE_P (tmp) = true;
5071 return tmp;
43e9d192
IB
5072}
5073
43e9d192
IB
5074/* Output code to add DELTA to the first argument, and then jump
5075 to FUNCTION. Used for C++ multiple inheritance. */
5076static void
5077aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5078 HOST_WIDE_INT delta,
5079 HOST_WIDE_INT vcall_offset,
5080 tree function)
5081{
5082 /* The this pointer is always in x0. Note that this differs from
5083 Arm where the this pointer maybe bumped to r1 if r0 is required
5084 to return a pointer to an aggregate. On AArch64 a result value
5085 pointer will be in x8. */
5086 int this_regno = R0_REGNUM;
5d8a22a5
DM
5087 rtx this_rtx, temp0, temp1, addr, funexp;
5088 rtx_insn *insn;
43e9d192 5089
75f1d6fc
SN
5090 reload_completed = 1;
5091 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5092
f5470a77
RS
5093 this_rtx = gen_rtx_REG (Pmode, this_regno);
5094 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5095 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5096
43e9d192 5097 if (vcall_offset == 0)
43cacb12 5098 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5099 else
5100 {
28514dda 5101 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5102
75f1d6fc
SN
5103 addr = this_rtx;
5104 if (delta != 0)
5105 {
5106 if (delta >= -256 && delta < 256)
5107 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5108 plus_constant (Pmode, this_rtx, delta));
5109 else
43cacb12
RS
5110 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5111 temp1, temp0, false);
43e9d192
IB
5112 }
5113
28514dda
YZ
5114 if (Pmode == ptr_mode)
5115 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5116 else
5117 aarch64_emit_move (temp0,
5118 gen_rtx_ZERO_EXTEND (Pmode,
5119 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5120
28514dda 5121 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5122 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5123 else
5124 {
f43657b4
JW
5125 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5126 Pmode);
75f1d6fc 5127 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5128 }
5129
28514dda
YZ
5130 if (Pmode == ptr_mode)
5131 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5132 else
5133 aarch64_emit_move (temp1,
5134 gen_rtx_SIGN_EXTEND (Pmode,
5135 gen_rtx_MEM (ptr_mode, addr)));
5136
75f1d6fc 5137 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5138 }
5139
75f1d6fc
SN
5140 /* Generate a tail call to the target function. */
5141 if (!TREE_USED (function))
5142 {
5143 assemble_external (function);
5144 TREE_USED (function) = 1;
5145 }
5146 funexp = XEXP (DECL_RTL (function), 0);
5147 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5148 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5149 SIBLING_CALL_P (insn) = 1;
5150
5151 insn = get_insns ();
5152 shorten_branches (insn);
5153 final_start_function (insn, file, 1);
5154 final (insn, file, 1);
43e9d192 5155 final_end_function ();
75f1d6fc
SN
5156
5157 /* Stop pretending to be a post-reload pass. */
5158 reload_completed = 0;
43e9d192
IB
5159}
5160
43e9d192
IB
5161static bool
5162aarch64_tls_referenced_p (rtx x)
5163{
5164 if (!TARGET_HAVE_TLS)
5165 return false;
e7de8563
RS
5166 subrtx_iterator::array_type array;
5167 FOR_EACH_SUBRTX (iter, array, x, ALL)
5168 {
5169 const_rtx x = *iter;
5170 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5171 return true;
5172 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5173 TLS offsets, not real symbol references. */
5174 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5175 iter.skip_subrtxes ();
5176 }
5177 return false;
43e9d192
IB
5178}
5179
5180
43e9d192
IB
5181/* Return true if val can be encoded as a 12-bit unsigned immediate with
5182 a left shift of 0 or 12 bits. */
5183bool
5184aarch64_uimm12_shift (HOST_WIDE_INT val)
5185{
5186 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5187 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5188 );
5189}
5190
5191
5192/* Return true if val is an immediate that can be loaded into a
5193 register by a MOVZ instruction. */
5194static bool
77e994c9 5195aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5196{
5197 if (GET_MODE_SIZE (mode) > 4)
5198 {
5199 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5200 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5201 return 1;
5202 }
5203 else
5204 {
43cacb12
RS
5205 /* Ignore sign extension. */
5206 val &= (HOST_WIDE_INT) 0xffffffff;
5207 }
5208 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5209 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5210}
5211
5212/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5213 64-bit (DImode) integer. */
5214
5215static unsigned HOST_WIDE_INT
5216aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5217{
5218 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5219 while (size < 64)
5220 {
5221 val &= (HOST_WIDE_INT_1U << size) - 1;
5222 val |= val << size;
5223 size *= 2;
43e9d192 5224 }
43cacb12 5225 return val;
43e9d192
IB
5226}
5227
a64c73a2
WD
5228/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5229
5230static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5231 {
5232 0x0000000100000001ull,
5233 0x0001000100010001ull,
5234 0x0101010101010101ull,
5235 0x1111111111111111ull,
5236 0x5555555555555555ull,
5237 };
5238
43e9d192
IB
5239
5240/* Return true if val is a valid bitmask immediate. */
a64c73a2 5241
43e9d192 5242bool
a64c73a2 5243aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5244{
a64c73a2
WD
5245 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5246 int bits;
5247
5248 /* Check for a single sequence of one bits and return quickly if so.
5249 The special cases of all ones and all zeroes returns false. */
43cacb12 5250 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5251 tmp = val + (val & -val);
5252
5253 if (tmp == (tmp & -tmp))
5254 return (val + 1) > 1;
5255
5256 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5257 if (mode == SImode)
5258 val = (val << 32) | (val & 0xffffffff);
5259
5260 /* Invert if the immediate doesn't start with a zero bit - this means we
5261 only need to search for sequences of one bits. */
5262 if (val & 1)
5263 val = ~val;
5264
5265 /* Find the first set bit and set tmp to val with the first sequence of one
5266 bits removed. Return success if there is a single sequence of ones. */
5267 first_one = val & -val;
5268 tmp = val & (val + first_one);
5269
5270 if (tmp == 0)
5271 return true;
5272
5273 /* Find the next set bit and compute the difference in bit position. */
5274 next_one = tmp & -tmp;
5275 bits = clz_hwi (first_one) - clz_hwi (next_one);
5276 mask = val ^ tmp;
5277
5278 /* Check the bit position difference is a power of 2, and that the first
5279 sequence of one bits fits within 'bits' bits. */
5280 if ((mask >> bits) != 0 || bits != (bits & -bits))
5281 return false;
5282
5283 /* Check the sequence of one bits is repeated 64/bits times. */
5284 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5285}
5286
43fd192f
MC
5287/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5288 Assumed precondition: VAL_IN Is not zero. */
5289
5290unsigned HOST_WIDE_INT
5291aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5292{
5293 int lowest_bit_set = ctz_hwi (val_in);
5294 int highest_bit_set = floor_log2 (val_in);
5295 gcc_assert (val_in != 0);
5296
5297 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5298 (HOST_WIDE_INT_1U << lowest_bit_set));
5299}
5300
5301/* Create constant where bits outside of lowest bit set to highest bit set
5302 are set to 1. */
5303
5304unsigned HOST_WIDE_INT
5305aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5306{
5307 return val_in | ~aarch64_and_split_imm1 (val_in);
5308}
5309
5310/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5311
5312bool
5313aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5314{
77e994c9
RS
5315 scalar_int_mode int_mode;
5316 if (!is_a <scalar_int_mode> (mode, &int_mode))
5317 return false;
5318
5319 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5320 return false;
5321
77e994c9 5322 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5323 return false;
5324
5325 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5326
77e994c9 5327 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 5328}
43e9d192
IB
5329
5330/* Return true if val is an immediate that can be loaded into a
5331 register in a single instruction. */
5332bool
ef4bddc2 5333aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 5334{
77e994c9
RS
5335 scalar_int_mode int_mode;
5336 if (!is_a <scalar_int_mode> (mode, &int_mode))
5337 return false;
5338
5339 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 5340 return 1;
77e994c9 5341 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
5342}
5343
5344static bool
ef4bddc2 5345aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
5346{
5347 rtx base, offset;
7eda14e1 5348
43e9d192
IB
5349 if (GET_CODE (x) == HIGH)
5350 return true;
5351
43cacb12
RS
5352 /* There's no way to calculate VL-based values using relocations. */
5353 subrtx_iterator::array_type array;
5354 FOR_EACH_SUBRTX (iter, array, x, ALL)
5355 if (GET_CODE (*iter) == CONST_POLY_INT)
5356 return true;
5357
43e9d192
IB
5358 split_const (x, &base, &offset);
5359 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 5360 {
43cacb12 5361 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
5362 != SYMBOL_FORCE_TO_MEM)
5363 return true;
5364 else
5365 /* Avoid generating a 64-bit relocation in ILP32; leave
5366 to aarch64_expand_mov_immediate to handle it properly. */
5367 return mode != ptr_mode;
5368 }
43e9d192
IB
5369
5370 return aarch64_tls_referenced_p (x);
5371}
5372
e79136e4
WD
5373/* Implement TARGET_CASE_VALUES_THRESHOLD.
5374 The expansion for a table switch is quite expensive due to the number
5375 of instructions, the table lookup and hard to predict indirect jump.
5376 When optimizing for speed, and -O3 enabled, use the per-core tuning if
5377 set, otherwise use tables for > 16 cases as a tradeoff between size and
5378 performance. When optimizing for size, use the default setting. */
50487d79
EM
5379
5380static unsigned int
5381aarch64_case_values_threshold (void)
5382{
5383 /* Use the specified limit for the number of cases before using jump
5384 tables at higher optimization levels. */
5385 if (optimize > 2
5386 && selected_cpu->tune->max_case_values != 0)
5387 return selected_cpu->tune->max_case_values;
5388 else
e79136e4 5389 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
5390}
5391
43e9d192
IB
5392/* Return true if register REGNO is a valid index register.
5393 STRICT_P is true if REG_OK_STRICT is in effect. */
5394
5395bool
5396aarch64_regno_ok_for_index_p (int regno, bool strict_p)
5397{
5398 if (!HARD_REGISTER_NUM_P (regno))
5399 {
5400 if (!strict_p)
5401 return true;
5402
5403 if (!reg_renumber)
5404 return false;
5405
5406 regno = reg_renumber[regno];
5407 }
5408 return GP_REGNUM_P (regno);
5409}
5410
5411/* Return true if register REGNO is a valid base register for mode MODE.
5412 STRICT_P is true if REG_OK_STRICT is in effect. */
5413
5414bool
5415aarch64_regno_ok_for_base_p (int regno, bool strict_p)
5416{
5417 if (!HARD_REGISTER_NUM_P (regno))
5418 {
5419 if (!strict_p)
5420 return true;
5421
5422 if (!reg_renumber)
5423 return false;
5424
5425 regno = reg_renumber[regno];
5426 }
5427
5428 /* The fake registers will be eliminated to either the stack or
5429 hard frame pointer, both of which are usually valid base registers.
5430 Reload deals with the cases where the eliminated form isn't valid. */
5431 return (GP_REGNUM_P (regno)
5432 || regno == SP_REGNUM
5433 || regno == FRAME_POINTER_REGNUM
5434 || regno == ARG_POINTER_REGNUM);
5435}
5436
5437/* Return true if X is a valid base register for mode MODE.
5438 STRICT_P is true if REG_OK_STRICT is in effect. */
5439
5440static bool
5441aarch64_base_register_rtx_p (rtx x, bool strict_p)
5442{
76160199
RS
5443 if (!strict_p
5444 && GET_CODE (x) == SUBREG
5445 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
5446 x = SUBREG_REG (x);
5447
5448 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
5449}
5450
5451/* Return true if address offset is a valid index. If it is, fill in INFO
5452 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
5453
5454static bool
5455aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 5456 machine_mode mode, bool strict_p)
43e9d192
IB
5457{
5458 enum aarch64_address_type type;
5459 rtx index;
5460 int shift;
5461
5462 /* (reg:P) */
5463 if ((REG_P (x) || GET_CODE (x) == SUBREG)
5464 && GET_MODE (x) == Pmode)
5465 {
5466 type = ADDRESS_REG_REG;
5467 index = x;
5468 shift = 0;
5469 }
5470 /* (sign_extend:DI (reg:SI)) */
5471 else if ((GET_CODE (x) == SIGN_EXTEND
5472 || GET_CODE (x) == ZERO_EXTEND)
5473 && GET_MODE (x) == DImode
5474 && GET_MODE (XEXP (x, 0)) == SImode)
5475 {
5476 type = (GET_CODE (x) == SIGN_EXTEND)
5477 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5478 index = XEXP (x, 0);
5479 shift = 0;
5480 }
5481 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
5482 else if (GET_CODE (x) == MULT
5483 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5484 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5485 && GET_MODE (XEXP (x, 0)) == DImode
5486 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5487 && CONST_INT_P (XEXP (x, 1)))
5488 {
5489 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5490 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5491 index = XEXP (XEXP (x, 0), 0);
5492 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5493 }
5494 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
5495 else if (GET_CODE (x) == ASHIFT
5496 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5497 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5498 && GET_MODE (XEXP (x, 0)) == DImode
5499 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5500 && CONST_INT_P (XEXP (x, 1)))
5501 {
5502 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5503 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5504 index = XEXP (XEXP (x, 0), 0);
5505 shift = INTVAL (XEXP (x, 1));
5506 }
5507 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
5508 else if ((GET_CODE (x) == SIGN_EXTRACT
5509 || GET_CODE (x) == ZERO_EXTRACT)
5510 && GET_MODE (x) == DImode
5511 && GET_CODE (XEXP (x, 0)) == MULT
5512 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5513 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5514 {
5515 type = (GET_CODE (x) == SIGN_EXTRACT)
5516 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5517 index = XEXP (XEXP (x, 0), 0);
5518 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5519 if (INTVAL (XEXP (x, 1)) != 32 + shift
5520 || INTVAL (XEXP (x, 2)) != 0)
5521 shift = -1;
5522 }
5523 /* (and:DI (mult:DI (reg:DI) (const_int scale))
5524 (const_int 0xffffffff<<shift)) */
5525 else if (GET_CODE (x) == AND
5526 && GET_MODE (x) == DImode
5527 && GET_CODE (XEXP (x, 0)) == MULT
5528 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5529 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5530 && CONST_INT_P (XEXP (x, 1)))
5531 {
5532 type = ADDRESS_REG_UXTW;
5533 index = XEXP (XEXP (x, 0), 0);
5534 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5535 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5536 shift = -1;
5537 }
5538 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
5539 else if ((GET_CODE (x) == SIGN_EXTRACT
5540 || GET_CODE (x) == ZERO_EXTRACT)
5541 && GET_MODE (x) == DImode
5542 && GET_CODE (XEXP (x, 0)) == ASHIFT
5543 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5544 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5545 {
5546 type = (GET_CODE (x) == SIGN_EXTRACT)
5547 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5548 index = XEXP (XEXP (x, 0), 0);
5549 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5550 if (INTVAL (XEXP (x, 1)) != 32 + shift
5551 || INTVAL (XEXP (x, 2)) != 0)
5552 shift = -1;
5553 }
5554 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
5555 (const_int 0xffffffff<<shift)) */
5556 else if (GET_CODE (x) == AND
5557 && GET_MODE (x) == DImode
5558 && GET_CODE (XEXP (x, 0)) == ASHIFT
5559 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5560 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5561 && CONST_INT_P (XEXP (x, 1)))
5562 {
5563 type = ADDRESS_REG_UXTW;
5564 index = XEXP (XEXP (x, 0), 0);
5565 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5566 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5567 shift = -1;
5568 }
5569 /* (mult:P (reg:P) (const_int scale)) */
5570 else if (GET_CODE (x) == MULT
5571 && GET_MODE (x) == Pmode
5572 && GET_MODE (XEXP (x, 0)) == Pmode
5573 && CONST_INT_P (XEXP (x, 1)))
5574 {
5575 type = ADDRESS_REG_REG;
5576 index = XEXP (x, 0);
5577 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5578 }
5579 /* (ashift:P (reg:P) (const_int shift)) */
5580 else if (GET_CODE (x) == ASHIFT
5581 && GET_MODE (x) == Pmode
5582 && GET_MODE (XEXP (x, 0)) == Pmode
5583 && CONST_INT_P (XEXP (x, 1)))
5584 {
5585 type = ADDRESS_REG_REG;
5586 index = XEXP (x, 0);
5587 shift = INTVAL (XEXP (x, 1));
5588 }
5589 else
5590 return false;
5591
76160199
RS
5592 if (!strict_p
5593 && GET_CODE (index) == SUBREG
5594 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
5595 index = SUBREG_REG (index);
5596
43cacb12
RS
5597 if (aarch64_sve_data_mode_p (mode))
5598 {
5599 if (type != ADDRESS_REG_REG
5600 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
5601 return false;
5602 }
5603 else
5604 {
5605 if (shift != 0
5606 && !(IN_RANGE (shift, 1, 3)
5607 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
5608 return false;
5609 }
5610
5611 if (REG_P (index)
43e9d192
IB
5612 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
5613 {
5614 info->type = type;
5615 info->offset = index;
5616 info->shift = shift;
5617 return true;
5618 }
5619
5620 return false;
5621}
5622
abc52318
KT
5623/* Return true if MODE is one of the modes for which we
5624 support LDP/STP operations. */
5625
5626static bool
5627aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
5628{
5629 return mode == SImode || mode == DImode
5630 || mode == SFmode || mode == DFmode
5631 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
5632 && (known_eq (GET_MODE_SIZE (mode), 8)
5633 || (known_eq (GET_MODE_SIZE (mode), 16)
5634 && (aarch64_tune_params.extra_tuning_flags
5635 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
5636}
5637
9e0218fc
RH
5638/* Return true if REGNO is a virtual pointer register, or an eliminable
5639 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
5640 include stack_pointer or hard_frame_pointer. */
5641static bool
5642virt_or_elim_regno_p (unsigned regno)
5643{
5644 return ((regno >= FIRST_VIRTUAL_REGISTER
5645 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
5646 || regno == FRAME_POINTER_REGNUM
5647 || regno == ARG_POINTER_REGNUM);
5648}
5649
a97d8b98
RS
5650/* Return true if X is a valid address of type TYPE for machine mode MODE.
5651 If it is, fill in INFO appropriately. STRICT_P is true if
5652 REG_OK_STRICT is in effect. */
43e9d192
IB
5653
5654static bool
5655aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98
RS
5656 rtx x, machine_mode mode, bool strict_p,
5657 aarch64_addr_query_type type = ADDR_QUERY_M)
43e9d192
IB
5658{
5659 enum rtx_code code = GET_CODE (x);
5660 rtx op0, op1;
dc640181
RS
5661 poly_int64 offset;
5662
6a70badb 5663 HOST_WIDE_INT const_size;
2d8c6dc1 5664
80d43579
WD
5665 /* On BE, we use load/store pair for all large int mode load/stores.
5666 TI/TFmode may also use a load/store pair. */
43cacb12
RS
5667 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5668 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 5669 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 5670 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
5671 || mode == TImode
5672 || mode == TFmode
43cacb12 5673 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 5674
a25831ac
AV
5675 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
5676 corresponds to the actual size of the memory being loaded/stored and the
5677 mode of the corresponding addressing mode is half of that. */
5678 if (type == ADDR_QUERY_LDP_STP_N
5679 && known_eq (GET_MODE_SIZE (mode), 16))
5680 mode = DFmode;
5681
6a70badb 5682 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
5683 && (known_lt (GET_MODE_SIZE (mode), 16)
5684 || vec_flags == VEC_ADVSIMD
5685 || vec_flags == VEC_SVE_DATA));
5686
5687 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
5688 [Rn, #offset, MUL VL]. */
5689 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
5690 && (code != REG && code != PLUS))
5691 return false;
2d8c6dc1
AH
5692
5693 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
5694 REG addressing. */
43cacb12
RS
5695 if (advsimd_struct_p
5696 && !BYTES_BIG_ENDIAN
43e9d192
IB
5697 && (code != POST_INC && code != REG))
5698 return false;
5699
43cacb12
RS
5700 gcc_checking_assert (GET_MODE (x) == VOIDmode
5701 || SCALAR_INT_MODE_P (GET_MODE (x)));
5702
43e9d192
IB
5703 switch (code)
5704 {
5705 case REG:
5706 case SUBREG:
5707 info->type = ADDRESS_REG_IMM;
5708 info->base = x;
5709 info->offset = const0_rtx;
dc640181 5710 info->const_offset = 0;
43e9d192
IB
5711 return aarch64_base_register_rtx_p (x, strict_p);
5712
5713 case PLUS:
5714 op0 = XEXP (x, 0);
5715 op1 = XEXP (x, 1);
15c0c5c9
JW
5716
5717 if (! strict_p
4aa81c2e 5718 && REG_P (op0)
9e0218fc 5719 && virt_or_elim_regno_p (REGNO (op0))
dc640181 5720 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
5721 {
5722 info->type = ADDRESS_REG_IMM;
5723 info->base = op0;
5724 info->offset = op1;
dc640181 5725 info->const_offset = offset;
15c0c5c9
JW
5726
5727 return true;
5728 }
5729
6a70badb 5730 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
5731 && aarch64_base_register_rtx_p (op0, strict_p)
5732 && poly_int_rtx_p (op1, &offset))
43e9d192 5733 {
43e9d192
IB
5734 info->type = ADDRESS_REG_IMM;
5735 info->base = op0;
5736 info->offset = op1;
dc640181 5737 info->const_offset = offset;
43e9d192
IB
5738
5739 /* TImode and TFmode values are allowed in both pairs of X
5740 registers and individual Q registers. The available
5741 address modes are:
5742 X,X: 7-bit signed scaled offset
5743 Q: 9-bit signed offset
5744 We conservatively require an offset representable in either mode.
8ed49fab
KT
5745 When performing the check for pairs of X registers i.e. LDP/STP
5746 pass down DImode since that is the natural size of the LDP/STP
5747 instruction memory accesses. */
43e9d192 5748 if (mode == TImode || mode == TFmode)
8ed49fab 5749 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
5750 && (offset_9bit_signed_unscaled_p (mode, offset)
5751 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 5752
2d8c6dc1
AH
5753 /* A 7bit offset check because OImode will emit a ldp/stp
5754 instruction (only big endian will get here).
5755 For ldp/stp instructions, the offset is scaled for the size of a
5756 single element of the pair. */
5757 if (mode == OImode)
5758 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
5759
5760 /* Three 9/12 bit offsets checks because CImode will emit three
5761 ldr/str instructions (only big endian will get here). */
5762 if (mode == CImode)
5763 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5764 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
5765 || offset_12bit_unsigned_scaled_p (V16QImode,
5766 offset + 32)));
5767
5768 /* Two 7bit offsets checks because XImode will emit two ldp/stp
5769 instructions (only big endian will get here). */
5770 if (mode == XImode)
5771 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5772 && aarch64_offset_7bit_signed_scaled_p (TImode,
5773 offset + 32));
5774
43cacb12
RS
5775 /* Make "m" use the LD1 offset range for SVE data modes, so
5776 that pre-RTL optimizers like ivopts will work to that
5777 instead of the wider LDR/STR range. */
5778 if (vec_flags == VEC_SVE_DATA)
5779 return (type == ADDR_QUERY_M
5780 ? offset_4bit_signed_scaled_p (mode, offset)
5781 : offset_9bit_signed_scaled_p (mode, offset));
5782
9f4cbab8
RS
5783 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
5784 {
5785 poly_int64 end_offset = (offset
5786 + GET_MODE_SIZE (mode)
5787 - BYTES_PER_SVE_VECTOR);
5788 return (type == ADDR_QUERY_M
5789 ? offset_4bit_signed_scaled_p (mode, offset)
5790 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
5791 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
5792 end_offset)));
5793 }
5794
43cacb12
RS
5795 if (vec_flags == VEC_SVE_PRED)
5796 return offset_9bit_signed_scaled_p (mode, offset);
5797
2d8c6dc1 5798 if (load_store_pair_p)
6a70badb 5799 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
5800 || known_eq (GET_MODE_SIZE (mode), 8)
5801 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 5802 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5803 else
5804 return (offset_9bit_signed_unscaled_p (mode, offset)
5805 || offset_12bit_unsigned_scaled_p (mode, offset));
5806 }
5807
5808 if (allow_reg_index_p)
5809 {
5810 /* Look for base + (scaled/extended) index register. */
5811 if (aarch64_base_register_rtx_p (op0, strict_p)
5812 && aarch64_classify_index (info, op1, mode, strict_p))
5813 {
5814 info->base = op0;
5815 return true;
5816 }
5817 if (aarch64_base_register_rtx_p (op1, strict_p)
5818 && aarch64_classify_index (info, op0, mode, strict_p))
5819 {
5820 info->base = op1;
5821 return true;
5822 }
5823 }
5824
5825 return false;
5826
5827 case POST_INC:
5828 case POST_DEC:
5829 case PRE_INC:
5830 case PRE_DEC:
5831 info->type = ADDRESS_REG_WB;
5832 info->base = XEXP (x, 0);
5833 info->offset = NULL_RTX;
5834 return aarch64_base_register_rtx_p (info->base, strict_p);
5835
5836 case POST_MODIFY:
5837 case PRE_MODIFY:
5838 info->type = ADDRESS_REG_WB;
5839 info->base = XEXP (x, 0);
5840 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 5841 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
5842 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
5843 && aarch64_base_register_rtx_p (info->base, strict_p))
5844 {
43e9d192 5845 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 5846 info->const_offset = offset;
43e9d192
IB
5847
5848 /* TImode and TFmode values are allowed in both pairs of X
5849 registers and individual Q registers. The available
5850 address modes are:
5851 X,X: 7-bit signed scaled offset
5852 Q: 9-bit signed offset
5853 We conservatively require an offset representable in either mode.
5854 */
5855 if (mode == TImode || mode == TFmode)
44707478 5856 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
5857 && offset_9bit_signed_unscaled_p (mode, offset));
5858
2d8c6dc1 5859 if (load_store_pair_p)
6a70badb 5860 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
5861 || known_eq (GET_MODE_SIZE (mode), 8)
5862 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 5863 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5864 else
5865 return offset_9bit_signed_unscaled_p (mode, offset);
5866 }
5867 return false;
5868
5869 case CONST:
5870 case SYMBOL_REF:
5871 case LABEL_REF:
79517551
SN
5872 /* load literal: pc-relative constant pool entry. Only supported
5873 for SI mode or larger. */
43e9d192 5874 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 5875
6a70badb
RS
5876 if (!load_store_pair_p
5877 && GET_MODE_SIZE (mode).is_constant (&const_size)
5878 && const_size >= 4)
43e9d192
IB
5879 {
5880 rtx sym, addend;
5881
5882 split_const (x, &sym, &addend);
b4f50fd4
RR
5883 return ((GET_CODE (sym) == LABEL_REF
5884 || (GET_CODE (sym) == SYMBOL_REF
5885 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 5886 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
5887 }
5888 return false;
5889
5890 case LO_SUM:
5891 info->type = ADDRESS_LO_SUM;
5892 info->base = XEXP (x, 0);
5893 info->offset = XEXP (x, 1);
5894 if (allow_reg_index_p
5895 && aarch64_base_register_rtx_p (info->base, strict_p))
5896 {
5897 rtx sym, offs;
5898 split_const (info->offset, &sym, &offs);
5899 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
5900 && (aarch64_classify_symbol (sym, INTVAL (offs))
5901 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
5902 {
5903 /* The symbol and offset must be aligned to the access size. */
5904 unsigned int align;
43e9d192
IB
5905
5906 if (CONSTANT_POOL_ADDRESS_P (sym))
5907 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
5908 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
5909 {
5910 tree exp = SYMBOL_REF_DECL (sym);
5911 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 5912 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
5913 }
5914 else if (SYMBOL_REF_DECL (sym))
5915 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
5916 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
5917 && SYMBOL_REF_BLOCK (sym) != NULL)
5918 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
5919 else
5920 align = BITS_PER_UNIT;
5921
6a70badb
RS
5922 poly_int64 ref_size = GET_MODE_SIZE (mode);
5923 if (known_eq (ref_size, 0))
43e9d192
IB
5924 ref_size = GET_MODE_SIZE (DImode);
5925
6a70badb
RS
5926 return (multiple_p (INTVAL (offs), ref_size)
5927 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
5928 }
5929 }
5930 return false;
5931
5932 default:
5933 return false;
5934 }
5935}
5936
9bf2f779
KT
5937/* Return true if the address X is valid for a PRFM instruction.
5938 STRICT_P is true if we should do strict checking with
5939 aarch64_classify_address. */
5940
5941bool
5942aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
5943{
5944 struct aarch64_address_info addr;
5945
5946 /* PRFM accepts the same addresses as DImode... */
a97d8b98 5947 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
5948 if (!res)
5949 return false;
5950
5951 /* ... except writeback forms. */
5952 return addr.type != ADDRESS_REG_WB;
5953}
5954
43e9d192
IB
5955bool
5956aarch64_symbolic_address_p (rtx x)
5957{
5958 rtx offset;
5959
5960 split_const (x, &x, &offset);
5961 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
5962}
5963
a6e0bfa7 5964/* Classify the base of symbolic expression X. */
da4f13a4
MS
5965
5966enum aarch64_symbol_type
a6e0bfa7 5967aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
5968{
5969 rtx offset;
da4f13a4 5970
43e9d192 5971 split_const (x, &x, &offset);
43cacb12 5972 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
5973}
5974
5975
5976/* Return TRUE if X is a legitimate address for accessing memory in
5977 mode MODE. */
5978static bool
ef4bddc2 5979aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
5980{
5981 struct aarch64_address_info addr;
5982
a97d8b98 5983 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
5984}
5985
a97d8b98
RS
5986/* Return TRUE if X is a legitimate address of type TYPE for accessing
5987 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 5988bool
a97d8b98
RS
5989aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
5990 aarch64_addr_query_type type)
43e9d192
IB
5991{
5992 struct aarch64_address_info addr;
5993
a97d8b98 5994 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
5995}
5996
9005477f
RS
5997/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
5998
491ec060 5999static bool
9005477f
RS
6000aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6001 poly_int64 orig_offset,
6002 machine_mode mode)
491ec060 6003{
6a70badb
RS
6004 HOST_WIDE_INT size;
6005 if (GET_MODE_SIZE (mode).is_constant (&size))
6006 {
9005477f
RS
6007 HOST_WIDE_INT const_offset, second_offset;
6008
6009 /* A general SVE offset is A * VQ + B. Remove the A component from
6010 coefficient 0 in order to get the constant B. */
6011 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6012
6013 /* Split an out-of-range address displacement into a base and
6014 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6015 range otherwise to increase opportunities for sharing the base
6016 address of different sizes. Unaligned accesses use the signed
6017 9-bit range, TImode/TFmode use the intersection of signed
6018 scaled 7-bit and signed 9-bit offset. */
6a70badb 6019 if (mode == TImode || mode == TFmode)
9005477f
RS
6020 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6021 else if ((const_offset & (size - 1)) != 0)
6022 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6023 else
9005477f 6024 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6025
9005477f
RS
6026 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6027 return false;
6028
6029 /* Split the offset into second_offset and the rest. */
6030 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6031 *offset2 = gen_int_mode (second_offset, Pmode);
6032 return true;
6033 }
6034 else
6035 {
6036 /* Get the mode we should use as the basis of the range. For structure
6037 modes this is the mode of one vector. */
6038 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6039 machine_mode step_mode
6040 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6041
6042 /* Get the "mul vl" multiplier we'd like to use. */
6043 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6044 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6045 if (vec_flags & VEC_SVE_DATA)
6046 /* LDR supports a 9-bit range, but the move patterns for
6047 structure modes require all vectors to be in range of the
6048 same base. The simplest way of accomodating that while still
6049 promoting reuse of anchor points between different modes is
6050 to use an 8-bit range unconditionally. */
6051 vnum = ((vnum + 128) & 255) - 128;
6052 else
6053 /* Predicates are only handled singly, so we might as well use
6054 the full range. */
6055 vnum = ((vnum + 256) & 511) - 256;
6056 if (vnum == 0)
6057 return false;
6058
6059 /* Convert the "mul vl" multiplier into a byte offset. */
6060 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6061 if (known_eq (second_offset, orig_offset))
6062 return false;
6063
6064 /* Split the offset into second_offset and the rest. */
6065 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6066 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6067 return true;
6068 }
491ec060
WD
6069}
6070
a2170965
TC
6071/* Return the binary representation of floating point constant VALUE in INTVAL.
6072 If the value cannot be converted, return false without setting INTVAL.
6073 The conversion is done in the given MODE. */
6074bool
6075aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6076{
6077
6078 /* We make a general exception for 0. */
6079 if (aarch64_float_const_zero_rtx_p (value))
6080 {
6081 *intval = 0;
6082 return true;
6083 }
6084
0d0e0188 6085 scalar_float_mode mode;
a2170965 6086 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6087 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6088 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6089 /* Only support up to DF mode. */
6090 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6091 return false;
6092
6093 unsigned HOST_WIDE_INT ival = 0;
6094
6095 long res[2];
6096 real_to_target (res,
6097 CONST_DOUBLE_REAL_VALUE (value),
6098 REAL_MODE_FORMAT (mode));
6099
5c22bb48
TC
6100 if (mode == DFmode)
6101 {
6102 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6103 ival = zext_hwi (res[order], 32);
6104 ival |= (zext_hwi (res[1 - order], 32) << 32);
6105 }
6106 else
6107 ival = zext_hwi (res[0], 32);
a2170965
TC
6108
6109 *intval = ival;
6110 return true;
6111}
6112
6113/* Return TRUE if rtx X is an immediate constant that can be moved using a
6114 single MOV(+MOVK) followed by an FMOV. */
6115bool
6116aarch64_float_const_rtx_p (rtx x)
6117{
6118 machine_mode mode = GET_MODE (x);
6119 if (mode == VOIDmode)
6120 return false;
6121
6122 /* Determine whether it's cheaper to write float constants as
6123 mov/movk pairs over ldr/adrp pairs. */
6124 unsigned HOST_WIDE_INT ival;
6125
6126 if (GET_CODE (x) == CONST_DOUBLE
6127 && SCALAR_FLOAT_MODE_P (mode)
6128 && aarch64_reinterpret_float_as_int (x, &ival))
6129 {
77e994c9
RS
6130 scalar_int_mode imode = (mode == HFmode
6131 ? SImode
6132 : int_mode_for_mode (mode).require ());
a2170965
TC
6133 int num_instr = aarch64_internal_mov_immediate
6134 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6135 return num_instr < 3;
6136 }
6137
6138 return false;
6139}
6140
43e9d192
IB
6141/* Return TRUE if rtx X is immediate constant 0.0 */
6142bool
3520f7cc 6143aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6144{
43e9d192
IB
6145 if (GET_MODE (x) == VOIDmode)
6146 return false;
6147
34a72c33 6148 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6149 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6150 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6151}
6152
a2170965
TC
6153/* Return TRUE if rtx X is immediate constant that fits in a single
6154 MOVI immediate operation. */
6155bool
6156aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6157{
6158 if (!TARGET_SIMD)
6159 return false;
6160
77e994c9
RS
6161 machine_mode vmode;
6162 scalar_int_mode imode;
a2170965
TC
6163 unsigned HOST_WIDE_INT ival;
6164
6165 if (GET_CODE (x) == CONST_DOUBLE
6166 && SCALAR_FLOAT_MODE_P (mode))
6167 {
6168 if (!aarch64_reinterpret_float_as_int (x, &ival))
6169 return false;
6170
35c38fa6
TC
6171 /* We make a general exception for 0. */
6172 if (aarch64_float_const_zero_rtx_p (x))
6173 return true;
6174
304b9962 6175 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6176 }
6177 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6178 && is_a <scalar_int_mode> (mode, &imode))
6179 ival = INTVAL (x);
a2170965
TC
6180 else
6181 return false;
6182
6183 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6184 a 128 bit vector mode. */
77e994c9 6185 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6186
6187 vmode = aarch64_simd_container_mode (imode, width);
6188 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6189
b187677b 6190 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6191}
6192
6193
70f09188
AP
6194/* Return the fixed registers used for condition codes. */
6195
6196static bool
6197aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6198{
6199 *p1 = CC_REGNUM;
6200 *p2 = INVALID_REGNUM;
6201 return true;
6202}
6203
47210a04
RL
6204/* This function is used by the call expanders of the machine description.
6205 RESULT is the register in which the result is returned. It's NULL for
6206 "call" and "sibcall".
6207 MEM is the location of the function call.
6208 SIBCALL indicates whether this function call is normal call or sibling call.
6209 It will generate different pattern accordingly. */
6210
6211void
6212aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6213{
6214 rtx call, callee, tmp;
6215 rtvec vec;
6216 machine_mode mode;
6217
6218 gcc_assert (MEM_P (mem));
6219 callee = XEXP (mem, 0);
6220 mode = GET_MODE (callee);
6221 gcc_assert (mode == Pmode);
6222
6223 /* Decide if we should generate indirect calls by loading the
6224 address of the callee into a register before performing
6225 the branch-and-link. */
6226 if (SYMBOL_REF_P (callee)
6227 ? (aarch64_is_long_call_p (callee)
6228 || aarch64_is_noplt_call_p (callee))
6229 : !REG_P (callee))
6230 XEXP (mem, 0) = force_reg (mode, callee);
6231
6232 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6233
6234 if (result != NULL_RTX)
6235 call = gen_rtx_SET (result, call);
6236
6237 if (sibcall)
6238 tmp = ret_rtx;
6239 else
6240 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6241
6242 vec = gen_rtvec (2, call, tmp);
6243 call = gen_rtx_PARALLEL (VOIDmode, vec);
6244
6245 aarch64_emit_call_insn (call);
6246}
6247
78607708
TV
6248/* Emit call insn with PAT and do aarch64-specific handling. */
6249
d07a3fed 6250void
78607708
TV
6251aarch64_emit_call_insn (rtx pat)
6252{
6253 rtx insn = emit_call_insn (pat);
6254
6255 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6256 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6257 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6258}
6259
ef4bddc2 6260machine_mode
43e9d192
IB
6261aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6262{
6263 /* All floating point compares return CCFP if it is an equality
6264 comparison, and CCFPE otherwise. */
6265 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6266 {
6267 switch (code)
6268 {
6269 case EQ:
6270 case NE:
6271 case UNORDERED:
6272 case ORDERED:
6273 case UNLT:
6274 case UNLE:
6275 case UNGT:
6276 case UNGE:
6277 case UNEQ:
43e9d192
IB
6278 return CCFPmode;
6279
6280 case LT:
6281 case LE:
6282 case GT:
6283 case GE:
8332c5ee 6284 case LTGT:
43e9d192
IB
6285 return CCFPEmode;
6286
6287 default:
6288 gcc_unreachable ();
6289 }
6290 }
6291
2b8568fe
KT
6292 /* Equality comparisons of short modes against zero can be performed
6293 using the TST instruction with the appropriate bitmask. */
6294 if (y == const0_rtx && REG_P (x)
6295 && (code == EQ || code == NE)
6296 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6297 return CC_NZmode;
6298
b06335f9
KT
6299 /* Similarly, comparisons of zero_extends from shorter modes can
6300 be performed using an ANDS with an immediate mask. */
6301 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6302 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6303 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6304 && (code == EQ || code == NE))
6305 return CC_NZmode;
6306
43e9d192
IB
6307 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6308 && y == const0_rtx
6309 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6310 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6311 || GET_CODE (x) == NEG
6312 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6313 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6314 return CC_NZmode;
6315
1c992d1e 6316 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6317 the comparison will have to be swapped when we emit the assembly
6318 code. */
6319 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6320 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6321 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6322 || GET_CODE (x) == LSHIFTRT
1c992d1e 6323 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6324 return CC_SWPmode;
6325
1c992d1e
RE
6326 /* Similarly for a negated operand, but we can only do this for
6327 equalities. */
6328 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 6329 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
6330 && (code == EQ || code == NE)
6331 && GET_CODE (x) == NEG)
6332 return CC_Zmode;
6333
ef22810a
RH
6334 /* A test for unsigned overflow. */
6335 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6336 && code == NE
6337 && GET_CODE (x) == PLUS
6338 && GET_CODE (y) == ZERO_EXTEND)
6339 return CC_Cmode;
6340
30c46053
MC
6341 /* A test for signed overflow. */
6342 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6343 && code == NE
6344 && GET_CODE (x) == PLUS
6345 && GET_CODE (y) == SIGN_EXTEND)
6346 return CC_Vmode;
6347
43e9d192
IB
6348 /* For everything else, return CCmode. */
6349 return CCmode;
6350}
6351
3dfa7055 6352static int
b8506a8a 6353aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 6354
cd5660ab 6355int
43e9d192
IB
6356aarch64_get_condition_code (rtx x)
6357{
ef4bddc2 6358 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
6359 enum rtx_code comp_code = GET_CODE (x);
6360
6361 if (GET_MODE_CLASS (mode) != MODE_CC)
6362 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
6363 return aarch64_get_condition_code_1 (mode, comp_code);
6364}
43e9d192 6365
3dfa7055 6366static int
b8506a8a 6367aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 6368{
43e9d192
IB
6369 switch (mode)
6370 {
4e10a5a7
RS
6371 case E_CCFPmode:
6372 case E_CCFPEmode:
43e9d192
IB
6373 switch (comp_code)
6374 {
6375 case GE: return AARCH64_GE;
6376 case GT: return AARCH64_GT;
6377 case LE: return AARCH64_LS;
6378 case LT: return AARCH64_MI;
6379 case NE: return AARCH64_NE;
6380 case EQ: return AARCH64_EQ;
6381 case ORDERED: return AARCH64_VC;
6382 case UNORDERED: return AARCH64_VS;
6383 case UNLT: return AARCH64_LT;
6384 case UNLE: return AARCH64_LE;
6385 case UNGT: return AARCH64_HI;
6386 case UNGE: return AARCH64_PL;
cd5660ab 6387 default: return -1;
43e9d192
IB
6388 }
6389 break;
6390
4e10a5a7 6391 case E_CCmode:
43e9d192
IB
6392 switch (comp_code)
6393 {
6394 case NE: return AARCH64_NE;
6395 case EQ: return AARCH64_EQ;
6396 case GE: return AARCH64_GE;
6397 case GT: return AARCH64_GT;
6398 case LE: return AARCH64_LE;
6399 case LT: return AARCH64_LT;
6400 case GEU: return AARCH64_CS;
6401 case GTU: return AARCH64_HI;
6402 case LEU: return AARCH64_LS;
6403 case LTU: return AARCH64_CC;
cd5660ab 6404 default: return -1;
43e9d192
IB
6405 }
6406 break;
6407
4e10a5a7 6408 case E_CC_SWPmode:
43e9d192
IB
6409 switch (comp_code)
6410 {
6411 case NE: return AARCH64_NE;
6412 case EQ: return AARCH64_EQ;
6413 case GE: return AARCH64_LE;
6414 case GT: return AARCH64_LT;
6415 case LE: return AARCH64_GE;
6416 case LT: return AARCH64_GT;
6417 case GEU: return AARCH64_LS;
6418 case GTU: return AARCH64_CC;
6419 case LEU: return AARCH64_CS;
6420 case LTU: return AARCH64_HI;
cd5660ab 6421 default: return -1;
43e9d192
IB
6422 }
6423 break;
6424
4e10a5a7 6425 case E_CC_NZmode:
43e9d192
IB
6426 switch (comp_code)
6427 {
6428 case NE: return AARCH64_NE;
6429 case EQ: return AARCH64_EQ;
6430 case GE: return AARCH64_PL;
6431 case LT: return AARCH64_MI;
cd5660ab 6432 default: return -1;
43e9d192
IB
6433 }
6434 break;
6435
4e10a5a7 6436 case E_CC_Zmode:
1c992d1e
RE
6437 switch (comp_code)
6438 {
6439 case NE: return AARCH64_NE;
6440 case EQ: return AARCH64_EQ;
cd5660ab 6441 default: return -1;
1c992d1e
RE
6442 }
6443 break;
6444
4e10a5a7 6445 case E_CC_Cmode:
ef22810a
RH
6446 switch (comp_code)
6447 {
6448 case NE: return AARCH64_CS;
6449 case EQ: return AARCH64_CC;
6450 default: return -1;
6451 }
6452 break;
6453
30c46053
MC
6454 case E_CC_Vmode:
6455 switch (comp_code)
6456 {
6457 case NE: return AARCH64_VS;
6458 case EQ: return AARCH64_VC;
6459 default: return -1;
6460 }
6461 break;
6462
43e9d192 6463 default:
cd5660ab 6464 return -1;
43e9d192 6465 }
3dfa7055 6466
3dfa7055 6467 return -1;
43e9d192
IB
6468}
6469
ddeabd3e
AL
6470bool
6471aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
6472 HOST_WIDE_INT minval,
6473 HOST_WIDE_INT maxval)
ddeabd3e 6474{
6a70badb
RS
6475 rtx elt;
6476 return (const_vec_duplicate_p (x, &elt)
6477 && CONST_INT_P (elt)
6478 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
6479}
6480
6481bool
6482aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
6483{
6484 return aarch64_const_vec_all_same_in_range_p (x, val, val);
6485}
6486
43cacb12
RS
6487/* Return true if VEC is a constant in which every element is in the range
6488 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
6489
6490static bool
6491aarch64_const_vec_all_in_range_p (rtx vec,
6492 HOST_WIDE_INT minval,
6493 HOST_WIDE_INT maxval)
6494{
6495 if (GET_CODE (vec) != CONST_VECTOR
6496 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
6497 return false;
6498
6499 int nunits;
6500 if (!CONST_VECTOR_STEPPED_P (vec))
6501 nunits = const_vector_encoded_nelts (vec);
6502 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
6503 return false;
6504
6505 for (int i = 0; i < nunits; i++)
6506 {
6507 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
6508 if (!CONST_INT_P (vec_elem)
6509 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
6510 return false;
6511 }
6512 return true;
6513}
43e9d192 6514
cf670503
ZC
6515/* N Z C V. */
6516#define AARCH64_CC_V 1
6517#define AARCH64_CC_C (1 << 1)
6518#define AARCH64_CC_Z (1 << 2)
6519#define AARCH64_CC_N (1 << 3)
6520
c8012fbc
WD
6521/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
6522static const int aarch64_nzcv_codes[] =
6523{
6524 0, /* EQ, Z == 1. */
6525 AARCH64_CC_Z, /* NE, Z == 0. */
6526 0, /* CS, C == 1. */
6527 AARCH64_CC_C, /* CC, C == 0. */
6528 0, /* MI, N == 1. */
6529 AARCH64_CC_N, /* PL, N == 0. */
6530 0, /* VS, V == 1. */
6531 AARCH64_CC_V, /* VC, V == 0. */
6532 0, /* HI, C ==1 && Z == 0. */
6533 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
6534 AARCH64_CC_V, /* GE, N == V. */
6535 0, /* LT, N != V. */
6536 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
6537 0, /* LE, !(Z == 0 && N == V). */
6538 0, /* AL, Any. */
6539 0 /* NV, Any. */
cf670503
ZC
6540};
6541
43cacb12
RS
6542/* Print floating-point vector immediate operand X to F, negating it
6543 first if NEGATE is true. Return true on success, false if it isn't
6544 a constant we can handle. */
6545
6546static bool
6547aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
6548{
6549 rtx elt;
6550
6551 if (!const_vec_duplicate_p (x, &elt))
6552 return false;
6553
6554 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
6555 if (negate)
6556 r = real_value_negate (&r);
6557
6558 /* We only handle the SVE single-bit immediates here. */
6559 if (real_equal (&r, &dconst0))
6560 asm_fprintf (f, "0.0");
6561 else if (real_equal (&r, &dconst1))
6562 asm_fprintf (f, "1.0");
6563 else if (real_equal (&r, &dconsthalf))
6564 asm_fprintf (f, "0.5");
6565 else
6566 return false;
6567
6568 return true;
6569}
6570
9f4cbab8
RS
6571/* Return the equivalent letter for size. */
6572static char
6573sizetochar (int size)
6574{
6575 switch (size)
6576 {
6577 case 64: return 'd';
6578 case 32: return 's';
6579 case 16: return 'h';
6580 case 8 : return 'b';
6581 default: gcc_unreachable ();
6582 }
6583}
6584
bcf19844
JW
6585/* Print operand X to file F in a target specific manner according to CODE.
6586 The acceptable formatting commands given by CODE are:
6587 'c': An integer or symbol address without a preceding #
6588 sign.
43cacb12
RS
6589 'C': Take the duplicated element in a vector constant
6590 and print it in hex.
6591 'D': Take the duplicated element in a vector constant
6592 and print it as an unsigned integer, in decimal.
bcf19844
JW
6593 'e': Print the sign/zero-extend size as a character 8->b,
6594 16->h, 32->w.
6595 'p': Prints N such that 2^N == X (X must be power of 2 and
6596 const int).
6597 'P': Print the number of non-zero bits in X (a const_int).
6598 'H': Print the higher numbered register of a pair (TImode)
6599 of regs.
6600 'm': Print a condition (eq, ne, etc).
6601 'M': Same as 'm', but invert condition.
43cacb12
RS
6602 'N': Take the duplicated element in a vector constant
6603 and print the negative of it in decimal.
bcf19844
JW
6604 'b/h/s/d/q': Print a scalar FP/SIMD register name.
6605 'S/T/U/V': Print a FP/SIMD register name for a register list.
6606 The register printed is the FP/SIMD register name
6607 of X + 0/1/2/3 for S/T/U/V.
6608 'R': Print a scalar FP/SIMD register name + 1.
6609 'X': Print bottom 16 bits of integer constant in hex.
6610 'w/x': Print a general register name or the zero register
6611 (32-bit or 64-bit).
6612 '0': Print a normal operand, if it's a general register,
6613 then we assume DImode.
6614 'k': Print NZCV for conditional compare instructions.
6615 'A': Output address constant representing the first
6616 argument of X, specifying a relocation offset
6617 if appropriate.
6618 'L': Output constant address specified by X
6619 with a relocation offset if appropriate.
6620 'G': Prints address of X, specifying a PC relative
e69a816d
WD
6621 relocation mode if appropriate.
6622 'y': Output address of LDP or STP - this is used for
6623 some LDP/STPs which don't use a PARALLEL in their
6624 pattern (so the mode needs to be adjusted).
6625 'z': Output address of a typical LDP or STP. */
bcf19844 6626
cc8ca59e
JB
6627static void
6628aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 6629{
43cacb12 6630 rtx elt;
43e9d192
IB
6631 switch (code)
6632 {
f541a481
KT
6633 case 'c':
6634 switch (GET_CODE (x))
6635 {
6636 case CONST_INT:
6637 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6638 break;
6639
6640 case SYMBOL_REF:
6641 output_addr_const (f, x);
6642 break;
6643
6644 case CONST:
6645 if (GET_CODE (XEXP (x, 0)) == PLUS
6646 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
6647 {
6648 output_addr_const (f, x);
6649 break;
6650 }
6651 /* Fall through. */
6652
6653 default:
ee61f880 6654 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
6655 }
6656 break;
6657
43e9d192 6658 case 'e':
43e9d192
IB
6659 {
6660 int n;
6661
4aa81c2e 6662 if (!CONST_INT_P (x)
43e9d192
IB
6663 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
6664 {
6665 output_operand_lossage ("invalid operand for '%%%c'", code);
6666 return;
6667 }
6668
6669 switch (n)
6670 {
6671 case 3:
6672 fputc ('b', f);
6673 break;
6674 case 4:
6675 fputc ('h', f);
6676 break;
6677 case 5:
6678 fputc ('w', f);
6679 break;
6680 default:
6681 output_operand_lossage ("invalid operand for '%%%c'", code);
6682 return;
6683 }
6684 }
6685 break;
6686
6687 case 'p':
6688 {
6689 int n;
6690
4aa81c2e 6691 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
6692 {
6693 output_operand_lossage ("invalid operand for '%%%c'", code);
6694 return;
6695 }
6696
6697 asm_fprintf (f, "%d", n);
6698 }
6699 break;
6700
6701 case 'P':
4aa81c2e 6702 if (!CONST_INT_P (x))
43e9d192
IB
6703 {
6704 output_operand_lossage ("invalid operand for '%%%c'", code);
6705 return;
6706 }
6707
8d55c61b 6708 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
6709 break;
6710
6711 case 'H':
c0111dc4
RE
6712 if (x == const0_rtx)
6713 {
6714 asm_fprintf (f, "xzr");
6715 break;
6716 }
6717
4aa81c2e 6718 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
6719 {
6720 output_operand_lossage ("invalid operand for '%%%c'", code);
6721 return;
6722 }
6723
01a3a324 6724 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
6725 break;
6726
43e9d192 6727 case 'M':
c8012fbc 6728 case 'm':
cd5660ab
KT
6729 {
6730 int cond_code;
c8012fbc
WD
6731 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
6732 if (x == const_true_rtx)
cd5660ab 6733 {
c8012fbc
WD
6734 if (code == 'M')
6735 fputs ("nv", f);
cd5660ab
KT
6736 return;
6737 }
43e9d192 6738
cd5660ab
KT
6739 if (!COMPARISON_P (x))
6740 {
6741 output_operand_lossage ("invalid operand for '%%%c'", code);
6742 return;
6743 }
c8012fbc 6744
cd5660ab
KT
6745 cond_code = aarch64_get_condition_code (x);
6746 gcc_assert (cond_code >= 0);
c8012fbc
WD
6747 if (code == 'M')
6748 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
6749 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 6750 }
43e9d192
IB
6751 break;
6752
43cacb12
RS
6753 case 'N':
6754 if (!const_vec_duplicate_p (x, &elt))
6755 {
6756 output_operand_lossage ("invalid vector constant");
6757 return;
6758 }
6759
6760 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6761 asm_fprintf (f, "%wd", -INTVAL (elt));
6762 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
6763 && aarch64_print_vector_float_operand (f, x, true))
6764 ;
6765 else
6766 {
6767 output_operand_lossage ("invalid vector constant");
6768 return;
6769 }
6770 break;
6771
43e9d192
IB
6772 case 'b':
6773 case 'h':
6774 case 's':
6775 case 'd':
6776 case 'q':
43e9d192
IB
6777 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6778 {
6779 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6780 return;
6781 }
50ce6f88 6782 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
6783 break;
6784
6785 case 'S':
6786 case 'T':
6787 case 'U':
6788 case 'V':
43e9d192
IB
6789 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6790 {
6791 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6792 return;
6793 }
43cacb12
RS
6794 asm_fprintf (f, "%c%d",
6795 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
6796 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
6797 break;
6798
2d8c6dc1 6799 case 'R':
2d8c6dc1
AH
6800 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6801 {
6802 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6803 return;
6804 }
6805 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
6806 break;
6807
a05c0ddf 6808 case 'X':
4aa81c2e 6809 if (!CONST_INT_P (x))
a05c0ddf
IB
6810 {
6811 output_operand_lossage ("invalid operand for '%%%c'", code);
6812 return;
6813 }
50d38551 6814 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
6815 break;
6816
43cacb12
RS
6817 case 'C':
6818 {
6819 /* Print a replicated constant in hex. */
6820 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6821 {
6822 output_operand_lossage ("invalid operand for '%%%c'", code);
6823 return;
6824 }
6825 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6826 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6827 }
6828 break;
6829
6830 case 'D':
6831 {
6832 /* Print a replicated constant in decimal, treating it as
6833 unsigned. */
6834 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6835 {
6836 output_operand_lossage ("invalid operand for '%%%c'", code);
6837 return;
6838 }
6839 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6840 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6841 }
6842 break;
6843
43e9d192
IB
6844 case 'w':
6845 case 'x':
3520f7cc
JG
6846 if (x == const0_rtx
6847 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 6848 {
50ce6f88 6849 asm_fprintf (f, "%czr", code);
43e9d192
IB
6850 break;
6851 }
6852
6853 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
6854 {
50ce6f88 6855 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
6856 break;
6857 }
6858
6859 if (REG_P (x) && REGNO (x) == SP_REGNUM)
6860 {
50ce6f88 6861 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
6862 break;
6863 }
6864
6865 /* Fall through */
6866
6867 case 0:
43e9d192
IB
6868 if (x == NULL)
6869 {
6870 output_operand_lossage ("missing operand");
6871 return;
6872 }
6873
6874 switch (GET_CODE (x))
6875 {
6876 case REG:
43cacb12 6877 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
6878 {
6879 if (REG_NREGS (x) == 1)
6880 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
6881 else
6882 {
6883 char suffix
6884 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
6885 asm_fprintf (f, "{z%d.%c - z%d.%c}",
6886 REGNO (x) - V0_REGNUM, suffix,
6887 END_REGNO (x) - V0_REGNUM - 1, suffix);
6888 }
6889 }
43cacb12
RS
6890 else
6891 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
6892 break;
6893
6894 case MEM:
cc8ca59e 6895 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
6896 break;
6897
6898 case LABEL_REF:
6899 case SYMBOL_REF:
6900 output_addr_const (asm_out_file, x);
6901 break;
6902
6903 case CONST_INT:
6904 asm_fprintf (f, "%wd", INTVAL (x));
6905 break;
6906
43cacb12
RS
6907 case CONST:
6908 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 6909 {
43cacb12
RS
6910 output_addr_const (asm_out_file, x);
6911 break;
3520f7cc 6912 }
43cacb12
RS
6913 /* fall through */
6914
6915 case CONST_VECTOR:
6916 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 6917 {
43cacb12
RS
6918 output_operand_lossage ("invalid vector constant");
6919 return;
3520f7cc 6920 }
43cacb12
RS
6921
6922 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6923 asm_fprintf (f, "%wd", INTVAL (elt));
6924 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
6925 && aarch64_print_vector_float_operand (f, x, false))
6926 ;
3520f7cc 6927 else
43cacb12
RS
6928 {
6929 output_operand_lossage ("invalid vector constant");
6930 return;
6931 }
43e9d192
IB
6932 break;
6933
3520f7cc 6934 case CONST_DOUBLE:
2ca5b430
KT
6935 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
6936 be getting CONST_DOUBLEs holding integers. */
6937 gcc_assert (GET_MODE (x) != VOIDmode);
6938 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
6939 {
6940 fputc ('0', f);
6941 break;
6942 }
6943 else if (aarch64_float_const_representable_p (x))
6944 {
6945#define buf_size 20
6946 char float_buf[buf_size] = {'\0'};
34a72c33
RS
6947 real_to_decimal_for_mode (float_buf,
6948 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
6949 buf_size, buf_size,
6950 1, GET_MODE (x));
6951 asm_fprintf (asm_out_file, "%s", float_buf);
6952 break;
6953#undef buf_size
6954 }
6955 output_operand_lossage ("invalid constant");
6956 return;
43e9d192
IB
6957 default:
6958 output_operand_lossage ("invalid operand");
6959 return;
6960 }
6961 break;
6962
6963 case 'A':
6964 if (GET_CODE (x) == HIGH)
6965 x = XEXP (x, 0);
6966
a6e0bfa7 6967 switch (aarch64_classify_symbolic_expression (x))
43e9d192 6968 {
6642bdb4 6969 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
6970 asm_fprintf (asm_out_file, ":got:");
6971 break;
6972
6973 case SYMBOL_SMALL_TLSGD:
6974 asm_fprintf (asm_out_file, ":tlsgd:");
6975 break;
6976
6977 case SYMBOL_SMALL_TLSDESC:
6978 asm_fprintf (asm_out_file, ":tlsdesc:");
6979 break;
6980
79496620 6981 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
6982 asm_fprintf (asm_out_file, ":gottprel:");
6983 break;
6984
d18ba284 6985 case SYMBOL_TLSLE24:
43e9d192
IB
6986 asm_fprintf (asm_out_file, ":tprel:");
6987 break;
6988
87dd8ab0
MS
6989 case SYMBOL_TINY_GOT:
6990 gcc_unreachable ();
6991 break;
6992
43e9d192
IB
6993 default:
6994 break;
6995 }
6996 output_addr_const (asm_out_file, x);
6997 break;
6998
6999 case 'L':
a6e0bfa7 7000 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7001 {
6642bdb4 7002 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7003 asm_fprintf (asm_out_file, ":lo12:");
7004 break;
7005
7006 case SYMBOL_SMALL_TLSGD:
7007 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7008 break;
7009
7010 case SYMBOL_SMALL_TLSDESC:
7011 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7012 break;
7013
79496620 7014 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7015 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7016 break;
7017
cbf5629e
JW
7018 case SYMBOL_TLSLE12:
7019 asm_fprintf (asm_out_file, ":tprel_lo12:");
7020 break;
7021
d18ba284 7022 case SYMBOL_TLSLE24:
43e9d192
IB
7023 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7024 break;
7025
87dd8ab0
MS
7026 case SYMBOL_TINY_GOT:
7027 asm_fprintf (asm_out_file, ":got:");
7028 break;
7029
5ae7caad
JW
7030 case SYMBOL_TINY_TLSIE:
7031 asm_fprintf (asm_out_file, ":gottprel:");
7032 break;
7033
43e9d192
IB
7034 default:
7035 break;
7036 }
7037 output_addr_const (asm_out_file, x);
7038 break;
7039
7040 case 'G':
a6e0bfa7 7041 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7042 {
d18ba284 7043 case SYMBOL_TLSLE24:
43e9d192
IB
7044 asm_fprintf (asm_out_file, ":tprel_hi12:");
7045 break;
7046 default:
7047 break;
7048 }
7049 output_addr_const (asm_out_file, x);
7050 break;
7051
cf670503
ZC
7052 case 'k':
7053 {
c8012fbc 7054 HOST_WIDE_INT cond_code;
cf670503 7055
c8012fbc 7056 if (!CONST_INT_P (x))
cf670503
ZC
7057 {
7058 output_operand_lossage ("invalid operand for '%%%c'", code);
7059 return;
7060 }
7061
c8012fbc
WD
7062 cond_code = INTVAL (x);
7063 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7064 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7065 }
7066 break;
7067
e69a816d
WD
7068 case 'y':
7069 case 'z':
7070 {
7071 machine_mode mode = GET_MODE (x);
7072
c348cab0 7073 if (GET_CODE (x) != MEM
6a70badb 7074 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7075 {
7076 output_operand_lossage ("invalid operand for '%%%c'", code);
7077 return;
7078 }
7079
a25831ac
AV
7080 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
7081 code == 'y'
7082 ? ADDR_QUERY_LDP_STP_N
7083 : ADDR_QUERY_LDP_STP))
c348cab0 7084 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7085 }
7086 break;
7087
43e9d192
IB
7088 default:
7089 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7090 return;
7091 }
7092}
7093
e69a816d
WD
7094/* Print address 'x' of a memory access with mode 'mode'.
7095 'op' is the context required by aarch64_classify_address. It can either be
7096 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7097static bool
a97d8b98
RS
7098aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7099 aarch64_addr_query_type type)
43e9d192
IB
7100{
7101 struct aarch64_address_info addr;
6a70badb 7102 unsigned int size;
43e9d192 7103
e69a816d 7104 /* Check all addresses are Pmode - including ILP32. */
67c58c8f
SE
7105 if (GET_MODE (x) != Pmode)
7106 output_operand_lossage ("invalid address mode");
e69a816d 7107
a97d8b98 7108 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7109 switch (addr.type)
7110 {
7111 case ADDRESS_REG_IMM:
dc640181 7112 if (known_eq (addr.const_offset, 0))
01a3a324 7113 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7114 else if (aarch64_sve_data_mode_p (mode))
7115 {
7116 HOST_WIDE_INT vnum
7117 = exact_div (addr.const_offset,
7118 BYTES_PER_SVE_VECTOR).to_constant ();
7119 asm_fprintf (f, "[%s, #%wd, mul vl]",
7120 reg_names[REGNO (addr.base)], vnum);
7121 }
7122 else if (aarch64_sve_pred_mode_p (mode))
7123 {
7124 HOST_WIDE_INT vnum
7125 = exact_div (addr.const_offset,
7126 BYTES_PER_SVE_PRED).to_constant ();
7127 asm_fprintf (f, "[%s, #%wd, mul vl]",
7128 reg_names[REGNO (addr.base)], vnum);
7129 }
43e9d192 7130 else
16a3246f 7131 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7132 INTVAL (addr.offset));
c348cab0 7133 return true;
43e9d192
IB
7134
7135 case ADDRESS_REG_REG:
7136 if (addr.shift == 0)
16a3246f 7137 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7138 reg_names [REGNO (addr.offset)]);
43e9d192 7139 else
16a3246f 7140 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7141 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7142 return true;
43e9d192
IB
7143
7144 case ADDRESS_REG_UXTW:
7145 if (addr.shift == 0)
16a3246f 7146 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7147 REGNO (addr.offset) - R0_REGNUM);
7148 else
16a3246f 7149 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7150 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7151 return true;
43e9d192
IB
7152
7153 case ADDRESS_REG_SXTW:
7154 if (addr.shift == 0)
16a3246f 7155 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7156 REGNO (addr.offset) - R0_REGNUM);
7157 else
16a3246f 7158 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7159 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7160 return true;
43e9d192
IB
7161
7162 case ADDRESS_REG_WB:
6a70badb
RS
7163 /* Writeback is only supported for fixed-width modes. */
7164 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7165 switch (GET_CODE (x))
7166 {
7167 case PRE_INC:
6a70badb 7168 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7169 return true;
43e9d192 7170 case POST_INC:
6a70badb 7171 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7172 return true;
43e9d192 7173 case PRE_DEC:
6a70badb 7174 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7175 return true;
43e9d192 7176 case POST_DEC:
6a70badb 7177 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7178 return true;
43e9d192 7179 case PRE_MODIFY:
6a70badb 7180 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7181 INTVAL (addr.offset));
c348cab0 7182 return true;
43e9d192 7183 case POST_MODIFY:
6a70badb 7184 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7185 INTVAL (addr.offset));
c348cab0 7186 return true;
43e9d192
IB
7187 default:
7188 break;
7189 }
7190 break;
7191
7192 case ADDRESS_LO_SUM:
16a3246f 7193 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7194 output_addr_const (f, addr.offset);
7195 asm_fprintf (f, "]");
c348cab0 7196 return true;
43e9d192
IB
7197
7198 case ADDRESS_SYMBOLIC:
d6591257 7199 output_addr_const (f, x);
c348cab0 7200 return true;
43e9d192
IB
7201 }
7202
c348cab0 7203 return false;
43e9d192
IB
7204}
7205
e69a816d
WD
7206/* Print address 'x' of a memory access with mode 'mode'. */
7207static void
7208aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7209{
43cacb12 7210 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7211 output_addr_const (f, x);
e69a816d
WD
7212}
7213
43e9d192
IB
7214bool
7215aarch64_label_mentioned_p (rtx x)
7216{
7217 const char *fmt;
7218 int i;
7219
7220 if (GET_CODE (x) == LABEL_REF)
7221 return true;
7222
7223 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7224 referencing instruction, but they are constant offsets, not
7225 symbols. */
7226 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7227 return false;
7228
7229 fmt = GET_RTX_FORMAT (GET_CODE (x));
7230 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7231 {
7232 if (fmt[i] == 'E')
7233 {
7234 int j;
7235
7236 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7237 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7238 return 1;
7239 }
7240 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7241 return 1;
7242 }
7243
7244 return 0;
7245}
7246
7247/* Implement REGNO_REG_CLASS. */
7248
7249enum reg_class
7250aarch64_regno_regclass (unsigned regno)
7251{
7252 if (GP_REGNUM_P (regno))
a4a182c6 7253 return GENERAL_REGS;
43e9d192
IB
7254
7255 if (regno == SP_REGNUM)
7256 return STACK_REG;
7257
7258 if (regno == FRAME_POINTER_REGNUM
7259 || regno == ARG_POINTER_REGNUM)
f24bb080 7260 return POINTER_REGS;
43e9d192
IB
7261
7262 if (FP_REGNUM_P (regno))
7263 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7264
43cacb12
RS
7265 if (PR_REGNUM_P (regno))
7266 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7267
43e9d192
IB
7268 return NO_REGS;
7269}
7270
6a70badb
RS
7271/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7272 If OFFSET is out of range, return an offset of an anchor point
7273 that is in range. Return 0 otherwise. */
7274
7275static HOST_WIDE_INT
7276aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7277 machine_mode mode)
7278{
7279 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7280 if (size > 16)
7281 return (offset + 0x400) & ~0x7f0;
7282
7283 /* For offsets that aren't a multiple of the access size, the limit is
7284 -256...255. */
7285 if (offset & (size - 1))
7286 {
7287 /* BLKmode typically uses LDP of X-registers. */
7288 if (mode == BLKmode)
7289 return (offset + 512) & ~0x3ff;
7290 return (offset + 0x100) & ~0x1ff;
7291 }
7292
7293 /* Small negative offsets are supported. */
7294 if (IN_RANGE (offset, -256, 0))
7295 return 0;
7296
7297 if (mode == TImode || mode == TFmode)
7298 return (offset + 0x100) & ~0x1ff;
7299
7300 /* Use 12-bit offset by access size. */
7301 return offset & (~0xfff * size);
7302}
7303
0c4ec427 7304static rtx
ef4bddc2 7305aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7306{
7307 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7308 where mask is selected by alignment and size of the offset.
7309 We try to pick as large a range for the offset as possible to
7310 maximize the chance of a CSE. However, for aligned addresses
7311 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7312 elements are likely to use the same base. We need to be careful
7313 not to split a CONST for some forms of address expression, otherwise
7314 it will generate sub-optimal code. */
0c4ec427
RE
7315
7316 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7317 {
9e0218fc 7318 rtx base = XEXP (x, 0);
17d7bdd8 7319 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7320 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 7321
9e0218fc 7322 if (GET_CODE (base) == PLUS)
e8426e0a 7323 {
9e0218fc
RH
7324 rtx op0 = XEXP (base, 0);
7325 rtx op1 = XEXP (base, 1);
7326
7327 /* Force any scaling into a temp for CSE. */
7328 op0 = force_reg (Pmode, op0);
7329 op1 = force_reg (Pmode, op1);
7330
7331 /* Let the pointer register be in op0. */
7332 if (REG_POINTER (op1))
7333 std::swap (op0, op1);
7334
7335 /* If the pointer is virtual or frame related, then we know that
7336 virtual register instantiation or register elimination is going
7337 to apply a second constant. We want the two constants folded
7338 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
7339 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 7340 {
9e0218fc
RH
7341 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
7342 NULL_RTX, true, OPTAB_DIRECT);
7343 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 7344 }
e8426e0a 7345
9e0218fc
RH
7346 /* Otherwise, in order to encourage CSE (and thence loop strength
7347 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
7348 base = expand_binop (Pmode, add_optab, op0, op1,
7349 NULL_RTX, true, OPTAB_DIRECT);
7350 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
7351 }
7352
6a70badb
RS
7353 HOST_WIDE_INT size;
7354 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 7355 {
6a70badb
RS
7356 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
7357 mode);
7358 if (base_offset != 0)
7359 {
7360 base = plus_constant (Pmode, base, base_offset);
7361 base = force_operand (base, NULL_RTX);
7362 return plus_constant (Pmode, base, offset - base_offset);
7363 }
9e0218fc 7364 }
0c4ec427
RE
7365 }
7366
7367 return x;
7368}
7369
43e9d192
IB
7370static reg_class_t
7371aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
7372 reg_class_t rclass,
ef4bddc2 7373 machine_mode mode,
43e9d192
IB
7374 secondary_reload_info *sri)
7375{
9a1b9cb4
RS
7376 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
7377 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
7378 comment at the head of aarch64-sve.md for more details about the
7379 big-endian handling. */
43cacb12
RS
7380 if (BYTES_BIG_ENDIAN
7381 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
7382 && !((REG_P (x) && HARD_REGISTER_P (x))
7383 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
7384 && aarch64_sve_data_mode_p (mode))
7385 {
7386 sri->icode = CODE_FOR_aarch64_sve_reload_be;
7387 return NO_REGS;
7388 }
b4f50fd4
RR
7389
7390 /* If we have to disable direct literal pool loads and stores because the
7391 function is too big, then we need a scratch register. */
7392 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
7393 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
7394 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 7395 && !aarch64_pcrelative_literal_loads)
b4f50fd4 7396 {
0016d8d9 7397 sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
b4f50fd4
RR
7398 return NO_REGS;
7399 }
7400
43e9d192
IB
7401 /* Without the TARGET_SIMD instructions we cannot move a Q register
7402 to a Q register directly. We need a scratch. */
7403 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
7404 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
7405 && reg_class_subset_p (rclass, FP_REGS))
7406 {
0016d8d9 7407 sri->icode = code_for_aarch64_reload_mov (mode);
43e9d192
IB
7408 return NO_REGS;
7409 }
7410
7411 /* A TFmode or TImode memory access should be handled via an FP_REGS
7412 because AArch64 has richer addressing modes for LDR/STR instructions
7413 than LDP/STP instructions. */
d5726973 7414 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 7415 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
7416 return FP_REGS;
7417
7418 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 7419 return GENERAL_REGS;
43e9d192
IB
7420
7421 return NO_REGS;
7422}
7423
7424static bool
6216fd90 7425aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 7426{
6216fd90 7427 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 7428
6216fd90
WD
7429 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
7430 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 7431 if (frame_pointer_needed)
6216fd90 7432 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
7433 return true;
7434}
7435
6a70badb 7436poly_int64
43e9d192
IB
7437aarch64_initial_elimination_offset (unsigned from, unsigned to)
7438{
43e9d192 7439 aarch64_layout_frame ();
78c29983
MS
7440
7441 if (to == HARD_FRAME_POINTER_REGNUM)
7442 {
7443 if (from == ARG_POINTER_REGNUM)
71bfb77a 7444 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
7445
7446 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7447 return cfun->machine->frame.hard_fp_offset
7448 - cfun->machine->frame.locals_offset;
78c29983
MS
7449 }
7450
7451 if (to == STACK_POINTER_REGNUM)
7452 {
7453 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7454 return cfun->machine->frame.frame_size
7455 - cfun->machine->frame.locals_offset;
78c29983
MS
7456 }
7457
1c960e02 7458 return cfun->machine->frame.frame_size;
43e9d192
IB
7459}
7460
43e9d192
IB
7461/* Implement RETURN_ADDR_RTX. We do not support moving back to a
7462 previous frame. */
7463
7464rtx
7465aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7466{
7467 if (count != 0)
7468 return const0_rtx;
7469 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
7470}
7471
7472
7473static void
7474aarch64_asm_trampoline_template (FILE *f)
7475{
28514dda
YZ
7476 if (TARGET_ILP32)
7477 {
7478 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
7479 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
7480 }
7481 else
7482 {
7483 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
7484 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
7485 }
01a3a324 7486 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 7487 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
7488 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
7489 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
7490}
7491
7492static void
7493aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
7494{
7495 rtx fnaddr, mem, a_tramp;
28514dda 7496 const int tramp_code_sz = 16;
43e9d192
IB
7497
7498 /* Don't need to copy the trailing D-words, we fill those in below. */
7499 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
7500 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
7501 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 7502 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
7503 if (GET_MODE (fnaddr) != ptr_mode)
7504 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
7505 emit_move_insn (mem, fnaddr);
7506
28514dda 7507 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
7508 emit_move_insn (mem, chain_value);
7509
7510 /* XXX We should really define a "clear_cache" pattern and use
7511 gen_clear_cache(). */
7512 a_tramp = XEXP (m_tramp, 0);
7513 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 7514 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
7515 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
7516 ptr_mode);
43e9d192
IB
7517}
7518
7519static unsigned char
ef4bddc2 7520aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 7521{
6a70badb
RS
7522 /* ??? Logically we should only need to provide a value when
7523 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
7524 can hold MODE, but at the moment we need to handle all modes.
7525 Just ignore any runtime parts for registers that can't store them. */
7526 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 7527 unsigned int nregs;
43e9d192
IB
7528 switch (regclass)
7529 {
d677263e 7530 case TAILCALL_ADDR_REGS:
43e9d192
IB
7531 case POINTER_REGS:
7532 case GENERAL_REGS:
7533 case ALL_REGS:
f25a140b 7534 case POINTER_AND_FP_REGS:
43e9d192
IB
7535 case FP_REGS:
7536 case FP_LO_REGS:
43cacb12
RS
7537 if (aarch64_sve_data_mode_p (mode)
7538 && constant_multiple_p (GET_MODE_SIZE (mode),
7539 BYTES_PER_SVE_VECTOR, &nregs))
7540 return nregs;
7541 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
7542 ? CEIL (lowest_size, UNITS_PER_VREG)
7543 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 7544 case STACK_REG:
43cacb12
RS
7545 case PR_REGS:
7546 case PR_LO_REGS:
7547 case PR_HI_REGS:
43e9d192
IB
7548 return 1;
7549
7550 case NO_REGS:
7551 return 0;
7552
7553 default:
7554 break;
7555 }
7556 gcc_unreachable ();
7557}
7558
7559static reg_class_t
78d8b9f0 7560aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 7561{
51bb310d 7562 if (regclass == POINTER_REGS)
78d8b9f0
IB
7563 return GENERAL_REGS;
7564
51bb310d
MS
7565 if (regclass == STACK_REG)
7566 {
7567 if (REG_P(x)
7568 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
7569 return regclass;
7570
7571 return NO_REGS;
7572 }
7573
27bd251b
IB
7574 /* Register eliminiation can result in a request for
7575 SP+constant->FP_REGS. We cannot support such operations which
7576 use SP as source and an FP_REG as destination, so reject out
7577 right now. */
7578 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
7579 {
7580 rtx lhs = XEXP (x, 0);
7581
7582 /* Look through a possible SUBREG introduced by ILP32. */
7583 if (GET_CODE (lhs) == SUBREG)
7584 lhs = SUBREG_REG (lhs);
7585
7586 gcc_assert (REG_P (lhs));
7587 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
7588 POINTER_REGS));
7589 return NO_REGS;
7590 }
7591
78d8b9f0 7592 return regclass;
43e9d192
IB
7593}
7594
7595void
7596aarch64_asm_output_labelref (FILE* f, const char *name)
7597{
7598 asm_fprintf (f, "%U%s", name);
7599}
7600
7601static void
7602aarch64_elf_asm_constructor (rtx symbol, int priority)
7603{
7604 if (priority == DEFAULT_INIT_PRIORITY)
7605 default_ctor_section_asm_out_constructor (symbol, priority);
7606 else
7607 {
7608 section *s;
53d190c1
AT
7609 /* While priority is known to be in range [0, 65535], so 18 bytes
7610 would be enough, the compiler might not know that. To avoid
7611 -Wformat-truncation false positive, use a larger size. */
7612 char buf[23];
43e9d192 7613 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 7614 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7615 switch_to_section (s);
7616 assemble_align (POINTER_SIZE);
28514dda 7617 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7618 }
7619}
7620
7621static void
7622aarch64_elf_asm_destructor (rtx symbol, int priority)
7623{
7624 if (priority == DEFAULT_INIT_PRIORITY)
7625 default_dtor_section_asm_out_destructor (symbol, priority);
7626 else
7627 {
7628 section *s;
53d190c1
AT
7629 /* While priority is known to be in range [0, 65535], so 18 bytes
7630 would be enough, the compiler might not know that. To avoid
7631 -Wformat-truncation false positive, use a larger size. */
7632 char buf[23];
43e9d192 7633 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 7634 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7635 switch_to_section (s);
7636 assemble_align (POINTER_SIZE);
28514dda 7637 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7638 }
7639}
7640
7641const char*
7642aarch64_output_casesi (rtx *operands)
7643{
7644 char buf[100];
7645 char label[100];
b32d5189 7646 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
7647 int index;
7648 static const char *const patterns[4][2] =
7649 {
7650 {
7651 "ldrb\t%w3, [%0,%w1,uxtw]",
7652 "add\t%3, %4, %w3, sxtb #2"
7653 },
7654 {
7655 "ldrh\t%w3, [%0,%w1,uxtw #1]",
7656 "add\t%3, %4, %w3, sxth #2"
7657 },
7658 {
7659 "ldr\t%w3, [%0,%w1,uxtw #2]",
7660 "add\t%3, %4, %w3, sxtw #2"
7661 },
7662 /* We assume that DImode is only generated when not optimizing and
7663 that we don't really need 64-bit address offsets. That would
7664 imply an object file with 8GB of code in a single function! */
7665 {
7666 "ldr\t%w3, [%0,%w1,uxtw #2]",
7667 "add\t%3, %4, %w3, sxtw #2"
7668 }
7669 };
7670
7671 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
7672
77e994c9
RS
7673 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
7674 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
7675
7676 gcc_assert (index >= 0 && index <= 3);
7677
7678 /* Need to implement table size reduction, by chaning the code below. */
7679 output_asm_insn (patterns[index][0], operands);
7680 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
7681 snprintf (buf, sizeof (buf),
7682 "adr\t%%4, %s", targetm.strip_name_encoding (label));
7683 output_asm_insn (buf, operands);
7684 output_asm_insn (patterns[index][1], operands);
7685 output_asm_insn ("br\t%3", operands);
7686 assemble_label (asm_out_file, label);
7687 return "";
7688}
7689
7690
7691/* Return size in bits of an arithmetic operand which is shifted/scaled and
7692 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
7693 operator. */
7694
7695int
7696aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
7697{
7698 if (shift >= 0 && shift <= 3)
7699 {
7700 int size;
7701 for (size = 8; size <= 32; size *= 2)
7702 {
7703 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
7704 if (mask == bits << shift)
7705 return size;
7706 }
7707 }
7708 return 0;
7709}
7710
e78d485e
RR
7711/* Constant pools are per function only when PC relative
7712 literal loads are true or we are in the large memory
7713 model. */
7714
7715static inline bool
7716aarch64_can_use_per_function_literal_pools_p (void)
7717{
9ee6540a 7718 return (aarch64_pcrelative_literal_loads
e78d485e
RR
7719 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
7720}
7721
43e9d192 7722static bool
e78d485e 7723aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 7724{
74a9301d
VM
7725 /* We can't use blocks for constants when we're using a per-function
7726 constant pool. */
7727 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
7728}
7729
e78d485e
RR
7730/* Select appropriate section for constants depending
7731 on where we place literal pools. */
7732
43e9d192 7733static section *
e78d485e
RR
7734aarch64_select_rtx_section (machine_mode mode,
7735 rtx x,
7736 unsigned HOST_WIDE_INT align)
43e9d192 7737{
e78d485e
RR
7738 if (aarch64_can_use_per_function_literal_pools_p ())
7739 return function_section (current_function_decl);
43e9d192 7740
e78d485e
RR
7741 return default_elf_select_rtx_section (mode, x, align);
7742}
43e9d192 7743
5fca7b66
RH
7744/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
7745void
7746aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
7747 HOST_WIDE_INT offset)
7748{
7749 /* When using per-function literal pools, we must ensure that any code
7750 section is aligned to the minimal instruction length, lest we get
7751 errors from the assembler re "unaligned instructions". */
7752 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
7753 ASM_OUTPUT_ALIGN (f, 2);
7754}
7755
43e9d192
IB
7756/* Costs. */
7757
7758/* Helper function for rtx cost calculation. Strip a shift expression
7759 from X. Returns the inner operand if successful, or the original
7760 expression on failure. */
7761static rtx
7762aarch64_strip_shift (rtx x)
7763{
7764 rtx op = x;
7765
57b77d46
RE
7766 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
7767 we can convert both to ROR during final output. */
43e9d192
IB
7768 if ((GET_CODE (op) == ASHIFT
7769 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
7770 || GET_CODE (op) == LSHIFTRT
7771 || GET_CODE (op) == ROTATERT
7772 || GET_CODE (op) == ROTATE)
43e9d192
IB
7773 && CONST_INT_P (XEXP (op, 1)))
7774 return XEXP (op, 0);
7775
7776 if (GET_CODE (op) == MULT
7777 && CONST_INT_P (XEXP (op, 1))
7778 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
7779 return XEXP (op, 0);
7780
7781 return x;
7782}
7783
4745e701 7784/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
7785 expression from X. Returns the inner operand if successful, or the
7786 original expression on failure. We deal with a number of possible
b10f1009
AP
7787 canonicalization variations here. If STRIP_SHIFT is true, then
7788 we can strip off a shift also. */
43e9d192 7789static rtx
b10f1009 7790aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 7791{
77e994c9 7792 scalar_int_mode mode;
43e9d192
IB
7793 rtx op = x;
7794
77e994c9
RS
7795 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
7796 return op;
7797
43e9d192
IB
7798 /* Zero and sign extraction of a widened value. */
7799 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
7800 && XEXP (op, 2) == const0_rtx
4745e701 7801 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 7802 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
7803 XEXP (op, 1)))
7804 return XEXP (XEXP (op, 0), 0);
7805
7806 /* It can also be represented (for zero-extend) as an AND with an
7807 immediate. */
7808 if (GET_CODE (op) == AND
7809 && GET_CODE (XEXP (op, 0)) == MULT
7810 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
7811 && CONST_INT_P (XEXP (op, 1))
7812 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
7813 INTVAL (XEXP (op, 1))) != 0)
7814 return XEXP (XEXP (op, 0), 0);
7815
7816 /* Now handle extended register, as this may also have an optional
7817 left shift by 1..4. */
b10f1009
AP
7818 if (strip_shift
7819 && GET_CODE (op) == ASHIFT
43e9d192
IB
7820 && CONST_INT_P (XEXP (op, 1))
7821 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
7822 op = XEXP (op, 0);
7823
7824 if (GET_CODE (op) == ZERO_EXTEND
7825 || GET_CODE (op) == SIGN_EXTEND)
7826 op = XEXP (op, 0);
7827
7828 if (op != x)
7829 return op;
7830
4745e701
JG
7831 return x;
7832}
7833
0a78ebe4
KT
7834/* Return true iff CODE is a shift supported in combination
7835 with arithmetic instructions. */
4d1919ed 7836
0a78ebe4
KT
7837static bool
7838aarch64_shift_p (enum rtx_code code)
7839{
7840 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
7841}
7842
b10f1009
AP
7843
7844/* Return true iff X is a cheap shift without a sign extend. */
7845
7846static bool
7847aarch64_cheap_mult_shift_p (rtx x)
7848{
7849 rtx op0, op1;
7850
7851 op0 = XEXP (x, 0);
7852 op1 = XEXP (x, 1);
7853
7854 if (!(aarch64_tune_params.extra_tuning_flags
7855 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
7856 return false;
7857
7858 if (GET_CODE (op0) == SIGN_EXTEND)
7859 return false;
7860
7861 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
7862 && UINTVAL (op1) <= 4)
7863 return true;
7864
7865 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
7866 return false;
7867
7868 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
7869
7870 if (l2 > 0 && l2 <= 4)
7871 return true;
7872
7873 return false;
7874}
7875
4745e701 7876/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
7877 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
7878 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
7879 operands where needed. */
7880
7881static int
e548c9df 7882aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
7883{
7884 rtx op0, op1;
7885 const struct cpu_cost_table *extra_cost
b175b679 7886 = aarch64_tune_params.insn_extra_cost;
4745e701 7887 int cost = 0;
0a78ebe4 7888 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 7889 machine_mode mode = GET_MODE (x);
4745e701
JG
7890
7891 gcc_checking_assert (code == MULT);
7892
7893 op0 = XEXP (x, 0);
7894 op1 = XEXP (x, 1);
7895
7896 if (VECTOR_MODE_P (mode))
7897 mode = GET_MODE_INNER (mode);
7898
7899 /* Integer multiply/fma. */
7900 if (GET_MODE_CLASS (mode) == MODE_INT)
7901 {
7902 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
7903 if (aarch64_shift_p (GET_CODE (x))
7904 || (CONST_INT_P (op1)
7905 && exact_log2 (INTVAL (op1)) > 0))
4745e701 7906 {
0a78ebe4
KT
7907 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
7908 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
7909 if (speed)
7910 {
0a78ebe4
KT
7911 if (compound_p)
7912 {
b10f1009
AP
7913 /* If the shift is considered cheap,
7914 then don't add any cost. */
7915 if (aarch64_cheap_mult_shift_p (x))
7916 ;
7917 else if (REG_P (op1))
0a78ebe4
KT
7918 /* ARITH + shift-by-register. */
7919 cost += extra_cost->alu.arith_shift_reg;
7920 else if (is_extend)
7921 /* ARITH + extended register. We don't have a cost field
7922 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
7923 cost += extra_cost->alu.extend_arith;
7924 else
7925 /* ARITH + shift-by-immediate. */
7926 cost += extra_cost->alu.arith_shift;
7927 }
4745e701
JG
7928 else
7929 /* LSL (immediate). */
0a78ebe4
KT
7930 cost += extra_cost->alu.shift;
7931
4745e701 7932 }
0a78ebe4
KT
7933 /* Strip extends as we will have costed them in the case above. */
7934 if (is_extend)
b10f1009 7935 op0 = aarch64_strip_extend (op0, true);
4745e701 7936
e548c9df 7937 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
7938
7939 return cost;
7940 }
7941
d2ac256b
KT
7942 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
7943 compound and let the below cases handle it. After all, MNEG is a
7944 special-case alias of MSUB. */
7945 if (GET_CODE (op0) == NEG)
7946 {
7947 op0 = XEXP (op0, 0);
7948 compound_p = true;
7949 }
7950
4745e701
JG
7951 /* Integer multiplies or FMAs have zero/sign extending variants. */
7952 if ((GET_CODE (op0) == ZERO_EXTEND
7953 && GET_CODE (op1) == ZERO_EXTEND)
7954 || (GET_CODE (op0) == SIGN_EXTEND
7955 && GET_CODE (op1) == SIGN_EXTEND))
7956 {
e548c9df
AM
7957 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
7958 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
7959
7960 if (speed)
7961 {
0a78ebe4 7962 if (compound_p)
d2ac256b 7963 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
7964 cost += extra_cost->mult[0].extend_add;
7965 else
7966 /* MUL/SMULL/UMULL. */
7967 cost += extra_cost->mult[0].extend;
7968 }
7969
7970 return cost;
7971 }
7972
d2ac256b 7973 /* This is either an integer multiply or a MADD. In both cases
4745e701 7974 we want to recurse and cost the operands. */
e548c9df
AM
7975 cost += rtx_cost (op0, mode, MULT, 0, speed);
7976 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
7977
7978 if (speed)
7979 {
0a78ebe4 7980 if (compound_p)
d2ac256b 7981 /* MADD/MSUB. */
4745e701
JG
7982 cost += extra_cost->mult[mode == DImode].add;
7983 else
7984 /* MUL. */
7985 cost += extra_cost->mult[mode == DImode].simple;
7986 }
7987
7988 return cost;
7989 }
7990 else
7991 {
7992 if (speed)
7993 {
3d840f7d 7994 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
7995 operands, unless the rounding mode is upward or downward in
7996 which case FNMUL is different than FMUL with operand negation. */
7997 bool neg0 = GET_CODE (op0) == NEG;
7998 bool neg1 = GET_CODE (op1) == NEG;
7999 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8000 {
8001 if (neg0)
8002 op0 = XEXP (op0, 0);
8003 if (neg1)
8004 op1 = XEXP (op1, 0);
8005 }
4745e701 8006
0a78ebe4 8007 if (compound_p)
4745e701
JG
8008 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8009 cost += extra_cost->fp[mode == DFmode].fma;
8010 else
3d840f7d 8011 /* FMUL/FNMUL. */
4745e701
JG
8012 cost += extra_cost->fp[mode == DFmode].mult;
8013 }
8014
e548c9df
AM
8015 cost += rtx_cost (op0, mode, MULT, 0, speed);
8016 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8017 return cost;
8018 }
43e9d192
IB
8019}
8020
67747367
JG
8021static int
8022aarch64_address_cost (rtx x,
ef4bddc2 8023 machine_mode mode,
67747367
JG
8024 addr_space_t as ATTRIBUTE_UNUSED,
8025 bool speed)
8026{
8027 enum rtx_code c = GET_CODE (x);
b175b679 8028 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8029 struct aarch64_address_info info;
8030 int cost = 0;
8031 info.shift = 0;
8032
a97d8b98 8033 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8034 {
8035 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8036 {
8037 /* This is a CONST or SYMBOL ref which will be split
8038 in a different way depending on the code model in use.
8039 Cost it through the generic infrastructure. */
e548c9df 8040 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8041 /* Divide through by the cost of one instruction to
8042 bring it to the same units as the address costs. */
8043 cost_symbol_ref /= COSTS_N_INSNS (1);
8044 /* The cost is then the cost of preparing the address,
8045 followed by an immediate (possibly 0) offset. */
8046 return cost_symbol_ref + addr_cost->imm_offset;
8047 }
8048 else
8049 {
8050 /* This is most likely a jump table from a case
8051 statement. */
8052 return addr_cost->register_offset;
8053 }
8054 }
8055
8056 switch (info.type)
8057 {
8058 case ADDRESS_LO_SUM:
8059 case ADDRESS_SYMBOLIC:
8060 case ADDRESS_REG_IMM:
8061 cost += addr_cost->imm_offset;
8062 break;
8063
8064 case ADDRESS_REG_WB:
8065 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8066 cost += addr_cost->pre_modify;
8067 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8068 cost += addr_cost->post_modify;
8069 else
8070 gcc_unreachable ();
8071
8072 break;
8073
8074 case ADDRESS_REG_REG:
8075 cost += addr_cost->register_offset;
8076 break;
8077
67747367 8078 case ADDRESS_REG_SXTW:
783879e6
EM
8079 cost += addr_cost->register_sextend;
8080 break;
8081
8082 case ADDRESS_REG_UXTW:
8083 cost += addr_cost->register_zextend;
67747367
JG
8084 break;
8085
8086 default:
8087 gcc_unreachable ();
8088 }
8089
8090
8091 if (info.shift > 0)
8092 {
8093 /* For the sake of calculating the cost of the shifted register
8094 component, we can treat same sized modes in the same way. */
6a70badb
RS
8095 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8096 cost += addr_cost->addr_scale_costs.hi;
8097 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8098 cost += addr_cost->addr_scale_costs.si;
8099 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8100 cost += addr_cost->addr_scale_costs.di;
8101 else
8102 /* We can't tell, or this is a 128-bit vector. */
8103 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8104 }
8105
8106 return cost;
8107}
8108
b9066f5a
MW
8109/* Return the cost of a branch. If SPEED_P is true then the compiler is
8110 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8111 to be taken. */
8112
8113int
8114aarch64_branch_cost (bool speed_p, bool predictable_p)
8115{
8116 /* When optimizing for speed, use the cost of unpredictable branches. */
8117 const struct cpu_branch_cost *branch_costs =
b175b679 8118 aarch64_tune_params.branch_costs;
b9066f5a
MW
8119
8120 if (!speed_p || predictable_p)
8121 return branch_costs->predictable;
8122 else
8123 return branch_costs->unpredictable;
8124}
8125
7cc2145f
JG
8126/* Return true if the RTX X in mode MODE is a zero or sign extract
8127 usable in an ADD or SUB (extended register) instruction. */
8128static bool
77e994c9 8129aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8130{
8131 /* Catch add with a sign extract.
8132 This is add_<optab><mode>_multp2. */
8133 if (GET_CODE (x) == SIGN_EXTRACT
8134 || GET_CODE (x) == ZERO_EXTRACT)
8135 {
8136 rtx op0 = XEXP (x, 0);
8137 rtx op1 = XEXP (x, 1);
8138 rtx op2 = XEXP (x, 2);
8139
8140 if (GET_CODE (op0) == MULT
8141 && CONST_INT_P (op1)
8142 && op2 == const0_rtx
8143 && CONST_INT_P (XEXP (op0, 1))
8144 && aarch64_is_extend_from_extract (mode,
8145 XEXP (op0, 1),
8146 op1))
8147 {
8148 return true;
8149 }
8150 }
e47c4031
KT
8151 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8152 No shift. */
8153 else if (GET_CODE (x) == SIGN_EXTEND
8154 || GET_CODE (x) == ZERO_EXTEND)
8155 return REG_P (XEXP (x, 0));
7cc2145f
JG
8156
8157 return false;
8158}
8159
61263118
KT
8160static bool
8161aarch64_frint_unspec_p (unsigned int u)
8162{
8163 switch (u)
8164 {
8165 case UNSPEC_FRINTZ:
8166 case UNSPEC_FRINTP:
8167 case UNSPEC_FRINTM:
8168 case UNSPEC_FRINTA:
8169 case UNSPEC_FRINTN:
8170 case UNSPEC_FRINTX:
8171 case UNSPEC_FRINTI:
8172 return true;
8173
8174 default:
8175 return false;
8176 }
8177}
8178
fb0cb7fa
KT
8179/* Return true iff X is an rtx that will match an extr instruction
8180 i.e. as described in the *extr<mode>5_insn family of patterns.
8181 OP0 and OP1 will be set to the operands of the shifts involved
8182 on success and will be NULL_RTX otherwise. */
8183
8184static bool
8185aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8186{
8187 rtx op0, op1;
77e994c9
RS
8188 scalar_int_mode mode;
8189 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8190 return false;
fb0cb7fa
KT
8191
8192 *res_op0 = NULL_RTX;
8193 *res_op1 = NULL_RTX;
8194
8195 if (GET_CODE (x) != IOR)
8196 return false;
8197
8198 op0 = XEXP (x, 0);
8199 op1 = XEXP (x, 1);
8200
8201 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8202 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8203 {
8204 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8205 if (GET_CODE (op1) == ASHIFT)
8206 std::swap (op0, op1);
8207
8208 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8209 return false;
8210
8211 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8212 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8213
8214 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8215 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8216 {
8217 *res_op0 = XEXP (op0, 0);
8218 *res_op1 = XEXP (op1, 0);
8219 return true;
8220 }
8221 }
8222
8223 return false;
8224}
8225
2d5ffe46
AP
8226/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8227 storing it in *COST. Result is true if the total cost of the operation
8228 has now been calculated. */
8229static bool
8230aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8231{
b9e3afe9
AP
8232 rtx inner;
8233 rtx comparator;
8234 enum rtx_code cmpcode;
8235
8236 if (COMPARISON_P (op0))
8237 {
8238 inner = XEXP (op0, 0);
8239 comparator = XEXP (op0, 1);
8240 cmpcode = GET_CODE (op0);
8241 }
8242 else
8243 {
8244 inner = op0;
8245 comparator = const0_rtx;
8246 cmpcode = NE;
8247 }
8248
2d5ffe46
AP
8249 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8250 {
8251 /* Conditional branch. */
b9e3afe9 8252 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8253 return true;
8254 else
8255 {
b9e3afe9 8256 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8257 {
2d5ffe46
AP
8258 if (comparator == const0_rtx)
8259 {
8260 /* TBZ/TBNZ/CBZ/CBNZ. */
8261 if (GET_CODE (inner) == ZERO_EXTRACT)
8262 /* TBZ/TBNZ. */
e548c9df
AM
8263 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8264 ZERO_EXTRACT, 0, speed);
8265 else
8266 /* CBZ/CBNZ. */
8267 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8268
8269 return true;
8270 }
8271 }
b9e3afe9 8272 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8273 {
2d5ffe46
AP
8274 /* TBZ/TBNZ. */
8275 if (comparator == const0_rtx)
8276 return true;
8277 }
8278 }
8279 }
b9e3afe9 8280 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8281 {
786298dc 8282 /* CCMP. */
6dfeb7ce 8283 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8284 {
8285 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8286 if (XEXP (op1, 1) == const0_rtx)
8287 *cost += 1;
8288 if (speed)
8289 {
8290 machine_mode mode = GET_MODE (XEXP (op1, 0));
8291 const struct cpu_cost_table *extra_cost
8292 = aarch64_tune_params.insn_extra_cost;
8293
8294 if (GET_MODE_CLASS (mode) == MODE_INT)
8295 *cost += extra_cost->alu.arith;
8296 else
8297 *cost += extra_cost->fp[mode == DFmode].compare;
8298 }
8299 return true;
8300 }
8301
2d5ffe46
AP
8302 /* It's a conditional operation based on the status flags,
8303 so it must be some flavor of CSEL. */
8304
8305 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8306 if (GET_CODE (op1) == NEG
8307 || GET_CODE (op1) == NOT
8308 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8309 op1 = XEXP (op1, 0);
bad00732
KT
8310 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8311 {
8312 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8313 op1 = XEXP (op1, 0);
8314 op2 = XEXP (op2, 0);
8315 }
2d5ffe46 8316
e548c9df
AM
8317 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8318 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8319 return true;
8320 }
8321
8322 /* We don't know what this is, cost all operands. */
8323 return false;
8324}
8325
283b6c85
KT
8326/* Check whether X is a bitfield operation of the form shift + extend that
8327 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
8328 operand to which the bitfield operation is applied. Otherwise return
8329 NULL_RTX. */
8330
8331static rtx
8332aarch64_extend_bitfield_pattern_p (rtx x)
8333{
8334 rtx_code outer_code = GET_CODE (x);
8335 machine_mode outer_mode = GET_MODE (x);
8336
8337 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
8338 && outer_mode != SImode && outer_mode != DImode)
8339 return NULL_RTX;
8340
8341 rtx inner = XEXP (x, 0);
8342 rtx_code inner_code = GET_CODE (inner);
8343 machine_mode inner_mode = GET_MODE (inner);
8344 rtx op = NULL_RTX;
8345
8346 switch (inner_code)
8347 {
8348 case ASHIFT:
8349 if (CONST_INT_P (XEXP (inner, 1))
8350 && (inner_mode == QImode || inner_mode == HImode))
8351 op = XEXP (inner, 0);
8352 break;
8353 case LSHIFTRT:
8354 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
8355 && (inner_mode == QImode || inner_mode == HImode))
8356 op = XEXP (inner, 0);
8357 break;
8358 case ASHIFTRT:
8359 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
8360 && (inner_mode == QImode || inner_mode == HImode))
8361 op = XEXP (inner, 0);
8362 break;
8363 default:
8364 break;
8365 }
8366
8367 return op;
8368}
8369
8c83f71d
KT
8370/* Return true if the mask and a shift amount from an RTX of the form
8371 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
8372 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
8373
8374bool
77e994c9
RS
8375aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
8376 rtx shft_amnt)
8c83f71d
KT
8377{
8378 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
8379 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
8380 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
8381 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
8382}
8383
43e9d192
IB
8384/* Calculate the cost of calculating X, storing it in *COST. Result
8385 is true if the total cost of the operation has now been calculated. */
8386static bool
e548c9df 8387aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
8388 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
8389{
a8eecd00 8390 rtx op0, op1, op2;
73250c4c 8391 const struct cpu_cost_table *extra_cost
b175b679 8392 = aarch64_tune_params.insn_extra_cost;
e548c9df 8393 int code = GET_CODE (x);
b4206259 8394 scalar_int_mode int_mode;
43e9d192 8395
7fc5ef02
JG
8396 /* By default, assume that everything has equivalent cost to the
8397 cheapest instruction. Any additional costs are applied as a delta
8398 above this default. */
8399 *cost = COSTS_N_INSNS (1);
8400
43e9d192
IB
8401 switch (code)
8402 {
8403 case SET:
ba123b0d
JG
8404 /* The cost depends entirely on the operands to SET. */
8405 *cost = 0;
43e9d192
IB
8406 op0 = SET_DEST (x);
8407 op1 = SET_SRC (x);
8408
8409 switch (GET_CODE (op0))
8410 {
8411 case MEM:
8412 if (speed)
2961177e
JG
8413 {
8414 rtx address = XEXP (op0, 0);
b6875aac
KV
8415 if (VECTOR_MODE_P (mode))
8416 *cost += extra_cost->ldst.storev;
8417 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8418 *cost += extra_cost->ldst.store;
8419 else if (mode == SFmode)
8420 *cost += extra_cost->ldst.storef;
8421 else if (mode == DFmode)
8422 *cost += extra_cost->ldst.stored;
8423
8424 *cost +=
8425 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8426 0, speed));
8427 }
43e9d192 8428
e548c9df 8429 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8430 return true;
8431
8432 case SUBREG:
8433 if (! REG_P (SUBREG_REG (op0)))
e548c9df 8434 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 8435
43e9d192
IB
8436 /* Fall through. */
8437 case REG:
b6875aac
KV
8438 /* The cost is one per vector-register copied. */
8439 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
8440 {
fe1447a1
RS
8441 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
8442 *cost = COSTS_N_INSNS (nregs);
b6875aac 8443 }
ba123b0d
JG
8444 /* const0_rtx is in general free, but we will use an
8445 instruction to set a register to 0. */
b6875aac
KV
8446 else if (REG_P (op1) || op1 == const0_rtx)
8447 {
8448 /* The cost is 1 per register copied. */
fe1447a1
RS
8449 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
8450 *cost = COSTS_N_INSNS (nregs);
b6875aac 8451 }
ba123b0d
JG
8452 else
8453 /* Cost is just the cost of the RHS of the set. */
e548c9df 8454 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8455 return true;
8456
ba123b0d 8457 case ZERO_EXTRACT:
43e9d192 8458 case SIGN_EXTRACT:
ba123b0d
JG
8459 /* Bit-field insertion. Strip any redundant widening of
8460 the RHS to meet the width of the target. */
43e9d192
IB
8461 if (GET_CODE (op1) == SUBREG)
8462 op1 = SUBREG_REG (op1);
8463 if ((GET_CODE (op1) == ZERO_EXTEND
8464 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 8465 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
8466 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
8467 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 8468 op1 = XEXP (op1, 0);
ba123b0d
JG
8469
8470 if (CONST_INT_P (op1))
8471 {
8472 /* MOV immediate is assumed to always be cheap. */
8473 *cost = COSTS_N_INSNS (1);
8474 }
8475 else
8476 {
8477 /* BFM. */
8478 if (speed)
8479 *cost += extra_cost->alu.bfi;
e548c9df 8480 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
8481 }
8482
43e9d192
IB
8483 return true;
8484
8485 default:
ba123b0d
JG
8486 /* We can't make sense of this, assume default cost. */
8487 *cost = COSTS_N_INSNS (1);
61263118 8488 return false;
43e9d192
IB
8489 }
8490 return false;
8491
9dfc162c
JG
8492 case CONST_INT:
8493 /* If an instruction can incorporate a constant within the
8494 instruction, the instruction's expression avoids calling
8495 rtx_cost() on the constant. If rtx_cost() is called on a
8496 constant, then it is usually because the constant must be
8497 moved into a register by one or more instructions.
8498
8499 The exception is constant 0, which can be expressed
8500 as XZR/WZR and is therefore free. The exception to this is
8501 if we have (set (reg) (const0_rtx)) in which case we must cost
8502 the move. However, we can catch that when we cost the SET, so
8503 we don't need to consider that here. */
8504 if (x == const0_rtx)
8505 *cost = 0;
8506 else
8507 {
8508 /* To an approximation, building any other constant is
8509 proportionally expensive to the number of instructions
8510 required to build that constant. This is true whether we
8511 are compiling for SPEED or otherwise. */
77e994c9
RS
8512 if (!is_a <scalar_int_mode> (mode, &int_mode))
8513 int_mode = word_mode;
82614948 8514 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 8515 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
8516 }
8517 return true;
8518
8519 case CONST_DOUBLE:
a2170965
TC
8520
8521 /* First determine number of instructions to do the move
8522 as an integer constant. */
8523 if (!aarch64_float_const_representable_p (x)
8524 && !aarch64_can_const_movi_rtx_p (x, mode)
8525 && aarch64_float_const_rtx_p (x))
8526 {
8527 unsigned HOST_WIDE_INT ival;
8528 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
8529 gcc_assert (succeed);
8530
77e994c9
RS
8531 scalar_int_mode imode = (mode == HFmode
8532 ? SImode
8533 : int_mode_for_mode (mode).require ());
a2170965
TC
8534 int ncost = aarch64_internal_mov_immediate
8535 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
8536 *cost += COSTS_N_INSNS (ncost);
8537 return true;
8538 }
8539
9dfc162c
JG
8540 if (speed)
8541 {
8542 /* mov[df,sf]_aarch64. */
8543 if (aarch64_float_const_representable_p (x))
8544 /* FMOV (scalar immediate). */
8545 *cost += extra_cost->fp[mode == DFmode].fpconst;
8546 else if (!aarch64_float_const_zero_rtx_p (x))
8547 {
8548 /* This will be a load from memory. */
8549 if (mode == DFmode)
8550 *cost += extra_cost->ldst.loadd;
8551 else
8552 *cost += extra_cost->ldst.loadf;
8553 }
8554 else
8555 /* Otherwise this is +0.0. We get this using MOVI d0, #0
8556 or MOV v0.s[0], wzr - neither of which are modeled by the
8557 cost tables. Just use the default cost. */
8558 {
8559 }
8560 }
8561
8562 return true;
8563
43e9d192
IB
8564 case MEM:
8565 if (speed)
2961177e
JG
8566 {
8567 /* For loads we want the base cost of a load, plus an
8568 approximation for the additional cost of the addressing
8569 mode. */
8570 rtx address = XEXP (x, 0);
b6875aac
KV
8571 if (VECTOR_MODE_P (mode))
8572 *cost += extra_cost->ldst.loadv;
8573 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8574 *cost += extra_cost->ldst.load;
8575 else if (mode == SFmode)
8576 *cost += extra_cost->ldst.loadf;
8577 else if (mode == DFmode)
8578 *cost += extra_cost->ldst.loadd;
8579
8580 *cost +=
8581 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8582 0, speed));
8583 }
43e9d192
IB
8584
8585 return true;
8586
8587 case NEG:
4745e701
JG
8588 op0 = XEXP (x, 0);
8589
b6875aac
KV
8590 if (VECTOR_MODE_P (mode))
8591 {
8592 if (speed)
8593 {
8594 /* FNEG. */
8595 *cost += extra_cost->vect.alu;
8596 }
8597 return false;
8598 }
8599
e548c9df
AM
8600 if (GET_MODE_CLASS (mode) == MODE_INT)
8601 {
4745e701
JG
8602 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8603 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8604 {
8605 /* CSETM. */
e548c9df 8606 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
8607 return true;
8608 }
8609
8610 /* Cost this as SUB wzr, X. */
e548c9df 8611 op0 = CONST0_RTX (mode);
4745e701
JG
8612 op1 = XEXP (x, 0);
8613 goto cost_minus;
8614 }
8615
e548c9df 8616 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
8617 {
8618 /* Support (neg(fma...)) as a single instruction only if
8619 sign of zeros is unimportant. This matches the decision
8620 making in aarch64.md. */
8621 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
8622 {
8623 /* FNMADD. */
e548c9df 8624 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
8625 return true;
8626 }
d318517d
SN
8627 if (GET_CODE (op0) == MULT)
8628 {
8629 /* FNMUL. */
8630 *cost = rtx_cost (op0, mode, NEG, 0, speed);
8631 return true;
8632 }
4745e701
JG
8633 if (speed)
8634 /* FNEG. */
8635 *cost += extra_cost->fp[mode == DFmode].neg;
8636 return false;
8637 }
8638
8639 return false;
43e9d192 8640
781aeb73
KT
8641 case CLRSB:
8642 case CLZ:
8643 if (speed)
b6875aac
KV
8644 {
8645 if (VECTOR_MODE_P (mode))
8646 *cost += extra_cost->vect.alu;
8647 else
8648 *cost += extra_cost->alu.clz;
8649 }
781aeb73
KT
8650
8651 return false;
8652
43e9d192
IB
8653 case COMPARE:
8654 op0 = XEXP (x, 0);
8655 op1 = XEXP (x, 1);
8656
8657 if (op1 == const0_rtx
8658 && GET_CODE (op0) == AND)
8659 {
8660 x = op0;
e548c9df 8661 mode = GET_MODE (op0);
43e9d192
IB
8662 goto cost_logic;
8663 }
8664
a8eecd00
JG
8665 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
8666 {
8667 /* TODO: A write to the CC flags possibly costs extra, this
8668 needs encoding in the cost tables. */
8669
e548c9df 8670 mode = GET_MODE (op0);
a8eecd00
JG
8671 /* ANDS. */
8672 if (GET_CODE (op0) == AND)
8673 {
8674 x = op0;
8675 goto cost_logic;
8676 }
8677
8678 if (GET_CODE (op0) == PLUS)
8679 {
8680 /* ADDS (and CMN alias). */
8681 x = op0;
8682 goto cost_plus;
8683 }
8684
8685 if (GET_CODE (op0) == MINUS)
8686 {
8687 /* SUBS. */
8688 x = op0;
8689 goto cost_minus;
8690 }
8691
345854d8
KT
8692 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
8693 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
8694 && CONST_INT_P (XEXP (op0, 2)))
8695 {
8696 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
8697 Handle it here directly rather than going to cost_logic
8698 since we know the immediate generated for the TST is valid
8699 so we can avoid creating an intermediate rtx for it only
8700 for costing purposes. */
8701 if (speed)
8702 *cost += extra_cost->alu.logical;
8703
8704 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
8705 ZERO_EXTRACT, 0, speed);
8706 return true;
8707 }
8708
a8eecd00
JG
8709 if (GET_CODE (op1) == NEG)
8710 {
8711 /* CMN. */
8712 if (speed)
8713 *cost += extra_cost->alu.arith;
8714
e548c9df
AM
8715 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
8716 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
8717 return true;
8718 }
8719
8720 /* CMP.
8721
8722 Compare can freely swap the order of operands, and
8723 canonicalization puts the more complex operation first.
8724 But the integer MINUS logic expects the shift/extend
8725 operation in op1. */
8726 if (! (REG_P (op0)
8727 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
8728 {
8729 op0 = XEXP (x, 1);
8730 op1 = XEXP (x, 0);
8731 }
8732 goto cost_minus;
8733 }
8734
8735 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8736 {
8737 /* FCMP. */
8738 if (speed)
8739 *cost += extra_cost->fp[mode == DFmode].compare;
8740
8741 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
8742 {
e548c9df 8743 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
8744 /* FCMP supports constant 0.0 for no extra cost. */
8745 return true;
8746 }
8747 return false;
8748 }
8749
b6875aac
KV
8750 if (VECTOR_MODE_P (mode))
8751 {
8752 /* Vector compare. */
8753 if (speed)
8754 *cost += extra_cost->vect.alu;
8755
8756 if (aarch64_float_const_zero_rtx_p (op1))
8757 {
8758 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
8759 cost. */
8760 return true;
8761 }
8762 return false;
8763 }
a8eecd00 8764 return false;
43e9d192
IB
8765
8766 case MINUS:
4745e701
JG
8767 {
8768 op0 = XEXP (x, 0);
8769 op1 = XEXP (x, 1);
8770
8771cost_minus:
e548c9df 8772 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 8773
4745e701
JG
8774 /* Detect valid immediates. */
8775 if ((GET_MODE_CLASS (mode) == MODE_INT
8776 || (GET_MODE_CLASS (mode) == MODE_CC
8777 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
8778 && CONST_INT_P (op1)
8779 && aarch64_uimm12_shift (INTVAL (op1)))
8780 {
4745e701
JG
8781 if (speed)
8782 /* SUB(S) (immediate). */
8783 *cost += extra_cost->alu.arith;
8784 return true;
4745e701
JG
8785 }
8786
7cc2145f 8787 /* Look for SUB (extended register). */
77e994c9
RS
8788 if (is_a <scalar_int_mode> (mode, &int_mode)
8789 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
8790 {
8791 if (speed)
2533c820 8792 *cost += extra_cost->alu.extend_arith;
7cc2145f 8793
b10f1009 8794 op1 = aarch64_strip_extend (op1, true);
e47c4031 8795 *cost += rtx_cost (op1, VOIDmode,
e548c9df 8796 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
8797 return true;
8798 }
8799
b10f1009 8800 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
8801
8802 /* Cost this as an FMA-alike operation. */
8803 if ((GET_CODE (new_op1) == MULT
0a78ebe4 8804 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
8805 && code != COMPARE)
8806 {
8807 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
8808 (enum rtx_code) code,
8809 speed);
4745e701
JG
8810 return true;
8811 }
43e9d192 8812
e548c9df 8813 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 8814
4745e701
JG
8815 if (speed)
8816 {
b6875aac
KV
8817 if (VECTOR_MODE_P (mode))
8818 {
8819 /* Vector SUB. */
8820 *cost += extra_cost->vect.alu;
8821 }
8822 else if (GET_MODE_CLASS (mode) == MODE_INT)
8823 {
8824 /* SUB(S). */
8825 *cost += extra_cost->alu.arith;
8826 }
4745e701 8827 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
8828 {
8829 /* FSUB. */
8830 *cost += extra_cost->fp[mode == DFmode].addsub;
8831 }
4745e701
JG
8832 }
8833 return true;
8834 }
43e9d192
IB
8835
8836 case PLUS:
4745e701
JG
8837 {
8838 rtx new_op0;
43e9d192 8839
4745e701
JG
8840 op0 = XEXP (x, 0);
8841 op1 = XEXP (x, 1);
43e9d192 8842
a8eecd00 8843cost_plus:
4745e701
JG
8844 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8845 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8846 {
8847 /* CSINC. */
e548c9df
AM
8848 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
8849 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
8850 return true;
8851 }
43e9d192 8852
4745e701 8853 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
8854 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
8855 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 8856 {
e548c9df 8857 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 8858
4745e701
JG
8859 if (speed)
8860 /* ADD (immediate). */
8861 *cost += extra_cost->alu.arith;
8862 return true;
8863 }
8864
e548c9df 8865 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 8866
7cc2145f 8867 /* Look for ADD (extended register). */
77e994c9
RS
8868 if (is_a <scalar_int_mode> (mode, &int_mode)
8869 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
8870 {
8871 if (speed)
2533c820 8872 *cost += extra_cost->alu.extend_arith;
7cc2145f 8873
b10f1009 8874 op0 = aarch64_strip_extend (op0, true);
e47c4031 8875 *cost += rtx_cost (op0, VOIDmode,
e548c9df 8876 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
8877 return true;
8878 }
8879
4745e701
JG
8880 /* Strip any extend, leave shifts behind as we will
8881 cost them through mult_cost. */
b10f1009 8882 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
8883
8884 if (GET_CODE (new_op0) == MULT
0a78ebe4 8885 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
8886 {
8887 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
8888 speed);
4745e701
JG
8889 return true;
8890 }
8891
e548c9df 8892 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
8893
8894 if (speed)
8895 {
b6875aac
KV
8896 if (VECTOR_MODE_P (mode))
8897 {
8898 /* Vector ADD. */
8899 *cost += extra_cost->vect.alu;
8900 }
8901 else if (GET_MODE_CLASS (mode) == MODE_INT)
8902 {
8903 /* ADD. */
8904 *cost += extra_cost->alu.arith;
8905 }
4745e701 8906 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
8907 {
8908 /* FADD. */
8909 *cost += extra_cost->fp[mode == DFmode].addsub;
8910 }
4745e701
JG
8911 }
8912 return true;
8913 }
43e9d192 8914
18b42b2a
KT
8915 case BSWAP:
8916 *cost = COSTS_N_INSNS (1);
8917
8918 if (speed)
b6875aac
KV
8919 {
8920 if (VECTOR_MODE_P (mode))
8921 *cost += extra_cost->vect.alu;
8922 else
8923 *cost += extra_cost->alu.rev;
8924 }
18b42b2a
KT
8925 return false;
8926
43e9d192 8927 case IOR:
f7d5cf8d
KT
8928 if (aarch_rev16_p (x))
8929 {
8930 *cost = COSTS_N_INSNS (1);
8931
b6875aac
KV
8932 if (speed)
8933 {
8934 if (VECTOR_MODE_P (mode))
8935 *cost += extra_cost->vect.alu;
8936 else
8937 *cost += extra_cost->alu.rev;
8938 }
8939 return true;
f7d5cf8d 8940 }
fb0cb7fa
KT
8941
8942 if (aarch64_extr_rtx_p (x, &op0, &op1))
8943 {
e548c9df
AM
8944 *cost += rtx_cost (op0, mode, IOR, 0, speed);
8945 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
8946 if (speed)
8947 *cost += extra_cost->alu.shift;
8948
8949 return true;
8950 }
f7d5cf8d 8951 /* Fall through. */
43e9d192
IB
8952 case XOR:
8953 case AND:
8954 cost_logic:
8955 op0 = XEXP (x, 0);
8956 op1 = XEXP (x, 1);
8957
b6875aac
KV
8958 if (VECTOR_MODE_P (mode))
8959 {
8960 if (speed)
8961 *cost += extra_cost->vect.alu;
8962 return true;
8963 }
8964
268c3b47
JG
8965 if (code == AND
8966 && GET_CODE (op0) == MULT
8967 && CONST_INT_P (XEXP (op0, 1))
8968 && CONST_INT_P (op1)
8969 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
8970 INTVAL (op1)) != 0)
8971 {
8972 /* This is a UBFM/SBFM. */
e548c9df 8973 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
8974 if (speed)
8975 *cost += extra_cost->alu.bfx;
8976 return true;
8977 }
8978
b4206259 8979 if (is_int_mode (mode, &int_mode))
43e9d192 8980 {
8c83f71d 8981 if (CONST_INT_P (op1))
43e9d192 8982 {
8c83f71d
KT
8983 /* We have a mask + shift version of a UBFIZ
8984 i.e. the *andim_ashift<mode>_bfiz pattern. */
8985 if (GET_CODE (op0) == ASHIFT
b4206259
RS
8986 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
8987 XEXP (op0, 1)))
8c83f71d 8988 {
b4206259 8989 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
8990 (enum rtx_code) code, 0, speed);
8991 if (speed)
8992 *cost += extra_cost->alu.bfx;
268c3b47 8993
8c83f71d
KT
8994 return true;
8995 }
b4206259 8996 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
8997 {
8998 /* We possibly get the immediate for free, this is not
8999 modelled. */
b4206259
RS
9000 *cost += rtx_cost (op0, int_mode,
9001 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9002 if (speed)
9003 *cost += extra_cost->alu.logical;
268c3b47 9004
8c83f71d
KT
9005 return true;
9006 }
43e9d192
IB
9007 }
9008 else
9009 {
268c3b47
JG
9010 rtx new_op0 = op0;
9011
9012 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9013 if (GET_CODE (op0) == NOT)
9014 op0 = XEXP (op0, 0);
268c3b47
JG
9015
9016 new_op0 = aarch64_strip_shift (op0);
9017
9018 /* If we had a shift on op0 then this is a logical-shift-
9019 by-register/immediate operation. Otherwise, this is just
9020 a logical operation. */
9021 if (speed)
9022 {
9023 if (new_op0 != op0)
9024 {
9025 /* Shift by immediate. */
9026 if (CONST_INT_P (XEXP (op0, 1)))
9027 *cost += extra_cost->alu.log_shift;
9028 else
9029 *cost += extra_cost->alu.log_shift_reg;
9030 }
9031 else
9032 *cost += extra_cost->alu.logical;
9033 }
9034
9035 /* In both cases we want to cost both operands. */
b4206259
RS
9036 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9037 0, speed);
9038 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9039 1, speed);
268c3b47
JG
9040
9041 return true;
43e9d192 9042 }
43e9d192
IB
9043 }
9044 return false;
9045
268c3b47 9046 case NOT:
6365da9e
KT
9047 x = XEXP (x, 0);
9048 op0 = aarch64_strip_shift (x);
9049
b6875aac
KV
9050 if (VECTOR_MODE_P (mode))
9051 {
9052 /* Vector NOT. */
9053 *cost += extra_cost->vect.alu;
9054 return false;
9055 }
9056
6365da9e
KT
9057 /* MVN-shifted-reg. */
9058 if (op0 != x)
9059 {
e548c9df 9060 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9061
9062 if (speed)
9063 *cost += extra_cost->alu.log_shift;
9064
9065 return true;
9066 }
9067 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9068 Handle the second form here taking care that 'a' in the above can
9069 be a shift. */
9070 else if (GET_CODE (op0) == XOR)
9071 {
9072 rtx newop0 = XEXP (op0, 0);
9073 rtx newop1 = XEXP (op0, 1);
9074 rtx op0_stripped = aarch64_strip_shift (newop0);
9075
e548c9df
AM
9076 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9077 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9078
9079 if (speed)
9080 {
9081 if (op0_stripped != newop0)
9082 *cost += extra_cost->alu.log_shift;
9083 else
9084 *cost += extra_cost->alu.logical;
9085 }
9086
9087 return true;
9088 }
268c3b47
JG
9089 /* MVN. */
9090 if (speed)
9091 *cost += extra_cost->alu.logical;
9092
268c3b47
JG
9093 return false;
9094
43e9d192 9095 case ZERO_EXTEND:
b1685e62
JG
9096
9097 op0 = XEXP (x, 0);
9098 /* If a value is written in SI mode, then zero extended to DI
9099 mode, the operation will in general be free as a write to
9100 a 'w' register implicitly zeroes the upper bits of an 'x'
9101 register. However, if this is
9102
9103 (set (reg) (zero_extend (reg)))
9104
9105 we must cost the explicit register move. */
9106 if (mode == DImode
9107 && GET_MODE (op0) == SImode
9108 && outer == SET)
9109 {
e548c9df 9110 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9111
dde23f43
KM
9112 /* If OP_COST is non-zero, then the cost of the zero extend
9113 is effectively the cost of the inner operation. Otherwise
9114 we have a MOV instruction and we take the cost from the MOV
9115 itself. This is true independently of whether we are
9116 optimizing for space or time. */
9117 if (op_cost)
b1685e62
JG
9118 *cost = op_cost;
9119
9120 return true;
9121 }
e548c9df 9122 else if (MEM_P (op0))
43e9d192 9123 {
b1685e62 9124 /* All loads can zero extend to any size for free. */
e548c9df 9125 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9126 return true;
9127 }
b1685e62 9128
283b6c85
KT
9129 op0 = aarch64_extend_bitfield_pattern_p (x);
9130 if (op0)
9131 {
9132 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9133 if (speed)
9134 *cost += extra_cost->alu.bfx;
9135 return true;
9136 }
9137
b1685e62 9138 if (speed)
b6875aac
KV
9139 {
9140 if (VECTOR_MODE_P (mode))
9141 {
9142 /* UMOV. */
9143 *cost += extra_cost->vect.alu;
9144 }
9145 else
9146 {
63715e5e
WD
9147 /* We generate an AND instead of UXTB/UXTH. */
9148 *cost += extra_cost->alu.logical;
b6875aac
KV
9149 }
9150 }
43e9d192
IB
9151 return false;
9152
9153 case SIGN_EXTEND:
b1685e62 9154 if (MEM_P (XEXP (x, 0)))
43e9d192 9155 {
b1685e62
JG
9156 /* LDRSH. */
9157 if (speed)
9158 {
9159 rtx address = XEXP (XEXP (x, 0), 0);
9160 *cost += extra_cost->ldst.load_sign_extend;
9161
9162 *cost +=
9163 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9164 0, speed));
9165 }
43e9d192
IB
9166 return true;
9167 }
b1685e62 9168
283b6c85
KT
9169 op0 = aarch64_extend_bitfield_pattern_p (x);
9170 if (op0)
9171 {
9172 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9173 if (speed)
9174 *cost += extra_cost->alu.bfx;
9175 return true;
9176 }
9177
b1685e62 9178 if (speed)
b6875aac
KV
9179 {
9180 if (VECTOR_MODE_P (mode))
9181 *cost += extra_cost->vect.alu;
9182 else
9183 *cost += extra_cost->alu.extend;
9184 }
43e9d192
IB
9185 return false;
9186
ba0cfa17
JG
9187 case ASHIFT:
9188 op0 = XEXP (x, 0);
9189 op1 = XEXP (x, 1);
9190
9191 if (CONST_INT_P (op1))
9192 {
ba0cfa17 9193 if (speed)
b6875aac
KV
9194 {
9195 if (VECTOR_MODE_P (mode))
9196 {
9197 /* Vector shift (immediate). */
9198 *cost += extra_cost->vect.alu;
9199 }
9200 else
9201 {
9202 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9203 aliases. */
9204 *cost += extra_cost->alu.shift;
9205 }
9206 }
ba0cfa17
JG
9207
9208 /* We can incorporate zero/sign extend for free. */
9209 if (GET_CODE (op0) == ZERO_EXTEND
9210 || GET_CODE (op0) == SIGN_EXTEND)
9211 op0 = XEXP (op0, 0);
9212
e548c9df 9213 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9214 return true;
9215 }
9216 else
9217 {
7813b280 9218 if (VECTOR_MODE_P (mode))
b6875aac 9219 {
7813b280
KT
9220 if (speed)
9221 /* Vector shift (register). */
9222 *cost += extra_cost->vect.alu;
9223 }
9224 else
9225 {
9226 if (speed)
9227 /* LSLV. */
9228 *cost += extra_cost->alu.shift_reg;
9229
9230 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9231 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9232 && known_eq (INTVAL (XEXP (op1, 1)),
9233 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9234 {
7813b280
KT
9235 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9236 /* We already demanded XEXP (op1, 0) to be REG_P, so
9237 don't recurse into it. */
9238 return true;
b6875aac
KV
9239 }
9240 }
ba0cfa17
JG
9241 return false; /* All arguments need to be in registers. */
9242 }
9243
43e9d192 9244 case ROTATE:
43e9d192
IB
9245 case ROTATERT:
9246 case LSHIFTRT:
43e9d192 9247 case ASHIFTRT:
ba0cfa17
JG
9248 op0 = XEXP (x, 0);
9249 op1 = XEXP (x, 1);
43e9d192 9250
ba0cfa17
JG
9251 if (CONST_INT_P (op1))
9252 {
9253 /* ASR (immediate) and friends. */
9254 if (speed)
b6875aac
KV
9255 {
9256 if (VECTOR_MODE_P (mode))
9257 *cost += extra_cost->vect.alu;
9258 else
9259 *cost += extra_cost->alu.shift;
9260 }
43e9d192 9261
e548c9df 9262 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9263 return true;
9264 }
9265 else
9266 {
7813b280 9267 if (VECTOR_MODE_P (mode))
b6875aac 9268 {
7813b280
KT
9269 if (speed)
9270 /* Vector shift (register). */
b6875aac 9271 *cost += extra_cost->vect.alu;
7813b280
KT
9272 }
9273 else
9274 {
9275 if (speed)
9276 /* ASR (register) and friends. */
b6875aac 9277 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9278
9279 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9280 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9281 && known_eq (INTVAL (XEXP (op1, 1)),
9282 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9283 {
9284 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9285 /* We already demanded XEXP (op1, 0) to be REG_P, so
9286 don't recurse into it. */
9287 return true;
9288 }
b6875aac 9289 }
ba0cfa17
JG
9290 return false; /* All arguments need to be in registers. */
9291 }
43e9d192 9292
909734be
JG
9293 case SYMBOL_REF:
9294
1b1e81f8
JW
9295 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9296 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9297 {
9298 /* LDR. */
9299 if (speed)
9300 *cost += extra_cost->ldst.load;
9301 }
9302 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9303 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9304 {
9305 /* ADRP, followed by ADD. */
9306 *cost += COSTS_N_INSNS (1);
9307 if (speed)
9308 *cost += 2 * extra_cost->alu.arith;
9309 }
9310 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9311 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9312 {
9313 /* ADR. */
9314 if (speed)
9315 *cost += extra_cost->alu.arith;
9316 }
9317
9318 if (flag_pic)
9319 {
9320 /* One extra load instruction, after accessing the GOT. */
9321 *cost += COSTS_N_INSNS (1);
9322 if (speed)
9323 *cost += extra_cost->ldst.load;
9324 }
43e9d192
IB
9325 return true;
9326
909734be 9327 case HIGH:
43e9d192 9328 case LO_SUM:
909734be
JG
9329 /* ADRP/ADD (immediate). */
9330 if (speed)
9331 *cost += extra_cost->alu.arith;
43e9d192
IB
9332 return true;
9333
9334 case ZERO_EXTRACT:
9335 case SIGN_EXTRACT:
7cc2145f
JG
9336 /* UBFX/SBFX. */
9337 if (speed)
b6875aac
KV
9338 {
9339 if (VECTOR_MODE_P (mode))
9340 *cost += extra_cost->vect.alu;
9341 else
9342 *cost += extra_cost->alu.bfx;
9343 }
7cc2145f
JG
9344
9345 /* We can trust that the immediates used will be correct (there
9346 are no by-register forms), so we need only cost op0. */
e548c9df 9347 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
9348 return true;
9349
9350 case MULT:
4745e701
JG
9351 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
9352 /* aarch64_rtx_mult_cost always handles recursion to its
9353 operands. */
9354 return true;
43e9d192
IB
9355
9356 case MOD:
4f58fe36
KT
9357 /* We can expand signed mod by power of 2 using a NEGS, two parallel
9358 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
9359 an unconditional negate. This case should only ever be reached through
9360 the set_smod_pow2_cheap check in expmed.c. */
9361 if (CONST_INT_P (XEXP (x, 1))
9362 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9363 && (mode == SImode || mode == DImode))
9364 {
9365 /* We expand to 4 instructions. Reset the baseline. */
9366 *cost = COSTS_N_INSNS (4);
9367
9368 if (speed)
9369 *cost += 2 * extra_cost->alu.logical
9370 + 2 * extra_cost->alu.arith;
9371
9372 return true;
9373 }
9374
9375 /* Fall-through. */
43e9d192 9376 case UMOD:
43e9d192
IB
9377 if (speed)
9378 {
cb9ac430 9379 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
9380 if (VECTOR_MODE_P (mode))
9381 *cost += extra_cost->vect.alu;
e548c9df
AM
9382 else if (GET_MODE_CLASS (mode) == MODE_INT)
9383 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
9384 + extra_cost->mult[mode == DImode].idiv
9385 + (code == MOD ? 1 : 0));
43e9d192
IB
9386 }
9387 return false; /* All arguments need to be in registers. */
9388
9389 case DIV:
9390 case UDIV:
4105fe38 9391 case SQRT:
43e9d192
IB
9392 if (speed)
9393 {
b6875aac
KV
9394 if (VECTOR_MODE_P (mode))
9395 *cost += extra_cost->vect.alu;
9396 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
9397 /* There is no integer SQRT, so only DIV and UDIV can get
9398 here. */
cb9ac430
TC
9399 *cost += (extra_cost->mult[mode == DImode].idiv
9400 /* Slighly prefer UDIV over SDIV. */
9401 + (code == DIV ? 1 : 0));
4105fe38
JG
9402 else
9403 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
9404 }
9405 return false; /* All arguments need to be in registers. */
9406
a8eecd00 9407 case IF_THEN_ELSE:
2d5ffe46
AP
9408 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
9409 XEXP (x, 2), cost, speed);
a8eecd00
JG
9410
9411 case EQ:
9412 case NE:
9413 case GT:
9414 case GTU:
9415 case LT:
9416 case LTU:
9417 case GE:
9418 case GEU:
9419 case LE:
9420 case LEU:
9421
9422 return false; /* All arguments must be in registers. */
9423
b292109f
JG
9424 case FMA:
9425 op0 = XEXP (x, 0);
9426 op1 = XEXP (x, 1);
9427 op2 = XEXP (x, 2);
9428
9429 if (speed)
b6875aac
KV
9430 {
9431 if (VECTOR_MODE_P (mode))
9432 *cost += extra_cost->vect.alu;
9433 else
9434 *cost += extra_cost->fp[mode == DFmode].fma;
9435 }
b292109f
JG
9436
9437 /* FMSUB, FNMADD, and FNMSUB are free. */
9438 if (GET_CODE (op0) == NEG)
9439 op0 = XEXP (op0, 0);
9440
9441 if (GET_CODE (op2) == NEG)
9442 op2 = XEXP (op2, 0);
9443
9444 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
9445 and the by-element operand as operand 0. */
9446 if (GET_CODE (op1) == NEG)
9447 op1 = XEXP (op1, 0);
9448
9449 /* Catch vector-by-element operations. The by-element operand can
9450 either be (vec_duplicate (vec_select (x))) or just
9451 (vec_select (x)), depending on whether we are multiplying by
9452 a vector or a scalar.
9453
9454 Canonicalization is not very good in these cases, FMA4 will put the
9455 by-element operand as operand 0, FNMA4 will have it as operand 1. */
9456 if (GET_CODE (op0) == VEC_DUPLICATE)
9457 op0 = XEXP (op0, 0);
9458 else if (GET_CODE (op1) == VEC_DUPLICATE)
9459 op1 = XEXP (op1, 0);
9460
9461 if (GET_CODE (op0) == VEC_SELECT)
9462 op0 = XEXP (op0, 0);
9463 else if (GET_CODE (op1) == VEC_SELECT)
9464 op1 = XEXP (op1, 0);
9465
9466 /* If the remaining parameters are not registers,
9467 get the cost to put them into registers. */
e548c9df
AM
9468 *cost += rtx_cost (op0, mode, FMA, 0, speed);
9469 *cost += rtx_cost (op1, mode, FMA, 1, speed);
9470 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
9471 return true;
9472
5e2a765b
KT
9473 case FLOAT:
9474 case UNSIGNED_FLOAT:
9475 if (speed)
9476 *cost += extra_cost->fp[mode == DFmode].fromint;
9477 return false;
9478
b292109f
JG
9479 case FLOAT_EXTEND:
9480 if (speed)
b6875aac
KV
9481 {
9482 if (VECTOR_MODE_P (mode))
9483 {
9484 /*Vector truncate. */
9485 *cost += extra_cost->vect.alu;
9486 }
9487 else
9488 *cost += extra_cost->fp[mode == DFmode].widen;
9489 }
b292109f
JG
9490 return false;
9491
9492 case FLOAT_TRUNCATE:
9493 if (speed)
b6875aac
KV
9494 {
9495 if (VECTOR_MODE_P (mode))
9496 {
9497 /*Vector conversion. */
9498 *cost += extra_cost->vect.alu;
9499 }
9500 else
9501 *cost += extra_cost->fp[mode == DFmode].narrow;
9502 }
b292109f
JG
9503 return false;
9504
61263118
KT
9505 case FIX:
9506 case UNSIGNED_FIX:
9507 x = XEXP (x, 0);
9508 /* Strip the rounding part. They will all be implemented
9509 by the fcvt* family of instructions anyway. */
9510 if (GET_CODE (x) == UNSPEC)
9511 {
9512 unsigned int uns_code = XINT (x, 1);
9513
9514 if (uns_code == UNSPEC_FRINTA
9515 || uns_code == UNSPEC_FRINTM
9516 || uns_code == UNSPEC_FRINTN
9517 || uns_code == UNSPEC_FRINTP
9518 || uns_code == UNSPEC_FRINTZ)
9519 x = XVECEXP (x, 0, 0);
9520 }
9521
9522 if (speed)
b6875aac
KV
9523 {
9524 if (VECTOR_MODE_P (mode))
9525 *cost += extra_cost->vect.alu;
9526 else
9527 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
9528 }
39252973
KT
9529
9530 /* We can combine fmul by a power of 2 followed by a fcvt into a single
9531 fixed-point fcvt. */
9532 if (GET_CODE (x) == MULT
9533 && ((VECTOR_MODE_P (mode)
9534 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
9535 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
9536 {
9537 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
9538 0, speed);
9539 return true;
9540 }
9541
e548c9df 9542 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
9543 return true;
9544
b292109f 9545 case ABS:
b6875aac
KV
9546 if (VECTOR_MODE_P (mode))
9547 {
9548 /* ABS (vector). */
9549 if (speed)
9550 *cost += extra_cost->vect.alu;
9551 }
9552 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 9553 {
19261b99
KT
9554 op0 = XEXP (x, 0);
9555
9556 /* FABD, which is analogous to FADD. */
9557 if (GET_CODE (op0) == MINUS)
9558 {
e548c9df
AM
9559 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
9560 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
9561 if (speed)
9562 *cost += extra_cost->fp[mode == DFmode].addsub;
9563
9564 return true;
9565 }
9566 /* Simple FABS is analogous to FNEG. */
b292109f
JG
9567 if (speed)
9568 *cost += extra_cost->fp[mode == DFmode].neg;
9569 }
9570 else
9571 {
9572 /* Integer ABS will either be split to
9573 two arithmetic instructions, or will be an ABS
9574 (scalar), which we don't model. */
9575 *cost = COSTS_N_INSNS (2);
9576 if (speed)
9577 *cost += 2 * extra_cost->alu.arith;
9578 }
9579 return false;
9580
9581 case SMAX:
9582 case SMIN:
9583 if (speed)
9584 {
b6875aac
KV
9585 if (VECTOR_MODE_P (mode))
9586 *cost += extra_cost->vect.alu;
9587 else
9588 {
9589 /* FMAXNM/FMINNM/FMAX/FMIN.
9590 TODO: This may not be accurate for all implementations, but
9591 we do not model this in the cost tables. */
9592 *cost += extra_cost->fp[mode == DFmode].addsub;
9593 }
b292109f
JG
9594 }
9595 return false;
9596
61263118
KT
9597 case UNSPEC:
9598 /* The floating point round to integer frint* instructions. */
9599 if (aarch64_frint_unspec_p (XINT (x, 1)))
9600 {
9601 if (speed)
9602 *cost += extra_cost->fp[mode == DFmode].roundint;
9603
9604 return false;
9605 }
781aeb73
KT
9606
9607 if (XINT (x, 1) == UNSPEC_RBIT)
9608 {
9609 if (speed)
9610 *cost += extra_cost->alu.rev;
9611
9612 return false;
9613 }
61263118
KT
9614 break;
9615
fb620c4a
JG
9616 case TRUNCATE:
9617
9618 /* Decompose <su>muldi3_highpart. */
9619 if (/* (truncate:DI */
9620 mode == DImode
9621 /* (lshiftrt:TI */
9622 && GET_MODE (XEXP (x, 0)) == TImode
9623 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
9624 /* (mult:TI */
9625 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9626 /* (ANY_EXTEND:TI (reg:DI))
9627 (ANY_EXTEND:TI (reg:DI))) */
9628 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
9629 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
9630 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
9631 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
9632 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
9633 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
9634 /* (const_int 64) */
9635 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9636 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
9637 {
9638 /* UMULH/SMULH. */
9639 if (speed)
9640 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
9641 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
9642 mode, MULT, 0, speed);
9643 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
9644 mode, MULT, 1, speed);
fb620c4a
JG
9645 return true;
9646 }
9647
9648 /* Fall through. */
43e9d192 9649 default:
61263118 9650 break;
43e9d192 9651 }
61263118 9652
c10e3d7f
AP
9653 if (dump_file
9654 && flag_aarch64_verbose_cost)
61263118
KT
9655 fprintf (dump_file,
9656 "\nFailed to cost RTX. Assuming default cost.\n");
9657
9658 return true;
43e9d192
IB
9659}
9660
0ee859b5
JG
9661/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
9662 calculated for X. This cost is stored in *COST. Returns true
9663 if the total cost of X was calculated. */
9664static bool
e548c9df 9665aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
9666 int param, int *cost, bool speed)
9667{
e548c9df 9668 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 9669
c10e3d7f
AP
9670 if (dump_file
9671 && flag_aarch64_verbose_cost)
0ee859b5
JG
9672 {
9673 print_rtl_single (dump_file, x);
9674 fprintf (dump_file, "\n%s cost: %d (%s)\n",
9675 speed ? "Hot" : "Cold",
9676 *cost, result ? "final" : "partial");
9677 }
9678
9679 return result;
9680}
9681
43e9d192 9682static int
ef4bddc2 9683aarch64_register_move_cost (machine_mode mode,
8a3a7e67 9684 reg_class_t from_i, reg_class_t to_i)
43e9d192 9685{
8a3a7e67
RH
9686 enum reg_class from = (enum reg_class) from_i;
9687 enum reg_class to = (enum reg_class) to_i;
43e9d192 9688 const struct cpu_regmove_cost *regmove_cost
b175b679 9689 = aarch64_tune_params.regmove_cost;
43e9d192 9690
3be07662 9691 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 9692 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
9693 to = GENERAL_REGS;
9694
d677263e 9695 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
9696 from = GENERAL_REGS;
9697
6ee70f81
AP
9698 /* Moving between GPR and stack cost is the same as GP2GP. */
9699 if ((from == GENERAL_REGS && to == STACK_REG)
9700 || (to == GENERAL_REGS && from == STACK_REG))
9701 return regmove_cost->GP2GP;
9702
9703 /* To/From the stack register, we move via the gprs. */
9704 if (to == STACK_REG || from == STACK_REG)
9705 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
9706 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
9707
6a70badb 9708 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
9709 {
9710 /* 128-bit operations on general registers require 2 instructions. */
9711 if (from == GENERAL_REGS && to == GENERAL_REGS)
9712 return regmove_cost->GP2GP * 2;
9713 else if (from == GENERAL_REGS)
9714 return regmove_cost->GP2FP * 2;
9715 else if (to == GENERAL_REGS)
9716 return regmove_cost->FP2GP * 2;
9717
9718 /* When AdvSIMD instructions are disabled it is not possible to move
9719 a 128-bit value directly between Q registers. This is handled in
9720 secondary reload. A general register is used as a scratch to move
9721 the upper DI value and the lower DI value is moved directly,
9722 hence the cost is the sum of three moves. */
9723 if (! TARGET_SIMD)
9724 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
9725
9726 return regmove_cost->FP2FP;
9727 }
9728
43e9d192
IB
9729 if (from == GENERAL_REGS && to == GENERAL_REGS)
9730 return regmove_cost->GP2GP;
9731 else if (from == GENERAL_REGS)
9732 return regmove_cost->GP2FP;
9733 else if (to == GENERAL_REGS)
9734 return regmove_cost->FP2GP;
9735
43e9d192
IB
9736 return regmove_cost->FP2FP;
9737}
9738
9739static int
ef4bddc2 9740aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
9741 reg_class_t rclass ATTRIBUTE_UNUSED,
9742 bool in ATTRIBUTE_UNUSED)
9743{
b175b679 9744 return aarch64_tune_params.memmov_cost;
43e9d192
IB
9745}
9746
0c30e0f3
EM
9747/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
9748 to optimize 1.0/sqrt. */
ee62a5a6
RS
9749
9750static bool
9acc9cbe 9751use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
9752{
9753 return (!flag_trapping_math
9754 && flag_unsafe_math_optimizations
9acc9cbe
EM
9755 && ((aarch64_tune_params.approx_modes->recip_sqrt
9756 & AARCH64_APPROX_MODE (mode))
1a33079e 9757 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
9758}
9759
0c30e0f3
EM
9760/* Function to decide when to use the approximate reciprocal square root
9761 builtin. */
a6fc00da
BH
9762
9763static tree
ee62a5a6 9764aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 9765{
9acc9cbe
EM
9766 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
9767
9768 if (!use_rsqrt_p (mode))
a6fc00da 9769 return NULL_TREE;
ee62a5a6 9770 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
9771}
9772
98daafa0
EM
9773/* Emit instruction sequence to compute either the approximate square root
9774 or its approximate reciprocal, depending on the flag RECP, and return
9775 whether the sequence was emitted or not. */
a6fc00da 9776
98daafa0
EM
9777bool
9778aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 9779{
98daafa0 9780 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
9781
9782 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
9783 {
9784 gcc_assert (!recp);
9785 return false;
9786 }
9787
2e19adc8
RE
9788 if (!recp)
9789 {
9790 if (!(flag_mlow_precision_sqrt
9791 || (aarch64_tune_params.approx_modes->sqrt
9792 & AARCH64_APPROX_MODE (mode))))
9793 return false;
9794
9795 if (flag_finite_math_only
9796 || flag_trapping_math
9797 || !flag_unsafe_math_optimizations
9798 || optimize_function_for_size_p (cfun))
9799 return false;
9800 }
9801 else
9802 /* Caller assumes we cannot fail. */
9803 gcc_assert (use_rsqrt_p (mode));
daef0a8c 9804
ddc203a7 9805 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
9806 rtx xmsk = gen_reg_rtx (mmsk);
9807 if (!recp)
2e19adc8
RE
9808 /* When calculating the approximate square root, compare the
9809 argument with 0.0 and create a mask. */
9810 emit_insn (gen_rtx_SET (xmsk,
9811 gen_rtx_NEG (mmsk,
9812 gen_rtx_EQ (mmsk, src,
9813 CONST0_RTX (mode)))));
a6fc00da 9814
98daafa0
EM
9815 /* Estimate the approximate reciprocal square root. */
9816 rtx xdst = gen_reg_rtx (mode);
0016d8d9 9817 emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
a6fc00da 9818
98daafa0
EM
9819 /* Iterate over the series twice for SF and thrice for DF. */
9820 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 9821
98daafa0
EM
9822 /* Optionally iterate over the series once less for faster performance
9823 while sacrificing the accuracy. */
9824 if ((recp && flag_mrecip_low_precision_sqrt)
9825 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
9826 iterations--;
9827
98daafa0
EM
9828 /* Iterate over the series to calculate the approximate reciprocal square
9829 root. */
9830 rtx x1 = gen_reg_rtx (mode);
9831 while (iterations--)
a6fc00da 9832 {
a6fc00da 9833 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
9834 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
9835
0016d8d9 9836 emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
a6fc00da 9837
98daafa0
EM
9838 if (iterations > 0)
9839 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
9840 }
9841
9842 if (!recp)
9843 {
9844 /* Qualify the approximate reciprocal square root when the argument is
9845 0.0 by squashing the intermediary result to 0.0. */
9846 rtx xtmp = gen_reg_rtx (mmsk);
9847 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
9848 gen_rtx_SUBREG (mmsk, xdst, 0)));
9849 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 9850
98daafa0
EM
9851 /* Calculate the approximate square root. */
9852 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
9853 }
9854
98daafa0
EM
9855 /* Finalize the approximation. */
9856 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
9857
9858 return true;
a6fc00da
BH
9859}
9860
79a2bc2d
EM
9861/* Emit the instruction sequence to compute the approximation for the division
9862 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
9863
9864bool
9865aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
9866{
9867 machine_mode mode = GET_MODE (quo);
33d72b63
JW
9868
9869 if (GET_MODE_INNER (mode) == HFmode)
9870 return false;
9871
79a2bc2d
EM
9872 bool use_approx_division_p = (flag_mlow_precision_div
9873 || (aarch64_tune_params.approx_modes->division
9874 & AARCH64_APPROX_MODE (mode)));
9875
9876 if (!flag_finite_math_only
9877 || flag_trapping_math
9878 || !flag_unsafe_math_optimizations
9879 || optimize_function_for_size_p (cfun)
9880 || !use_approx_division_p)
9881 return false;
9882
1be49a38
RR
9883 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
9884 return false;
9885
79a2bc2d
EM
9886 /* Estimate the approximate reciprocal. */
9887 rtx xrcp = gen_reg_rtx (mode);
0016d8d9 9888 emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
79a2bc2d
EM
9889
9890 /* Iterate over the series twice for SF and thrice for DF. */
9891 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
9892
9893 /* Optionally iterate over the series once less for faster performance,
9894 while sacrificing the accuracy. */
9895 if (flag_mlow_precision_div)
9896 iterations--;
9897
9898 /* Iterate over the series to calculate the approximate reciprocal. */
9899 rtx xtmp = gen_reg_rtx (mode);
9900 while (iterations--)
9901 {
0016d8d9 9902 emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
79a2bc2d
EM
9903
9904 if (iterations > 0)
9905 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
9906 }
9907
9908 if (num != CONST1_RTX (mode))
9909 {
9910 /* As the approximate reciprocal of DEN is already calculated, only
9911 calculate the approximate division when NUM is not 1.0. */
9912 rtx xnum = force_reg (mode, num);
9913 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
9914 }
9915
9916 /* Finalize the approximation. */
9917 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
9918 return true;
9919}
9920
d126a4ae
AP
9921/* Return the number of instructions that can be issued per cycle. */
9922static int
9923aarch64_sched_issue_rate (void)
9924{
b175b679 9925 return aarch64_tune_params.issue_rate;
d126a4ae
AP
9926}
9927
d03f7e44
MK
9928static int
9929aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
9930{
9931 int issue_rate = aarch64_sched_issue_rate ();
9932
9933 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
9934}
9935
2d6bc7fa
KT
9936
9937/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
9938 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
9939 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
9940
9941static int
9942aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
9943 int ready_index)
9944{
9945 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
9946}
9947
9948
8990e73a
TB
9949/* Vectorizer cost model target hooks. */
9950
9951/* Implement targetm.vectorize.builtin_vectorization_cost. */
9952static int
9953aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
9954 tree vectype,
9955 int misalign ATTRIBUTE_UNUSED)
9956{
9957 unsigned elements;
cd8ae5ed
AP
9958 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
9959 bool fp = false;
9960
9961 if (vectype != NULL)
9962 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
9963
9964 switch (type_of_cost)
9965 {
9966 case scalar_stmt:
cd8ae5ed 9967 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
9968
9969 case scalar_load:
cd8ae5ed 9970 return costs->scalar_load_cost;
8990e73a
TB
9971
9972 case scalar_store:
cd8ae5ed 9973 return costs->scalar_store_cost;
8990e73a
TB
9974
9975 case vector_stmt:
cd8ae5ed 9976 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
9977
9978 case vector_load:
cd8ae5ed 9979 return costs->vec_align_load_cost;
8990e73a
TB
9980
9981 case vector_store:
cd8ae5ed 9982 return costs->vec_store_cost;
8990e73a
TB
9983
9984 case vec_to_scalar:
cd8ae5ed 9985 return costs->vec_to_scalar_cost;
8990e73a
TB
9986
9987 case scalar_to_vec:
cd8ae5ed 9988 return costs->scalar_to_vec_cost;
8990e73a
TB
9989
9990 case unaligned_load:
cc9fe6bb 9991 case vector_gather_load:
cd8ae5ed 9992 return costs->vec_unalign_load_cost;
8990e73a
TB
9993
9994 case unaligned_store:
cc9fe6bb 9995 case vector_scatter_store:
cd8ae5ed 9996 return costs->vec_unalign_store_cost;
8990e73a
TB
9997
9998 case cond_branch_taken:
cd8ae5ed 9999 return costs->cond_taken_branch_cost;
8990e73a
TB
10000
10001 case cond_branch_not_taken:
cd8ae5ed 10002 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10003
10004 case vec_perm:
cd8ae5ed 10005 return costs->vec_permute_cost;
c428f91c 10006
8990e73a 10007 case vec_promote_demote:
cd8ae5ed 10008 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10009
10010 case vec_construct:
6a70badb 10011 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10012 return elements / 2 + 1;
10013
10014 default:
10015 gcc_unreachable ();
10016 }
10017}
10018
10019/* Implement targetm.vectorize.add_stmt_cost. */
10020static unsigned
10021aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10022 struct _stmt_vec_info *stmt_info, int misalign,
10023 enum vect_cost_model_location where)
10024{
10025 unsigned *cost = (unsigned *) data;
10026 unsigned retval = 0;
10027
10028 if (flag_vect_cost_model)
10029 {
10030 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10031 int stmt_cost =
10032 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10033
10034 /* Statements in an inner loop relative to the loop being
10035 vectorized are weighted more heavily. The value here is
058e4c71 10036 arbitrary and could potentially be improved with analysis. */
8990e73a 10037 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10038 count *= 50; /* FIXME */
8990e73a
TB
10039
10040 retval = (unsigned) (count * stmt_cost);
10041 cost[where] += retval;
10042 }
10043
10044 return retval;
10045}
10046
0cfff2a1 10047static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10048
0cfff2a1
KT
10049/* Parse the TO_PARSE string and put the architecture struct that it
10050 selects into RES and the architectural features into ISA_FLAGS.
10051 Return an aarch64_parse_opt_result describing the parse result.
10052 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 10053
0cfff2a1
KT
10054static enum aarch64_parse_opt_result
10055aarch64_parse_arch (const char *to_parse, const struct processor **res,
10056 unsigned long *isa_flags)
43e9d192
IB
10057{
10058 char *ext;
10059 const struct processor *arch;
0cfff2a1 10060 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10061 size_t len;
10062
0cfff2a1 10063 strcpy (str, to_parse);
43e9d192
IB
10064
10065 ext = strchr (str, '+');
10066
10067 if (ext != NULL)
10068 len = ext - str;
10069 else
10070 len = strlen (str);
10071
10072 if (len == 0)
0cfff2a1
KT
10073 return AARCH64_PARSE_MISSING_ARG;
10074
43e9d192 10075
0cfff2a1 10076 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10077 for (arch = all_architectures; arch->name != NULL; arch++)
10078 {
10079 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
10080 {
0cfff2a1 10081 unsigned long isa_temp = arch->flags;
43e9d192
IB
10082
10083 if (ext != NULL)
10084 {
0cfff2a1
KT
10085 /* TO_PARSE string contains at least one extension. */
10086 enum aarch64_parse_opt_result ext_res
10087 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10088
0cfff2a1
KT
10089 if (ext_res != AARCH64_PARSE_OK)
10090 return ext_res;
ffee7aa9 10091 }
0cfff2a1
KT
10092 /* Extension parsing was successful. Confirm the result
10093 arch and ISA flags. */
10094 *res = arch;
10095 *isa_flags = isa_temp;
10096 return AARCH64_PARSE_OK;
43e9d192
IB
10097 }
10098 }
10099
10100 /* ARCH name not found in list. */
0cfff2a1 10101 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10102}
10103
0cfff2a1
KT
10104/* Parse the TO_PARSE string and put the result tuning in RES and the
10105 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10106 describing the parse result. If there is an error parsing, RES and
10107 ISA_FLAGS are left unchanged. */
43e9d192 10108
0cfff2a1
KT
10109static enum aarch64_parse_opt_result
10110aarch64_parse_cpu (const char *to_parse, const struct processor **res,
10111 unsigned long *isa_flags)
43e9d192
IB
10112{
10113 char *ext;
10114 const struct processor *cpu;
0cfff2a1 10115 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10116 size_t len;
10117
0cfff2a1 10118 strcpy (str, to_parse);
43e9d192
IB
10119
10120 ext = strchr (str, '+');
10121
10122 if (ext != NULL)
10123 len = ext - str;
10124 else
10125 len = strlen (str);
10126
10127 if (len == 0)
0cfff2a1
KT
10128 return AARCH64_PARSE_MISSING_ARG;
10129
43e9d192
IB
10130
10131 /* Loop through the list of supported CPUs to find a match. */
10132 for (cpu = all_cores; cpu->name != NULL; cpu++)
10133 {
10134 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
10135 {
0cfff2a1
KT
10136 unsigned long isa_temp = cpu->flags;
10137
43e9d192
IB
10138
10139 if (ext != NULL)
10140 {
0cfff2a1
KT
10141 /* TO_PARSE string contains at least one extension. */
10142 enum aarch64_parse_opt_result ext_res
10143 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10144
0cfff2a1
KT
10145 if (ext_res != AARCH64_PARSE_OK)
10146 return ext_res;
10147 }
10148 /* Extension parsing was successfull. Confirm the result
10149 cpu and ISA flags. */
10150 *res = cpu;
10151 *isa_flags = isa_temp;
10152 return AARCH64_PARSE_OK;
43e9d192
IB
10153 }
10154 }
10155
10156 /* CPU name not found in list. */
0cfff2a1 10157 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10158}
10159
0cfff2a1
KT
10160/* Parse the TO_PARSE string and put the cpu it selects into RES.
10161 Return an aarch64_parse_opt_result describing the parse result.
10162 If the parsing fails the RES does not change. */
43e9d192 10163
0cfff2a1
KT
10164static enum aarch64_parse_opt_result
10165aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10166{
10167 const struct processor *cpu;
0cfff2a1
KT
10168 char *str = (char *) alloca (strlen (to_parse) + 1);
10169
10170 strcpy (str, to_parse);
43e9d192
IB
10171
10172 /* Loop through the list of supported CPUs to find a match. */
10173 for (cpu = all_cores; cpu->name != NULL; cpu++)
10174 {
10175 if (strcmp (cpu->name, str) == 0)
10176 {
0cfff2a1
KT
10177 *res = cpu;
10178 return AARCH64_PARSE_OK;
43e9d192
IB
10179 }
10180 }
10181
10182 /* CPU name not found in list. */
0cfff2a1 10183 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10184}
10185
8dec06f2
JG
10186/* Parse TOKEN, which has length LENGTH to see if it is an option
10187 described in FLAG. If it is, return the index bit for that fusion type.
10188 If not, error (printing OPTION_NAME) and return zero. */
10189
10190static unsigned int
10191aarch64_parse_one_option_token (const char *token,
10192 size_t length,
10193 const struct aarch64_flag_desc *flag,
10194 const char *option_name)
10195{
10196 for (; flag->name != NULL; flag++)
10197 {
10198 if (length == strlen (flag->name)
10199 && !strncmp (flag->name, token, length))
10200 return flag->flag;
10201 }
10202
10203 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10204 return 0;
10205}
10206
10207/* Parse OPTION which is a comma-separated list of flags to enable.
10208 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10209 default state we inherit from the CPU tuning structures. OPTION_NAME
10210 gives the top-level option we are parsing in the -moverride string,
10211 for use in error messages. */
10212
10213static unsigned int
10214aarch64_parse_boolean_options (const char *option,
10215 const struct aarch64_flag_desc *flags,
10216 unsigned int initial_state,
10217 const char *option_name)
10218{
10219 const char separator = '.';
10220 const char* specs = option;
10221 const char* ntoken = option;
10222 unsigned int found_flags = initial_state;
10223
10224 while ((ntoken = strchr (specs, separator)))
10225 {
10226 size_t token_length = ntoken - specs;
10227 unsigned token_ops = aarch64_parse_one_option_token (specs,
10228 token_length,
10229 flags,
10230 option_name);
10231 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10232 in the token stream, reset the supported operations. So:
10233
10234 adrp+add.cmp+branch.none.adrp+add
10235
10236 would have the result of turning on only adrp+add fusion. */
10237 if (!token_ops)
10238 found_flags = 0;
10239
10240 found_flags |= token_ops;
10241 specs = ++ntoken;
10242 }
10243
10244 /* We ended with a comma, print something. */
10245 if (!(*specs))
10246 {
10247 error ("%s string ill-formed\n", option_name);
10248 return 0;
10249 }
10250
10251 /* We still have one more token to parse. */
10252 size_t token_length = strlen (specs);
10253 unsigned token_ops = aarch64_parse_one_option_token (specs,
10254 token_length,
10255 flags,
10256 option_name);
10257 if (!token_ops)
10258 found_flags = 0;
10259
10260 found_flags |= token_ops;
10261 return found_flags;
10262}
10263
10264/* Support for overriding instruction fusion. */
10265
10266static void
10267aarch64_parse_fuse_string (const char *fuse_string,
10268 struct tune_params *tune)
10269{
10270 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10271 aarch64_fusible_pairs,
10272 tune->fusible_ops,
10273 "fuse=");
10274}
10275
10276/* Support for overriding other tuning flags. */
10277
10278static void
10279aarch64_parse_tune_string (const char *tune_string,
10280 struct tune_params *tune)
10281{
10282 tune->extra_tuning_flags
10283 = aarch64_parse_boolean_options (tune_string,
10284 aarch64_tuning_flags,
10285 tune->extra_tuning_flags,
10286 "tune=");
10287}
10288
10289/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10290 we understand. If it is, extract the option string and handoff to
10291 the appropriate function. */
10292
10293void
10294aarch64_parse_one_override_token (const char* token,
10295 size_t length,
10296 struct tune_params *tune)
10297{
10298 const struct aarch64_tuning_override_function *fn
10299 = aarch64_tuning_override_functions;
10300
10301 const char *option_part = strchr (token, '=');
10302 if (!option_part)
10303 {
10304 error ("tuning string missing in option (%s)", token);
10305 return;
10306 }
10307
10308 /* Get the length of the option name. */
10309 length = option_part - token;
10310 /* Skip the '=' to get to the option string. */
10311 option_part++;
10312
10313 for (; fn->name != NULL; fn++)
10314 {
10315 if (!strncmp (fn->name, token, length))
10316 {
10317 fn->parse_override (option_part, tune);
10318 return;
10319 }
10320 }
10321
10322 error ("unknown tuning option (%s)",token);
10323 return;
10324}
10325
5eee3c34
JW
10326/* A checking mechanism for the implementation of the tls size. */
10327
10328static void
10329initialize_aarch64_tls_size (struct gcc_options *opts)
10330{
10331 if (aarch64_tls_size == 0)
10332 aarch64_tls_size = 24;
10333
10334 switch (opts->x_aarch64_cmodel_var)
10335 {
10336 case AARCH64_CMODEL_TINY:
10337 /* Both the default and maximum TLS size allowed under tiny is 1M which
10338 needs two instructions to address, so we clamp the size to 24. */
10339 if (aarch64_tls_size > 24)
10340 aarch64_tls_size = 24;
10341 break;
10342 case AARCH64_CMODEL_SMALL:
10343 /* The maximum TLS size allowed under small is 4G. */
10344 if (aarch64_tls_size > 32)
10345 aarch64_tls_size = 32;
10346 break;
10347 case AARCH64_CMODEL_LARGE:
10348 /* The maximum TLS size allowed under large is 16E.
10349 FIXME: 16E should be 64bit, we only support 48bit offset now. */
10350 if (aarch64_tls_size > 48)
10351 aarch64_tls_size = 48;
10352 break;
10353 default:
10354 gcc_unreachable ();
10355 }
10356
10357 return;
10358}
10359
8dec06f2
JG
10360/* Parse STRING looking for options in the format:
10361 string :: option:string
10362 option :: name=substring
10363 name :: {a-z}
10364 substring :: defined by option. */
10365
10366static void
10367aarch64_parse_override_string (const char* input_string,
10368 struct tune_params* tune)
10369{
10370 const char separator = ':';
10371 size_t string_length = strlen (input_string) + 1;
10372 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
10373 char *string = string_root;
10374 strncpy (string, input_string, string_length);
10375 string[string_length - 1] = '\0';
10376
10377 char* ntoken = string;
10378
10379 while ((ntoken = strchr (string, separator)))
10380 {
10381 size_t token_length = ntoken - string;
10382 /* Make this substring look like a string. */
10383 *ntoken = '\0';
10384 aarch64_parse_one_override_token (string, token_length, tune);
10385 string = ++ntoken;
10386 }
10387
10388 /* One last option to parse. */
10389 aarch64_parse_one_override_token (string, strlen (string), tune);
10390 free (string_root);
10391}
43e9d192 10392
43e9d192
IB
10393
10394static void
0cfff2a1 10395aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 10396{
acea40ac
WD
10397 /* PR 70044: We have to be careful about being called multiple times for the
10398 same function. This means all changes should be repeatable. */
10399
d6cb6d6a
WD
10400 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
10401 Disable the frame pointer flag so the mid-end will not use a frame
10402 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
10403 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
10404 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
10405 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 10406 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 10407 opts->x_flag_omit_frame_pointer = 2;
43e9d192 10408
1be34295 10409 /* If not optimizing for size, set the default
0cfff2a1
KT
10410 alignment to what the target wants. */
10411 if (!opts->x_optimize_size)
43e9d192 10412 {
c518c102
ML
10413 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
10414 opts->x_str_align_loops = aarch64_tune_params.loop_align;
10415 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
10416 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
10417 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
10418 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 10419 }
b4f50fd4 10420
9ee6540a
WD
10421 /* We default to no pc-relative literal loads. */
10422
10423 aarch64_pcrelative_literal_loads = false;
10424
10425 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 10426 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
10427 if (opts->x_pcrelative_literal_loads == 1)
10428 aarch64_pcrelative_literal_loads = true;
b4f50fd4 10429
9ee6540a
WD
10430 /* In the tiny memory model it makes no sense to disallow PC relative
10431 literal pool loads. */
10432 if (aarch64_cmodel == AARCH64_CMODEL_TINY
10433 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
10434 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
10435
10436 /* When enabling the lower precision Newton series for the square root, also
10437 enable it for the reciprocal square root, since the latter is an
10438 intermediary step for the former. */
10439 if (flag_mlow_precision_sqrt)
10440 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 10441}
43e9d192 10442
0cfff2a1
KT
10443/* 'Unpack' up the internal tuning structs and update the options
10444 in OPTS. The caller must have set up selected_tune and selected_arch
10445 as all the other target-specific codegen decisions are
10446 derived from them. */
10447
e4ea20c8 10448void
0cfff2a1
KT
10449aarch64_override_options_internal (struct gcc_options *opts)
10450{
10451 aarch64_tune_flags = selected_tune->flags;
10452 aarch64_tune = selected_tune->sched_core;
10453 /* Make a copy of the tuning parameters attached to the core, which
10454 we may later overwrite. */
10455 aarch64_tune_params = *(selected_tune->tune);
10456 aarch64_architecture_version = selected_arch->architecture_version;
10457
10458 if (opts->x_aarch64_override_tune_string)
10459 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
10460 &aarch64_tune_params);
10461
10462 /* This target defaults to strict volatile bitfields. */
10463 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
10464 opts->x_flag_strict_volatile_bitfields = 1;
10465
0cfff2a1 10466 initialize_aarch64_code_model (opts);
5eee3c34 10467 initialize_aarch64_tls_size (opts);
63892fa2 10468
2d6bc7fa
KT
10469 int queue_depth = 0;
10470 switch (aarch64_tune_params.autoprefetcher_model)
10471 {
10472 case tune_params::AUTOPREFETCHER_OFF:
10473 queue_depth = -1;
10474 break;
10475 case tune_params::AUTOPREFETCHER_WEAK:
10476 queue_depth = 0;
10477 break;
10478 case tune_params::AUTOPREFETCHER_STRONG:
10479 queue_depth = max_insn_queue_index + 1;
10480 break;
10481 default:
10482 gcc_unreachable ();
10483 }
10484
10485 /* We don't mind passing in global_options_set here as we don't use
10486 the *options_set structs anyway. */
10487 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
10488 queue_depth,
10489 opts->x_param_values,
10490 global_options_set.x_param_values);
10491
9d2c6e2e
MK
10492 /* Set up parameters to be used in prefetching algorithm. Do not
10493 override the defaults unless we are tuning for a core we have
10494 researched values for. */
10495 if (aarch64_tune_params.prefetch->num_slots > 0)
10496 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
10497 aarch64_tune_params.prefetch->num_slots,
10498 opts->x_param_values,
10499 global_options_set.x_param_values);
10500 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
10501 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
10502 aarch64_tune_params.prefetch->l1_cache_size,
10503 opts->x_param_values,
10504 global_options_set.x_param_values);
10505 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 10506 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
10507 aarch64_tune_params.prefetch->l1_cache_line_size,
10508 opts->x_param_values,
10509 global_options_set.x_param_values);
10510 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
10511 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
10512 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
10513 opts->x_param_values,
10514 global_options_set.x_param_values);
d2ff35c0
LM
10515 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
10516 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
10517 0,
10518 opts->x_param_values,
10519 global_options_set.x_param_values);
59100dfc
LM
10520 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
10521 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
10522 aarch64_tune_params.prefetch->minimum_stride,
10523 opts->x_param_values,
10524 global_options_set.x_param_values);
50487d79 10525
13494fcb
WD
10526 /* Use the alternative scheduling-pressure algorithm by default. */
10527 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
10528 opts->x_param_values,
10529 global_options_set.x_param_values);
10530
16b2cafd
MK
10531 /* Enable sw prefetching at specified optimization level for
10532 CPUS that have prefetch. Lower optimization level threshold by 1
10533 when profiling is enabled. */
10534 if (opts->x_flag_prefetch_loop_arrays < 0
10535 && !opts->x_optimize_size
10536 && aarch64_tune_params.prefetch->default_opt_level >= 0
10537 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
10538 opts->x_flag_prefetch_loop_arrays = 1;
10539
0cfff2a1
KT
10540 aarch64_override_options_after_change_1 (opts);
10541}
43e9d192 10542
01f44038
KT
10543/* Print a hint with a suggestion for a core or architecture name that
10544 most closely resembles what the user passed in STR. ARCH is true if
10545 the user is asking for an architecture name. ARCH is false if the user
10546 is asking for a core name. */
10547
10548static void
10549aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
10550{
10551 auto_vec<const char *> candidates;
10552 const struct processor *entry = arch ? all_architectures : all_cores;
10553 for (; entry->name != NULL; entry++)
10554 candidates.safe_push (entry->name);
a08b5429
ML
10555
10556#ifdef HAVE_LOCAL_CPU_DETECT
10557 /* Add also "native" as possible value. */
10558 if (arch)
10559 candidates.safe_push ("native");
10560#endif
10561
01f44038
KT
10562 char *s;
10563 const char *hint = candidates_list_and_hint (str, s, candidates);
10564 if (hint)
10565 inform (input_location, "valid arguments are: %s;"
10566 " did you mean %qs?", s, hint);
6285e915
ML
10567 else
10568 inform (input_location, "valid arguments are: %s", s);
10569
01f44038
KT
10570 XDELETEVEC (s);
10571}
10572
10573/* Print a hint with a suggestion for a core name that most closely resembles
10574 what the user passed in STR. */
10575
10576inline static void
10577aarch64_print_hint_for_core (const char *str)
10578{
10579 aarch64_print_hint_for_core_or_arch (str, false);
10580}
10581
10582/* Print a hint with a suggestion for an architecture name that most closely
10583 resembles what the user passed in STR. */
10584
10585inline static void
10586aarch64_print_hint_for_arch (const char *str)
10587{
10588 aarch64_print_hint_for_core_or_arch (str, true);
10589}
10590
0cfff2a1
KT
10591/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
10592 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
10593 they are valid in RES and ISA_FLAGS. Return whether the option is
10594 valid. */
43e9d192 10595
361fb3ee 10596static bool
0cfff2a1
KT
10597aarch64_validate_mcpu (const char *str, const struct processor **res,
10598 unsigned long *isa_flags)
10599{
10600 enum aarch64_parse_opt_result parse_res
10601 = aarch64_parse_cpu (str, res, isa_flags);
10602
10603 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10604 return true;
0cfff2a1
KT
10605
10606 switch (parse_res)
10607 {
10608 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10609 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
10610 break;
10611 case AARCH64_PARSE_INVALID_ARG:
10612 error ("unknown value %qs for -mcpu", str);
01f44038 10613 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10614 break;
10615 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10616 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
10617 break;
10618 default:
10619 gcc_unreachable ();
10620 }
361fb3ee
KT
10621
10622 return false;
0cfff2a1
KT
10623}
10624
10625/* Validate a command-line -march option. Parse the arch and extensions
10626 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10627 results, if they are valid, in RES and ISA_FLAGS. Return whether the
10628 option is valid. */
0cfff2a1 10629
361fb3ee 10630static bool
0cfff2a1 10631aarch64_validate_march (const char *str, const struct processor **res,
01f44038 10632 unsigned long *isa_flags)
0cfff2a1
KT
10633{
10634 enum aarch64_parse_opt_result parse_res
10635 = aarch64_parse_arch (str, res, isa_flags);
10636
10637 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10638 return true;
0cfff2a1
KT
10639
10640 switch (parse_res)
10641 {
10642 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10643 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
10644 break;
10645 case AARCH64_PARSE_INVALID_ARG:
10646 error ("unknown value %qs for -march", str);
01f44038 10647 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
10648 break;
10649 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10650 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
10651 break;
10652 default:
10653 gcc_unreachable ();
10654 }
361fb3ee
KT
10655
10656 return false;
0cfff2a1
KT
10657}
10658
10659/* Validate a command-line -mtune option. Parse the cpu
10660 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10661 result, if it is valid, in RES. Return whether the option is
10662 valid. */
0cfff2a1 10663
361fb3ee 10664static bool
0cfff2a1
KT
10665aarch64_validate_mtune (const char *str, const struct processor **res)
10666{
10667 enum aarch64_parse_opt_result parse_res
10668 = aarch64_parse_tune (str, res);
10669
10670 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10671 return true;
0cfff2a1
KT
10672
10673 switch (parse_res)
10674 {
10675 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10676 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
10677 break;
10678 case AARCH64_PARSE_INVALID_ARG:
10679 error ("unknown value %qs for -mtune", str);
01f44038 10680 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10681 break;
10682 default:
10683 gcc_unreachable ();
10684 }
361fb3ee
KT
10685 return false;
10686}
10687
10688/* Return the CPU corresponding to the enum CPU.
10689 If it doesn't specify a cpu, return the default. */
10690
10691static const struct processor *
10692aarch64_get_tune_cpu (enum aarch64_processor cpu)
10693{
10694 if (cpu != aarch64_none)
10695 return &all_cores[cpu];
10696
10697 /* The & 0x3f is to extract the bottom 6 bits that encode the
10698 default cpu as selected by the --with-cpu GCC configure option
10699 in config.gcc.
10700 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
10701 flags mechanism should be reworked to make it more sane. */
10702 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10703}
10704
10705/* Return the architecture corresponding to the enum ARCH.
10706 If it doesn't specify a valid architecture, return the default. */
10707
10708static const struct processor *
10709aarch64_get_arch (enum aarch64_arch arch)
10710{
10711 if (arch != aarch64_no_arch)
10712 return &all_architectures[arch];
10713
10714 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10715
10716 return &all_architectures[cpu->arch];
0cfff2a1
KT
10717}
10718
43cacb12
RS
10719/* Return the VG value associated with -msve-vector-bits= value VALUE. */
10720
10721static poly_uint16
10722aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
10723{
10724 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
10725 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
10726 deciding which .md file patterns to use and when deciding whether
10727 something is a legitimate address or constant. */
10728 if (value == SVE_SCALABLE || value == SVE_128)
10729 return poly_uint16 (2, 2);
10730 else
10731 return (int) value / 64;
10732}
10733
0cfff2a1
KT
10734/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
10735 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
10736 tuning structs. In particular it must set selected_tune and
10737 aarch64_isa_flags that define the available ISA features and tuning
10738 decisions. It must also set selected_arch as this will be used to
10739 output the .arch asm tags for each function. */
10740
10741static void
10742aarch64_override_options (void)
10743{
10744 unsigned long cpu_isa = 0;
10745 unsigned long arch_isa = 0;
10746 aarch64_isa_flags = 0;
10747
361fb3ee
KT
10748 bool valid_cpu = true;
10749 bool valid_tune = true;
10750 bool valid_arch = true;
10751
0cfff2a1
KT
10752 selected_cpu = NULL;
10753 selected_arch = NULL;
10754 selected_tune = NULL;
10755
10756 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
10757 If either of -march or -mtune is given, they override their
10758 respective component of -mcpu. */
10759 if (aarch64_cpu_string)
361fb3ee
KT
10760 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
10761 &cpu_isa);
0cfff2a1
KT
10762
10763 if (aarch64_arch_string)
361fb3ee
KT
10764 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
10765 &arch_isa);
0cfff2a1
KT
10766
10767 if (aarch64_tune_string)
361fb3ee 10768 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
10769
10770 /* If the user did not specify a processor, choose the default
10771 one for them. This will be the CPU set during configuration using
a3cd0246 10772 --with-cpu, otherwise it is "generic". */
43e9d192
IB
10773 if (!selected_cpu)
10774 {
0cfff2a1
KT
10775 if (selected_arch)
10776 {
10777 selected_cpu = &all_cores[selected_arch->ident];
10778 aarch64_isa_flags = arch_isa;
361fb3ee 10779 explicit_arch = selected_arch->arch;
0cfff2a1
KT
10780 }
10781 else
10782 {
361fb3ee
KT
10783 /* Get default configure-time CPU. */
10784 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
10785 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
10786 }
361fb3ee
KT
10787
10788 if (selected_tune)
10789 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
10790 }
10791 /* If both -mcpu and -march are specified check that they are architecturally
10792 compatible, warn if they're not and prefer the -march ISA flags. */
10793 else if (selected_arch)
10794 {
10795 if (selected_arch->arch != selected_cpu->arch)
10796 {
10797 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
10798 all_architectures[selected_cpu->arch].name,
10799 selected_arch->name);
10800 }
10801 aarch64_isa_flags = arch_isa;
361fb3ee
KT
10802 explicit_arch = selected_arch->arch;
10803 explicit_tune_core = selected_tune ? selected_tune->ident
10804 : selected_cpu->ident;
0cfff2a1
KT
10805 }
10806 else
10807 {
10808 /* -mcpu but no -march. */
10809 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
10810 explicit_tune_core = selected_tune ? selected_tune->ident
10811 : selected_cpu->ident;
10812 gcc_assert (selected_cpu);
10813 selected_arch = &all_architectures[selected_cpu->arch];
10814 explicit_arch = selected_arch->arch;
43e9d192
IB
10815 }
10816
0cfff2a1
KT
10817 /* Set the arch as well as we will need it when outputing
10818 the .arch directive in assembly. */
10819 if (!selected_arch)
10820 {
10821 gcc_assert (selected_cpu);
10822 selected_arch = &all_architectures[selected_cpu->arch];
10823 }
43e9d192 10824
43e9d192 10825 if (!selected_tune)
3edaf26d 10826 selected_tune = selected_cpu;
43e9d192 10827
0cfff2a1
KT
10828#ifndef HAVE_AS_MABI_OPTION
10829 /* The compiler may have been configured with 2.23.* binutils, which does
10830 not have support for ILP32. */
10831 if (TARGET_ILP32)
ee61f880 10832 error ("assembler does not support -mabi=ilp32");
0cfff2a1 10833#endif
43e9d192 10834
43cacb12
RS
10835 /* Convert -msve-vector-bits to a VG count. */
10836 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
10837
db58fd89 10838 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 10839 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 10840
361fb3ee
KT
10841 /* Make sure we properly set up the explicit options. */
10842 if ((aarch64_cpu_string && valid_cpu)
10843 || (aarch64_tune_string && valid_tune))
10844 gcc_assert (explicit_tune_core != aarch64_none);
10845
10846 if ((aarch64_cpu_string && valid_cpu)
10847 || (aarch64_arch_string && valid_arch))
10848 gcc_assert (explicit_arch != aarch64_no_arch);
10849
0cfff2a1
KT
10850 aarch64_override_options_internal (&global_options);
10851
10852 /* Save these options as the default ones in case we push and pop them later
10853 while processing functions with potential target attributes. */
10854 target_option_default_node = target_option_current_node
10855 = build_target_option_node (&global_options);
43e9d192
IB
10856}
10857
10858/* Implement targetm.override_options_after_change. */
10859
10860static void
10861aarch64_override_options_after_change (void)
10862{
0cfff2a1 10863 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
10864}
10865
10866static struct machine_function *
10867aarch64_init_machine_status (void)
10868{
10869 struct machine_function *machine;
766090c2 10870 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
10871 return machine;
10872}
10873
10874void
10875aarch64_init_expanders (void)
10876{
10877 init_machine_status = aarch64_init_machine_status;
10878}
10879
10880/* A checking mechanism for the implementation of the various code models. */
10881static void
0cfff2a1 10882initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 10883{
0cfff2a1 10884 if (opts->x_flag_pic)
43e9d192 10885 {
0cfff2a1 10886 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
10887 {
10888 case AARCH64_CMODEL_TINY:
10889 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
10890 break;
10891 case AARCH64_CMODEL_SMALL:
34ecdb0f 10892#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
10893 aarch64_cmodel = (flag_pic == 2
10894 ? AARCH64_CMODEL_SMALL_PIC
10895 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
10896#else
10897 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
10898#endif
43e9d192
IB
10899 break;
10900 case AARCH64_CMODEL_LARGE:
10901 sorry ("code model %qs with -f%s", "large",
0cfff2a1 10902 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 10903 break;
43e9d192
IB
10904 default:
10905 gcc_unreachable ();
10906 }
10907 }
10908 else
0cfff2a1 10909 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
10910}
10911
361fb3ee
KT
10912/* Implement TARGET_OPTION_SAVE. */
10913
10914static void
10915aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
10916{
10917 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
10918}
10919
10920/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
10921 using the information saved in PTR. */
10922
10923static void
10924aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
10925{
10926 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
10927 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
10928 opts->x_explicit_arch = ptr->x_explicit_arch;
10929 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
10930 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
10931
10932 aarch64_override_options_internal (opts);
10933}
10934
10935/* Implement TARGET_OPTION_PRINT. */
10936
10937static void
10938aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
10939{
10940 const struct processor *cpu
10941 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
10942 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
10943 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 10944 std::string extension
04a99ebe 10945 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
10946
10947 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
10948 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
10949 arch->name, extension.c_str ());
361fb3ee
KT
10950}
10951
d78006d9
KT
10952static GTY(()) tree aarch64_previous_fndecl;
10953
e4ea20c8
KT
10954void
10955aarch64_reset_previous_fndecl (void)
10956{
10957 aarch64_previous_fndecl = NULL;
10958}
10959
acfc1ac1
KT
10960/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
10961 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
10962 make sure optab availability predicates are recomputed when necessary. */
10963
10964void
10965aarch64_save_restore_target_globals (tree new_tree)
10966{
10967 if (TREE_TARGET_GLOBALS (new_tree))
10968 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
10969 else if (new_tree == target_option_default_node)
10970 restore_target_globals (&default_target_globals);
10971 else
10972 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
10973}
10974
d78006d9
KT
10975/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
10976 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
10977 of the function, if such exists. This function may be called multiple
10978 times on a single function so use aarch64_previous_fndecl to avoid
10979 setting up identical state. */
10980
10981static void
10982aarch64_set_current_function (tree fndecl)
10983{
acfc1ac1
KT
10984 if (!fndecl || fndecl == aarch64_previous_fndecl)
10985 return;
10986
d78006d9
KT
10987 tree old_tree = (aarch64_previous_fndecl
10988 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
10989 : NULL_TREE);
10990
acfc1ac1 10991 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 10992
acfc1ac1
KT
10993 /* If current function has no attributes but the previous one did,
10994 use the default node. */
10995 if (!new_tree && old_tree)
10996 new_tree = target_option_default_node;
d78006d9 10997
acfc1ac1
KT
10998 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
10999 the default have been handled by aarch64_save_restore_target_globals from
11000 aarch64_pragma_target_parse. */
11001 if (old_tree == new_tree)
11002 return;
d78006d9 11003
acfc1ac1 11004 aarch64_previous_fndecl = fndecl;
6e17a23b 11005
acfc1ac1
KT
11006 /* First set the target options. */
11007 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11008
acfc1ac1 11009 aarch64_save_restore_target_globals (new_tree);
d78006d9 11010}
361fb3ee 11011
5a2c8331
KT
11012/* Enum describing the various ways we can handle attributes.
11013 In many cases we can reuse the generic option handling machinery. */
11014
11015enum aarch64_attr_opt_type
11016{
11017 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11018 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11019 aarch64_attr_enum, /* Attribute sets an enum variable. */
11020 aarch64_attr_custom /* Attribute requires a custom handling function. */
11021};
11022
11023/* All the information needed to handle a target attribute.
11024 NAME is the name of the attribute.
9c582551 11025 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11026 in the definition of enum aarch64_attr_opt_type.
11027 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11028 HANDLER is the function that takes the attribute string as an argument
11029 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11030 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11031 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11032 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11033 aarch64_attr_enum. */
11034
11035struct aarch64_attribute_info
11036{
11037 const char *name;
11038 enum aarch64_attr_opt_type attr_type;
11039 bool allow_neg;
ab93e9b7 11040 bool (*handler) (const char *);
5a2c8331
KT
11041 enum opt_code opt_num;
11042};
11043
ab93e9b7 11044/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11045
11046static bool
ab93e9b7 11047aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11048{
11049 const struct processor *tmp_arch = NULL;
11050 enum aarch64_parse_opt_result parse_res
11051 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
11052
11053 if (parse_res == AARCH64_PARSE_OK)
11054 {
11055 gcc_assert (tmp_arch);
11056 selected_arch = tmp_arch;
11057 explicit_arch = selected_arch->arch;
11058 return true;
11059 }
11060
11061 switch (parse_res)
11062 {
11063 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11064 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11065 break;
11066 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11067 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11068 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11069 break;
11070 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11071 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11072 break;
11073 default:
11074 gcc_unreachable ();
11075 }
11076
11077 return false;
11078}
11079
ab93e9b7 11080/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11081
11082static bool
ab93e9b7 11083aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11084{
11085 const struct processor *tmp_cpu = NULL;
11086 enum aarch64_parse_opt_result parse_res
11087 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
11088
11089 if (parse_res == AARCH64_PARSE_OK)
11090 {
11091 gcc_assert (tmp_cpu);
11092 selected_tune = tmp_cpu;
11093 explicit_tune_core = selected_tune->ident;
11094
11095 selected_arch = &all_architectures[tmp_cpu->arch];
11096 explicit_arch = selected_arch->arch;
11097 return true;
11098 }
11099
11100 switch (parse_res)
11101 {
11102 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11103 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11104 break;
11105 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11106 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11107 aarch64_print_hint_for_core (str);
5a2c8331
KT
11108 break;
11109 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11110 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11111 break;
11112 default:
11113 gcc_unreachable ();
11114 }
11115
11116 return false;
11117}
11118
ab93e9b7 11119/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11120
11121static bool
ab93e9b7 11122aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11123{
11124 const struct processor *tmp_tune = NULL;
11125 enum aarch64_parse_opt_result parse_res
11126 = aarch64_parse_tune (str, &tmp_tune);
11127
11128 if (parse_res == AARCH64_PARSE_OK)
11129 {
11130 gcc_assert (tmp_tune);
11131 selected_tune = tmp_tune;
11132 explicit_tune_core = selected_tune->ident;
11133 return true;
11134 }
11135
11136 switch (parse_res)
11137 {
11138 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11139 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11140 aarch64_print_hint_for_core (str);
5a2c8331
KT
11141 break;
11142 default:
11143 gcc_unreachable ();
11144 }
11145
11146 return false;
11147}
11148
11149/* Parse an architecture extensions target attribute string specified in STR.
11150 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11151 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11152 modified. */
5a2c8331
KT
11153
11154static bool
ab93e9b7 11155aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11156{
11157 enum aarch64_parse_opt_result parse_res;
11158 unsigned long isa_flags = aarch64_isa_flags;
11159
e4ea20c8
KT
11160 /* We allow "+nothing" in the beginning to clear out all architectural
11161 features if the user wants to handpick specific features. */
11162 if (strncmp ("+nothing", str, 8) == 0)
11163 {
11164 isa_flags = 0;
11165 str += 8;
11166 }
11167
5a2c8331
KT
11168 parse_res = aarch64_parse_extension (str, &isa_flags);
11169
11170 if (parse_res == AARCH64_PARSE_OK)
11171 {
11172 aarch64_isa_flags = isa_flags;
11173 return true;
11174 }
11175
11176 switch (parse_res)
11177 {
11178 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11179 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11180 break;
11181
11182 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11183 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11184 break;
11185
11186 default:
11187 gcc_unreachable ();
11188 }
11189
11190 return false;
11191}
11192
11193/* The target attributes that we support. On top of these we also support just
11194 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11195 handled explicitly in aarch64_process_one_target_attr. */
11196
11197static const struct aarch64_attribute_info aarch64_attributes[] =
11198{
11199 { "general-regs-only", aarch64_attr_mask, false, NULL,
11200 OPT_mgeneral_regs_only },
11201 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11202 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11203 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11204 OPT_mfix_cortex_a53_843419 },
5a2c8331 11205 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11206 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11207 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11208 OPT_momit_leaf_frame_pointer },
11209 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11210 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11211 OPT_march_ },
11212 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11213 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11214 OPT_mtune_ },
db58fd89
JW
11215 { "sign-return-address", aarch64_attr_enum, false, NULL,
11216 OPT_msign_return_address_ },
5a2c8331
KT
11217 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
11218};
11219
11220/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 11221 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
11222
11223static bool
ab93e9b7 11224aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
11225{
11226 bool invert = false;
11227
11228 size_t len = strlen (arg_str);
11229
11230 if (len == 0)
11231 {
ab93e9b7 11232 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11233 return false;
11234 }
11235
11236 char *str_to_check = (char *) alloca (len + 1);
11237 strcpy (str_to_check, arg_str);
11238
11239 /* Skip leading whitespace. */
11240 while (*str_to_check == ' ' || *str_to_check == '\t')
11241 str_to_check++;
11242
11243 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
11244 It is easier to detect and handle it explicitly here rather than going
11245 through the machinery for the rest of the target attributes in this
11246 function. */
11247 if (*str_to_check == '+')
ab93e9b7 11248 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
11249
11250 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
11251 {
11252 invert = true;
11253 str_to_check += 3;
11254 }
11255 char *arg = strchr (str_to_check, '=');
11256
11257 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
11258 and point ARG to "foo". */
11259 if (arg)
11260 {
11261 *arg = '\0';
11262 arg++;
11263 }
11264 const struct aarch64_attribute_info *p_attr;
16d12992 11265 bool found = false;
5a2c8331
KT
11266 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
11267 {
11268 /* If the names don't match up, or the user has given an argument
11269 to an attribute that doesn't accept one, or didn't give an argument
11270 to an attribute that expects one, fail to match. */
11271 if (strcmp (str_to_check, p_attr->name) != 0)
11272 continue;
11273
16d12992 11274 found = true;
5a2c8331
KT
11275 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
11276 || p_attr->attr_type == aarch64_attr_enum;
11277
11278 if (attr_need_arg_p ^ (arg != NULL))
11279 {
ab93e9b7 11280 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
11281 return false;
11282 }
11283
11284 /* If the name matches but the attribute does not allow "no-" versions
11285 then we can't match. */
11286 if (invert && !p_attr->allow_neg)
11287 {
ab93e9b7 11288 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
11289 return false;
11290 }
11291
11292 switch (p_attr->attr_type)
11293 {
11294 /* Has a custom handler registered.
11295 For example, cpu=, arch=, tune=. */
11296 case aarch64_attr_custom:
11297 gcc_assert (p_attr->handler);
ab93e9b7 11298 if (!p_attr->handler (arg))
5a2c8331
KT
11299 return false;
11300 break;
11301
11302 /* Either set or unset a boolean option. */
11303 case aarch64_attr_bool:
11304 {
11305 struct cl_decoded_option decoded;
11306
11307 generate_option (p_attr->opt_num, NULL, !invert,
11308 CL_TARGET, &decoded);
11309 aarch64_handle_option (&global_options, &global_options_set,
11310 &decoded, input_location);
11311 break;
11312 }
11313 /* Set or unset a bit in the target_flags. aarch64_handle_option
11314 should know what mask to apply given the option number. */
11315 case aarch64_attr_mask:
11316 {
11317 struct cl_decoded_option decoded;
11318 /* We only need to specify the option number.
11319 aarch64_handle_option will know which mask to apply. */
11320 decoded.opt_index = p_attr->opt_num;
11321 decoded.value = !invert;
11322 aarch64_handle_option (&global_options, &global_options_set,
11323 &decoded, input_location);
11324 break;
11325 }
11326 /* Use the option setting machinery to set an option to an enum. */
11327 case aarch64_attr_enum:
11328 {
11329 gcc_assert (arg);
11330 bool valid;
11331 int value;
11332 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
11333 &value, CL_TARGET);
11334 if (valid)
11335 {
11336 set_option (&global_options, NULL, p_attr->opt_num, value,
11337 NULL, DK_UNSPECIFIED, input_location,
11338 global_dc);
11339 }
11340 else
11341 {
ab93e9b7 11342 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
11343 }
11344 break;
11345 }
11346 default:
11347 gcc_unreachable ();
11348 }
11349 }
11350
16d12992
KT
11351 /* If we reached here we either have found an attribute and validated
11352 it or didn't match any. If we matched an attribute but its arguments
11353 were malformed we will have returned false already. */
11354 return found;
5a2c8331
KT
11355}
11356
11357/* Count how many times the character C appears in
11358 NULL-terminated string STR. */
11359
11360static unsigned int
11361num_occurences_in_str (char c, char *str)
11362{
11363 unsigned int res = 0;
11364 while (*str != '\0')
11365 {
11366 if (*str == c)
11367 res++;
11368
11369 str++;
11370 }
11371
11372 return res;
11373}
11374
11375/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 11376 and update the global target options space. */
5a2c8331
KT
11377
11378bool
ab93e9b7 11379aarch64_process_target_attr (tree args)
5a2c8331
KT
11380{
11381 if (TREE_CODE (args) == TREE_LIST)
11382 {
11383 do
11384 {
11385 tree head = TREE_VALUE (args);
11386 if (head)
11387 {
ab93e9b7 11388 if (!aarch64_process_target_attr (head))
5a2c8331
KT
11389 return false;
11390 }
11391 args = TREE_CHAIN (args);
11392 } while (args);
11393
11394 return true;
11395 }
3b6cb9e3
ML
11396
11397 if (TREE_CODE (args) != STRING_CST)
11398 {
11399 error ("attribute %<target%> argument not a string");
11400 return false;
11401 }
5a2c8331
KT
11402
11403 size_t len = strlen (TREE_STRING_POINTER (args));
11404 char *str_to_check = (char *) alloca (len + 1);
11405 strcpy (str_to_check, TREE_STRING_POINTER (args));
11406
11407 if (len == 0)
11408 {
ab93e9b7 11409 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11410 return false;
11411 }
11412
11413 /* Used to catch empty spaces between commas i.e.
11414 attribute ((target ("attr1,,attr2"))). */
11415 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
11416
11417 /* Handle multiple target attributes separated by ','. */
11418 char *token = strtok (str_to_check, ",");
11419
11420 unsigned int num_attrs = 0;
11421 while (token)
11422 {
11423 num_attrs++;
ab93e9b7 11424 if (!aarch64_process_one_target_attr (token))
5a2c8331 11425 {
ab93e9b7 11426 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
11427 return false;
11428 }
11429
11430 token = strtok (NULL, ",");
11431 }
11432
11433 if (num_attrs != num_commas + 1)
11434 {
ab93e9b7 11435 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
11436 return false;
11437 }
11438
11439 return true;
11440}
11441
11442/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
11443 process attribute ((target ("..."))). */
11444
11445static bool
11446aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
11447{
11448 struct cl_target_option cur_target;
11449 bool ret;
11450 tree old_optimize;
11451 tree new_target, new_optimize;
11452 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
11453
11454 /* If what we're processing is the current pragma string then the
11455 target option node is already stored in target_option_current_node
11456 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
11457 having to re-parse the string. This is especially useful to keep
11458 arm_neon.h compile times down since that header contains a lot
11459 of intrinsics enclosed in pragmas. */
11460 if (!existing_target && args == current_target_pragma)
11461 {
11462 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
11463 return true;
11464 }
5a2c8331
KT
11465 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11466
11467 old_optimize = build_optimization_node (&global_options);
11468 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11469
11470 /* If the function changed the optimization levels as well as setting
11471 target options, start with the optimizations specified. */
11472 if (func_optimize && func_optimize != old_optimize)
11473 cl_optimization_restore (&global_options,
11474 TREE_OPTIMIZATION (func_optimize));
11475
11476 /* Save the current target options to restore at the end. */
11477 cl_target_option_save (&cur_target, &global_options);
11478
11479 /* If fndecl already has some target attributes applied to it, unpack
11480 them so that we add this attribute on top of them, rather than
11481 overwriting them. */
11482 if (existing_target)
11483 {
11484 struct cl_target_option *existing_options
11485 = TREE_TARGET_OPTION (existing_target);
11486
11487 if (existing_options)
11488 cl_target_option_restore (&global_options, existing_options);
11489 }
11490 else
11491 cl_target_option_restore (&global_options,
11492 TREE_TARGET_OPTION (target_option_current_node));
11493
ab93e9b7 11494 ret = aarch64_process_target_attr (args);
5a2c8331
KT
11495
11496 /* Set up any additional state. */
11497 if (ret)
11498 {
11499 aarch64_override_options_internal (&global_options);
e95a988a
KT
11500 /* Initialize SIMD builtins if we haven't already.
11501 Set current_target_pragma to NULL for the duration so that
11502 the builtin initialization code doesn't try to tag the functions
11503 being built with the attributes specified by any current pragma, thus
11504 going into an infinite recursion. */
11505 if (TARGET_SIMD)
11506 {
11507 tree saved_current_target_pragma = current_target_pragma;
11508 current_target_pragma = NULL;
11509 aarch64_init_simd_builtins ();
11510 current_target_pragma = saved_current_target_pragma;
11511 }
5a2c8331
KT
11512 new_target = build_target_option_node (&global_options);
11513 }
11514 else
11515 new_target = NULL;
11516
11517 new_optimize = build_optimization_node (&global_options);
11518
11519 if (fndecl && ret)
11520 {
11521 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
11522
11523 if (old_optimize != new_optimize)
11524 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
11525 }
11526
11527 cl_target_option_restore (&global_options, &cur_target);
11528
11529 if (old_optimize != new_optimize)
11530 cl_optimization_restore (&global_options,
11531 TREE_OPTIMIZATION (old_optimize));
11532 return ret;
11533}
11534
1fd8d40c
KT
11535/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
11536 tri-bool options (yes, no, don't care) and the default value is
11537 DEF, determine whether to reject inlining. */
11538
11539static bool
11540aarch64_tribools_ok_for_inlining_p (int caller, int callee,
11541 int dont_care, int def)
11542{
11543 /* If the callee doesn't care, always allow inlining. */
11544 if (callee == dont_care)
11545 return true;
11546
11547 /* If the caller doesn't care, always allow inlining. */
11548 if (caller == dont_care)
11549 return true;
11550
11551 /* Otherwise, allow inlining if either the callee and caller values
11552 agree, or if the callee is using the default value. */
11553 return (callee == caller || callee == def);
11554}
11555
11556/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
11557 to inline CALLEE into CALLER based on target-specific info.
11558 Make sure that the caller and callee have compatible architectural
11559 features. Then go through the other possible target attributes
11560 and see if they can block inlining. Try not to reject always_inline
11561 callees unless they are incompatible architecturally. */
11562
11563static bool
11564aarch64_can_inline_p (tree caller, tree callee)
11565{
11566 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
11567 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
11568
1fd8d40c
KT
11569 struct cl_target_option *caller_opts
11570 = TREE_TARGET_OPTION (caller_tree ? caller_tree
11571 : target_option_default_node);
11572
675d044c
SD
11573 struct cl_target_option *callee_opts
11574 = TREE_TARGET_OPTION (callee_tree ? callee_tree
11575 : target_option_default_node);
1fd8d40c
KT
11576
11577 /* Callee's ISA flags should be a subset of the caller's. */
11578 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
11579 != callee_opts->x_aarch64_isa_flags)
11580 return false;
11581
11582 /* Allow non-strict aligned functions inlining into strict
11583 aligned ones. */
11584 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
11585 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
11586 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
11587 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
11588 return false;
11589
11590 bool always_inline = lookup_attribute ("always_inline",
11591 DECL_ATTRIBUTES (callee));
11592
11593 /* If the architectural features match up and the callee is always_inline
11594 then the other attributes don't matter. */
11595 if (always_inline)
11596 return true;
11597
11598 if (caller_opts->x_aarch64_cmodel_var
11599 != callee_opts->x_aarch64_cmodel_var)
11600 return false;
11601
11602 if (caller_opts->x_aarch64_tls_dialect
11603 != callee_opts->x_aarch64_tls_dialect)
11604 return false;
11605
11606 /* Honour explicit requests to workaround errata. */
11607 if (!aarch64_tribools_ok_for_inlining_p (
11608 caller_opts->x_aarch64_fix_a53_err835769,
11609 callee_opts->x_aarch64_fix_a53_err835769,
11610 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
11611 return false;
11612
48bb1a55
CL
11613 if (!aarch64_tribools_ok_for_inlining_p (
11614 caller_opts->x_aarch64_fix_a53_err843419,
11615 callee_opts->x_aarch64_fix_a53_err843419,
11616 2, TARGET_FIX_ERR_A53_843419))
11617 return false;
11618
1fd8d40c
KT
11619 /* If the user explicitly specified -momit-leaf-frame-pointer for the
11620 caller and calle and they don't match up, reject inlining. */
11621 if (!aarch64_tribools_ok_for_inlining_p (
11622 caller_opts->x_flag_omit_leaf_frame_pointer,
11623 callee_opts->x_flag_omit_leaf_frame_pointer,
11624 2, 1))
11625 return false;
11626
11627 /* If the callee has specific tuning overrides, respect them. */
11628 if (callee_opts->x_aarch64_override_tune_string != NULL
11629 && caller_opts->x_aarch64_override_tune_string == NULL)
11630 return false;
11631
11632 /* If the user specified tuning override strings for the
11633 caller and callee and they don't match up, reject inlining.
11634 We just do a string compare here, we don't analyze the meaning
11635 of the string, as it would be too costly for little gain. */
11636 if (callee_opts->x_aarch64_override_tune_string
11637 && caller_opts->x_aarch64_override_tune_string
11638 && (strcmp (callee_opts->x_aarch64_override_tune_string,
11639 caller_opts->x_aarch64_override_tune_string) != 0))
11640 return false;
11641
11642 return true;
11643}
11644
43e9d192
IB
11645/* Return true if SYMBOL_REF X binds locally. */
11646
11647static bool
11648aarch64_symbol_binds_local_p (const_rtx x)
11649{
11650 return (SYMBOL_REF_DECL (x)
11651 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
11652 : SYMBOL_REF_LOCAL_P (x));
11653}
11654
11655/* Return true if SYMBOL_REF X is thread local */
11656static bool
11657aarch64_tls_symbol_p (rtx x)
11658{
11659 if (! TARGET_HAVE_TLS)
11660 return false;
11661
11662 if (GET_CODE (x) != SYMBOL_REF)
11663 return false;
11664
11665 return SYMBOL_REF_TLS_MODEL (x) != 0;
11666}
11667
11668/* Classify a TLS symbol into one of the TLS kinds. */
11669enum aarch64_symbol_type
11670aarch64_classify_tls_symbol (rtx x)
11671{
11672 enum tls_model tls_kind = tls_symbolic_operand_type (x);
11673
11674 switch (tls_kind)
11675 {
11676 case TLS_MODEL_GLOBAL_DYNAMIC:
11677 case TLS_MODEL_LOCAL_DYNAMIC:
11678 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
11679
11680 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
11681 switch (aarch64_cmodel)
11682 {
11683 case AARCH64_CMODEL_TINY:
11684 case AARCH64_CMODEL_TINY_PIC:
11685 return SYMBOL_TINY_TLSIE;
11686 default:
79496620 11687 return SYMBOL_SMALL_TLSIE;
5ae7caad 11688 }
43e9d192
IB
11689
11690 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
11691 if (aarch64_tls_size == 12)
11692 return SYMBOL_TLSLE12;
11693 else if (aarch64_tls_size == 24)
11694 return SYMBOL_TLSLE24;
11695 else if (aarch64_tls_size == 32)
11696 return SYMBOL_TLSLE32;
11697 else if (aarch64_tls_size == 48)
11698 return SYMBOL_TLSLE48;
11699 else
11700 gcc_unreachable ();
43e9d192
IB
11701
11702 case TLS_MODEL_EMULATED:
11703 case TLS_MODEL_NONE:
11704 return SYMBOL_FORCE_TO_MEM;
11705
11706 default:
11707 gcc_unreachable ();
11708 }
11709}
11710
43cacb12
RS
11711/* Return the correct method for accessing X + OFFSET, where X is either
11712 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 11713
43e9d192 11714enum aarch64_symbol_type
43cacb12 11715aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
11716{
11717 if (GET_CODE (x) == LABEL_REF)
11718 {
11719 switch (aarch64_cmodel)
11720 {
11721 case AARCH64_CMODEL_LARGE:
11722 return SYMBOL_FORCE_TO_MEM;
11723
11724 case AARCH64_CMODEL_TINY_PIC:
11725 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
11726 return SYMBOL_TINY_ABSOLUTE;
11727
1b1e81f8 11728 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11729 case AARCH64_CMODEL_SMALL_PIC:
11730 case AARCH64_CMODEL_SMALL:
11731 return SYMBOL_SMALL_ABSOLUTE;
11732
11733 default:
11734 gcc_unreachable ();
11735 }
11736 }
11737
17f4d4bf 11738 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 11739 {
43e9d192
IB
11740 if (aarch64_tls_symbol_p (x))
11741 return aarch64_classify_tls_symbol (x);
11742
17f4d4bf
CSS
11743 switch (aarch64_cmodel)
11744 {
11745 case AARCH64_CMODEL_TINY:
15f6e0da 11746 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
11747 the offset does not cause overflow of the final address. But
11748 we have no way of knowing the address of symbol at compile time
11749 so we can't accurately say if the distance between the PC and
11750 symbol + offset is outside the addressible range of +/-1M in the
11751 TINY code model. So we rely on images not being greater than
11752 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
11753 be loaded using an alternative mechanism. Furthermore if the
11754 symbol is a weak reference to something that isn't known to
11755 resolve to a symbol in this module, then force to memory. */
11756 if ((SYMBOL_REF_WEAK (x)
11757 && !aarch64_symbol_binds_local_p (x))
43cacb12 11758 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
11759 return SYMBOL_FORCE_TO_MEM;
11760 return SYMBOL_TINY_ABSOLUTE;
11761
17f4d4bf 11762 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
11763 /* Same reasoning as the tiny code model, but the offset cap here is
11764 4G. */
15f6e0da
RR
11765 if ((SYMBOL_REF_WEAK (x)
11766 && !aarch64_symbol_binds_local_p (x))
43cacb12 11767 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 11768 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
11769 return SYMBOL_FORCE_TO_MEM;
11770 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11771
17f4d4bf 11772 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 11773 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 11774 return SYMBOL_TINY_GOT;
38e6c9a6
MS
11775 return SYMBOL_TINY_ABSOLUTE;
11776
1b1e81f8 11777 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
11778 case AARCH64_CMODEL_SMALL_PIC:
11779 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
11780 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
11781 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 11782 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11783
9ee6540a
WD
11784 case AARCH64_CMODEL_LARGE:
11785 /* This is alright even in PIC code as the constant
11786 pool reference is always PC relative and within
11787 the same translation unit. */
d47d34bb 11788 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
11789 return SYMBOL_SMALL_ABSOLUTE;
11790 else
11791 return SYMBOL_FORCE_TO_MEM;
11792
17f4d4bf
CSS
11793 default:
11794 gcc_unreachable ();
11795 }
43e9d192 11796 }
17f4d4bf 11797
43e9d192
IB
11798 /* By default push everything into the constant pool. */
11799 return SYMBOL_FORCE_TO_MEM;
11800}
11801
43e9d192
IB
11802bool
11803aarch64_constant_address_p (rtx x)
11804{
11805 return (CONSTANT_P (x) && memory_address_p (DImode, x));
11806}
11807
11808bool
11809aarch64_legitimate_pic_operand_p (rtx x)
11810{
11811 if (GET_CODE (x) == SYMBOL_REF
11812 || (GET_CODE (x) == CONST
11813 && GET_CODE (XEXP (x, 0)) == PLUS
11814 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
11815 return false;
11816
11817 return true;
11818}
11819
26895c21
WD
11820/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
11821 that should be rematerialized rather than spilled. */
3520f7cc 11822
43e9d192 11823static bool
ef4bddc2 11824aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 11825{
26895c21 11826 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 11827 if (CONST_INT_P (x)
9f7b87ca 11828 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 11829 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
11830 return true;
11831
43cacb12
RS
11832 /* Do not allow vector struct mode constants for Advanced SIMD.
11833 We could support 0 and -1 easily, but they need support in
11834 aarch64-simd.md. */
11835 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
11836 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
11837 return false;
11838
43cacb12
RS
11839 /* Only accept variable-length vector constants if they can be
11840 handled directly.
11841
11842 ??? It would be possible to handle rematerialization of other
11843 constants via secondary reloads. */
11844 if (vec_flags & VEC_ANY_SVE)
11845 return aarch64_simd_valid_immediate (x, NULL);
11846
509bb9b6
RS
11847 if (GET_CODE (x) == HIGH)
11848 x = XEXP (x, 0);
11849
43cacb12
RS
11850 /* Accept polynomial constants that can be calculated by using the
11851 destination of a move as the sole temporary. Constants that
11852 require a second temporary cannot be rematerialized (they can't be
11853 forced to memory and also aren't legitimate constants). */
11854 poly_int64 offset;
11855 if (poly_int_rtx_p (x, &offset))
11856 return aarch64_offset_temporaries (false, offset) <= 1;
11857
11858 /* If an offset is being added to something else, we need to allow the
11859 base to be moved into the destination register, meaning that there
11860 are no free temporaries for the offset. */
11861 x = strip_offset (x, &offset);
11862 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
11863 return false;
26895c21 11864
43cacb12
RS
11865 /* Do not allow const (plus (anchor_symbol, const_int)). */
11866 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
11867 return false;
26895c21 11868
f28e54bd
WD
11869 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
11870 so spilling them is better than rematerialization. */
11871 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
11872 return true;
11873
26895c21
WD
11874 /* Label references are always constant. */
11875 if (GET_CODE (x) == LABEL_REF)
11876 return true;
11877
11878 return false;
43e9d192
IB
11879}
11880
a5bc806c 11881rtx
43e9d192
IB
11882aarch64_load_tp (rtx target)
11883{
11884 if (!target
11885 || GET_MODE (target) != Pmode
11886 || !register_operand (target, Pmode))
11887 target = gen_reg_rtx (Pmode);
11888
11889 /* Can return in any reg. */
11890 emit_insn (gen_aarch64_load_tp_hard (target));
11891 return target;
11892}
11893
43e9d192
IB
11894/* On AAPCS systems, this is the "struct __va_list". */
11895static GTY(()) tree va_list_type;
11896
11897/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
11898 Return the type to use as __builtin_va_list.
11899
11900 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
11901
11902 struct __va_list
11903 {
11904 void *__stack;
11905 void *__gr_top;
11906 void *__vr_top;
11907 int __gr_offs;
11908 int __vr_offs;
11909 }; */
11910
11911static tree
11912aarch64_build_builtin_va_list (void)
11913{
11914 tree va_list_name;
11915 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
11916
11917 /* Create the type. */
11918 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
11919 /* Give it the required name. */
11920 va_list_name = build_decl (BUILTINS_LOCATION,
11921 TYPE_DECL,
11922 get_identifier ("__va_list"),
11923 va_list_type);
11924 DECL_ARTIFICIAL (va_list_name) = 1;
11925 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 11926 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
11927
11928 /* Create the fields. */
11929 f_stack = build_decl (BUILTINS_LOCATION,
11930 FIELD_DECL, get_identifier ("__stack"),
11931 ptr_type_node);
11932 f_grtop = build_decl (BUILTINS_LOCATION,
11933 FIELD_DECL, get_identifier ("__gr_top"),
11934 ptr_type_node);
11935 f_vrtop = build_decl (BUILTINS_LOCATION,
11936 FIELD_DECL, get_identifier ("__vr_top"),
11937 ptr_type_node);
11938 f_groff = build_decl (BUILTINS_LOCATION,
11939 FIELD_DECL, get_identifier ("__gr_offs"),
11940 integer_type_node);
11941 f_vroff = build_decl (BUILTINS_LOCATION,
11942 FIELD_DECL, get_identifier ("__vr_offs"),
11943 integer_type_node);
11944
88e3bdd1 11945 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
11946 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
11947 purpose to identify whether the code is updating va_list internal
11948 offset fields through irregular way. */
11949 va_list_gpr_counter_field = f_groff;
11950 va_list_fpr_counter_field = f_vroff;
11951
43e9d192
IB
11952 DECL_ARTIFICIAL (f_stack) = 1;
11953 DECL_ARTIFICIAL (f_grtop) = 1;
11954 DECL_ARTIFICIAL (f_vrtop) = 1;
11955 DECL_ARTIFICIAL (f_groff) = 1;
11956 DECL_ARTIFICIAL (f_vroff) = 1;
11957
11958 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
11959 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
11960 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
11961 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
11962 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
11963
11964 TYPE_FIELDS (va_list_type) = f_stack;
11965 DECL_CHAIN (f_stack) = f_grtop;
11966 DECL_CHAIN (f_grtop) = f_vrtop;
11967 DECL_CHAIN (f_vrtop) = f_groff;
11968 DECL_CHAIN (f_groff) = f_vroff;
11969
11970 /* Compute its layout. */
11971 layout_type (va_list_type);
11972
11973 return va_list_type;
11974}
11975
11976/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
11977static void
11978aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11979{
11980 const CUMULATIVE_ARGS *cum;
11981 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
11982 tree stack, grtop, vrtop, groff, vroff;
11983 tree t;
88e3bdd1
JW
11984 int gr_save_area_size = cfun->va_list_gpr_size;
11985 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
11986 int vr_offset;
11987
11988 cum = &crtl->args.info;
88e3bdd1
JW
11989 if (cfun->va_list_gpr_size)
11990 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
11991 cfun->va_list_gpr_size);
11992 if (cfun->va_list_fpr_size)
11993 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
11994 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 11995
d5726973 11996 if (!TARGET_FLOAT)
43e9d192 11997 {
261fb553 11998 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
11999 vr_save_area_size = 0;
12000 }
12001
12002 f_stack = TYPE_FIELDS (va_list_type_node);
12003 f_grtop = DECL_CHAIN (f_stack);
12004 f_vrtop = DECL_CHAIN (f_grtop);
12005 f_groff = DECL_CHAIN (f_vrtop);
12006 f_vroff = DECL_CHAIN (f_groff);
12007
12008 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12009 NULL_TREE);
12010 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12011 NULL_TREE);
12012 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12013 NULL_TREE);
12014 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12015 NULL_TREE);
12016 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12017 NULL_TREE);
12018
12019 /* Emit code to initialize STACK, which points to the next varargs stack
12020 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12021 by named arguments. STACK is 8-byte aligned. */
12022 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12023 if (cum->aapcs_stack_size > 0)
12024 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12025 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12026 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12027
12028 /* Emit code to initialize GRTOP, the top of the GR save area.
12029 virtual_incoming_args_rtx should have been 16 byte aligned. */
12030 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12031 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12032 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12033
12034 /* Emit code to initialize VRTOP, the top of the VR save area.
12035 This address is gr_save_area_bytes below GRTOP, rounded
12036 down to the next 16-byte boundary. */
12037 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12038 vr_offset = ROUND_UP (gr_save_area_size,
12039 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12040
12041 if (vr_offset)
12042 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12043 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12044 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12045
12046 /* Emit code to initialize GROFF, the offset from GRTOP of the
12047 next GPR argument. */
12048 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12049 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12050 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12051
12052 /* Likewise emit code to initialize VROFF, the offset from FTOP
12053 of the next VR argument. */
12054 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12055 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12056 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12057}
12058
12059/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12060
12061static tree
12062aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12063 gimple_seq *post_p ATTRIBUTE_UNUSED)
12064{
12065 tree addr;
12066 bool indirect_p;
12067 bool is_ha; /* is HFA or HVA. */
12068 bool dw_align; /* double-word align. */
ef4bddc2 12069 machine_mode ag_mode = VOIDmode;
43e9d192 12070 int nregs;
ef4bddc2 12071 machine_mode mode;
43e9d192
IB
12072
12073 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12074 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12075 HOST_WIDE_INT size, rsize, adjust, align;
12076 tree t, u, cond1, cond2;
12077
12078 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12079 if (indirect_p)
12080 type = build_pointer_type (type);
12081
12082 mode = TYPE_MODE (type);
12083
12084 f_stack = TYPE_FIELDS (va_list_type_node);
12085 f_grtop = DECL_CHAIN (f_stack);
12086 f_vrtop = DECL_CHAIN (f_grtop);
12087 f_groff = DECL_CHAIN (f_vrtop);
12088 f_vroff = DECL_CHAIN (f_groff);
12089
12090 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12091 f_stack, NULL_TREE);
12092 size = int_size_in_bytes (type);
985b8393 12093 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12094
12095 dw_align = false;
12096 adjust = 0;
12097 if (aarch64_vfp_is_call_or_return_candidate (mode,
12098 type,
12099 &ag_mode,
12100 &nregs,
12101 &is_ha))
12102 {
6a70badb
RS
12103 /* No frontends can create types with variable-sized modes, so we
12104 shouldn't be asked to pass or return them. */
12105 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12106
43e9d192 12107 /* TYPE passed in fp/simd registers. */
d5726973 12108 if (!TARGET_FLOAT)
fc29dfc9 12109 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
12110
12111 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12112 unshare_expr (valist), f_vrtop, NULL_TREE);
12113 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12114 unshare_expr (valist), f_vroff, NULL_TREE);
12115
12116 rsize = nregs * UNITS_PER_VREG;
12117
12118 if (is_ha)
12119 {
6a70badb
RS
12120 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12121 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12122 }
76b0cbf8 12123 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12124 && size < UNITS_PER_VREG)
12125 {
12126 adjust = UNITS_PER_VREG - size;
12127 }
12128 }
12129 else
12130 {
12131 /* TYPE passed in general registers. */
12132 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12133 unshare_expr (valist), f_grtop, NULL_TREE);
12134 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12135 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12136 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12137 nregs = rsize / UNITS_PER_WORD;
12138
12139 if (align > 8)
12140 dw_align = true;
12141
76b0cbf8 12142 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12143 && size < UNITS_PER_WORD)
12144 {
12145 adjust = UNITS_PER_WORD - size;
12146 }
12147 }
12148
12149 /* Get a local temporary for the field value. */
12150 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12151
12152 /* Emit code to branch if off >= 0. */
12153 t = build2 (GE_EXPR, boolean_type_node, off,
12154 build_int_cst (TREE_TYPE (off), 0));
12155 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12156
12157 if (dw_align)
12158 {
12159 /* Emit: offs = (offs + 15) & -16. */
12160 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12161 build_int_cst (TREE_TYPE (off), 15));
12162 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12163 build_int_cst (TREE_TYPE (off), -16));
12164 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12165 }
12166 else
12167 roundup = NULL;
12168
12169 /* Update ap.__[g|v]r_offs */
12170 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12171 build_int_cst (TREE_TYPE (off), rsize));
12172 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12173
12174 /* String up. */
12175 if (roundup)
12176 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12177
12178 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12179 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12180 build_int_cst (TREE_TYPE (f_off), 0));
12181 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12182
12183 /* String up: make sure the assignment happens before the use. */
12184 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12185 COND_EXPR_ELSE (cond1) = t;
12186
12187 /* Prepare the trees handling the argument that is passed on the stack;
12188 the top level node will store in ON_STACK. */
12189 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12190 if (align > 8)
12191 {
12192 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12193 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12194 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12195 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12196 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12197 }
12198 else
12199 roundup = NULL;
12200 /* Advance ap.__stack */
4bdc2738 12201 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12202 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12203 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12204 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12205 /* String up roundup and advance. */
12206 if (roundup)
12207 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12208 /* String up with arg */
12209 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12210 /* Big-endianness related address adjustment. */
76b0cbf8 12211 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12212 && size < UNITS_PER_WORD)
12213 {
12214 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12215 size_int (UNITS_PER_WORD - size));
12216 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
12217 }
12218
12219 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
12220 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
12221
12222 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
12223 t = off;
12224 if (adjust)
12225 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
12226 build_int_cst (TREE_TYPE (off), adjust));
12227
12228 t = fold_convert (sizetype, t);
12229 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
12230
12231 if (is_ha)
12232 {
12233 /* type ha; // treat as "struct {ftype field[n];}"
12234 ... [computing offs]
12235 for (i = 0; i <nregs; ++i, offs += 16)
12236 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
12237 return ha; */
12238 int i;
12239 tree tmp_ha, field_t, field_ptr_t;
12240
12241 /* Declare a local variable. */
12242 tmp_ha = create_tmp_var_raw (type, "ha");
12243 gimple_add_tmp_var (tmp_ha);
12244
12245 /* Establish the base type. */
12246 switch (ag_mode)
12247 {
4e10a5a7 12248 case E_SFmode:
43e9d192
IB
12249 field_t = float_type_node;
12250 field_ptr_t = float_ptr_type_node;
12251 break;
4e10a5a7 12252 case E_DFmode:
43e9d192
IB
12253 field_t = double_type_node;
12254 field_ptr_t = double_ptr_type_node;
12255 break;
4e10a5a7 12256 case E_TFmode:
43e9d192
IB
12257 field_t = long_double_type_node;
12258 field_ptr_t = long_double_ptr_type_node;
12259 break;
4e10a5a7 12260 case E_HFmode:
1b62ed4f
JG
12261 field_t = aarch64_fp16_type_node;
12262 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 12263 break;
4e10a5a7
RS
12264 case E_V2SImode:
12265 case E_V4SImode:
43e9d192
IB
12266 {
12267 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
12268 field_t = build_vector_type_for_mode (innertype, ag_mode);
12269 field_ptr_t = build_pointer_type (field_t);
12270 }
12271 break;
12272 default:
12273 gcc_assert (0);
12274 }
12275
12276 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
12277 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
12278 addr = t;
12279 t = fold_convert (field_ptr_t, addr);
12280 t = build2 (MODIFY_EXPR, field_t,
12281 build1 (INDIRECT_REF, field_t, tmp_ha),
12282 build1 (INDIRECT_REF, field_t, t));
12283
12284 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
12285 for (i = 1; i < nregs; ++i)
12286 {
12287 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
12288 u = fold_convert (field_ptr_t, addr);
12289 u = build2 (MODIFY_EXPR, field_t,
12290 build2 (MEM_REF, field_t, tmp_ha,
12291 build_int_cst (field_ptr_t,
12292 (i *
12293 int_size_in_bytes (field_t)))),
12294 build1 (INDIRECT_REF, field_t, u));
12295 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
12296 }
12297
12298 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
12299 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
12300 }
12301
12302 COND_EXPR_ELSE (cond2) = t;
12303 addr = fold_convert (build_pointer_type (type), cond1);
12304 addr = build_va_arg_indirect_ref (addr);
12305
12306 if (indirect_p)
12307 addr = build_va_arg_indirect_ref (addr);
12308
12309 return addr;
12310}
12311
12312/* Implement TARGET_SETUP_INCOMING_VARARGS. */
12313
12314static void
ef4bddc2 12315aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
12316 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12317 int no_rtl)
12318{
12319 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12320 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
12321 int gr_saved = cfun->va_list_gpr_size;
12322 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
12323
12324 /* The caller has advanced CUM up to, but not beyond, the last named
12325 argument. Advance a local copy of CUM past the last "real" named
12326 argument, to find out how many registers are left over. */
12327 local_cum = *cum;
12328 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
12329
88e3bdd1
JW
12330 /* Found out how many registers we need to save.
12331 Honor tree-stdvar analysis results. */
12332 if (cfun->va_list_gpr_size)
12333 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
12334 cfun->va_list_gpr_size / UNITS_PER_WORD);
12335 if (cfun->va_list_fpr_size)
12336 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
12337 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 12338
d5726973 12339 if (!TARGET_FLOAT)
43e9d192 12340 {
261fb553 12341 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
12342 vr_saved = 0;
12343 }
12344
12345 if (!no_rtl)
12346 {
12347 if (gr_saved > 0)
12348 {
12349 rtx ptr, mem;
12350
12351 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
12352 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
12353 - gr_saved * UNITS_PER_WORD);
12354 mem = gen_frame_mem (BLKmode, ptr);
12355 set_mem_alias_set (mem, get_varargs_alias_set ());
12356
12357 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
12358 mem, gr_saved);
12359 }
12360 if (vr_saved > 0)
12361 {
12362 /* We can't use move_block_from_reg, because it will use
12363 the wrong mode, storing D regs only. */
ef4bddc2 12364 machine_mode mode = TImode;
88e3bdd1 12365 int off, i, vr_start;
43e9d192
IB
12366
12367 /* Set OFF to the offset from virtual_incoming_args_rtx of
12368 the first vector register. The VR save area lies below
12369 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
12370 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
12371 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12372 off -= vr_saved * UNITS_PER_VREG;
12373
88e3bdd1
JW
12374 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
12375 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
12376 {
12377 rtx ptr, mem;
12378
12379 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
12380 mem = gen_frame_mem (mode, ptr);
12381 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 12382 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
12383 off += UNITS_PER_VREG;
12384 }
12385 }
12386 }
12387
12388 /* We don't save the size into *PRETEND_SIZE because we want to avoid
12389 any complication of having crtl->args.pretend_args_size changed. */
8799637a 12390 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
12391 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
12392 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
12393 + vr_saved * UNITS_PER_VREG);
12394}
12395
12396static void
12397aarch64_conditional_register_usage (void)
12398{
12399 int i;
12400 if (!TARGET_FLOAT)
12401 {
12402 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
12403 {
12404 fixed_regs[i] = 1;
12405 call_used_regs[i] = 1;
12406 }
12407 }
43cacb12
RS
12408 if (!TARGET_SVE)
12409 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
12410 {
12411 fixed_regs[i] = 1;
12412 call_used_regs[i] = 1;
12413 }
3751345d
RE
12414
12415 /* When tracking speculation, we need a couple of call-clobbered registers
12416 to track the speculation state. It would be nice to just use
12417 IP0 and IP1, but currently there are numerous places that just
12418 assume these registers are free for other uses (eg pointer
12419 authentication). */
12420 if (aarch64_track_speculation)
12421 {
12422 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
12423 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
12424 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
12425 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
12426 }
43e9d192
IB
12427}
12428
12429/* Walk down the type tree of TYPE counting consecutive base elements.
12430 If *MODEP is VOIDmode, then set it to the first valid floating point
12431 type. If a non-floating point type is found, or if a floating point
12432 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
12433 otherwise return the count in the sub-tree. */
12434static int
ef4bddc2 12435aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 12436{
ef4bddc2 12437 machine_mode mode;
43e9d192
IB
12438 HOST_WIDE_INT size;
12439
12440 switch (TREE_CODE (type))
12441 {
12442 case REAL_TYPE:
12443 mode = TYPE_MODE (type);
1b62ed4f
JG
12444 if (mode != DFmode && mode != SFmode
12445 && mode != TFmode && mode != HFmode)
43e9d192
IB
12446 return -1;
12447
12448 if (*modep == VOIDmode)
12449 *modep = mode;
12450
12451 if (*modep == mode)
12452 return 1;
12453
12454 break;
12455
12456 case COMPLEX_TYPE:
12457 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
12458 if (mode != DFmode && mode != SFmode
12459 && mode != TFmode && mode != HFmode)
43e9d192
IB
12460 return -1;
12461
12462 if (*modep == VOIDmode)
12463 *modep = mode;
12464
12465 if (*modep == mode)
12466 return 2;
12467
12468 break;
12469
12470 case VECTOR_TYPE:
12471 /* Use V2SImode and V4SImode as representatives of all 64-bit
12472 and 128-bit vector types. */
12473 size = int_size_in_bytes (type);
12474 switch (size)
12475 {
12476 case 8:
12477 mode = V2SImode;
12478 break;
12479 case 16:
12480 mode = V4SImode;
12481 break;
12482 default:
12483 return -1;
12484 }
12485
12486 if (*modep == VOIDmode)
12487 *modep = mode;
12488
12489 /* Vector modes are considered to be opaque: two vectors are
12490 equivalent for the purposes of being homogeneous aggregates
12491 if they are the same size. */
12492 if (*modep == mode)
12493 return 1;
12494
12495 break;
12496
12497 case ARRAY_TYPE:
12498 {
12499 int count;
12500 tree index = TYPE_DOMAIN (type);
12501
807e902e
KZ
12502 /* Can't handle incomplete types nor sizes that are not
12503 fixed. */
12504 if (!COMPLETE_TYPE_P (type)
12505 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12506 return -1;
12507
12508 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
12509 if (count == -1
12510 || !index
12511 || !TYPE_MAX_VALUE (index)
cc269bb6 12512 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 12513 || !TYPE_MIN_VALUE (index)
cc269bb6 12514 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
12515 || count < 0)
12516 return -1;
12517
ae7e9ddd
RS
12518 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
12519 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
12520
12521 /* There must be no padding. */
6a70badb
RS
12522 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12523 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12524 return -1;
12525
12526 return count;
12527 }
12528
12529 case RECORD_TYPE:
12530 {
12531 int count = 0;
12532 int sub_count;
12533 tree field;
12534
807e902e
KZ
12535 /* Can't handle incomplete types nor sizes that are not
12536 fixed. */
12537 if (!COMPLETE_TYPE_P (type)
12538 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12539 return -1;
12540
12541 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12542 {
12543 if (TREE_CODE (field) != FIELD_DECL)
12544 continue;
12545
12546 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12547 if (sub_count < 0)
12548 return -1;
12549 count += sub_count;
12550 }
12551
12552 /* There must be no padding. */
6a70badb
RS
12553 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12554 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12555 return -1;
12556
12557 return count;
12558 }
12559
12560 case UNION_TYPE:
12561 case QUAL_UNION_TYPE:
12562 {
12563 /* These aren't very interesting except in a degenerate case. */
12564 int count = 0;
12565 int sub_count;
12566 tree field;
12567
807e902e
KZ
12568 /* Can't handle incomplete types nor sizes that are not
12569 fixed. */
12570 if (!COMPLETE_TYPE_P (type)
12571 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12572 return -1;
12573
12574 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12575 {
12576 if (TREE_CODE (field) != FIELD_DECL)
12577 continue;
12578
12579 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12580 if (sub_count < 0)
12581 return -1;
12582 count = count > sub_count ? count : sub_count;
12583 }
12584
12585 /* There must be no padding. */
6a70badb
RS
12586 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12587 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12588 return -1;
12589
12590 return count;
12591 }
12592
12593 default:
12594 break;
12595 }
12596
12597 return -1;
12598}
12599
b6ec6215
KT
12600/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
12601 type as described in AAPCS64 \S 4.1.2.
12602
12603 See the comment above aarch64_composite_type_p for the notes on MODE. */
12604
12605static bool
12606aarch64_short_vector_p (const_tree type,
12607 machine_mode mode)
12608{
6a70badb 12609 poly_int64 size = -1;
b6ec6215
KT
12610
12611 if (type && TREE_CODE (type) == VECTOR_TYPE)
12612 size = int_size_in_bytes (type);
12613 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12614 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12615 size = GET_MODE_SIZE (mode);
12616
6a70badb 12617 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
12618}
12619
43e9d192
IB
12620/* Return TRUE if the type, as described by TYPE and MODE, is a composite
12621 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
12622 array types. The C99 floating-point complex types are also considered
12623 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
12624 types, which are GCC extensions and out of the scope of AAPCS64, are
12625 treated as composite types here as well.
12626
12627 Note that MODE itself is not sufficient in determining whether a type
12628 is such a composite type or not. This is because
12629 stor-layout.c:compute_record_mode may have already changed the MODE
12630 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
12631 structure with only one field may have its MODE set to the mode of the
12632 field. Also an integer mode whose size matches the size of the
12633 RECORD_TYPE type may be used to substitute the original mode
12634 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
12635 solely relied on. */
12636
12637static bool
12638aarch64_composite_type_p (const_tree type,
ef4bddc2 12639 machine_mode mode)
43e9d192 12640{
b6ec6215
KT
12641 if (aarch64_short_vector_p (type, mode))
12642 return false;
12643
43e9d192
IB
12644 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
12645 return true;
12646
12647 if (mode == BLKmode
12648 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
12649 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
12650 return true;
12651
12652 return false;
12653}
12654
43e9d192
IB
12655/* Return TRUE if an argument, whose type is described by TYPE and MODE,
12656 shall be passed or returned in simd/fp register(s) (providing these
12657 parameter passing registers are available).
12658
12659 Upon successful return, *COUNT returns the number of needed registers,
12660 *BASE_MODE returns the mode of the individual register and when IS_HAF
12661 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
12662 floating-point aggregate or a homogeneous short-vector aggregate. */
12663
12664static bool
ef4bddc2 12665aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 12666 const_tree type,
ef4bddc2 12667 machine_mode *base_mode,
43e9d192
IB
12668 int *count,
12669 bool *is_ha)
12670{
ef4bddc2 12671 machine_mode new_mode = VOIDmode;
43e9d192
IB
12672 bool composite_p = aarch64_composite_type_p (type, mode);
12673
12674 if (is_ha != NULL) *is_ha = false;
12675
12676 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
12677 || aarch64_short_vector_p (type, mode))
12678 {
12679 *count = 1;
12680 new_mode = mode;
12681 }
12682 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
12683 {
12684 if (is_ha != NULL) *is_ha = true;
12685 *count = 2;
12686 new_mode = GET_MODE_INNER (mode);
12687 }
12688 else if (type && composite_p)
12689 {
12690 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
12691
12692 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
12693 {
12694 if (is_ha != NULL) *is_ha = true;
12695 *count = ag_count;
12696 }
12697 else
12698 return false;
12699 }
12700 else
12701 return false;
12702
12703 *base_mode = new_mode;
12704 return true;
12705}
12706
12707/* Implement TARGET_STRUCT_VALUE_RTX. */
12708
12709static rtx
12710aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
12711 int incoming ATTRIBUTE_UNUSED)
12712{
12713 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
12714}
12715
12716/* Implements target hook vector_mode_supported_p. */
12717static bool
ef4bddc2 12718aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 12719{
43cacb12
RS
12720 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12721 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
12722}
12723
b7342d25
IB
12724/* Return appropriate SIMD container
12725 for MODE within a vector of WIDTH bits. */
ef4bddc2 12726static machine_mode
43cacb12 12727aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 12728{
43cacb12
RS
12729 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
12730 switch (mode)
12731 {
12732 case E_DFmode:
12733 return VNx2DFmode;
12734 case E_SFmode:
12735 return VNx4SFmode;
12736 case E_HFmode:
12737 return VNx8HFmode;
12738 case E_DImode:
12739 return VNx2DImode;
12740 case E_SImode:
12741 return VNx4SImode;
12742 case E_HImode:
12743 return VNx8HImode;
12744 case E_QImode:
12745 return VNx16QImode;
12746 default:
12747 return word_mode;
12748 }
12749
12750 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 12751 if (TARGET_SIMD)
b7342d25 12752 {
43cacb12 12753 if (known_eq (width, 128))
b7342d25
IB
12754 switch (mode)
12755 {
4e10a5a7 12756 case E_DFmode:
b7342d25 12757 return V2DFmode;
4e10a5a7 12758 case E_SFmode:
b7342d25 12759 return V4SFmode;
4e10a5a7 12760 case E_HFmode:
b719f884 12761 return V8HFmode;
4e10a5a7 12762 case E_SImode:
b7342d25 12763 return V4SImode;
4e10a5a7 12764 case E_HImode:
b7342d25 12765 return V8HImode;
4e10a5a7 12766 case E_QImode:
b7342d25 12767 return V16QImode;
4e10a5a7 12768 case E_DImode:
b7342d25
IB
12769 return V2DImode;
12770 default:
12771 break;
12772 }
12773 else
12774 switch (mode)
12775 {
4e10a5a7 12776 case E_SFmode:
b7342d25 12777 return V2SFmode;
4e10a5a7 12778 case E_HFmode:
b719f884 12779 return V4HFmode;
4e10a5a7 12780 case E_SImode:
b7342d25 12781 return V2SImode;
4e10a5a7 12782 case E_HImode:
b7342d25 12783 return V4HImode;
4e10a5a7 12784 case E_QImode:
b7342d25
IB
12785 return V8QImode;
12786 default:
12787 break;
12788 }
12789 }
43e9d192
IB
12790 return word_mode;
12791}
12792
b7342d25 12793/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 12794static machine_mode
005ba29c 12795aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 12796{
43cacb12
RS
12797 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
12798 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
12799}
12800
86e36728 12801/* Return a list of possible vector sizes for the vectorizer
3b357264 12802 to iterate over. */
86e36728
RS
12803static void
12804aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 12805{
43cacb12
RS
12806 if (TARGET_SVE)
12807 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
12808 sizes->safe_push (16);
12809 sizes->safe_push (8);
3b357264
JG
12810}
12811
ac2b960f
YZ
12812/* Implement TARGET_MANGLE_TYPE. */
12813
6f549691 12814static const char *
ac2b960f
YZ
12815aarch64_mangle_type (const_tree type)
12816{
12817 /* The AArch64 ABI documents say that "__va_list" has to be
12818 managled as if it is in the "std" namespace. */
12819 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
12820 return "St9__va_list";
12821
c2ec330c
AL
12822 /* Half-precision float. */
12823 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
12824 return "Dh";
12825
f9d53c27
TB
12826 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
12827 builtin types. */
12828 if (TYPE_NAME (type) != NULL)
12829 return aarch64_mangle_builtin_type (type);
c6fc9e43 12830
ac2b960f
YZ
12831 /* Use the default mangling. */
12832 return NULL;
12833}
12834
75cf1494
KT
12835/* Find the first rtx_insn before insn that will generate an assembly
12836 instruction. */
12837
12838static rtx_insn *
12839aarch64_prev_real_insn (rtx_insn *insn)
12840{
12841 if (!insn)
12842 return NULL;
12843
12844 do
12845 {
12846 insn = prev_real_insn (insn);
12847 }
12848 while (insn && recog_memoized (insn) < 0);
12849
12850 return insn;
12851}
12852
12853static bool
12854is_madd_op (enum attr_type t1)
12855{
12856 unsigned int i;
12857 /* A number of these may be AArch32 only. */
12858 enum attr_type mlatypes[] = {
12859 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
12860 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
12861 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
12862 };
12863
12864 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
12865 {
12866 if (t1 == mlatypes[i])
12867 return true;
12868 }
12869
12870 return false;
12871}
12872
12873/* Check if there is a register dependency between a load and the insn
12874 for which we hold recog_data. */
12875
12876static bool
12877dep_between_memop_and_curr (rtx memop)
12878{
12879 rtx load_reg;
12880 int opno;
12881
8baff86e 12882 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
12883
12884 if (!REG_P (SET_DEST (memop)))
12885 return false;
12886
12887 load_reg = SET_DEST (memop);
8baff86e 12888 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
12889 {
12890 rtx operand = recog_data.operand[opno];
12891 if (REG_P (operand)
12892 && reg_overlap_mentioned_p (load_reg, operand))
12893 return true;
12894
12895 }
12896 return false;
12897}
12898
8baff86e
KT
12899
12900/* When working around the Cortex-A53 erratum 835769,
12901 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
12902 instruction and has a preceding memory instruction such that a NOP
12903 should be inserted between them. */
12904
75cf1494
KT
12905bool
12906aarch64_madd_needs_nop (rtx_insn* insn)
12907{
12908 enum attr_type attr_type;
12909 rtx_insn *prev;
12910 rtx body;
12911
b32c1043 12912 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
12913 return false;
12914
e322d6e3 12915 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
12916 return false;
12917
12918 attr_type = get_attr_type (insn);
12919 if (!is_madd_op (attr_type))
12920 return false;
12921
12922 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
12923 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
12924 Restore recog state to INSN to avoid state corruption. */
12925 extract_constrain_insn_cached (insn);
12926
550e2205 12927 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
12928 return false;
12929
12930 body = single_set (prev);
12931
12932 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
12933 it and the DImode madd, emit a NOP between them. If body is NULL then we
12934 have a complex memory operation, probably a load/store pair.
12935 Be conservative for now and emit a NOP. */
12936 if (GET_MODE (recog_data.operand[0]) == DImode
12937 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
12938 return true;
12939
12940 return false;
12941
12942}
12943
8baff86e
KT
12944
12945/* Implement FINAL_PRESCAN_INSN. */
12946
75cf1494
KT
12947void
12948aarch64_final_prescan_insn (rtx_insn *insn)
12949{
12950 if (aarch64_madd_needs_nop (insn))
12951 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
12952}
12953
12954
43cacb12
RS
12955/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
12956 instruction. */
12957
12958bool
12959aarch64_sve_index_immediate_p (rtx base_or_step)
12960{
12961 return (CONST_INT_P (base_or_step)
12962 && IN_RANGE (INTVAL (base_or_step), -16, 15));
12963}
12964
12965/* Return true if X is a valid immediate for the SVE ADD and SUB
12966 instructions. Negate X first if NEGATE_P is true. */
12967
12968bool
12969aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
12970{
12971 rtx elt;
12972
12973 if (!const_vec_duplicate_p (x, &elt)
12974 || !CONST_INT_P (elt))
12975 return false;
12976
12977 HOST_WIDE_INT val = INTVAL (elt);
12978 if (negate_p)
12979 val = -val;
12980 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
12981
12982 if (val & 0xff)
12983 return IN_RANGE (val, 0, 0xff);
12984 return IN_RANGE (val, 0, 0xff00);
12985}
12986
12987/* Return true if X is a valid immediate operand for an SVE logical
12988 instruction such as AND. */
12989
12990bool
12991aarch64_sve_bitmask_immediate_p (rtx x)
12992{
12993 rtx elt;
12994
12995 return (const_vec_duplicate_p (x, &elt)
12996 && CONST_INT_P (elt)
12997 && aarch64_bitmask_imm (INTVAL (elt),
12998 GET_MODE_INNER (GET_MODE (x))));
12999}
13000
13001/* Return true if X is a valid immediate for the SVE DUP and CPY
13002 instructions. */
13003
13004bool
13005aarch64_sve_dup_immediate_p (rtx x)
13006{
13007 rtx elt;
13008
13009 if (!const_vec_duplicate_p (x, &elt)
13010 || !CONST_INT_P (elt))
13011 return false;
13012
13013 HOST_WIDE_INT val = INTVAL (elt);
13014 if (val & 0xff)
13015 return IN_RANGE (val, -0x80, 0x7f);
13016 return IN_RANGE (val, -0x8000, 0x7f00);
13017}
13018
13019/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13020 SIGNED_P says whether the operand is signed rather than unsigned. */
13021
13022bool
13023aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13024{
13025 rtx elt;
13026
13027 return (const_vec_duplicate_p (x, &elt)
13028 && CONST_INT_P (elt)
13029 && (signed_p
13030 ? IN_RANGE (INTVAL (elt), -16, 15)
13031 : IN_RANGE (INTVAL (elt), 0, 127)));
13032}
13033
13034/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13035 instruction. Negate X first if NEGATE_P is true. */
13036
13037bool
13038aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13039{
13040 rtx elt;
13041 REAL_VALUE_TYPE r;
13042
13043 if (!const_vec_duplicate_p (x, &elt)
13044 || GET_CODE (elt) != CONST_DOUBLE)
13045 return false;
13046
13047 r = *CONST_DOUBLE_REAL_VALUE (elt);
13048
13049 if (negate_p)
13050 r = real_value_negate (&r);
13051
13052 if (real_equal (&r, &dconst1))
13053 return true;
13054 if (real_equal (&r, &dconsthalf))
13055 return true;
13056 return false;
13057}
13058
13059/* Return true if X is a valid immediate operand for an SVE FMUL
13060 instruction. */
13061
13062bool
13063aarch64_sve_float_mul_immediate_p (rtx x)
13064{
13065 rtx elt;
13066
13067 /* GCC will never generate a multiply with an immediate of 2, so there is no
13068 point testing for it (even though it is a valid constant). */
13069 return (const_vec_duplicate_p (x, &elt)
13070 && GET_CODE (elt) == CONST_DOUBLE
13071 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13072}
13073
b187677b
RS
13074/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13075 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13076 is nonnull, use it to describe valid immediates. */
3520f7cc 13077static bool
b187677b
RS
13078aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13079 simd_immediate_info *info,
13080 enum simd_immediate_check which,
13081 simd_immediate_info::insn_type insn)
13082{
13083 /* Try a 4-byte immediate with LSL. */
13084 for (unsigned int shift = 0; shift < 32; shift += 8)
13085 if ((val32 & (0xff << shift)) == val32)
13086 {
13087 if (info)
13088 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13089 simd_immediate_info::LSL, shift);
13090 return true;
13091 }
3520f7cc 13092
b187677b
RS
13093 /* Try a 2-byte immediate with LSL. */
13094 unsigned int imm16 = val32 & 0xffff;
13095 if (imm16 == (val32 >> 16))
13096 for (unsigned int shift = 0; shift < 16; shift += 8)
13097 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13098 {
b187677b
RS
13099 if (info)
13100 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13101 simd_immediate_info::LSL, shift);
13102 return true;
48063b9d 13103 }
3520f7cc 13104
b187677b
RS
13105 /* Try a 4-byte immediate with MSL, except for cases that MVN
13106 can handle. */
13107 if (which == AARCH64_CHECK_MOV)
13108 for (unsigned int shift = 8; shift < 24; shift += 8)
13109 {
13110 unsigned int low = (1 << shift) - 1;
13111 if (((val32 & (0xff << shift)) | low) == val32)
13112 {
13113 if (info)
13114 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13115 simd_immediate_info::MSL, shift);
13116 return true;
13117 }
13118 }
43e9d192 13119
b187677b
RS
13120 return false;
13121}
13122
13123/* Return true if replicating VAL64 is a valid immediate for the
13124 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13125 use it to describe valid immediates. */
13126static bool
13127aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13128 simd_immediate_info *info,
13129 enum simd_immediate_check which)
13130{
13131 unsigned int val32 = val64 & 0xffffffff;
13132 unsigned int val16 = val64 & 0xffff;
13133 unsigned int val8 = val64 & 0xff;
13134
13135 if (val32 == (val64 >> 32))
43e9d192 13136 {
b187677b
RS
13137 if ((which & AARCH64_CHECK_ORR) != 0
13138 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13139 simd_immediate_info::MOV))
13140 return true;
43e9d192 13141
b187677b
RS
13142 if ((which & AARCH64_CHECK_BIC) != 0
13143 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13144 simd_immediate_info::MVN))
13145 return true;
ee78df47 13146
b187677b
RS
13147 /* Try using a replicated byte. */
13148 if (which == AARCH64_CHECK_MOV
13149 && val16 == (val32 >> 16)
13150 && val8 == (val16 >> 8))
ee78df47 13151 {
b187677b
RS
13152 if (info)
13153 *info = simd_immediate_info (QImode, val8);
13154 return true;
ee78df47 13155 }
43e9d192
IB
13156 }
13157
b187677b
RS
13158 /* Try using a bit-to-bytemask. */
13159 if (which == AARCH64_CHECK_MOV)
43e9d192 13160 {
b187677b
RS
13161 unsigned int i;
13162 for (i = 0; i < 64; i += 8)
ab6501d7 13163 {
b187677b
RS
13164 unsigned char byte = (val64 >> i) & 0xff;
13165 if (byte != 0 && byte != 0xff)
13166 break;
ab6501d7 13167 }
b187677b 13168 if (i == 64)
ab6501d7 13169 {
b187677b
RS
13170 if (info)
13171 *info = simd_immediate_info (DImode, val64);
13172 return true;
ab6501d7 13173 }
43e9d192 13174 }
b187677b
RS
13175 return false;
13176}
43e9d192 13177
43cacb12
RS
13178/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13179 instruction. If INFO is nonnull, use it to describe valid immediates. */
13180
13181static bool
13182aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13183 simd_immediate_info *info)
13184{
13185 scalar_int_mode mode = DImode;
13186 unsigned int val32 = val64 & 0xffffffff;
13187 if (val32 == (val64 >> 32))
13188 {
13189 mode = SImode;
13190 unsigned int val16 = val32 & 0xffff;
13191 if (val16 == (val32 >> 16))
13192 {
13193 mode = HImode;
13194 unsigned int val8 = val16 & 0xff;
13195 if (val8 == (val16 >> 8))
13196 mode = QImode;
13197 }
13198 }
13199 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13200 if (IN_RANGE (val, -0x80, 0x7f))
13201 {
13202 /* DUP with no shift. */
13203 if (info)
13204 *info = simd_immediate_info (mode, val);
13205 return true;
13206 }
13207 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13208 {
13209 /* DUP with LSL #8. */
13210 if (info)
13211 *info = simd_immediate_info (mode, val);
13212 return true;
13213 }
13214 if (aarch64_bitmask_imm (val64, mode))
13215 {
13216 /* DUPM. */
13217 if (info)
13218 *info = simd_immediate_info (mode, val);
13219 return true;
13220 }
13221 return false;
13222}
13223
b187677b
RS
13224/* Return true if OP is a valid SIMD immediate for the operation
13225 described by WHICH. If INFO is nonnull, use it to describe valid
13226 immediates. */
13227bool
13228aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
13229 enum simd_immediate_check which)
13230{
43cacb12
RS
13231 machine_mode mode = GET_MODE (op);
13232 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13233 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
13234 return false;
13235
13236 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 13237 rtx base, step;
b187677b 13238 unsigned int n_elts;
f9093f23
RS
13239 if (GET_CODE (op) == CONST_VECTOR
13240 && CONST_VECTOR_DUPLICATE_P (op))
13241 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
13242 else if ((vec_flags & VEC_SVE_DATA)
13243 && const_vec_series_p (op, &base, &step))
13244 {
13245 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
13246 if (!aarch64_sve_index_immediate_p (base)
13247 || !aarch64_sve_index_immediate_p (step))
13248 return false;
13249
13250 if (info)
13251 *info = simd_immediate_info (elt_mode, base, step);
13252 return true;
13253 }
6a70badb
RS
13254 else if (GET_CODE (op) == CONST_VECTOR
13255 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
13256 /* N_ELTS set above. */;
b187677b 13257 else
d8edd899 13258 return false;
43e9d192 13259
43cacb12
RS
13260 /* Handle PFALSE and PTRUE. */
13261 if (vec_flags & VEC_SVE_PRED)
13262 return (op == CONST0_RTX (mode)
13263 || op == CONSTM1_RTX (mode));
13264
b187677b 13265 scalar_float_mode elt_float_mode;
f9093f23
RS
13266 if (n_elts == 1
13267 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 13268 {
f9093f23
RS
13269 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
13270 if (aarch64_float_const_zero_rtx_p (elt)
13271 || aarch64_float_const_representable_p (elt))
13272 {
13273 if (info)
13274 *info = simd_immediate_info (elt_float_mode, elt);
13275 return true;
13276 }
b187677b 13277 }
43e9d192 13278
b187677b
RS
13279 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
13280 if (elt_size > 8)
13281 return false;
e4f0f84d 13282
b187677b 13283 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 13284
b187677b
RS
13285 /* Expand the vector constant out into a byte vector, with the least
13286 significant byte of the register first. */
13287 auto_vec<unsigned char, 16> bytes;
13288 bytes.reserve (n_elts * elt_size);
13289 for (unsigned int i = 0; i < n_elts; i++)
13290 {
f9093f23
RS
13291 /* The vector is provided in gcc endian-neutral fashion.
13292 For aarch64_be Advanced SIMD, it must be laid out in the vector
13293 register in reverse order. */
13294 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
13295 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 13296
b187677b
RS
13297 if (elt_mode != elt_int_mode)
13298 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 13299
b187677b
RS
13300 if (!CONST_INT_P (elt))
13301 return false;
43e9d192 13302
b187677b
RS
13303 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
13304 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 13305 {
b187677b
RS
13306 bytes.quick_push (elt_val & 0xff);
13307 elt_val >>= BITS_PER_UNIT;
48063b9d 13308 }
43e9d192
IB
13309 }
13310
b187677b
RS
13311 /* The immediate must repeat every eight bytes. */
13312 unsigned int nbytes = bytes.length ();
13313 for (unsigned i = 8; i < nbytes; ++i)
13314 if (bytes[i] != bytes[i - 8])
13315 return false;
13316
13317 /* Get the repeating 8-byte value as an integer. No endian correction
13318 is needed here because bytes is already in lsb-first order. */
13319 unsigned HOST_WIDE_INT val64 = 0;
13320 for (unsigned int i = 0; i < 8; i++)
13321 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
13322 << (i * BITS_PER_UNIT));
13323
43cacb12
RS
13324 if (vec_flags & VEC_SVE_DATA)
13325 return aarch64_sve_valid_immediate (val64, info);
13326 else
13327 return aarch64_advsimd_valid_immediate (val64, info, which);
13328}
13329
13330/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
13331 has a step in the range of INDEX. Return the index expression if so,
13332 otherwise return null. */
13333rtx
13334aarch64_check_zero_based_sve_index_immediate (rtx x)
13335{
13336 rtx base, step;
13337 if (const_vec_series_p (x, &base, &step)
13338 && base == const0_rtx
13339 && aarch64_sve_index_immediate_p (step))
13340 return step;
13341 return NULL_RTX;
43e9d192
IB
13342}
13343
43e9d192
IB
13344/* Check of immediate shift constants are within range. */
13345bool
ef4bddc2 13346aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
13347{
13348 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
13349 if (left)
ddeabd3e 13350 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 13351 else
ddeabd3e 13352 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
13353}
13354
7325d85a
KT
13355/* Return the bitmask CONST_INT to select the bits required by a zero extract
13356 operation of width WIDTH at bit position POS. */
13357
13358rtx
13359aarch64_mask_from_zextract_ops (rtx width, rtx pos)
13360{
13361 gcc_assert (CONST_INT_P (width));
13362 gcc_assert (CONST_INT_P (pos));
13363
13364 unsigned HOST_WIDE_INT mask
13365 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
13366 return GEN_INT (mask << UINTVAL (pos));
13367}
13368
83f8c414 13369bool
a6e0bfa7 13370aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 13371{
83f8c414
CSS
13372 if (GET_CODE (x) == HIGH
13373 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
13374 return true;
13375
82614948 13376 if (CONST_INT_P (x))
83f8c414
CSS
13377 return true;
13378
43cacb12
RS
13379 if (VECTOR_MODE_P (GET_MODE (x)))
13380 return aarch64_simd_valid_immediate (x, NULL);
13381
83f8c414
CSS
13382 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
13383 return true;
13384
43cacb12
RS
13385 if (aarch64_sve_cnt_immediate_p (x))
13386 return true;
13387
a6e0bfa7 13388 return aarch64_classify_symbolic_expression (x)
a5350ddc 13389 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
13390}
13391
43e9d192
IB
13392/* Return a const_int vector of VAL. */
13393rtx
ab014eb3 13394aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 13395{
59d06c05
RS
13396 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
13397 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
13398}
13399
051d0e2f
SN
13400/* Check OP is a legal scalar immediate for the MOVI instruction. */
13401
13402bool
77e994c9 13403aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 13404{
ef4bddc2 13405 machine_mode vmode;
051d0e2f 13406
43cacb12 13407 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 13408 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 13409 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
13410}
13411
988fa693
JG
13412/* Construct and return a PARALLEL RTX vector with elements numbering the
13413 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
13414 the vector - from the perspective of the architecture. This does not
13415 line up with GCC's perspective on lane numbers, so we end up with
13416 different masks depending on our target endian-ness. The diagram
13417 below may help. We must draw the distinction when building masks
13418 which select one half of the vector. An instruction selecting
13419 architectural low-lanes for a big-endian target, must be described using
13420 a mask selecting GCC high-lanes.
13421
13422 Big-Endian Little-Endian
13423
13424GCC 0 1 2 3 3 2 1 0
13425 | x | x | x | x | | x | x | x | x |
13426Architecture 3 2 1 0 3 2 1 0
13427
13428Low Mask: { 2, 3 } { 0, 1 }
13429High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
13430
13431 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 13432
43e9d192 13433rtx
f5cbabc1 13434aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 13435{
43e9d192 13436 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
13437 int high_base = nunits / 2;
13438 int low_base = 0;
13439 int base;
43e9d192
IB
13440 rtx t1;
13441 int i;
13442
988fa693
JG
13443 if (BYTES_BIG_ENDIAN)
13444 base = high ? low_base : high_base;
13445 else
13446 base = high ? high_base : low_base;
13447
13448 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
13449 RTVEC_ELT (v, i) = GEN_INT (base + i);
13450
13451 t1 = gen_rtx_PARALLEL (mode, v);
13452 return t1;
13453}
13454
988fa693
JG
13455/* Check OP for validity as a PARALLEL RTX vector with elements
13456 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
13457 from the perspective of the architecture. See the diagram above
13458 aarch64_simd_vect_par_cnst_half for more details. */
13459
13460bool
ef4bddc2 13461aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
13462 bool high)
13463{
6a70badb
RS
13464 int nelts;
13465 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
13466 return false;
13467
6a70badb 13468 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
13469 HOST_WIDE_INT count_op = XVECLEN (op, 0);
13470 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
13471 int i = 0;
13472
988fa693
JG
13473 if (count_op != count_ideal)
13474 return false;
13475
13476 for (i = 0; i < count_ideal; i++)
13477 {
13478 rtx elt_op = XVECEXP (op, 0, i);
13479 rtx elt_ideal = XVECEXP (ideal, 0, i);
13480
4aa81c2e 13481 if (!CONST_INT_P (elt_op)
988fa693
JG
13482 || INTVAL (elt_ideal) != INTVAL (elt_op))
13483 return false;
13484 }
13485 return true;
13486}
13487
43e9d192
IB
13488/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
13489 HIGH (exclusive). */
13490void
46ed6024
CB
13491aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13492 const_tree exp)
43e9d192
IB
13493{
13494 HOST_WIDE_INT lane;
4aa81c2e 13495 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
13496 lane = INTVAL (operand);
13497
13498 if (lane < low || lane >= high)
46ed6024
CB
13499 {
13500 if (exp)
cf0c27ef 13501 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 13502 else
cf0c27ef 13503 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 13504 }
43e9d192
IB
13505}
13506
7ac29c0f
RS
13507/* Peform endian correction on lane number N, which indexes a vector
13508 of mode MODE, and return the result as an SImode rtx. */
13509
13510rtx
13511aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
13512{
13513 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
13514}
13515
43e9d192 13516/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 13517
43e9d192
IB
13518bool
13519aarch64_simd_mem_operand_p (rtx op)
13520{
13521 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 13522 || REG_P (XEXP (op, 0)));
43e9d192
IB
13523}
13524
43cacb12
RS
13525/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
13526
13527bool
13528aarch64_sve_ld1r_operand_p (rtx op)
13529{
13530 struct aarch64_address_info addr;
13531 scalar_mode mode;
13532
13533 return (MEM_P (op)
13534 && is_a <scalar_mode> (GET_MODE (op), &mode)
13535 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
13536 && addr.type == ADDRESS_REG_IMM
13537 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
13538}
13539
13540/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
13541 The conditions for STR are the same. */
13542bool
13543aarch64_sve_ldr_operand_p (rtx op)
13544{
13545 struct aarch64_address_info addr;
13546
13547 return (MEM_P (op)
13548 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
13549 false, ADDR_QUERY_ANY)
13550 && addr.type == ADDRESS_REG_IMM);
13551}
13552
9f4cbab8
RS
13553/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
13554 We need to be able to access the individual pieces, so the range
13555 is different from LD[234] and ST[234]. */
13556bool
13557aarch64_sve_struct_memory_operand_p (rtx op)
13558{
13559 if (!MEM_P (op))
13560 return false;
13561
13562 machine_mode mode = GET_MODE (op);
13563 struct aarch64_address_info addr;
13564 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
13565 ADDR_QUERY_ANY)
13566 || addr.type != ADDRESS_REG_IMM)
13567 return false;
13568
13569 poly_int64 first = addr.const_offset;
13570 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
13571 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
13572 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
13573}
13574
2d8c6dc1
AH
13575/* Emit a register copy from operand to operand, taking care not to
13576 early-clobber source registers in the process.
43e9d192 13577
2d8c6dc1
AH
13578 COUNT is the number of components into which the copy needs to be
13579 decomposed. */
43e9d192 13580void
b8506a8a 13581aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 13582 unsigned int count)
43e9d192
IB
13583{
13584 unsigned int i;
2d8c6dc1
AH
13585 int rdest = REGNO (operands[0]);
13586 int rsrc = REGNO (operands[1]);
43e9d192
IB
13587
13588 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
13589 || rdest < rsrc)
13590 for (i = 0; i < count; i++)
13591 emit_move_insn (gen_rtx_REG (mode, rdest + i),
13592 gen_rtx_REG (mode, rsrc + i));
43e9d192 13593 else
2d8c6dc1
AH
13594 for (i = 0; i < count; i++)
13595 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
13596 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
13597}
13598
668046d1 13599/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 13600 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 13601int
b8506a8a 13602aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 13603{
6a70badb
RS
13604 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
13605 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
13606}
13607
db0253a4 13608/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
13609 alignment of a vector to 128 bits. SVE predicates have an alignment of
13610 16 bits. */
db0253a4
TB
13611static HOST_WIDE_INT
13612aarch64_simd_vector_alignment (const_tree type)
13613{
43cacb12
RS
13614 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
13615 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
13616 be set for non-predicate vectors of booleans. Modes are the most
13617 direct way we have of identifying real SVE predicate types. */
13618 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 13619 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
13620 return MIN (align, 128);
13621}
13622
43cacb12
RS
13623/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
13624static HOST_WIDE_INT
13625aarch64_vectorize_preferred_vector_alignment (const_tree type)
13626{
13627 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
13628 {
13629 /* If the length of the vector is fixed, try to align to that length,
13630 otherwise don't try to align at all. */
13631 HOST_WIDE_INT result;
13632 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
13633 result = TYPE_ALIGN (TREE_TYPE (type));
13634 return result;
13635 }
13636 return TYPE_ALIGN (type);
13637}
13638
db0253a4
TB
13639/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
13640static bool
13641aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
13642{
13643 if (is_packed)
13644 return false;
13645
43cacb12
RS
13646 /* For fixed-length vectors, check that the vectorizer will aim for
13647 full-vector alignment. This isn't true for generic GCC vectors
13648 that are wider than the ABI maximum of 128 bits. */
13649 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13650 && (wi::to_widest (TYPE_SIZE (type))
13651 != aarch64_vectorize_preferred_vector_alignment (type)))
db0253a4
TB
13652 return false;
13653
13654 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
13655 return true;
13656}
13657
7df76747
N
13658/* Return true if the vector misalignment factor is supported by the
13659 target. */
13660static bool
13661aarch64_builtin_support_vector_misalignment (machine_mode mode,
13662 const_tree type, int misalignment,
13663 bool is_packed)
13664{
13665 if (TARGET_SIMD && STRICT_ALIGNMENT)
13666 {
13667 /* Return if movmisalign pattern is not supported for this mode. */
13668 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
13669 return false;
13670
a509c571 13671 /* Misalignment factor is unknown at compile time. */
7df76747 13672 if (misalignment == -1)
a509c571 13673 return false;
7df76747
N
13674 }
13675 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13676 is_packed);
13677}
13678
4369c11e
TB
13679/* If VALS is a vector constant that can be loaded into a register
13680 using DUP, generate instructions to do so and return an RTX to
13681 assign to the register. Otherwise return NULL_RTX. */
13682static rtx
13683aarch64_simd_dup_constant (rtx vals)
13684{
ef4bddc2
RS
13685 machine_mode mode = GET_MODE (vals);
13686 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 13687 rtx x;
4369c11e 13688
92695fbb 13689 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
13690 return NULL_RTX;
13691
13692 /* We can load this constant by using DUP and a constant in a
13693 single ARM register. This will be cheaper than a vector
13694 load. */
92695fbb 13695 x = copy_to_mode_reg (inner_mode, x);
59d06c05 13696 return gen_vec_duplicate (mode, x);
4369c11e
TB
13697}
13698
13699
13700/* Generate code to load VALS, which is a PARALLEL containing only
13701 constants (for vec_init) or CONST_VECTOR, efficiently into a
13702 register. Returns an RTX to copy into the register, or NULL_RTX
13703 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 13704static rtx
4369c11e
TB
13705aarch64_simd_make_constant (rtx vals)
13706{
ef4bddc2 13707 machine_mode mode = GET_MODE (vals);
4369c11e
TB
13708 rtx const_dup;
13709 rtx const_vec = NULL_RTX;
4369c11e
TB
13710 int n_const = 0;
13711 int i;
13712
13713 if (GET_CODE (vals) == CONST_VECTOR)
13714 const_vec = vals;
13715 else if (GET_CODE (vals) == PARALLEL)
13716 {
13717 /* A CONST_VECTOR must contain only CONST_INTs and
13718 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13719 Only store valid constants in a CONST_VECTOR. */
6a70badb 13720 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
13721 for (i = 0; i < n_elts; ++i)
13722 {
13723 rtx x = XVECEXP (vals, 0, i);
13724 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13725 n_const++;
13726 }
13727 if (n_const == n_elts)
13728 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13729 }
13730 else
13731 gcc_unreachable ();
13732
13733 if (const_vec != NULL_RTX
b187677b 13734 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
13735 /* Load using MOVI/MVNI. */
13736 return const_vec;
13737 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
13738 /* Loaded using DUP. */
13739 return const_dup;
13740 else if (const_vec != NULL_RTX)
13741 /* Load from constant pool. We can not take advantage of single-cycle
13742 LD1 because we need a PC-relative addressing mode. */
13743 return const_vec;
13744 else
13745 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13746 We can not construct an initializer. */
13747 return NULL_RTX;
13748}
13749
35a093b6
JG
13750/* Expand a vector initialisation sequence, such that TARGET is
13751 initialised to contain VALS. */
13752
4369c11e
TB
13753void
13754aarch64_expand_vector_init (rtx target, rtx vals)
13755{
ef4bddc2 13756 machine_mode mode = GET_MODE (target);
146c2e3a 13757 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 13758 /* The number of vector elements. */
6a70badb 13759 int n_elts = XVECLEN (vals, 0);
35a093b6 13760 /* The number of vector elements which are not constant. */
8b66a2d4
AL
13761 int n_var = 0;
13762 rtx any_const = NULL_RTX;
35a093b6
JG
13763 /* The first element of vals. */
13764 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 13765 bool all_same = true;
4369c11e 13766
35a093b6 13767 /* Count the number of variable elements to initialise. */
8b66a2d4 13768 for (int i = 0; i < n_elts; ++i)
4369c11e 13769 {
8b66a2d4 13770 rtx x = XVECEXP (vals, 0, i);
35a093b6 13771 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
13772 ++n_var;
13773 else
13774 any_const = x;
4369c11e 13775
35a093b6 13776 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
13777 }
13778
35a093b6
JG
13779 /* No variable elements, hand off to aarch64_simd_make_constant which knows
13780 how best to handle this. */
4369c11e
TB
13781 if (n_var == 0)
13782 {
13783 rtx constant = aarch64_simd_make_constant (vals);
13784 if (constant != NULL_RTX)
13785 {
13786 emit_move_insn (target, constant);
13787 return;
13788 }
13789 }
13790
13791 /* Splat a single non-constant element if we can. */
13792 if (all_same)
13793 {
35a093b6 13794 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 13795 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
13796 return;
13797 }
13798
85c1b6d7
AP
13799 enum insn_code icode = optab_handler (vec_set_optab, mode);
13800 gcc_assert (icode != CODE_FOR_nothing);
13801
13802 /* If there are only variable elements, try to optimize
13803 the insertion using dup for the most common element
13804 followed by insertions. */
13805
13806 /* The algorithm will fill matches[*][0] with the earliest matching element,
13807 and matches[X][1] with the count of duplicate elements (if X is the
13808 earliest element which has duplicates). */
13809
13810 if (n_var == n_elts && n_elts <= 16)
13811 {
13812 int matches[16][2] = {0};
13813 for (int i = 0; i < n_elts; i++)
13814 {
13815 for (int j = 0; j <= i; j++)
13816 {
13817 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
13818 {
13819 matches[i][0] = j;
13820 matches[j][1]++;
13821 break;
13822 }
13823 }
13824 }
13825 int maxelement = 0;
13826 int maxv = 0;
13827 for (int i = 0; i < n_elts; i++)
13828 if (matches[i][1] > maxv)
13829 {
13830 maxelement = i;
13831 maxv = matches[i][1];
13832 }
13833
b4e2cd5b
JG
13834 /* Create a duplicate of the most common element, unless all elements
13835 are equally useless to us, in which case just immediately set the
13836 vector register using the first element. */
13837
13838 if (maxv == 1)
13839 {
13840 /* For vectors of two 64-bit elements, we can do even better. */
13841 if (n_elts == 2
13842 && (inner_mode == E_DImode
13843 || inner_mode == E_DFmode))
13844
13845 {
13846 rtx x0 = XVECEXP (vals, 0, 0);
13847 rtx x1 = XVECEXP (vals, 0, 1);
13848 /* Combine can pick up this case, but handling it directly
13849 here leaves clearer RTL.
13850
13851 This is load_pair_lanes<mode>, and also gives us a clean-up
13852 for store_pair_lanes<mode>. */
13853 if (memory_operand (x0, inner_mode)
13854 && memory_operand (x1, inner_mode)
13855 && !STRICT_ALIGNMENT
13856 && rtx_equal_p (XEXP (x1, 0),
13857 plus_constant (Pmode,
13858 XEXP (x0, 0),
13859 GET_MODE_SIZE (inner_mode))))
13860 {
13861 rtx t;
13862 if (inner_mode == DFmode)
13863 t = gen_load_pair_lanesdf (target, x0, x1);
13864 else
13865 t = gen_load_pair_lanesdi (target, x0, x1);
13866 emit_insn (t);
13867 return;
13868 }
13869 }
13870 /* The subreg-move sequence below will move into lane zero of the
13871 vector register. For big-endian we want that position to hold
13872 the last element of VALS. */
13873 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
13874 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
13875 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
13876 }
13877 else
13878 {
13879 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
13880 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
13881 }
85c1b6d7
AP
13882
13883 /* Insert the rest. */
13884 for (int i = 0; i < n_elts; i++)
13885 {
13886 rtx x = XVECEXP (vals, 0, i);
13887 if (matches[i][0] == maxelement)
13888 continue;
13889 x = copy_to_mode_reg (inner_mode, x);
13890 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
13891 }
13892 return;
13893 }
13894
35a093b6
JG
13895 /* Initialise a vector which is part-variable. We want to first try
13896 to build those lanes which are constant in the most efficient way we
13897 can. */
13898 if (n_var != n_elts)
4369c11e
TB
13899 {
13900 rtx copy = copy_rtx (vals);
4369c11e 13901
8b66a2d4
AL
13902 /* Load constant part of vector. We really don't care what goes into the
13903 parts we will overwrite, but we're more likely to be able to load the
13904 constant efficiently if it has fewer, larger, repeating parts
13905 (see aarch64_simd_valid_immediate). */
13906 for (int i = 0; i < n_elts; i++)
13907 {
13908 rtx x = XVECEXP (vals, 0, i);
13909 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13910 continue;
13911 rtx subst = any_const;
13912 for (int bit = n_elts / 2; bit > 0; bit /= 2)
13913 {
13914 /* Look in the copied vector, as more elements are const. */
13915 rtx test = XVECEXP (copy, 0, i ^ bit);
13916 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
13917 {
13918 subst = test;
13919 break;
13920 }
13921 }
13922 XVECEXP (copy, 0, i) = subst;
13923 }
4369c11e 13924 aarch64_expand_vector_init (target, copy);
35a093b6 13925 }
4369c11e 13926
35a093b6 13927 /* Insert the variable lanes directly. */
8b66a2d4 13928 for (int i = 0; i < n_elts; i++)
35a093b6
JG
13929 {
13930 rtx x = XVECEXP (vals, 0, i);
13931 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13932 continue;
13933 x = copy_to_mode_reg (inner_mode, x);
13934 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
13935 }
4369c11e
TB
13936}
13937
43e9d192 13938static unsigned HOST_WIDE_INT
ef4bddc2 13939aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 13940{
43cacb12
RS
13941 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
13942 return 0;
13943 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
13944}
13945
43e9d192
IB
13946/* Select a format to encode pointers in exception handling data. */
13947int
13948aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
13949{
13950 int type;
13951 switch (aarch64_cmodel)
13952 {
13953 case AARCH64_CMODEL_TINY:
13954 case AARCH64_CMODEL_TINY_PIC:
13955 case AARCH64_CMODEL_SMALL:
13956 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 13957 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
13958 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
13959 for everything. */
13960 type = DW_EH_PE_sdata4;
13961 break;
13962 default:
13963 /* No assumptions here. 8-byte relocs required. */
13964 type = DW_EH_PE_sdata8;
13965 break;
13966 }
13967 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
13968}
13969
e1c1ecb0
KT
13970/* The last .arch and .tune assembly strings that we printed. */
13971static std::string aarch64_last_printed_arch_string;
13972static std::string aarch64_last_printed_tune_string;
13973
361fb3ee
KT
13974/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
13975 by the function fndecl. */
13976
13977void
13978aarch64_declare_function_name (FILE *stream, const char* name,
13979 tree fndecl)
13980{
13981 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
13982
13983 struct cl_target_option *targ_options;
13984 if (target_parts)
13985 targ_options = TREE_TARGET_OPTION (target_parts);
13986 else
13987 targ_options = TREE_TARGET_OPTION (target_option_current_node);
13988 gcc_assert (targ_options);
13989
13990 const struct processor *this_arch
13991 = aarch64_get_arch (targ_options->x_explicit_arch);
13992
054b4005
JG
13993 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
13994 std::string extension
04a99ebe
JG
13995 = aarch64_get_extension_string_for_isa_flags (isa_flags,
13996 this_arch->flags);
e1c1ecb0
KT
13997 /* Only update the assembler .arch string if it is distinct from the last
13998 such string we printed. */
13999 std::string to_print = this_arch->name + extension;
14000 if (to_print != aarch64_last_printed_arch_string)
14001 {
14002 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14003 aarch64_last_printed_arch_string = to_print;
14004 }
361fb3ee
KT
14005
14006 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14007 useful to readers of the generated asm. Do it only when it changes
14008 from function to function and verbose assembly is requested. */
361fb3ee
KT
14009 const struct processor *this_tune
14010 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14011
e1c1ecb0
KT
14012 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14013 {
14014 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14015 this_tune->name);
14016 aarch64_last_printed_tune_string = this_tune->name;
14017 }
361fb3ee
KT
14018
14019 /* Don't forget the type directive for ELF. */
14020 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14021 ASM_OUTPUT_LABEL (stream, name);
14022}
14023
e1c1ecb0
KT
14024/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14025
14026static void
14027aarch64_start_file (void)
14028{
14029 struct cl_target_option *default_options
14030 = TREE_TARGET_OPTION (target_option_default_node);
14031
14032 const struct processor *default_arch
14033 = aarch64_get_arch (default_options->x_explicit_arch);
14034 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14035 std::string extension
04a99ebe
JG
14036 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14037 default_arch->flags);
e1c1ecb0
KT
14038
14039 aarch64_last_printed_arch_string = default_arch->name + extension;
14040 aarch64_last_printed_tune_string = "";
14041 asm_fprintf (asm_out_file, "\t.arch %s\n",
14042 aarch64_last_printed_arch_string.c_str ());
14043
14044 default_file_start ();
14045}
14046
0462169c
SN
14047/* Emit load exclusive. */
14048
14049static void
ef4bddc2 14050aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14051 rtx mem, rtx model_rtx)
14052{
0016d8d9 14053 emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
0462169c
SN
14054}
14055
14056/* Emit store exclusive. */
14057
14058static void
ef4bddc2 14059aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14060 rtx rval, rtx mem, rtx model_rtx)
14061{
0016d8d9 14062 emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
0462169c
SN
14063}
14064
14065/* Mark the previous jump instruction as unlikely. */
14066
14067static void
14068aarch64_emit_unlikely_jump (rtx insn)
14069{
f370536c 14070 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14071 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14072}
14073
14074/* Expand a compare and swap pattern. */
14075
14076void
14077aarch64_expand_compare_and_swap (rtx operands[])
14078{
14079 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 14080 machine_mode mode, cmp_mode;
0462169c
SN
14081
14082 bval = operands[0];
14083 rval = operands[1];
14084 mem = operands[2];
14085 oldval = operands[3];
14086 newval = operands[4];
14087 is_weak = operands[5];
14088 mod_s = operands[6];
14089 mod_f = operands[7];
14090 mode = GET_MODE (mem);
14091 cmp_mode = mode;
14092
14093 /* Normally the succ memory model must be stronger than fail, but in the
14094 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14095 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
14096
46b35980
AM
14097 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14098 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14099 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14100
14101 switch (mode)
14102 {
4e10a5a7
RS
14103 case E_QImode:
14104 case E_HImode:
0462169c
SN
14105 /* For short modes, we're going to perform the comparison in SImode,
14106 so do the zero-extension now. */
14107 cmp_mode = SImode;
14108 rval = gen_reg_rtx (SImode);
14109 oldval = convert_modes (SImode, mode, oldval, true);
14110 /* Fall through. */
14111
4e10a5a7
RS
14112 case E_SImode:
14113 case E_DImode:
0462169c
SN
14114 /* Force the value into a register if needed. */
14115 if (!aarch64_plus_operand (oldval, mode))
14116 oldval = force_reg (cmp_mode, oldval);
14117 break;
14118
14119 default:
14120 gcc_unreachable ();
14121 }
14122
b0770c0f 14123 if (TARGET_LSE)
0016d8d9
RS
14124 emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem, oldval,
14125 newval, is_weak, mod_s,
14126 mod_f));
b0770c0f 14127 else
0016d8d9
RS
14128 emit_insn (gen_aarch64_compare_and_swap (mode, rval, mem, oldval, newval,
14129 is_weak, mod_s, mod_f));
0462169c 14130
0462169c
SN
14131
14132 if (mode == QImode || mode == HImode)
14133 emit_move_insn (operands[1], gen_lowpart (mode, rval));
14134
14135 x = gen_rtx_REG (CCmode, CC_REGNUM);
14136 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 14137 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14138}
14139
641c2f8b
MW
14140/* Test whether the target supports using a atomic load-operate instruction.
14141 CODE is the operation and AFTER is TRUE if the data in memory after the
14142 operation should be returned and FALSE if the data before the operation
14143 should be returned. Returns FALSE if the operation isn't supported by the
14144 architecture. */
14145
14146bool
14147aarch64_atomic_ldop_supported_p (enum rtx_code code)
14148{
14149 if (!TARGET_LSE)
14150 return false;
14151
14152 switch (code)
14153 {
14154 case SET:
14155 case AND:
14156 case IOR:
14157 case XOR:
14158 case MINUS:
14159 case PLUS:
14160 return true;
14161 default:
14162 return false;
14163 }
14164}
14165
f70fb3b6
MW
14166/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14167 sequence implementing an atomic operation. */
14168
14169static void
14170aarch64_emit_post_barrier (enum memmodel model)
14171{
14172 const enum memmodel base_model = memmodel_base (model);
14173
14174 if (is_mm_sync (model)
14175 && (base_model == MEMMODEL_ACQUIRE
14176 || base_model == MEMMODEL_ACQ_REL
14177 || base_model == MEMMODEL_SEQ_CST))
14178 {
14179 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14180 }
14181}
14182
b0770c0f
MW
14183/* Emit an atomic compare-and-swap operation. RVAL is the destination register
14184 for the data in memory. EXPECTED is the value expected to be in memory.
14185 DESIRED is the value to store to memory. MEM is the memory location. MODEL
14186 is the memory ordering to use. */
14187
14188void
14189aarch64_gen_atomic_cas (rtx rval, rtx mem,
14190 rtx expected, rtx desired,
14191 rtx model)
14192{
b0770c0f
MW
14193 machine_mode mode;
14194
14195 mode = GET_MODE (mem);
14196
b0770c0f
MW
14197 /* Move the expected value into the CAS destination register. */
14198 emit_insn (gen_rtx_SET (rval, expected));
14199
14200 /* Emit the CAS. */
0016d8d9 14201 emit_insn (gen_aarch64_atomic_cas (mode, rval, mem, desired, model));
b0770c0f
MW
14202
14203 /* Compare the expected value with the value loaded by the CAS, to establish
14204 whether the swap was made. */
14205 aarch64_gen_compare_reg (EQ, rval, expected);
14206}
14207
0462169c
SN
14208/* Split a compare and swap pattern. */
14209
14210void
14211aarch64_split_compare_and_swap (rtx operands[])
14212{
14213 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14214 machine_mode mode;
0462169c 14215 bool is_weak;
5d8a22a5
DM
14216 rtx_code_label *label1, *label2;
14217 rtx x, cond;
ab876106
MW
14218 enum memmodel model;
14219 rtx model_rtx;
0462169c
SN
14220
14221 rval = operands[0];
14222 mem = operands[1];
14223 oldval = operands[2];
14224 newval = operands[3];
14225 is_weak = (operands[4] != const0_rtx);
ab876106 14226 model_rtx = operands[5];
0462169c
SN
14227 scratch = operands[7];
14228 mode = GET_MODE (mem);
ab876106 14229 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14230
17f47f86
KT
14231 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14232 loop:
14233 .label1:
14234 LD[A]XR rval, [mem]
14235 CBNZ rval, .label2
14236 ST[L]XR scratch, newval, [mem]
14237 CBNZ scratch, .label1
14238 .label2:
14239 CMP rval, 0. */
14240 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14241
5d8a22a5 14242 label1 = NULL;
0462169c
SN
14243 if (!is_weak)
14244 {
14245 label1 = gen_label_rtx ();
14246 emit_label (label1);
14247 }
14248 label2 = gen_label_rtx ();
14249
ab876106
MW
14250 /* The initial load can be relaxed for a __sync operation since a final
14251 barrier will be emitted to stop code hoisting. */
14252 if (is_mm_sync (model))
14253 aarch64_emit_load_exclusive (mode, rval, mem,
14254 GEN_INT (MEMMODEL_RELAXED));
14255 else
14256 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14257
17f47f86
KT
14258 if (strong_zero_p)
14259 {
6e1eaca9
RE
14260 if (aarch64_track_speculation)
14261 {
14262 /* Emit an explicit compare instruction, so that we can correctly
14263 track the condition codes. */
14264 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
14265 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14266 }
14267 else
14268 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
14269
17f47f86
KT
14270 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14271 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14272 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14273 }
14274 else
14275 {
14276 cond = aarch64_gen_compare_reg (NE, rval, oldval);
14277 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14278 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14279 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14280 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14281 }
0462169c 14282
ab876106 14283 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
14284
14285 if (!is_weak)
14286 {
6e1eaca9
RE
14287 if (aarch64_track_speculation)
14288 {
14289 /* Emit an explicit compare instruction, so that we can correctly
14290 track the condition codes. */
14291 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
14292 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14293 }
14294 else
14295 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
14296
0462169c
SN
14297 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14298 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 14299 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
14300 }
14301 else
14302 {
14303 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14304 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 14305 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
14306 }
14307
14308 emit_label (label2);
17f47f86
KT
14309 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
14310 to set the condition flags. If this is not used it will be removed by
14311 later passes. */
14312 if (strong_zero_p)
14313 {
14314 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14315 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
14316 emit_insn (gen_rtx_SET (cond, x));
14317 }
ab876106
MW
14318 /* Emit any final barrier needed for a __sync operation. */
14319 if (is_mm_sync (model))
14320 aarch64_emit_post_barrier (model);
0462169c
SN
14321}
14322
68729b06
MW
14323/* Emit a BIC instruction. */
14324
14325static void
14326aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
14327{
14328 rtx shift_rtx = GEN_INT (shift);
14329 rtx (*gen) (rtx, rtx, rtx, rtx);
14330
14331 switch (mode)
14332 {
4e10a5a7
RS
14333 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
14334 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
68729b06
MW
14335 default:
14336 gcc_unreachable ();
14337 }
14338
14339 emit_insn (gen (dst, s2, shift_rtx, s1));
14340}
14341
9cd7b720
MW
14342/* Emit an atomic swap. */
14343
14344static void
14345aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
14346 rtx mem, rtx model)
14347{
0016d8d9 14348 emit_insn (gen_aarch64_atomic_swp (mode, dst, mem, value, model));
641c2f8b
MW
14349}
14350
14351/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
14352 location to store the data read from memory. OUT_RESULT is the location to
14353 store the result of the operation. MEM is the memory location to read and
14354 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
14355 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
14356 be NULL. */
9cd7b720
MW
14357
14358void
68729b06 14359aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
14360 rtx mem, rtx value, rtx model_rtx)
14361{
14362 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
14363 machine_mode wmode = (mode == DImode ? DImode : SImode);
14364 const bool short_mode = (mode < SImode);
0016d8d9 14365 int ldop_code;
641c2f8b
MW
14366 rtx src;
14367 rtx x;
14368
14369 if (out_data)
14370 out_data = gen_lowpart (mode, out_data);
9cd7b720 14371
68729b06
MW
14372 if (out_result)
14373 out_result = gen_lowpart (mode, out_result);
14374
641c2f8b
MW
14375 /* Make sure the value is in a register, putting it into a destination
14376 register if it needs to be manipulated. */
14377 if (!register_operand (value, mode)
14378 || code == AND || code == MINUS)
14379 {
68729b06 14380 src = out_result ? out_result : out_data;
641c2f8b
MW
14381 emit_move_insn (src, gen_lowpart (mode, value));
14382 }
14383 else
14384 src = value;
14385 gcc_assert (register_operand (src, mode));
9cd7b720 14386
641c2f8b
MW
14387 /* Preprocess the data for the operation as necessary. If the operation is
14388 a SET then emit a swap instruction and finish. */
9cd7b720
MW
14389 switch (code)
14390 {
14391 case SET:
641c2f8b 14392 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
14393 return;
14394
641c2f8b
MW
14395 case MINUS:
14396 /* Negate the value and treat it as a PLUS. */
14397 {
14398 rtx neg_src;
14399
14400 /* Resize the value if necessary. */
14401 if (short_mode)
14402 src = gen_lowpart (wmode, src);
14403
14404 neg_src = gen_rtx_NEG (wmode, src);
14405 emit_insn (gen_rtx_SET (src, neg_src));
14406
14407 if (short_mode)
14408 src = gen_lowpart (mode, src);
14409 }
14410 /* Fall-through. */
14411 case PLUS:
0016d8d9 14412 ldop_code = UNSPECV_ATOMIC_LDOP_PLUS;
641c2f8b
MW
14413 break;
14414
14415 case IOR:
0016d8d9 14416 ldop_code = UNSPECV_ATOMIC_LDOP_OR;
641c2f8b
MW
14417 break;
14418
14419 case XOR:
0016d8d9 14420 ldop_code = UNSPECV_ATOMIC_LDOP_XOR;
641c2f8b
MW
14421 break;
14422
14423 case AND:
14424 {
14425 rtx not_src;
14426
14427 /* Resize the value if necessary. */
14428 if (short_mode)
14429 src = gen_lowpart (wmode, src);
14430
14431 not_src = gen_rtx_NOT (wmode, src);
14432 emit_insn (gen_rtx_SET (src, not_src));
14433
14434 if (short_mode)
14435 src = gen_lowpart (mode, src);
14436 }
0016d8d9 14437 ldop_code = UNSPECV_ATOMIC_LDOP_BIC;
641c2f8b
MW
14438 break;
14439
9cd7b720
MW
14440 default:
14441 /* The operation can't be done with atomic instructions. */
14442 gcc_unreachable ();
14443 }
641c2f8b 14444
0016d8d9
RS
14445 emit_insn (gen_aarch64_atomic_load (ldop_code, mode,
14446 out_data, mem, src, model_rtx));
68729b06
MW
14447
14448 /* If necessary, calculate the data in memory after the update by redoing the
14449 operation from values in registers. */
14450 if (!out_result)
14451 return;
14452
14453 if (short_mode)
14454 {
14455 src = gen_lowpart (wmode, src);
14456 out_data = gen_lowpart (wmode, out_data);
14457 out_result = gen_lowpart (wmode, out_result);
14458 }
14459
14460 x = NULL_RTX;
14461
14462 switch (code)
14463 {
14464 case MINUS:
14465 case PLUS:
14466 x = gen_rtx_PLUS (wmode, out_data, src);
14467 break;
14468 case IOR:
14469 x = gen_rtx_IOR (wmode, out_data, src);
14470 break;
14471 case XOR:
14472 x = gen_rtx_XOR (wmode, out_data, src);
14473 break;
14474 case AND:
14475 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
14476 return;
14477 default:
14478 gcc_unreachable ();
14479 }
14480
14481 emit_set_insn (out_result, x);
14482
14483 return;
9cd7b720
MW
14484}
14485
0462169c
SN
14486/* Split an atomic operation. */
14487
14488void
14489aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 14490 rtx value, rtx model_rtx, rtx cond)
0462169c 14491{
ef4bddc2
RS
14492 machine_mode mode = GET_MODE (mem);
14493 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
14494 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
14495 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
14496 rtx_code_label *label;
14497 rtx x;
0462169c 14498
9cd7b720 14499 /* Split the atomic operation into a sequence. */
0462169c
SN
14500 label = gen_label_rtx ();
14501 emit_label (label);
14502
14503 if (new_out)
14504 new_out = gen_lowpart (wmode, new_out);
14505 if (old_out)
14506 old_out = gen_lowpart (wmode, old_out);
14507 else
14508 old_out = new_out;
14509 value = simplify_gen_subreg (wmode, value, mode, 0);
14510
f70fb3b6
MW
14511 /* The initial load can be relaxed for a __sync operation since a final
14512 barrier will be emitted to stop code hoisting. */
14513 if (is_sync)
14514 aarch64_emit_load_exclusive (mode, old_out, mem,
14515 GEN_INT (MEMMODEL_RELAXED));
14516 else
14517 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
14518
14519 switch (code)
14520 {
14521 case SET:
14522 new_out = value;
14523 break;
14524
14525 case NOT:
14526 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 14527 emit_insn (gen_rtx_SET (new_out, x));
0462169c 14528 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 14529 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14530 break;
14531
14532 case MINUS:
14533 if (CONST_INT_P (value))
14534 {
14535 value = GEN_INT (-INTVAL (value));
14536 code = PLUS;
14537 }
14538 /* Fall through. */
14539
14540 default:
14541 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 14542 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14543 break;
14544 }
14545
14546 aarch64_emit_store_exclusive (mode, cond, mem,
14547 gen_lowpart (mode, new_out), model_rtx);
14548
6e1eaca9
RE
14549 if (aarch64_track_speculation)
14550 {
14551 /* Emit an explicit compare instruction, so that we can correctly
14552 track the condition codes. */
14553 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
14554 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14555 }
14556 else
14557 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14558
0462169c
SN
14559 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14560 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 14561 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
14562
14563 /* Emit any final barrier needed for a __sync operation. */
14564 if (is_sync)
14565 aarch64_emit_post_barrier (model);
0462169c
SN
14566}
14567
c2ec330c
AL
14568static void
14569aarch64_init_libfuncs (void)
14570{
14571 /* Half-precision float operations. The compiler handles all operations
14572 with NULL libfuncs by converting to SFmode. */
14573
14574 /* Conversions. */
14575 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
14576 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
14577
14578 /* Arithmetic. */
14579 set_optab_libfunc (add_optab, HFmode, NULL);
14580 set_optab_libfunc (sdiv_optab, HFmode, NULL);
14581 set_optab_libfunc (smul_optab, HFmode, NULL);
14582 set_optab_libfunc (neg_optab, HFmode, NULL);
14583 set_optab_libfunc (sub_optab, HFmode, NULL);
14584
14585 /* Comparisons. */
14586 set_optab_libfunc (eq_optab, HFmode, NULL);
14587 set_optab_libfunc (ne_optab, HFmode, NULL);
14588 set_optab_libfunc (lt_optab, HFmode, NULL);
14589 set_optab_libfunc (le_optab, HFmode, NULL);
14590 set_optab_libfunc (ge_optab, HFmode, NULL);
14591 set_optab_libfunc (gt_optab, HFmode, NULL);
14592 set_optab_libfunc (unord_optab, HFmode, NULL);
14593}
14594
43e9d192 14595/* Target hook for c_mode_for_suffix. */
ef4bddc2 14596static machine_mode
43e9d192
IB
14597aarch64_c_mode_for_suffix (char suffix)
14598{
14599 if (suffix == 'q')
14600 return TFmode;
14601
14602 return VOIDmode;
14603}
14604
3520f7cc
JG
14605/* We can only represent floating point constants which will fit in
14606 "quarter-precision" values. These values are characterised by
14607 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
14608 by:
14609
14610 (-1)^s * (n/16) * 2^r
14611
14612 Where:
14613 's' is the sign bit.
14614 'n' is an integer in the range 16 <= n <= 31.
14615 'r' is an integer in the range -3 <= r <= 4. */
14616
14617/* Return true iff X can be represented by a quarter-precision
14618 floating point immediate operand X. Note, we cannot represent 0.0. */
14619bool
14620aarch64_float_const_representable_p (rtx x)
14621{
14622 /* This represents our current view of how many bits
14623 make up the mantissa. */
14624 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 14625 int exponent;
3520f7cc 14626 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 14627 REAL_VALUE_TYPE r, m;
807e902e 14628 bool fail;
3520f7cc
JG
14629
14630 if (!CONST_DOUBLE_P (x))
14631 return false;
14632
a4518821
RS
14633 if (GET_MODE (x) == VOIDmode
14634 || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
94bfa2da
TV
14635 return false;
14636
34a72c33 14637 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
14638
14639 /* We cannot represent infinities, NaNs or +/-zero. We won't
14640 know if we have +zero until we analyse the mantissa, but we
14641 can reject the other invalid values. */
14642 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
14643 || REAL_VALUE_MINUS_ZERO (r))
14644 return false;
14645
ba96cdfb 14646 /* Extract exponent. */
3520f7cc
JG
14647 r = real_value_abs (&r);
14648 exponent = REAL_EXP (&r);
14649
14650 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
14651 highest (sign) bit, with a fixed binary point at bit point_pos.
14652 m1 holds the low part of the mantissa, m2 the high part.
14653 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
14654 bits for the mantissa, this can fail (low bits will be lost). */
14655 real_ldexp (&m, &r, point_pos - exponent);
807e902e 14656 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
14657
14658 /* If the low part of the mantissa has bits set we cannot represent
14659 the value. */
d9074b29 14660 if (w.ulow () != 0)
3520f7cc
JG
14661 return false;
14662 /* We have rejected the lower HOST_WIDE_INT, so update our
14663 understanding of how many bits lie in the mantissa and
14664 look only at the high HOST_WIDE_INT. */
807e902e 14665 mantissa = w.elt (1);
3520f7cc
JG
14666 point_pos -= HOST_BITS_PER_WIDE_INT;
14667
14668 /* We can only represent values with a mantissa of the form 1.xxxx. */
14669 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
14670 if ((mantissa & mask) != 0)
14671 return false;
14672
14673 /* Having filtered unrepresentable values, we may now remove all
14674 but the highest 5 bits. */
14675 mantissa >>= point_pos - 5;
14676
14677 /* We cannot represent the value 0.0, so reject it. This is handled
14678 elsewhere. */
14679 if (mantissa == 0)
14680 return false;
14681
14682 /* Then, as bit 4 is always set, we can mask it off, leaving
14683 the mantissa in the range [0, 15]. */
14684 mantissa &= ~(1 << 4);
14685 gcc_assert (mantissa <= 15);
14686
14687 /* GCC internally does not use IEEE754-like encoding (where normalized
14688 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
14689 Our mantissa values are shifted 4 places to the left relative to
14690 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
14691 by 5 places to correct for GCC's representation. */
14692 exponent = 5 - exponent;
14693
14694 return (exponent >= 0 && exponent <= 7);
14695}
14696
ab6501d7
SD
14697/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
14698 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
14699 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 14700char*
b187677b 14701aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 14702 enum simd_immediate_check which)
3520f7cc 14703{
3ea63f60 14704 bool is_valid;
3520f7cc 14705 static char templ[40];
3520f7cc 14706 const char *mnemonic;
e4f0f84d 14707 const char *shift_op;
3520f7cc 14708 unsigned int lane_count = 0;
81c2dfb9 14709 char element_char;
3520f7cc 14710
b187677b 14711 struct simd_immediate_info info;
48063b9d
IB
14712
14713 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
14714 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
14715 It will also update INFO to show how the immediate should be generated.
14716 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 14717 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
14718 gcc_assert (is_valid);
14719
b187677b
RS
14720 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
14721 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 14722
b187677b 14723 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 14724 {
b187677b 14725 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
14726 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
14727 move immediate path. */
48063b9d
IB
14728 if (aarch64_float_const_zero_rtx_p (info.value))
14729 info.value = GEN_INT (0);
14730 else
14731 {
83faf7d0 14732 const unsigned int buf_size = 20;
48063b9d 14733 char float_buf[buf_size] = {'\0'};
34a72c33
RS
14734 real_to_decimal_for_mode (float_buf,
14735 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 14736 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
14737
14738 if (lane_count == 1)
14739 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
14740 else
14741 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 14742 lane_count, element_char, float_buf);
48063b9d
IB
14743 return templ;
14744 }
3520f7cc 14745 }
3520f7cc 14746
0d8e1702 14747 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
14748
14749 if (which == AARCH64_CHECK_MOV)
14750 {
b187677b
RS
14751 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
14752 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
14753 if (lane_count == 1)
14754 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
14755 mnemonic, UINTVAL (info.value));
14756 else if (info.shift)
14757 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
14758 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
14759 element_char, UINTVAL (info.value), shift_op, info.shift);
14760 else
14761 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
14762 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
14763 element_char, UINTVAL (info.value));
14764 }
3520f7cc 14765 else
ab6501d7
SD
14766 {
14767 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 14768 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
14769 if (info.shift)
14770 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
14771 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
14772 element_char, UINTVAL (info.value), "lsl", info.shift);
14773 else
14774 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
14775 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
14776 element_char, UINTVAL (info.value));
14777 }
3520f7cc
JG
14778 return templ;
14779}
14780
b7342d25 14781char*
77e994c9 14782aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 14783{
a2170965
TC
14784
14785 /* If a floating point number was passed and we desire to use it in an
14786 integer mode do the conversion to integer. */
14787 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
14788 {
14789 unsigned HOST_WIDE_INT ival;
14790 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
14791 gcc_unreachable ();
14792 immediate = gen_int_mode (ival, mode);
14793 }
14794
ef4bddc2 14795 machine_mode vmode;
a2170965
TC
14796 /* use a 64 bit mode for everything except for DI/DF mode, where we use
14797 a 128 bit vector mode. */
14798 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 14799
a2170965 14800 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 14801 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 14802 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
14803}
14804
43cacb12
RS
14805/* Return the output string to use for moving immediate CONST_VECTOR
14806 into an SVE register. */
14807
14808char *
14809aarch64_output_sve_mov_immediate (rtx const_vector)
14810{
14811 static char templ[40];
14812 struct simd_immediate_info info;
14813 char element_char;
14814
14815 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
14816 gcc_assert (is_valid);
14817
14818 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
14819
14820 if (info.step)
14821 {
14822 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
14823 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
14824 element_char, INTVAL (info.value), INTVAL (info.step));
14825 return templ;
14826 }
14827
14828 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
14829 {
14830 if (aarch64_float_const_zero_rtx_p (info.value))
14831 info.value = GEN_INT (0);
14832 else
14833 {
14834 const int buf_size = 20;
14835 char float_buf[buf_size] = {};
14836 real_to_decimal_for_mode (float_buf,
14837 CONST_DOUBLE_REAL_VALUE (info.value),
14838 buf_size, buf_size, 1, info.elt_mode);
14839
14840 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
14841 element_char, float_buf);
14842 return templ;
14843 }
14844 }
14845
14846 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
14847 element_char, INTVAL (info.value));
14848 return templ;
14849}
14850
14851/* Return the asm format for a PTRUE instruction whose destination has
14852 mode MODE. SUFFIX is the element size suffix. */
14853
14854char *
14855aarch64_output_ptrue (machine_mode mode, char suffix)
14856{
14857 unsigned int nunits;
14858 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
14859 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
14860 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
14861 else
14862 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
14863 return buf;
14864}
14865
88b08073
JG
14866/* Split operands into moves from op[1] + op[2] into op[0]. */
14867
14868void
14869aarch64_split_combinev16qi (rtx operands[3])
14870{
14871 unsigned int dest = REGNO (operands[0]);
14872 unsigned int src1 = REGNO (operands[1]);
14873 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 14874 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 14875 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
14876 rtx destlo, desthi;
14877
14878 gcc_assert (halfmode == V16QImode);
14879
14880 if (src1 == dest && src2 == dest + halfregs)
14881 {
14882 /* No-op move. Can't split to nothing; emit something. */
14883 emit_note (NOTE_INSN_DELETED);
14884 return;
14885 }
14886
14887 /* Preserve register attributes for variable tracking. */
14888 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
14889 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
14890 GET_MODE_SIZE (halfmode));
14891
14892 /* Special case of reversed high/low parts. */
14893 if (reg_overlap_mentioned_p (operands[2], destlo)
14894 && reg_overlap_mentioned_p (operands[1], desthi))
14895 {
14896 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
14897 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
14898 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
14899 }
14900 else if (!reg_overlap_mentioned_p (operands[2], destlo))
14901 {
14902 /* Try to avoid unnecessary moves if part of the result
14903 is in the right place already. */
14904 if (src1 != dest)
14905 emit_move_insn (destlo, operands[1]);
14906 if (src2 != dest + halfregs)
14907 emit_move_insn (desthi, operands[2]);
14908 }
14909 else
14910 {
14911 if (src2 != dest + halfregs)
14912 emit_move_insn (desthi, operands[2]);
14913 if (src1 != dest)
14914 emit_move_insn (destlo, operands[1]);
14915 }
14916}
14917
14918/* vec_perm support. */
14919
88b08073
JG
14920struct expand_vec_perm_d
14921{
14922 rtx target, op0, op1;
e3342de4 14923 vec_perm_indices perm;
ef4bddc2 14924 machine_mode vmode;
43cacb12 14925 unsigned int vec_flags;
88b08073
JG
14926 bool one_vector_p;
14927 bool testing_p;
14928};
14929
14930/* Generate a variable permutation. */
14931
14932static void
14933aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
14934{
ef4bddc2 14935 machine_mode vmode = GET_MODE (target);
88b08073
JG
14936 bool one_vector_p = rtx_equal_p (op0, op1);
14937
14938 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
14939 gcc_checking_assert (GET_MODE (op0) == vmode);
14940 gcc_checking_assert (GET_MODE (op1) == vmode);
14941 gcc_checking_assert (GET_MODE (sel) == vmode);
14942 gcc_checking_assert (TARGET_SIMD);
14943
14944 if (one_vector_p)
14945 {
14946 if (vmode == V8QImode)
14947 {
14948 /* Expand the argument to a V16QI mode by duplicating it. */
14949 rtx pair = gen_reg_rtx (V16QImode);
14950 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
14951 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
14952 }
14953 else
14954 {
14955 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
14956 }
14957 }
14958 else
14959 {
14960 rtx pair;
14961
14962 if (vmode == V8QImode)
14963 {
14964 pair = gen_reg_rtx (V16QImode);
14965 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
14966 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
14967 }
14968 else
14969 {
14970 pair = gen_reg_rtx (OImode);
14971 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
14972 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
14973 }
14974 }
14975}
14976
80940017
RS
14977/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
14978 NELT is the number of elements in the vector. */
14979
88b08073 14980void
80940017
RS
14981aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
14982 unsigned int nelt)
88b08073 14983{
ef4bddc2 14984 machine_mode vmode = GET_MODE (target);
88b08073 14985 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 14986 rtx mask;
88b08073
JG
14987
14988 /* The TBL instruction does not use a modulo index, so we must take care
14989 of that ourselves. */
f7c4e5b8
AL
14990 mask = aarch64_simd_gen_const_vector_dup (vmode,
14991 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
14992 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
14993
f7c4e5b8
AL
14994 /* For big-endian, we also need to reverse the index within the vector
14995 (but not which vector). */
14996 if (BYTES_BIG_ENDIAN)
14997 {
14998 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
14999 if (!one_vector_p)
15000 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15001 sel = expand_simple_binop (vmode, XOR, sel, mask,
15002 NULL, 0, OPTAB_LIB_WIDEN);
15003 }
88b08073
JG
15004 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15005}
15006
43cacb12
RS
15007/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15008
15009static void
15010emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15011{
15012 emit_insn (gen_rtx_SET (target,
15013 gen_rtx_UNSPEC (GET_MODE (target),
15014 gen_rtvec (2, op0, op1), code)));
15015}
15016
15017/* Expand an SVE vec_perm with the given operands. */
15018
15019void
15020aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15021{
15022 machine_mode data_mode = GET_MODE (target);
15023 machine_mode sel_mode = GET_MODE (sel);
15024 /* Enforced by the pattern condition. */
15025 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15026
15027 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15028 size of the two value vectors, i.e. the upper bits of the indices
15029 are effectively ignored. SVE TBL instead produces 0 for any
15030 out-of-range indices, so we need to modulo all the vec_perm indices
15031 to ensure they are all in range. */
15032 rtx sel_reg = force_reg (sel_mode, sel);
15033
15034 /* Check if the sel only references the first values vector. */
15035 if (GET_CODE (sel) == CONST_VECTOR
15036 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15037 {
15038 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15039 return;
15040 }
15041
15042 /* Check if the two values vectors are the same. */
15043 if (rtx_equal_p (op0, op1))
15044 {
15045 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15046 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15047 NULL, 0, OPTAB_DIRECT);
15048 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15049 return;
15050 }
15051
15052 /* Run TBL on for each value vector and combine the results. */
15053
15054 rtx res0 = gen_reg_rtx (data_mode);
15055 rtx res1 = gen_reg_rtx (data_mode);
15056 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15057 if (GET_CODE (sel) != CONST_VECTOR
15058 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15059 {
15060 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15061 2 * nunits - 1);
15062 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15063 NULL, 0, OPTAB_DIRECT);
15064 }
15065 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15066 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15067 NULL, 0, OPTAB_DIRECT);
15068 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15069 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15070 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15071 else
15072 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15073}
15074
cc4d934f
JG
15075/* Recognize patterns suitable for the TRN instructions. */
15076static bool
15077aarch64_evpc_trn (struct expand_vec_perm_d *d)
15078{
6a70badb
RS
15079 HOST_WIDE_INT odd;
15080 poly_uint64 nelt = d->perm.length ();
cc4d934f 15081 rtx out, in0, in1, x;
ef4bddc2 15082 machine_mode vmode = d->vmode;
cc4d934f
JG
15083
15084 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15085 return false;
15086
15087 /* Note that these are little-endian tests.
15088 We correct for big-endian later. */
6a70badb
RS
15089 if (!d->perm[0].is_constant (&odd)
15090 || (odd != 0 && odd != 1)
326ac20e
RS
15091 || !d->perm.series_p (0, 2, odd, 2)
15092 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15093 return false;
cc4d934f
JG
15094
15095 /* Success! */
15096 if (d->testing_p)
15097 return true;
15098
15099 in0 = d->op0;
15100 in1 = d->op1;
43cacb12
RS
15101 /* We don't need a big-endian lane correction for SVE; see the comment
15102 at the head of aarch64-sve.md for details. */
15103 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15104 {
15105 x = in0, in0 = in1, in1 = x;
15106 odd = !odd;
15107 }
15108 out = d->target;
15109
3f8334a5
RS
15110 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15111 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15112 return true;
15113}
15114
15115/* Recognize patterns suitable for the UZP instructions. */
15116static bool
15117aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15118{
6a70badb 15119 HOST_WIDE_INT odd;
cc4d934f 15120 rtx out, in0, in1, x;
ef4bddc2 15121 machine_mode vmode = d->vmode;
cc4d934f
JG
15122
15123 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15124 return false;
15125
15126 /* Note that these are little-endian tests.
15127 We correct for big-endian later. */
6a70badb
RS
15128 if (!d->perm[0].is_constant (&odd)
15129 || (odd != 0 && odd != 1)
326ac20e 15130 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15131 return false;
cc4d934f
JG
15132
15133 /* Success! */
15134 if (d->testing_p)
15135 return true;
15136
15137 in0 = d->op0;
15138 in1 = d->op1;
43cacb12
RS
15139 /* We don't need a big-endian lane correction for SVE; see the comment
15140 at the head of aarch64-sve.md for details. */
15141 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15142 {
15143 x = in0, in0 = in1, in1 = x;
15144 odd = !odd;
15145 }
15146 out = d->target;
15147
3f8334a5
RS
15148 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15149 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15150 return true;
15151}
15152
15153/* Recognize patterns suitable for the ZIP instructions. */
15154static bool
15155aarch64_evpc_zip (struct expand_vec_perm_d *d)
15156{
6a70badb
RS
15157 unsigned int high;
15158 poly_uint64 nelt = d->perm.length ();
cc4d934f 15159 rtx out, in0, in1, x;
ef4bddc2 15160 machine_mode vmode = d->vmode;
cc4d934f
JG
15161
15162 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15163 return false;
15164
15165 /* Note that these are little-endian tests.
15166 We correct for big-endian later. */
6a70badb
RS
15167 poly_uint64 first = d->perm[0];
15168 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15169 || !d->perm.series_p (0, 2, first, 1)
15170 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15171 return false;
6a70badb 15172 high = maybe_ne (first, 0U);
cc4d934f
JG
15173
15174 /* Success! */
15175 if (d->testing_p)
15176 return true;
15177
15178 in0 = d->op0;
15179 in1 = d->op1;
43cacb12
RS
15180 /* We don't need a big-endian lane correction for SVE; see the comment
15181 at the head of aarch64-sve.md for details. */
15182 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15183 {
15184 x = in0, in0 = in1, in1 = x;
15185 high = !high;
15186 }
15187 out = d->target;
15188
3f8334a5
RS
15189 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15190 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15191 return true;
15192}
15193
ae0533da
AL
15194/* Recognize patterns for the EXT insn. */
15195
15196static bool
15197aarch64_evpc_ext (struct expand_vec_perm_d *d)
15198{
6a70badb 15199 HOST_WIDE_INT location;
ae0533da
AL
15200 rtx offset;
15201
6a70badb
RS
15202 /* The first element always refers to the first vector.
15203 Check if the extracted indices are increasing by one. */
43cacb12
RS
15204 if (d->vec_flags == VEC_SVE_PRED
15205 || !d->perm[0].is_constant (&location)
6a70badb 15206 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15207 return false;
ae0533da 15208
ae0533da
AL
15209 /* Success! */
15210 if (d->testing_p)
15211 return true;
15212
b31e65bb 15213 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15214 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15215
43cacb12
RS
15216 We don't need a big-endian lane correction for SVE; see the comment
15217 at the head of aarch64-sve.md for details. */
15218 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15219 {
15220 /* After setup, we want the high elements of the first vector (stored
15221 at the LSB end of the register), and the low elements of the second
15222 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15223 std::swap (d->op0, d->op1);
6a70badb
RS
15224 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15225 to_constant () is safe since this is restricted to Advanced SIMD
15226 vectors. */
15227 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15228 }
15229
15230 offset = GEN_INT (location);
3f8334a5
RS
15231 emit_set_insn (d->target,
15232 gen_rtx_UNSPEC (d->vmode,
15233 gen_rtvec (3, d->op0, d->op1, offset),
15234 UNSPEC_EXT));
ae0533da
AL
15235 return true;
15236}
15237
43cacb12
RS
15238/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15239 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15240
15241static bool
43cacb12 15242aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15243{
6a70badb
RS
15244 HOST_WIDE_INT diff;
15245 unsigned int i, size, unspec;
43cacb12 15246 machine_mode pred_mode;
923fcec3 15247
43cacb12
RS
15248 if (d->vec_flags == VEC_SVE_PRED
15249 || !d->one_vector_p
6a70badb 15250 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15251 return false;
15252
3f8334a5
RS
15253 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15254 if (size == 8)
43cacb12
RS
15255 {
15256 unspec = UNSPEC_REV64;
15257 pred_mode = VNx2BImode;
15258 }
3f8334a5 15259 else if (size == 4)
43cacb12
RS
15260 {
15261 unspec = UNSPEC_REV32;
15262 pred_mode = VNx4BImode;
15263 }
3f8334a5 15264 else if (size == 2)
43cacb12
RS
15265 {
15266 unspec = UNSPEC_REV16;
15267 pred_mode = VNx8BImode;
15268 }
3f8334a5
RS
15269 else
15270 return false;
923fcec3 15271
326ac20e
RS
15272 unsigned int step = diff + 1;
15273 for (i = 0; i < step; ++i)
15274 if (!d->perm.series_p (i, step, diff - i, step))
15275 return false;
923fcec3
AL
15276
15277 /* Success! */
15278 if (d->testing_p)
15279 return true;
15280
43cacb12
RS
15281 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15282 if (d->vec_flags == VEC_SVE_DATA)
15283 {
15284 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15285 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15286 UNSPEC_MERGE_PTRUE);
15287 }
15288 emit_set_insn (d->target, src);
15289 return true;
15290}
15291
15292/* Recognize patterns for the REV insn, which reverses elements within
15293 a full vector. */
15294
15295static bool
15296aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15297{
15298 poly_uint64 nelt = d->perm.length ();
15299
15300 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15301 return false;
15302
15303 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15304 return false;
15305
15306 /* Success! */
15307 if (d->testing_p)
15308 return true;
15309
15310 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15311 emit_set_insn (d->target, src);
923fcec3
AL
15312 return true;
15313}
15314
91bd4114
JG
15315static bool
15316aarch64_evpc_dup (struct expand_vec_perm_d *d)
15317{
91bd4114
JG
15318 rtx out = d->target;
15319 rtx in0;
6a70badb 15320 HOST_WIDE_INT elt;
ef4bddc2 15321 machine_mode vmode = d->vmode;
91bd4114
JG
15322 rtx lane;
15323
43cacb12
RS
15324 if (d->vec_flags == VEC_SVE_PRED
15325 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15326 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15327 return false;
15328
43cacb12
RS
15329 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15330 return false;
15331
326ac20e
RS
15332 /* Success! */
15333 if (d->testing_p)
15334 return true;
15335
91bd4114
JG
15336 /* The generic preparation in aarch64_expand_vec_perm_const_1
15337 swaps the operand order and the permute indices if it finds
15338 d->perm[0] to be in the second operand. Thus, we can always
15339 use d->op0 and need not do any extra arithmetic to get the
15340 correct lane number. */
15341 in0 = d->op0;
f901401e 15342 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15343
3f8334a5
RS
15344 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15345 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15346 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15347 return true;
15348}
15349
88b08073
JG
15350static bool
15351aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15352{
43cacb12 15353 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15354 machine_mode vmode = d->vmode;
6a70badb
RS
15355
15356 /* Make sure that the indices are constant. */
15357 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15358 for (unsigned int i = 0; i < encoded_nelts; ++i)
15359 if (!d->perm[i].is_constant ())
15360 return false;
88b08073 15361
88b08073
JG
15362 if (d->testing_p)
15363 return true;
15364
15365 /* Generic code will try constant permutation twice. Once with the
15366 original mode and again with the elements lowered to QImode.
15367 So wait and don't do the selector expansion ourselves. */
15368 if (vmode != V8QImode && vmode != V16QImode)
15369 return false;
15370
6a70badb
RS
15371 /* to_constant is safe since this routine is specific to Advanced SIMD
15372 vectors. */
15373 unsigned int nelt = d->perm.length ().to_constant ();
15374 for (unsigned int i = 0; i < nelt; ++i)
15375 /* If big-endian and two vectors we end up with a weird mixed-endian
15376 mode on NEON. Reverse the index within each word but not the word
15377 itself. to_constant is safe because we checked is_constant above. */
15378 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15379 ? d->perm[i].to_constant () ^ (nelt - 1)
15380 : d->perm[i].to_constant ());
bbcc9c00 15381
88b08073
JG
15382 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15383 sel = force_reg (vmode, sel);
15384
15385 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15386 return true;
15387}
15388
43cacb12
RS
15389/* Try to implement D using an SVE TBL instruction. */
15390
15391static bool
15392aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15393{
15394 unsigned HOST_WIDE_INT nelt;
15395
15396 /* Permuting two variable-length vectors could overflow the
15397 index range. */
15398 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15399 return false;
15400
15401 if (d->testing_p)
15402 return true;
15403
15404 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15405 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
15406 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
15407 return true;
15408}
15409
88b08073
JG
15410static bool
15411aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15412{
15413 /* The pattern matching functions above are written to look for a small
15414 number to begin the sequence (0, 1, N/2). If we begin with an index
15415 from the second operand, we can swap the operands. */
6a70badb
RS
15416 poly_int64 nelt = d->perm.length ();
15417 if (known_ge (d->perm[0], nelt))
88b08073 15418 {
e3342de4 15419 d->perm.rotate_inputs (1);
cb5c6c29 15420 std::swap (d->op0, d->op1);
88b08073
JG
15421 }
15422
43cacb12
RS
15423 if ((d->vec_flags == VEC_ADVSIMD
15424 || d->vec_flags == VEC_SVE_DATA
15425 || d->vec_flags == VEC_SVE_PRED)
15426 && known_gt (nelt, 1))
cc4d934f 15427 {
43cacb12
RS
15428 if (aarch64_evpc_rev_local (d))
15429 return true;
15430 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
15431 return true;
15432 else if (aarch64_evpc_ext (d))
ae0533da 15433 return true;
f901401e
AL
15434 else if (aarch64_evpc_dup (d))
15435 return true;
ae0533da 15436 else if (aarch64_evpc_zip (d))
cc4d934f
JG
15437 return true;
15438 else if (aarch64_evpc_uzp (d))
15439 return true;
15440 else if (aarch64_evpc_trn (d))
15441 return true;
43cacb12
RS
15442 if (d->vec_flags == VEC_SVE_DATA)
15443 return aarch64_evpc_sve_tbl (d);
15444 else if (d->vec_flags == VEC_SVE_DATA)
15445 return aarch64_evpc_tbl (d);
cc4d934f 15446 }
88b08073
JG
15447 return false;
15448}
15449
f151c9e1 15450/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 15451
f151c9e1
RS
15452static bool
15453aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
15454 rtx op1, const vec_perm_indices &sel)
88b08073
JG
15455{
15456 struct expand_vec_perm_d d;
88b08073 15457
326ac20e
RS
15458 /* Check whether the mask can be applied to a single vector. */
15459 if (op0 && rtx_equal_p (op0, op1))
15460 d.one_vector_p = true;
15461 else if (sel.all_from_input_p (0))
88b08073 15462 {
326ac20e
RS
15463 d.one_vector_p = true;
15464 op1 = op0;
88b08073 15465 }
326ac20e 15466 else if (sel.all_from_input_p (1))
88b08073 15467 {
88b08073 15468 d.one_vector_p = true;
326ac20e 15469 op0 = op1;
88b08073 15470 }
326ac20e
RS
15471 else
15472 d.one_vector_p = false;
88b08073 15473
326ac20e
RS
15474 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
15475 sel.nelts_per_input ());
15476 d.vmode = vmode;
43cacb12 15477 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
15478 d.target = target;
15479 d.op0 = op0;
15480 d.op1 = op1;
15481 d.testing_p = !target;
e3342de4 15482
f151c9e1
RS
15483 if (!d.testing_p)
15484 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 15485
326ac20e 15486 rtx_insn *last = get_last_insn ();
f151c9e1 15487 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 15488 gcc_assert (last == get_last_insn ());
88b08073
JG
15489
15490 return ret;
15491}
15492
73e3da51
RS
15493/* Generate a byte permute mask for a register of mode MODE,
15494 which has NUNITS units. */
15495
668046d1 15496rtx
73e3da51 15497aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
15498{
15499 /* We have to reverse each vector because we dont have
15500 a permuted load that can reverse-load according to ABI rules. */
15501 rtx mask;
15502 rtvec v = rtvec_alloc (16);
73e3da51
RS
15503 unsigned int i, j;
15504 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
15505
15506 gcc_assert (BYTES_BIG_ENDIAN);
15507 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
15508
15509 for (i = 0; i < nunits; i++)
15510 for (j = 0; j < usize; j++)
15511 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
15512 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
15513 return force_reg (V16QImode, mask);
15514}
15515
43cacb12
RS
15516/* Return true if X is a valid second operand for the SVE instruction
15517 that implements integer comparison OP_CODE. */
15518
15519static bool
15520aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
15521{
15522 if (register_operand (x, VOIDmode))
15523 return true;
15524
15525 switch (op_code)
15526 {
15527 case LTU:
15528 case LEU:
15529 case GEU:
15530 case GTU:
15531 return aarch64_sve_cmp_immediate_p (x, false);
15532 case LT:
15533 case LE:
15534 case GE:
15535 case GT:
15536 case NE:
15537 case EQ:
15538 return aarch64_sve_cmp_immediate_p (x, true);
15539 default:
15540 gcc_unreachable ();
15541 }
15542}
15543
f22d7973
RS
15544/* Use predicated SVE instructions to implement the equivalent of:
15545
15546 (set TARGET OP)
15547
15548 given that PTRUE is an all-true predicate of the appropriate mode. */
15549
15550static void
15551aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
15552{
15553 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15554 gen_rtvec (2, ptrue, op),
15555 UNSPEC_MERGE_PTRUE);
15556 rtx_insn *insn = emit_set_insn (target, unspec);
15557 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15558}
15559
15560/* Likewise, but also clobber the condition codes. */
15561
15562static void
15563aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
15564{
15565 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15566 gen_rtvec (2, ptrue, op),
15567 UNSPEC_MERGE_PTRUE);
15568 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
15569 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15570}
15571
43cacb12
RS
15572/* Return the UNSPEC_COND_* code for comparison CODE. */
15573
15574static unsigned int
15575aarch64_unspec_cond_code (rtx_code code)
15576{
15577 switch (code)
15578 {
15579 case NE:
15580 return UNSPEC_COND_NE;
15581 case EQ:
15582 return UNSPEC_COND_EQ;
15583 case LT:
15584 return UNSPEC_COND_LT;
15585 case GT:
15586 return UNSPEC_COND_GT;
15587 case LE:
15588 return UNSPEC_COND_LE;
15589 case GE:
15590 return UNSPEC_COND_GE;
43cacb12
RS
15591 default:
15592 gcc_unreachable ();
15593 }
15594}
15595
f22d7973 15596/* Emit:
43cacb12 15597
f22d7973
RS
15598 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
15599
15600 where <X> is the operation associated with comparison CODE. This form
15601 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
15602 semantics, such as when PRED might not be all-true and when comparing
15603 inactive lanes could have side effects. */
15604
15605static void
15606aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
15607 rtx pred, rtx op0, rtx op1)
43cacb12 15608{
f22d7973
RS
15609 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
15610 gen_rtvec (3, pred, op0, op1),
15611 aarch64_unspec_cond_code (code));
15612 emit_set_insn (target, unspec);
43cacb12
RS
15613}
15614
f22d7973 15615/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 15616
f22d7973 15617 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
15618
15619void
15620aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
15621{
15622 machine_mode pred_mode = GET_MODE (target);
15623 machine_mode data_mode = GET_MODE (op0);
15624
15625 if (!aarch64_sve_cmp_operand_p (code, op1))
15626 op1 = force_reg (data_mode, op1);
15627
15628 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
15629 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15630 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
15631}
15632
f22d7973 15633/* Emit the SVE equivalent of:
43cacb12 15634
f22d7973
RS
15635 (set TMP1 (CODE1 OP0 OP1))
15636 (set TMP2 (CODE2 OP0 OP1))
15637 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 15638
f22d7973 15639 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
15640
15641static void
f22d7973
RS
15642aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
15643 rtx ptrue, rtx op0, rtx op1)
43cacb12 15644{
f22d7973 15645 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 15646 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
15647 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
15648 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 15649 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
15650 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
15651 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
15652 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
15653}
15654
f22d7973 15655/* Emit the SVE equivalent of:
43cacb12 15656
f22d7973
RS
15657 (set TMP (CODE OP0 OP1))
15658 (set TARGET (not TMP))
43cacb12 15659
f22d7973 15660 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
15661
15662static void
f22d7973
RS
15663aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
15664 rtx op0, rtx op1)
43cacb12 15665{
f22d7973
RS
15666 machine_mode pred_mode = GET_MODE (ptrue);
15667 rtx tmp = gen_reg_rtx (pred_mode);
15668 aarch64_emit_sve_ptrue_op (tmp, ptrue,
15669 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
15670 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
15671}
15672
f22d7973 15673/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 15674
f22d7973 15675 (set TARGET (CODE OP0 OP1))
43cacb12
RS
15676
15677 If CAN_INVERT_P is true, the caller can also handle inverted results;
15678 return true if the result is in fact inverted. */
15679
15680bool
15681aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
15682 rtx op0, rtx op1, bool can_invert_p)
15683{
15684 machine_mode pred_mode = GET_MODE (target);
15685 machine_mode data_mode = GET_MODE (op0);
15686
15687 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15688 switch (code)
15689 {
15690 case UNORDERED:
15691 /* UNORDERED has no immediate form. */
15692 op1 = force_reg (data_mode, op1);
f22d7973 15693 /* fall through */
43cacb12
RS
15694 case LT:
15695 case LE:
15696 case GT:
15697 case GE:
15698 case EQ:
15699 case NE:
f22d7973
RS
15700 {
15701 /* There is native support for the comparison. */
15702 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15703 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
15704 return false;
15705 }
43cacb12
RS
15706
15707 case LTGT:
15708 /* This is a trapping operation (LT or GT). */
f22d7973 15709 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
15710 return false;
15711
15712 case UNEQ:
15713 if (!flag_trapping_math)
15714 {
15715 /* This would trap for signaling NaNs. */
15716 op1 = force_reg (data_mode, op1);
f22d7973 15717 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
15718 return false;
15719 }
15720 /* fall through */
43cacb12
RS
15721 case UNLT:
15722 case UNLE:
15723 case UNGT:
15724 case UNGE:
f22d7973
RS
15725 if (flag_trapping_math)
15726 {
15727 /* Work out which elements are ordered. */
15728 rtx ordered = gen_reg_rtx (pred_mode);
15729 op1 = force_reg (data_mode, op1);
15730 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
15731
15732 /* Test the opposite condition for the ordered elements,
15733 then invert the result. */
15734 if (code == UNEQ)
15735 code = NE;
15736 else
15737 code = reverse_condition_maybe_unordered (code);
15738 if (can_invert_p)
15739 {
15740 aarch64_emit_sve_predicated_cond (target, code,
15741 ordered, op0, op1);
15742 return true;
15743 }
15744 rtx tmp = gen_reg_rtx (pred_mode);
15745 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
15746 aarch64_emit_unop (target, one_cmpl_optab, tmp);
15747 return false;
15748 }
15749 break;
15750
15751 case ORDERED:
15752 /* ORDERED has no immediate form. */
15753 op1 = force_reg (data_mode, op1);
15754 break;
43cacb12
RS
15755
15756 default:
15757 gcc_unreachable ();
15758 }
f22d7973
RS
15759
15760 /* There is native support for the inverse comparison. */
15761 code = reverse_condition_maybe_unordered (code);
15762 if (can_invert_p)
15763 {
15764 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15765 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
15766 return true;
15767 }
15768 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
15769 return false;
43cacb12
RS
15770}
15771
15772/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
15773 of the data being selected and CMP_MODE is the mode of the values being
15774 compared. */
15775
15776void
15777aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
15778 rtx *ops)
15779{
15780 machine_mode pred_mode
15781 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
15782 GET_MODE_SIZE (cmp_mode)).require ();
15783 rtx pred = gen_reg_rtx (pred_mode);
15784 if (FLOAT_MODE_P (cmp_mode))
15785 {
15786 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
15787 ops[4], ops[5], true))
15788 std::swap (ops[1], ops[2]);
15789 }
15790 else
15791 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
15792
15793 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
15794 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
15795}
15796
99e1629f
RS
15797/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
15798 true. However due to issues with register allocation it is preferable
15799 to avoid tieing integer scalar and FP scalar modes. Executing integer
15800 operations in general registers is better than treating them as scalar
15801 vector operations. This reduces latency and avoids redundant int<->FP
15802 moves. So tie modes if they are either the same class, or vector modes
15803 with other vector modes, vector structs or any scalar mode. */
97e1ad78 15804
99e1629f 15805static bool
ef4bddc2 15806aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
15807{
15808 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
15809 return true;
15810
15811 /* We specifically want to allow elements of "structure" modes to
15812 be tieable to the structure. This more general condition allows
43cacb12
RS
15813 other rarer situations too. The reason we don't extend this to
15814 predicate modes is that there are no predicate structure modes
15815 nor any specific instructions for extracting part of a predicate
15816 register. */
15817 if (aarch64_vector_data_mode_p (mode1)
15818 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
15819 return true;
15820
15821 /* Also allow any scalar modes with vectors. */
15822 if (aarch64_vector_mode_supported_p (mode1)
15823 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
15824 return true;
15825
15826 return false;
15827}
15828
e2c75eea
JG
15829/* Return a new RTX holding the result of moving POINTER forward by
15830 AMOUNT bytes. */
15831
15832static rtx
6a70badb 15833aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
15834{
15835 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
15836
15837 return adjust_automodify_address (pointer, GET_MODE (pointer),
15838 next, amount);
15839}
15840
15841/* Return a new RTX holding the result of moving POINTER forward by the
15842 size of the mode it points to. */
15843
15844static rtx
15845aarch64_progress_pointer (rtx pointer)
15846{
6a70badb 15847 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
15848}
15849
15850/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
15851 MODE bytes. */
15852
15853static void
15854aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 15855 machine_mode mode)
e2c75eea
JG
15856{
15857 rtx reg = gen_reg_rtx (mode);
15858
15859 /* "Cast" the pointers to the correct mode. */
15860 *src = adjust_address (*src, mode, 0);
15861 *dst = adjust_address (*dst, mode, 0);
15862 /* Emit the memcpy. */
15863 emit_move_insn (reg, *src);
15864 emit_move_insn (*dst, reg);
15865 /* Move the pointers forward. */
15866 *src = aarch64_progress_pointer (*src);
15867 *dst = aarch64_progress_pointer (*dst);
15868}
15869
15870/* Expand movmem, as if from a __builtin_memcpy. Return true if
15871 we succeed, otherwise return false. */
15872
15873bool
15874aarch64_expand_movmem (rtx *operands)
15875{
89c52e5e 15876 int n, mode_bits;
e2c75eea
JG
15877 rtx dst = operands[0];
15878 rtx src = operands[1];
15879 rtx base;
89c52e5e 15880 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
15881 bool speed_p = !optimize_function_for_size_p (cfun);
15882
15883 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
15884 memcpy call, but use the default otherwise. Moves larger than 8 bytes
15885 will always require an even number of instructions to do now. And each
15886 operation requires both a load+store, so devide the max number by 2. */
15887 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
15888
15889 /* We can't do anything smart if the amount to copy is not constant. */
15890 if (!CONST_INT_P (operands[2]))
15891 return false;
15892
89c52e5e 15893 n = INTVAL (operands[2]);
e2c75eea 15894
89c52e5e
TC
15895 /* Try to keep the number of instructions low. For all cases we will do at
15896 most two moves for the residual amount, since we'll always overlap the
15897 remainder. */
15898 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
15899 return false;
15900
15901 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15902 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15903
15904 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
15905 src = adjust_automodify_address (src, VOIDmode, base, 0);
15906
89c52e5e
TC
15907 /* Convert n to bits to make the rest of the code simpler. */
15908 n = n * BITS_PER_UNIT;
e2c75eea 15909
89c52e5e 15910 while (n > 0)
e2c75eea 15911 {
89c52e5e
TC
15912 /* Find the largest mode in which to do the copy in without over reading
15913 or writing. */
15914 opt_scalar_int_mode mode_iter;
15915 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
15916 if (GET_MODE_BITSIZE (mode_iter.require ()) <= n)
15917 cur_mode = mode_iter.require ();
e2c75eea 15918
89c52e5e 15919 gcc_assert (cur_mode != BLKmode);
e2c75eea 15920
89c52e5e
TC
15921 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
15922 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 15923
89c52e5e 15924 n -= mode_bits;
e2c75eea 15925
89c52e5e
TC
15926 /* Do certain trailing copies as overlapping if it's going to be
15927 cheaper. i.e. less instructions to do so. For instance doing a 15
15928 byte copy it's more efficient to do two overlapping 8 byte copies than
15929 8 + 6 + 1. */
15930 next_mode = smallest_mode_for_size (n, MODE_INT);
15931 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
15932 if (n > 0 && n_bits > n && n_bits <= 8 * BITS_PER_UNIT)
15933 {
15934 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
15935 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
15936 n = n_bits;
e2c75eea
JG
15937 }
15938 }
15939
15940 return true;
15941}
15942
141a3ccf
KT
15943/* Split a DImode store of a CONST_INT SRC to MEM DST as two
15944 SImode stores. Handle the case when the constant has identical
15945 bottom and top halves. This is beneficial when the two stores can be
15946 merged into an STP and we avoid synthesising potentially expensive
15947 immediates twice. Return true if such a split is possible. */
15948
15949bool
15950aarch64_split_dimode_const_store (rtx dst, rtx src)
15951{
15952 rtx lo = gen_lowpart (SImode, src);
15953 rtx hi = gen_highpart_mode (SImode, DImode, src);
15954
15955 bool size_p = optimize_function_for_size_p (cfun);
15956
15957 if (!rtx_equal_p (lo, hi))
15958 return false;
15959
15960 unsigned int orig_cost
15961 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
15962 unsigned int lo_cost
15963 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
15964
15965 /* We want to transform:
15966 MOV x1, 49370
15967 MOVK x1, 0x140, lsl 16
15968 MOVK x1, 0xc0da, lsl 32
15969 MOVK x1, 0x140, lsl 48
15970 STR x1, [x0]
15971 into:
15972 MOV w1, 49370
15973 MOVK w1, 0x140, lsl 16
15974 STP w1, w1, [x0]
15975 So we want to perform this only when we save two instructions
15976 or more. When optimizing for size, however, accept any code size
15977 savings we can. */
15978 if (size_p && orig_cost <= lo_cost)
15979 return false;
15980
15981 if (!size_p
15982 && (orig_cost <= lo_cost + 1))
15983 return false;
15984
15985 rtx mem_lo = adjust_address (dst, SImode, 0);
15986 if (!aarch64_mem_pair_operand (mem_lo, SImode))
15987 return false;
15988
15989 rtx tmp_reg = gen_reg_rtx (SImode);
15990 aarch64_expand_mov_immediate (tmp_reg, lo);
15991 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
15992 /* Don't emit an explicit store pair as this may not be always profitable.
15993 Let the sched-fusion logic decide whether to merge them. */
15994 emit_move_insn (mem_lo, tmp_reg);
15995 emit_move_insn (mem_hi, tmp_reg);
15996
15997 return true;
15998}
15999
30c46053
MC
16000/* Generate RTL for a conditional branch with rtx comparison CODE in
16001 mode CC_MODE. The destination of the unlikely conditional branch
16002 is LABEL_REF. */
16003
16004void
16005aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
16006 rtx label_ref)
16007{
16008 rtx x;
16009 x = gen_rtx_fmt_ee (code, VOIDmode,
16010 gen_rtx_REG (cc_mode, CC_REGNUM),
16011 const0_rtx);
16012
16013 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16014 gen_rtx_LABEL_REF (VOIDmode, label_ref),
16015 pc_rtx);
16016 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16017}
16018
16019/* Generate DImode scratch registers for 128-bit (TImode) addition.
16020
16021 OP1 represents the TImode destination operand 1
16022 OP2 represents the TImode destination operand 2
16023 LOW_DEST represents the low half (DImode) of TImode operand 0
16024 LOW_IN1 represents the low half (DImode) of TImode operand 1
16025 LOW_IN2 represents the low half (DImode) of TImode operand 2
16026 HIGH_DEST represents the high half (DImode) of TImode operand 0
16027 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16028 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16029
16030void
16031aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16032 rtx *low_in1, rtx *low_in2,
16033 rtx *high_dest, rtx *high_in1,
16034 rtx *high_in2)
16035{
16036 *low_dest = gen_reg_rtx (DImode);
16037 *low_in1 = gen_lowpart (DImode, op1);
16038 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16039 subreg_lowpart_offset (DImode, TImode));
16040 *high_dest = gen_reg_rtx (DImode);
16041 *high_in1 = gen_highpart (DImode, op1);
16042 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16043 subreg_highpart_offset (DImode, TImode));
16044}
16045
16046/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
16047
16048 This function differs from 'arch64_addti_scratch_regs' in that
16049 OP1 can be an immediate constant (zero). We must call
16050 subreg_highpart_offset with DImode and TImode arguments, otherwise
16051 VOIDmode will be used for the const_int which generates an internal
16052 error from subreg_size_highpart_offset which does not expect a size of zero.
16053
16054 OP1 represents the TImode destination operand 1
16055 OP2 represents the TImode destination operand 2
16056 LOW_DEST represents the low half (DImode) of TImode operand 0
16057 LOW_IN1 represents the low half (DImode) of TImode operand 1
16058 LOW_IN2 represents the low half (DImode) of TImode operand 2
16059 HIGH_DEST represents the high half (DImode) of TImode operand 0
16060 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16061 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16062
16063
16064void
16065aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16066 rtx *low_in1, rtx *low_in2,
16067 rtx *high_dest, rtx *high_in1,
16068 rtx *high_in2)
16069{
16070 *low_dest = gen_reg_rtx (DImode);
16071 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
16072 subreg_lowpart_offset (DImode, TImode));
16073
16074 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16075 subreg_lowpart_offset (DImode, TImode));
16076 *high_dest = gen_reg_rtx (DImode);
16077
16078 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
16079 subreg_highpart_offset (DImode, TImode));
16080 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16081 subreg_highpart_offset (DImode, TImode));
16082}
16083
16084/* Generate RTL for 128-bit (TImode) subtraction with overflow.
16085
16086 OP0 represents the TImode destination operand 0
16087 LOW_DEST represents the low half (DImode) of TImode operand 0
16088 LOW_IN1 represents the low half (DImode) of TImode operand 1
16089 LOW_IN2 represents the low half (DImode) of TImode operand 2
16090 HIGH_DEST represents the high half (DImode) of TImode operand 0
16091 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16092 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16093
16094void
16095aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
16096 rtx low_in2, rtx high_dest, rtx high_in1,
16097 rtx high_in2)
16098{
16099 if (low_in2 == const0_rtx)
16100 {
16101 low_dest = low_in1;
16102 emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
16103 force_reg (DImode, high_in2)));
16104 }
16105 else
16106 {
16107 if (CONST_INT_P (low_in2))
16108 {
16109 low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
16110 high_in2 = force_reg (DImode, high_in2);
16111 emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
16112 }
16113 else
16114 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
16115 emit_insn (gen_subdi3_carryinCV (high_dest,
16116 force_reg (DImode, high_in1),
16117 high_in2));
16118 }
16119
16120 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
16121 emit_move_insn (gen_highpart (DImode, op0), high_dest);
16122
16123}
16124
a3125fc2
CL
16125/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16126
16127static unsigned HOST_WIDE_INT
16128aarch64_asan_shadow_offset (void)
16129{
16130 return (HOST_WIDE_INT_1 << 36);
16131}
16132
5f3bc026 16133static rtx
cb4347e8 16134aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16135 int code, tree treeop0, tree treeop1)
16136{
c8012fbc
WD
16137 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16138 rtx op0, op1;
5f3bc026 16139 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16140 insn_code icode;
5f3bc026
ZC
16141 struct expand_operand ops[4];
16142
5f3bc026
ZC
16143 start_sequence ();
16144 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16145
16146 op_mode = GET_MODE (op0);
16147 if (op_mode == VOIDmode)
16148 op_mode = GET_MODE (op1);
16149
16150 switch (op_mode)
16151 {
4e10a5a7
RS
16152 case E_QImode:
16153 case E_HImode:
16154 case E_SImode:
5f3bc026
ZC
16155 cmp_mode = SImode;
16156 icode = CODE_FOR_cmpsi;
16157 break;
16158
4e10a5a7 16159 case E_DImode:
5f3bc026
ZC
16160 cmp_mode = DImode;
16161 icode = CODE_FOR_cmpdi;
16162 break;
16163
4e10a5a7 16164 case E_SFmode:
786e3c06
WD
16165 cmp_mode = SFmode;
16166 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16167 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16168 break;
16169
4e10a5a7 16170 case E_DFmode:
786e3c06
WD
16171 cmp_mode = DFmode;
16172 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16173 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16174 break;
16175
5f3bc026
ZC
16176 default:
16177 end_sequence ();
16178 return NULL_RTX;
16179 }
16180
c8012fbc
WD
16181 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16182 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16183 if (!op0 || !op1)
16184 {
16185 end_sequence ();
16186 return NULL_RTX;
16187 }
16188 *prep_seq = get_insns ();
16189 end_sequence ();
16190
c8012fbc
WD
16191 create_fixed_operand (&ops[0], op0);
16192 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16193
16194 start_sequence ();
c8012fbc 16195 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16196 {
16197 end_sequence ();
16198 return NULL_RTX;
16199 }
16200 *gen_seq = get_insns ();
16201 end_sequence ();
16202
c8012fbc
WD
16203 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16204 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16205}
16206
16207static rtx
cb4347e8
TS
16208aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16209 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16210{
c8012fbc
WD
16211 rtx op0, op1, target;
16212 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16213 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16214 insn_code icode;
5f3bc026 16215 struct expand_operand ops[6];
c8012fbc 16216 int aarch64_cond;
5f3bc026 16217
cb4347e8 16218 push_to_sequence (*prep_seq);
5f3bc026
ZC
16219 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16220
16221 op_mode = GET_MODE (op0);
16222 if (op_mode == VOIDmode)
16223 op_mode = GET_MODE (op1);
16224
16225 switch (op_mode)
16226 {
4e10a5a7
RS
16227 case E_QImode:
16228 case E_HImode:
16229 case E_SImode:
5f3bc026 16230 cmp_mode = SImode;
c8012fbc 16231 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16232 break;
16233
4e10a5a7 16234 case E_DImode:
5f3bc026 16235 cmp_mode = DImode;
c8012fbc 16236 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16237 break;
16238
4e10a5a7 16239 case E_SFmode:
786e3c06
WD
16240 cmp_mode = SFmode;
16241 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16242 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16243 break;
16244
4e10a5a7 16245 case E_DFmode:
786e3c06
WD
16246 cmp_mode = DFmode;
16247 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16248 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16249 break;
16250
5f3bc026
ZC
16251 default:
16252 end_sequence ();
16253 return NULL_RTX;
16254 }
16255
16256 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16257 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16258 if (!op0 || !op1)
16259 {
16260 end_sequence ();
16261 return NULL_RTX;
16262 }
16263 *prep_seq = get_insns ();
16264 end_sequence ();
16265
16266 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16267 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16268
c8012fbc
WD
16269 if (bit_code != AND)
16270 {
16271 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16272 GET_MODE (XEXP (prev, 0))),
16273 VOIDmode, XEXP (prev, 0), const0_rtx);
16274 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16275 }
16276
16277 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16278 create_fixed_operand (&ops[1], target);
16279 create_fixed_operand (&ops[2], op0);
16280 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16281 create_fixed_operand (&ops[4], prev);
16282 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16283
cb4347e8 16284 push_to_sequence (*gen_seq);
5f3bc026
ZC
16285 if (!maybe_expand_insn (icode, 6, ops))
16286 {
16287 end_sequence ();
16288 return NULL_RTX;
16289 }
16290
16291 *gen_seq = get_insns ();
16292 end_sequence ();
16293
c8012fbc 16294 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16295}
16296
16297#undef TARGET_GEN_CCMP_FIRST
16298#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16299
16300#undef TARGET_GEN_CCMP_NEXT
16301#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16302
6a569cdd
KT
16303/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16304 instruction fusion of some sort. */
16305
16306static bool
16307aarch64_macro_fusion_p (void)
16308{
b175b679 16309 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16310}
16311
16312
16313/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16314 should be kept together during scheduling. */
16315
16316static bool
16317aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16318{
16319 rtx set_dest;
16320 rtx prev_set = single_set (prev);
16321 rtx curr_set = single_set (curr);
16322 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16323 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16324
16325 if (!aarch64_macro_fusion_p ())
16326 return false;
16327
d7b03373 16328 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16329 {
16330 /* We are trying to match:
16331 prev (mov) == (set (reg r0) (const_int imm16))
16332 curr (movk) == (set (zero_extract (reg r0)
16333 (const_int 16)
16334 (const_int 16))
16335 (const_int imm16_1)) */
16336
16337 set_dest = SET_DEST (curr_set);
16338
16339 if (GET_CODE (set_dest) == ZERO_EXTRACT
16340 && CONST_INT_P (SET_SRC (curr_set))
16341 && CONST_INT_P (SET_SRC (prev_set))
16342 && CONST_INT_P (XEXP (set_dest, 2))
16343 && INTVAL (XEXP (set_dest, 2)) == 16
16344 && REG_P (XEXP (set_dest, 0))
16345 && REG_P (SET_DEST (prev_set))
16346 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16347 {
16348 return true;
16349 }
16350 }
16351
d7b03373 16352 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16353 {
16354
16355 /* We're trying to match:
16356 prev (adrp) == (set (reg r1)
16357 (high (symbol_ref ("SYM"))))
16358 curr (add) == (set (reg r0)
16359 (lo_sum (reg r1)
16360 (symbol_ref ("SYM"))))
16361 Note that r0 need not necessarily be the same as r1, especially
16362 during pre-regalloc scheduling. */
16363
16364 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16365 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16366 {
16367 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16368 && REG_P (XEXP (SET_SRC (curr_set), 0))
16369 && REGNO (XEXP (SET_SRC (curr_set), 0))
16370 == REGNO (SET_DEST (prev_set))
16371 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16372 XEXP (SET_SRC (curr_set), 1)))
16373 return true;
16374 }
16375 }
16376
d7b03373 16377 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16378 {
16379
16380 /* We're trying to match:
16381 prev (movk) == (set (zero_extract (reg r0)
16382 (const_int 16)
16383 (const_int 32))
16384 (const_int imm16_1))
16385 curr (movk) == (set (zero_extract (reg r0)
16386 (const_int 16)
16387 (const_int 48))
16388 (const_int imm16_2)) */
16389
16390 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16391 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16392 && REG_P (XEXP (SET_DEST (prev_set), 0))
16393 && REG_P (XEXP (SET_DEST (curr_set), 0))
16394 && REGNO (XEXP (SET_DEST (prev_set), 0))
16395 == REGNO (XEXP (SET_DEST (curr_set), 0))
16396 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16397 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16398 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16399 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16400 && CONST_INT_P (SET_SRC (prev_set))
16401 && CONST_INT_P (SET_SRC (curr_set)))
16402 return true;
16403
16404 }
d7b03373 16405 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16406 {
16407 /* We're trying to match:
16408 prev (adrp) == (set (reg r0)
16409 (high (symbol_ref ("SYM"))))
16410 curr (ldr) == (set (reg r1)
16411 (mem (lo_sum (reg r0)
16412 (symbol_ref ("SYM")))))
16413 or
16414 curr (ldr) == (set (reg r1)
16415 (zero_extend (mem
16416 (lo_sum (reg r0)
16417 (symbol_ref ("SYM")))))) */
16418 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16419 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16420 {
16421 rtx curr_src = SET_SRC (curr_set);
16422
16423 if (GET_CODE (curr_src) == ZERO_EXTEND)
16424 curr_src = XEXP (curr_src, 0);
16425
16426 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
16427 && REG_P (XEXP (XEXP (curr_src, 0), 0))
16428 && REGNO (XEXP (XEXP (curr_src, 0), 0))
16429 == REGNO (SET_DEST (prev_set))
16430 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
16431 XEXP (SET_SRC (prev_set), 0)))
16432 return true;
16433 }
16434 }
cd0cb232 16435
d7b03373 16436 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
16437 && aarch_crypto_can_dual_issue (prev, curr))
16438 return true;
16439
d7b03373 16440 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
16441 && any_condjump_p (curr))
16442 {
16443 enum attr_type prev_type = get_attr_type (prev);
16444
509f819a
N
16445 unsigned int condreg1, condreg2;
16446 rtx cc_reg_1;
16447 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
16448 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
16449
16450 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
16451 && prev
16452 && modified_in_p (cc_reg_1, prev))
16453 {
16454 /* FIXME: this misses some which is considered simple arthematic
16455 instructions for ThunderX. Simple shifts are missed here. */
16456 if (prev_type == TYPE_ALUS_SREG
16457 || prev_type == TYPE_ALUS_IMM
16458 || prev_type == TYPE_LOGICS_REG
16459 || prev_type == TYPE_LOGICS_IMM)
16460 return true;
16461 }
3759108f
AP
16462 }
16463
bee7e0fc
AP
16464 if (prev_set
16465 && curr_set
16466 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
16467 && any_condjump_p (curr))
16468 {
16469 /* We're trying to match:
16470 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
16471 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
16472 (const_int 0))
16473 (label_ref ("SYM"))
16474 (pc)) */
16475 if (SET_DEST (curr_set) == (pc_rtx)
16476 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
16477 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
16478 && REG_P (SET_DEST (prev_set))
16479 && REGNO (SET_DEST (prev_set))
16480 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
16481 {
16482 /* Fuse ALU operations followed by conditional branch instruction. */
16483 switch (get_attr_type (prev))
16484 {
16485 case TYPE_ALU_IMM:
16486 case TYPE_ALU_SREG:
16487 case TYPE_ADC_REG:
16488 case TYPE_ADC_IMM:
16489 case TYPE_ADCS_REG:
16490 case TYPE_ADCS_IMM:
16491 case TYPE_LOGIC_REG:
16492 case TYPE_LOGIC_IMM:
16493 case TYPE_CSEL:
16494 case TYPE_ADR:
16495 case TYPE_MOV_IMM:
16496 case TYPE_SHIFT_REG:
16497 case TYPE_SHIFT_IMM:
16498 case TYPE_BFM:
16499 case TYPE_RBIT:
16500 case TYPE_REV:
16501 case TYPE_EXTEND:
16502 return true;
16503
16504 default:;
16505 }
16506 }
16507 }
16508
6a569cdd
KT
16509 return false;
16510}
16511
f2879a90
KT
16512/* Return true iff the instruction fusion described by OP is enabled. */
16513
16514bool
16515aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
16516{
16517 return (aarch64_tune_params.fusible_ops & op) != 0;
16518}
16519
350013bc
BC
16520/* If MEM is in the form of [base+offset], extract the two parts
16521 of address and set to BASE and OFFSET, otherwise return false
16522 after clearing BASE and OFFSET. */
16523
16524bool
16525extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
16526{
16527 rtx addr;
16528
16529 gcc_assert (MEM_P (mem));
16530
16531 addr = XEXP (mem, 0);
16532
16533 if (REG_P (addr))
16534 {
16535 *base = addr;
16536 *offset = const0_rtx;
16537 return true;
16538 }
16539
16540 if (GET_CODE (addr) == PLUS
16541 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
16542 {
16543 *base = XEXP (addr, 0);
16544 *offset = XEXP (addr, 1);
16545 return true;
16546 }
16547
16548 *base = NULL_RTX;
16549 *offset = NULL_RTX;
16550
16551 return false;
16552}
16553
16554/* Types for scheduling fusion. */
16555enum sched_fusion_type
16556{
16557 SCHED_FUSION_NONE = 0,
16558 SCHED_FUSION_LD_SIGN_EXTEND,
16559 SCHED_FUSION_LD_ZERO_EXTEND,
16560 SCHED_FUSION_LD,
16561 SCHED_FUSION_ST,
16562 SCHED_FUSION_NUM
16563};
16564
16565/* If INSN is a load or store of address in the form of [base+offset],
16566 extract the two parts and set to BASE and OFFSET. Return scheduling
16567 fusion type this INSN is. */
16568
16569static enum sched_fusion_type
16570fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
16571{
16572 rtx x, dest, src;
16573 enum sched_fusion_type fusion = SCHED_FUSION_LD;
16574
16575 gcc_assert (INSN_P (insn));
16576 x = PATTERN (insn);
16577 if (GET_CODE (x) != SET)
16578 return SCHED_FUSION_NONE;
16579
16580 src = SET_SRC (x);
16581 dest = SET_DEST (x);
16582
abc52318
KT
16583 machine_mode dest_mode = GET_MODE (dest);
16584
16585 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
16586 return SCHED_FUSION_NONE;
16587
16588 if (GET_CODE (src) == SIGN_EXTEND)
16589 {
16590 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
16591 src = XEXP (src, 0);
16592 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16593 return SCHED_FUSION_NONE;
16594 }
16595 else if (GET_CODE (src) == ZERO_EXTEND)
16596 {
16597 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
16598 src = XEXP (src, 0);
16599 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16600 return SCHED_FUSION_NONE;
16601 }
16602
16603 if (GET_CODE (src) == MEM && REG_P (dest))
16604 extract_base_offset_in_addr (src, base, offset);
16605 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
16606 {
16607 fusion = SCHED_FUSION_ST;
16608 extract_base_offset_in_addr (dest, base, offset);
16609 }
16610 else
16611 return SCHED_FUSION_NONE;
16612
16613 if (*base == NULL_RTX || *offset == NULL_RTX)
16614 fusion = SCHED_FUSION_NONE;
16615
16616 return fusion;
16617}
16618
16619/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
16620
16621 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
16622 and PRI are only calculated for these instructions. For other instruction,
16623 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
16624 type instruction fusion can be added by returning different priorities.
16625
16626 It's important that irrelevant instructions get the largest FUSION_PRI. */
16627
16628static void
16629aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
16630 int *fusion_pri, int *pri)
16631{
16632 int tmp, off_val;
16633 rtx base, offset;
16634 enum sched_fusion_type fusion;
16635
16636 gcc_assert (INSN_P (insn));
16637
16638 tmp = max_pri - 1;
16639 fusion = fusion_load_store (insn, &base, &offset);
16640 if (fusion == SCHED_FUSION_NONE)
16641 {
16642 *pri = tmp;
16643 *fusion_pri = tmp;
16644 return;
16645 }
16646
16647 /* Set FUSION_PRI according to fusion type and base register. */
16648 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
16649
16650 /* Calculate PRI. */
16651 tmp /= 2;
16652
16653 /* INSN with smaller offset goes first. */
16654 off_val = (int)(INTVAL (offset));
16655 if (off_val >= 0)
16656 tmp -= (off_val & 0xfffff);
16657 else
16658 tmp += ((- off_val) & 0xfffff);
16659
16660 *pri = tmp;
16661 return;
16662}
16663
9bca63d4
WD
16664/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
16665 Adjust priority of sha1h instructions so they are scheduled before
16666 other SHA1 instructions. */
16667
16668static int
16669aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
16670{
16671 rtx x = PATTERN (insn);
16672
16673 if (GET_CODE (x) == SET)
16674 {
16675 x = SET_SRC (x);
16676
16677 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
16678 return priority + 10;
16679 }
16680
16681 return priority;
16682}
16683
350013bc
BC
16684/* Given OPERANDS of consecutive load/store, check if we can merge
16685 them into ldp/stp. LOAD is true if they are load instructions.
16686 MODE is the mode of memory operands. */
16687
16688bool
16689aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 16690 machine_mode mode)
350013bc
BC
16691{
16692 HOST_WIDE_INT offval_1, offval_2, msize;
16693 enum reg_class rclass_1, rclass_2;
16694 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
16695
16696 if (load)
16697 {
16698 mem_1 = operands[1];
16699 mem_2 = operands[3];
16700 reg_1 = operands[0];
16701 reg_2 = operands[2];
16702 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
16703 if (REGNO (reg_1) == REGNO (reg_2))
16704 return false;
16705 }
16706 else
16707 {
16708 mem_1 = operands[0];
16709 mem_2 = operands[2];
16710 reg_1 = operands[1];
16711 reg_2 = operands[3];
16712 }
16713
bf84ac44
AP
16714 /* The mems cannot be volatile. */
16715 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
16716 return false;
16717
54700e2e
AP
16718 /* If we have SImode and slow unaligned ldp,
16719 check the alignment to be at least 8 byte. */
16720 if (mode == SImode
16721 && (aarch64_tune_params.extra_tuning_flags
16722 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
16723 && !optimize_size
16724 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
16725 return false;
16726
350013bc
BC
16727 /* Check if the addresses are in the form of [base+offset]. */
16728 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
16729 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
16730 return false;
16731 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
16732 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
16733 return false;
16734
16735 /* Check if the bases are same. */
16736 if (!rtx_equal_p (base_1, base_2))
16737 return false;
16738
dfe1da23
JW
16739 /* The operands must be of the same size. */
16740 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
16741 GET_MODE_SIZE (GET_MODE (mem_2))));
16742
350013bc
BC
16743 offval_1 = INTVAL (offset_1);
16744 offval_2 = INTVAL (offset_2);
6a70badb
RS
16745 /* We should only be trying this for fixed-sized modes. There is no
16746 SVE LDP/STP instruction. */
16747 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
16748 /* Check if the offsets are consecutive. */
16749 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
16750 return false;
16751
16752 /* Check if the addresses are clobbered by load. */
16753 if (load)
16754 {
16755 if (reg_mentioned_p (reg_1, mem_1))
16756 return false;
16757
16758 /* In increasing order, the last load can clobber the address. */
16759 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 16760 return false;
350013bc
BC
16761 }
16762
9b56ec11
JW
16763 /* One of the memory accesses must be a mempair operand.
16764 If it is not the first one, they need to be swapped by the
16765 peephole. */
16766 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
16767 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
16768 return false;
16769
350013bc
BC
16770 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
16771 rclass_1 = FP_REGS;
16772 else
16773 rclass_1 = GENERAL_REGS;
16774
16775 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
16776 rclass_2 = FP_REGS;
16777 else
16778 rclass_2 = GENERAL_REGS;
16779
16780 /* Check if the registers are of same class. */
16781 if (rclass_1 != rclass_2)
16782 return false;
16783
16784 return true;
16785}
16786
9b56ec11
JW
16787/* Given OPERANDS of consecutive load/store that can be merged,
16788 swap them if they are not in ascending order. */
16789void
16790aarch64_swap_ldrstr_operands (rtx* operands, bool load)
16791{
16792 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
16793 HOST_WIDE_INT offval_1, offval_2;
16794
16795 if (load)
16796 {
16797 mem_1 = operands[1];
16798 mem_2 = operands[3];
16799 }
16800 else
16801 {
16802 mem_1 = operands[0];
16803 mem_2 = operands[2];
16804 }
16805
16806 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
16807 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
16808
16809 offval_1 = INTVAL (offset_1);
16810 offval_2 = INTVAL (offset_2);
16811
16812 if (offval_1 > offval_2)
16813 {
16814 /* Irrespective of whether this is a load or a store,
16815 we do the same swap. */
16816 std::swap (operands[0], operands[2]);
16817 std::swap (operands[1], operands[3]);
16818 }
16819}
16820
d0b51297
JW
16821/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
16822 comparison between the two. */
16823int
16824aarch64_host_wide_int_compare (const void *x, const void *y)
16825{
16826 return wi::cmps (* ((const HOST_WIDE_INT *) x),
16827 * ((const HOST_WIDE_INT *) y));
16828}
16829
16830/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
16831 other pointing to a REG rtx containing an offset, compare the offsets
16832 of the two pairs.
16833
16834 Return:
16835
16836 1 iff offset (X) > offset (Y)
16837 0 iff offset (X) == offset (Y)
16838 -1 iff offset (X) < offset (Y) */
16839int
16840aarch64_ldrstr_offset_compare (const void *x, const void *y)
16841{
16842 const rtx * operands_1 = (const rtx *) x;
16843 const rtx * operands_2 = (const rtx *) y;
16844 rtx mem_1, mem_2, base, offset_1, offset_2;
16845
16846 if (MEM_P (operands_1[0]))
16847 mem_1 = operands_1[0];
16848 else
16849 mem_1 = operands_1[1];
16850
16851 if (MEM_P (operands_2[0]))
16852 mem_2 = operands_2[0];
16853 else
16854 mem_2 = operands_2[1];
16855
16856 /* Extract the offsets. */
16857 extract_base_offset_in_addr (mem_1, &base, &offset_1);
16858 extract_base_offset_in_addr (mem_2, &base, &offset_2);
16859
16860 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
16861
16862 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
16863}
16864
350013bc
BC
16865/* Given OPERANDS of consecutive load/store, check if we can merge
16866 them into ldp/stp by adjusting the offset. LOAD is true if they
16867 are load instructions. MODE is the mode of memory operands.
16868
16869 Given below consecutive stores:
16870
16871 str w1, [xb, 0x100]
16872 str w1, [xb, 0x104]
16873 str w1, [xb, 0x108]
16874 str w1, [xb, 0x10c]
16875
16876 Though the offsets are out of the range supported by stp, we can
16877 still pair them after adjusting the offset, like:
16878
16879 add scratch, xb, 0x100
16880 stp w1, w1, [scratch]
16881 stp w1, w1, [scratch, 0x8]
16882
16883 The peephole patterns detecting this opportunity should guarantee
16884 the scratch register is avaliable. */
16885
16886bool
16887aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 16888 scalar_mode mode)
350013bc 16889{
34d7854d
JW
16890 const int num_insns = 4;
16891 enum reg_class rclass;
16892 HOST_WIDE_INT offvals[num_insns], msize;
16893 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
16894
16895 if (load)
16896 {
34d7854d
JW
16897 for (int i = 0; i < num_insns; i++)
16898 {
16899 reg[i] = operands[2 * i];
16900 mem[i] = operands[2 * i + 1];
16901
16902 gcc_assert (REG_P (reg[i]));
16903 }
d0b51297
JW
16904
16905 /* Do not attempt to merge the loads if the loads clobber each other. */
16906 for (int i = 0; i < 8; i += 2)
16907 for (int j = i + 2; j < 8; j += 2)
16908 if (reg_overlap_mentioned_p (operands[i], operands[j]))
16909 return false;
350013bc
BC
16910 }
16911 else
34d7854d
JW
16912 for (int i = 0; i < num_insns; i++)
16913 {
16914 mem[i] = operands[2 * i];
16915 reg[i] = operands[2 * i + 1];
16916 }
350013bc 16917
34d7854d
JW
16918 /* Skip if memory operand is by itself valid for ldp/stp. */
16919 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
16920 return false;
16921
34d7854d
JW
16922 for (int i = 0; i < num_insns; i++)
16923 {
16924 /* The mems cannot be volatile. */
16925 if (MEM_VOLATILE_P (mem[i]))
16926 return false;
16927
16928 /* Check if the addresses are in the form of [base+offset]. */
16929 extract_base_offset_in_addr (mem[i], base + i, offset + i);
16930 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
16931 return false;
16932 }
16933
363b395b
JW
16934 /* Check if the registers are of same class. */
16935 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
16936 ? FP_REGS : GENERAL_REGS;
16937
16938 for (int i = 1; i < num_insns; i++)
16939 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
16940 {
16941 if (rclass != FP_REGS)
16942 return false;
16943 }
16944 else
16945 {
16946 if (rclass != GENERAL_REGS)
16947 return false;
16948 }
16949
16950 /* Only the last register in the order in which they occur
16951 may be clobbered by the load. */
16952 if (rclass == GENERAL_REGS && load)
16953 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
16954 if (reg_mentioned_p (reg[i], mem[i]))
16955 return false;
350013bc
BC
16956
16957 /* Check if the bases are same. */
34d7854d
JW
16958 for (int i = 0; i < num_insns - 1; i++)
16959 if (!rtx_equal_p (base[i], base[i + 1]))
16960 return false;
16961
16962 for (int i = 0; i < num_insns; i++)
16963 offvals[i] = INTVAL (offset[i]);
350013bc 16964
350013bc 16965 msize = GET_MODE_SIZE (mode);
d0b51297
JW
16966
16967 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
16968 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
16969 aarch64_host_wide_int_compare);
d0b51297
JW
16970
16971 if (!(offvals[1] == offvals[0] + msize
16972 && offvals[3] == offvals[2] + msize))
350013bc
BC
16973 return false;
16974
d0b51297
JW
16975 /* Check that offsets are within range of each other. The ldp/stp
16976 instructions have 7 bit immediate offsets, so use 0x80. */
16977 if (offvals[2] - offvals[0] >= msize * 0x80)
16978 return false;
350013bc 16979
d0b51297
JW
16980 /* The offsets must be aligned with respect to each other. */
16981 if (offvals[0] % msize != offvals[2] % msize)
16982 return false;
16983
54700e2e
AP
16984 /* If we have SImode and slow unaligned ldp,
16985 check the alignment to be at least 8 byte. */
16986 if (mode == SImode
16987 && (aarch64_tune_params.extra_tuning_flags
34d7854d 16988 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 16989 && !optimize_size
34d7854d 16990 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
16991 return false;
16992
350013bc
BC
16993 return true;
16994}
16995
16996/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
16997 into LDP/STP after adjusting the offset. It depends on the fact
16998 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
16999 MODE is the mode of memory operands. CODE is the rtl operator
17000 which should be applied to all memory operands, it's SIGN_EXTEND,
17001 ZERO_EXTEND or UNKNOWN. */
17002
17003bool
17004aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17005 scalar_mode mode, RTX_CODE code)
350013bc 17006{
d0b51297 17007 rtx base, offset_1, offset_3, t1, t2;
350013bc 17008 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
17009 rtx temp_operands[8];
17010 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
17011 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 17012
d0b51297
JW
17013 /* We make changes on a copy as we may still bail out. */
17014 for (int i = 0; i < 8; i ++)
17015 temp_operands[i] = operands[i];
9b56ec11 17016
d0b51297
JW
17017 /* Sort the operands. */
17018 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 17019
350013bc
BC
17020 if (load)
17021 {
d0b51297
JW
17022 mem_1 = temp_operands[1];
17023 mem_2 = temp_operands[3];
17024 mem_3 = temp_operands[5];
17025 mem_4 = temp_operands[7];
350013bc
BC
17026 }
17027 else
17028 {
d0b51297
JW
17029 mem_1 = temp_operands[0];
17030 mem_2 = temp_operands[2];
17031 mem_3 = temp_operands[4];
17032 mem_4 = temp_operands[6];
350013bc
BC
17033 gcc_assert (code == UNKNOWN);
17034 }
17035
9b56ec11 17036 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
17037 extract_base_offset_in_addr (mem_3, &base, &offset_3);
17038 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17039 && offset_3 != NULL_RTX);
350013bc 17040
d0b51297 17041 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 17042 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17043 stp_off_upper_limit = msize * (0x40 - 1);
17044 stp_off_lower_limit = - msize * 0x40;
350013bc 17045
d0b51297
JW
17046 off_val_1 = INTVAL (offset_1);
17047 off_val_3 = INTVAL (offset_3);
17048
17049 /* The base offset is optimally half way between the two STP/LDP offsets. */
17050 if (msize <= 4)
17051 base_off = (off_val_1 + off_val_3) / 2;
17052 else
17053 /* However, due to issues with negative LDP/STP offset generation for
17054 larger modes, for DF, DI and vector modes. we must not use negative
17055 addresses smaller than 9 signed unadjusted bits can store. This
17056 provides the most range in this case. */
17057 base_off = off_val_1;
17058
17059 /* Adjust the base so that it is aligned with the addresses but still
17060 optimal. */
17061 if (base_off % msize != off_val_1 % msize)
17062 /* Fix the offset, bearing in mind we want to make it bigger not
17063 smaller. */
17064 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
17065 else if (msize <= 4)
17066 /* The negative range of LDP/STP is one larger than the positive range. */
17067 base_off += msize;
17068
17069 /* Check if base offset is too big or too small. We can attempt to resolve
17070 this issue by setting it to the maximum value and seeing if the offsets
17071 still fit. */
17072 if (base_off >= 0x1000)
350013bc 17073 {
d0b51297
JW
17074 base_off = 0x1000 - 1;
17075 /* We must still make sure that the base offset is aligned with respect
17076 to the address. But it may may not be made any bigger. */
17077 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17078 }
17079
d0b51297
JW
17080 /* Likewise for the case where the base is too small. */
17081 if (base_off <= -0x1000)
350013bc 17082 {
d0b51297
JW
17083 base_off = -0x1000 + 1;
17084 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17085 }
17086
d0b51297
JW
17087 /* Offset of the first STP/LDP. */
17088 new_off_1 = off_val_1 - base_off;
17089
17090 /* Offset of the second STP/LDP. */
17091 new_off_3 = off_val_3 - base_off;
350013bc 17092
d0b51297
JW
17093 /* The offsets must be within the range of the LDP/STP instructions. */
17094 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
17095 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
17096 return false;
17097
d0b51297
JW
17098 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
17099 new_off_1), true);
17100 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
17101 new_off_1 + msize), true);
17102 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
17103 new_off_3), true);
17104 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
17105 new_off_3 + msize), true);
17106
17107 if (!aarch64_mem_pair_operand (mem_1, mode)
17108 || !aarch64_mem_pair_operand (mem_3, mode))
17109 return false;
350013bc
BC
17110
17111 if (code == ZERO_EXTEND)
17112 {
17113 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17114 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17115 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17116 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17117 }
17118 else if (code == SIGN_EXTEND)
17119 {
17120 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17121 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17122 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17123 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17124 }
17125
17126 if (load)
17127 {
d0b51297 17128 operands[0] = temp_operands[0];
350013bc 17129 operands[1] = mem_1;
d0b51297 17130 operands[2] = temp_operands[2];
350013bc 17131 operands[3] = mem_2;
d0b51297 17132 operands[4] = temp_operands[4];
350013bc 17133 operands[5] = mem_3;
d0b51297 17134 operands[6] = temp_operands[6];
350013bc
BC
17135 operands[7] = mem_4;
17136 }
17137 else
17138 {
17139 operands[0] = mem_1;
d0b51297 17140 operands[1] = temp_operands[1];
350013bc 17141 operands[2] = mem_2;
d0b51297 17142 operands[3] = temp_operands[3];
350013bc 17143 operands[4] = mem_3;
d0b51297 17144 operands[5] = temp_operands[5];
350013bc 17145 operands[6] = mem_4;
d0b51297 17146 operands[7] = temp_operands[7];
350013bc
BC
17147 }
17148
17149 /* Emit adjusting instruction. */
d0b51297 17150 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 17151 /* Emit ldp/stp instructions. */
f7df4a84
RS
17152 t1 = gen_rtx_SET (operands[0], operands[1]);
17153 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17154 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17155 t1 = gen_rtx_SET (operands[4], operands[5]);
17156 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17157 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17158 return true;
17159}
17160
76a34e3f
RS
17161/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17162 it isn't worth branching around empty masked ops (including masked
17163 stores). */
17164
17165static bool
17166aarch64_empty_mask_is_expensive (unsigned)
17167{
17168 return false;
17169}
17170
1b1e81f8
JW
17171/* Return 1 if pseudo register should be created and used to hold
17172 GOT address for PIC code. */
17173
17174bool
17175aarch64_use_pseudo_pic_reg (void)
17176{
17177 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17178}
17179
7b841a12
JW
17180/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17181
17182static int
17183aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17184{
17185 switch (XINT (x, 1))
17186 {
17187 case UNSPEC_GOTSMALLPIC:
17188 case UNSPEC_GOTSMALLPIC28K:
17189 case UNSPEC_GOTTINYPIC:
17190 return 0;
17191 default:
17192 break;
17193 }
17194
17195 return default_unspec_may_trap_p (x, flags);
17196}
17197
39252973
KT
17198
17199/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17200 return the log2 of that value. Otherwise return -1. */
17201
17202int
17203aarch64_fpconst_pow_of_2 (rtx x)
17204{
17205 const REAL_VALUE_TYPE *r;
17206
17207 if (!CONST_DOUBLE_P (x))
17208 return -1;
17209
17210 r = CONST_DOUBLE_REAL_VALUE (x);
17211
17212 if (REAL_VALUE_NEGATIVE (*r)
17213 || REAL_VALUE_ISNAN (*r)
17214 || REAL_VALUE_ISINF (*r)
17215 || !real_isinteger (r, DFmode))
17216 return -1;
17217
17218 return exact_log2 (real_to_integer (r));
17219}
17220
17221/* If X is a vector of equal CONST_DOUBLE values and that value is
17222 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17223
17224int
17225aarch64_vec_fpconst_pow_of_2 (rtx x)
17226{
6a70badb
RS
17227 int nelts;
17228 if (GET_CODE (x) != CONST_VECTOR
17229 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17230 return -1;
17231
17232 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17233 return -1;
17234
17235 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17236 if (firstval <= 0)
17237 return -1;
17238
6a70badb 17239 for (int i = 1; i < nelts; i++)
39252973
KT
17240 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17241 return -1;
17242
17243 return firstval;
17244}
17245
11e554b3
JG
17246/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17247 to float.
17248
17249 __fp16 always promotes through this hook.
17250 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17251 through the generic excess precision logic rather than here. */
17252
c2ec330c
AL
17253static tree
17254aarch64_promoted_type (const_tree t)
17255{
11e554b3
JG
17256 if (SCALAR_FLOAT_TYPE_P (t)
17257 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17258 return float_type_node;
11e554b3 17259
c2ec330c
AL
17260 return NULL_TREE;
17261}
ee62a5a6
RS
17262
17263/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17264
17265static bool
9acc9cbe 17266aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17267 optimization_type opt_type)
17268{
17269 switch (op)
17270 {
17271 case rsqrt_optab:
9acc9cbe 17272 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17273
17274 default:
17275 return true;
17276 }
17277}
17278
43cacb12
RS
17279/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17280
17281static unsigned int
17282aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17283 int *offset)
17284{
17285 /* Polynomial invariant 1 == (VG / 2) - 1. */
17286 gcc_assert (i == 1);
17287 *factor = 2;
17288 *offset = 1;
17289 return AARCH64_DWARF_VG;
17290}
17291
11e554b3
JG
17292/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17293 if MODE is HFmode, and punt to the generic implementation otherwise. */
17294
17295static bool
7c5bd57a 17296aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17297{
17298 return (mode == HFmode
17299 ? true
17300 : default_libgcc_floating_mode_supported_p (mode));
17301}
17302
2e5f8203
JG
17303/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17304 if MODE is HFmode, and punt to the generic implementation otherwise. */
17305
17306static bool
18e2a8b8 17307aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17308{
17309 return (mode == HFmode
17310 ? true
17311 : default_scalar_mode_supported_p (mode));
17312}
17313
11e554b3
JG
17314/* Set the value of FLT_EVAL_METHOD.
17315 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17316
17317 0: evaluate all operations and constants, whose semantic type has at
17318 most the range and precision of type float, to the range and
17319 precision of float; evaluate all other operations and constants to
17320 the range and precision of the semantic type;
17321
17322 N, where _FloatN is a supported interchange floating type
17323 evaluate all operations and constants, whose semantic type has at
17324 most the range and precision of _FloatN type, to the range and
17325 precision of the _FloatN type; evaluate all other operations and
17326 constants to the range and precision of the semantic type;
17327
17328 If we have the ARMv8.2-A extensions then we support _Float16 in native
17329 precision, so we should set this to 16. Otherwise, we support the type,
17330 but want to evaluate expressions in float precision, so set this to
17331 0. */
17332
17333static enum flt_eval_method
17334aarch64_excess_precision (enum excess_precision_type type)
17335{
17336 switch (type)
17337 {
17338 case EXCESS_PRECISION_TYPE_FAST:
17339 case EXCESS_PRECISION_TYPE_STANDARD:
17340 /* We can calculate either in 16-bit range and precision or
17341 32-bit range and precision. Make that decision based on whether
17342 we have native support for the ARMv8.2-A 16-bit floating-point
17343 instructions or not. */
17344 return (TARGET_FP_F16INST
17345 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17346 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17347 case EXCESS_PRECISION_TYPE_IMPLICIT:
17348 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17349 default:
17350 gcc_unreachable ();
17351 }
17352 return FLT_EVAL_METHOD_UNPREDICTABLE;
17353}
17354
b48d6421
KT
17355/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17356 scheduled for speculative execution. Reject the long-running division
17357 and square-root instructions. */
17358
17359static bool
17360aarch64_sched_can_speculate_insn (rtx_insn *insn)
17361{
17362 switch (get_attr_type (insn))
17363 {
17364 case TYPE_SDIV:
17365 case TYPE_UDIV:
17366 case TYPE_FDIVS:
17367 case TYPE_FDIVD:
17368 case TYPE_FSQRTS:
17369 case TYPE_FSQRTD:
17370 case TYPE_NEON_FP_SQRT_S:
17371 case TYPE_NEON_FP_SQRT_D:
17372 case TYPE_NEON_FP_SQRT_S_Q:
17373 case TYPE_NEON_FP_SQRT_D_Q:
17374 case TYPE_NEON_FP_DIV_S:
17375 case TYPE_NEON_FP_DIV_D:
17376 case TYPE_NEON_FP_DIV_S_Q:
17377 case TYPE_NEON_FP_DIV_D_Q:
17378 return false;
17379 default:
17380 return true;
17381 }
17382}
17383
43cacb12
RS
17384/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17385
17386static int
17387aarch64_compute_pressure_classes (reg_class *classes)
17388{
17389 int i = 0;
17390 classes[i++] = GENERAL_REGS;
17391 classes[i++] = FP_REGS;
17392 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17393 registers need to go in PR_LO_REGS at some point during their
17394 lifetime. Splitting it into two halves has the effect of making
17395 all predicates count against PR_LO_REGS, so that we try whenever
17396 possible to restrict the number of live predicates to 8. This
17397 greatly reduces the amount of spilling in certain loops. */
17398 classes[i++] = PR_LO_REGS;
17399 classes[i++] = PR_HI_REGS;
17400 return i;
17401}
17402
17403/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17404
17405static bool
17406aarch64_can_change_mode_class (machine_mode from,
17407 machine_mode to, reg_class_t)
17408{
002092be
RS
17409 if (BYTES_BIG_ENDIAN)
17410 {
17411 bool from_sve_p = aarch64_sve_data_mode_p (from);
17412 bool to_sve_p = aarch64_sve_data_mode_p (to);
17413
17414 /* Don't allow changes between SVE data modes and non-SVE modes.
17415 See the comment at the head of aarch64-sve.md for details. */
17416 if (from_sve_p != to_sve_p)
17417 return false;
17418
17419 /* Don't allow changes in element size: lane 0 of the new vector
17420 would not then be lane 0 of the old vector. See the comment
17421 above aarch64_maybe_expand_sve_subreg_move for a more detailed
17422 description.
17423
17424 In the worst case, this forces a register to be spilled in
17425 one mode and reloaded in the other, which handles the
17426 endianness correctly. */
17427 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
17428 return false;
17429 }
43cacb12
RS
17430 return true;
17431}
17432
5cce8171
RS
17433/* Implement TARGET_EARLY_REMAT_MODES. */
17434
17435static void
17436aarch64_select_early_remat_modes (sbitmap modes)
17437{
17438 /* SVE values are not normally live across a call, so it should be
17439 worth doing early rematerialization even in VL-specific mode. */
17440 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
17441 {
17442 machine_mode mode = (machine_mode) i;
17443 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17444 if (vec_flags & VEC_ANY_SVE)
17445 bitmap_set_bit (modes, i);
17446 }
17447}
17448
c0111dc4
RE
17449/* Override the default target speculation_safe_value. */
17450static rtx
17451aarch64_speculation_safe_value (machine_mode mode,
17452 rtx result, rtx val, rtx failval)
17453{
17454 /* Maybe we should warn if falling back to hard barriers. They are
17455 likely to be noticably more expensive than the alternative below. */
17456 if (!aarch64_track_speculation)
17457 return default_speculation_safe_value (mode, result, val, failval);
17458
17459 if (!REG_P (val))
17460 val = copy_to_mode_reg (mode, val);
17461
17462 if (!aarch64_reg_or_zero (failval, mode))
17463 failval = copy_to_mode_reg (mode, failval);
17464
17465 switch (mode)
17466 {
17467 case E_QImode:
17468 emit_insn (gen_despeculate_copyqi (result, val, failval));
17469 break;
17470 case E_HImode:
17471 emit_insn (gen_despeculate_copyhi (result, val, failval));
17472 break;
17473 case E_SImode:
17474 emit_insn (gen_despeculate_copysi (result, val, failval));
17475 break;
17476 case E_DImode:
17477 emit_insn (gen_despeculate_copydi (result, val, failval));
17478 break;
17479 case E_TImode:
17480 emit_insn (gen_despeculate_copyti (result, val, failval));
17481 break;
17482 default:
17483 gcc_unreachable ();
17484 }
17485 return result;
17486}
17487
51b86113
DM
17488/* Target-specific selftests. */
17489
17490#if CHECKING_P
17491
17492namespace selftest {
17493
17494/* Selftest for the RTL loader.
17495 Verify that the RTL loader copes with a dump from
17496 print_rtx_function. This is essentially just a test that class
17497 function_reader can handle a real dump, but it also verifies
17498 that lookup_reg_by_dump_name correctly handles hard regs.
17499 The presence of hard reg names in the dump means that the test is
17500 target-specific, hence it is in this file. */
17501
17502static void
17503aarch64_test_loading_full_dump ()
17504{
17505 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
17506
17507 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
17508
17509 rtx_insn *insn_1 = get_insn_by_uid (1);
17510 ASSERT_EQ (NOTE, GET_CODE (insn_1));
17511
17512 rtx_insn *insn_15 = get_insn_by_uid (15);
17513 ASSERT_EQ (INSN, GET_CODE (insn_15));
17514 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
17515
17516 /* Verify crtl->return_rtx. */
17517 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
17518 ASSERT_EQ (0, REGNO (crtl->return_rtx));
17519 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
17520}
17521
17522/* Run all target-specific selftests. */
17523
17524static void
17525aarch64_run_selftests (void)
17526{
17527 aarch64_test_loading_full_dump ();
17528}
17529
17530} // namespace selftest
17531
17532#endif /* #if CHECKING_P */
17533
43e9d192
IB
17534#undef TARGET_ADDRESS_COST
17535#define TARGET_ADDRESS_COST aarch64_address_cost
17536
17537/* This hook will determines whether unnamed bitfields affect the alignment
17538 of the containing structure. The hook returns true if the structure
17539 should inherit the alignment requirements of an unnamed bitfield's
17540 type. */
17541#undef TARGET_ALIGN_ANON_BITFIELD
17542#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
17543
17544#undef TARGET_ASM_ALIGNED_DI_OP
17545#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
17546
17547#undef TARGET_ASM_ALIGNED_HI_OP
17548#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
17549
17550#undef TARGET_ASM_ALIGNED_SI_OP
17551#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
17552
17553#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17554#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
17555 hook_bool_const_tree_hwi_hwi_const_tree_true
17556
e1c1ecb0
KT
17557#undef TARGET_ASM_FILE_START
17558#define TARGET_ASM_FILE_START aarch64_start_file
17559
43e9d192
IB
17560#undef TARGET_ASM_OUTPUT_MI_THUNK
17561#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
17562
17563#undef TARGET_ASM_SELECT_RTX_SECTION
17564#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
17565
17566#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17567#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
17568
17569#undef TARGET_BUILD_BUILTIN_VA_LIST
17570#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
17571
17572#undef TARGET_CALLEE_COPIES
17573#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
17574
17575#undef TARGET_CAN_ELIMINATE
17576#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
17577
1fd8d40c
KT
17578#undef TARGET_CAN_INLINE_P
17579#define TARGET_CAN_INLINE_P aarch64_can_inline_p
17580
43e9d192
IB
17581#undef TARGET_CANNOT_FORCE_CONST_MEM
17582#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
17583
50487d79
EM
17584#undef TARGET_CASE_VALUES_THRESHOLD
17585#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
17586
43e9d192
IB
17587#undef TARGET_CONDITIONAL_REGISTER_USAGE
17588#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
17589
17590/* Only the least significant bit is used for initialization guard
17591 variables. */
17592#undef TARGET_CXX_GUARD_MASK_BIT
17593#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
17594
17595#undef TARGET_C_MODE_FOR_SUFFIX
17596#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
17597
17598#ifdef TARGET_BIG_ENDIAN_DEFAULT
17599#undef TARGET_DEFAULT_TARGET_FLAGS
17600#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
17601#endif
17602
17603#undef TARGET_CLASS_MAX_NREGS
17604#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
17605
119103ca
JG
17606#undef TARGET_BUILTIN_DECL
17607#define TARGET_BUILTIN_DECL aarch64_builtin_decl
17608
a6fc00da
BH
17609#undef TARGET_BUILTIN_RECIPROCAL
17610#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
17611
11e554b3
JG
17612#undef TARGET_C_EXCESS_PRECISION
17613#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
17614
43e9d192
IB
17615#undef TARGET_EXPAND_BUILTIN
17616#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
17617
17618#undef TARGET_EXPAND_BUILTIN_VA_START
17619#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
17620
9697e620
JG
17621#undef TARGET_FOLD_BUILTIN
17622#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
17623
43e9d192
IB
17624#undef TARGET_FUNCTION_ARG
17625#define TARGET_FUNCTION_ARG aarch64_function_arg
17626
17627#undef TARGET_FUNCTION_ARG_ADVANCE
17628#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
17629
17630#undef TARGET_FUNCTION_ARG_BOUNDARY
17631#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
17632
76b0cbf8
RS
17633#undef TARGET_FUNCTION_ARG_PADDING
17634#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
17635
43cacb12
RS
17636#undef TARGET_GET_RAW_RESULT_MODE
17637#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
17638#undef TARGET_GET_RAW_ARG_MODE
17639#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
17640
43e9d192
IB
17641#undef TARGET_FUNCTION_OK_FOR_SIBCALL
17642#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
17643
17644#undef TARGET_FUNCTION_VALUE
17645#define TARGET_FUNCTION_VALUE aarch64_function_value
17646
17647#undef TARGET_FUNCTION_VALUE_REGNO_P
17648#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
17649
fc72cba7
AL
17650#undef TARGET_GIMPLE_FOLD_BUILTIN
17651#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 17652
43e9d192
IB
17653#undef TARGET_GIMPLIFY_VA_ARG_EXPR
17654#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
17655
17656#undef TARGET_INIT_BUILTINS
17657#define TARGET_INIT_BUILTINS aarch64_init_builtins
17658
c64f7d37
WD
17659#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
17660#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
17661 aarch64_ira_change_pseudo_allocno_class
17662
43e9d192
IB
17663#undef TARGET_LEGITIMATE_ADDRESS_P
17664#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
17665
17666#undef TARGET_LEGITIMATE_CONSTANT_P
17667#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
17668
491ec060
WD
17669#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
17670#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
17671 aarch64_legitimize_address_displacement
17672
43e9d192
IB
17673#undef TARGET_LIBGCC_CMP_RETURN_MODE
17674#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
17675
11e554b3
JG
17676#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
17677#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
17678aarch64_libgcc_floating_mode_supported_p
17679
ac2b960f
YZ
17680#undef TARGET_MANGLE_TYPE
17681#define TARGET_MANGLE_TYPE aarch64_mangle_type
17682
43e9d192
IB
17683#undef TARGET_MEMORY_MOVE_COST
17684#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
17685
26e0ff94
WD
17686#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
17687#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
17688
43e9d192
IB
17689#undef TARGET_MUST_PASS_IN_STACK
17690#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
17691
17692/* This target hook should return true if accesses to volatile bitfields
17693 should use the narrowest mode possible. It should return false if these
17694 accesses should use the bitfield container type. */
17695#undef TARGET_NARROW_VOLATILE_BITFIELD
17696#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
17697
17698#undef TARGET_OPTION_OVERRIDE
17699#define TARGET_OPTION_OVERRIDE aarch64_override_options
17700
17701#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17702#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
17703 aarch64_override_options_after_change
17704
361fb3ee
KT
17705#undef TARGET_OPTION_SAVE
17706#define TARGET_OPTION_SAVE aarch64_option_save
17707
17708#undef TARGET_OPTION_RESTORE
17709#define TARGET_OPTION_RESTORE aarch64_option_restore
17710
17711#undef TARGET_OPTION_PRINT
17712#define TARGET_OPTION_PRINT aarch64_option_print
17713
5a2c8331
KT
17714#undef TARGET_OPTION_VALID_ATTRIBUTE_P
17715#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
17716
d78006d9
KT
17717#undef TARGET_SET_CURRENT_FUNCTION
17718#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
17719
43e9d192
IB
17720#undef TARGET_PASS_BY_REFERENCE
17721#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
17722
17723#undef TARGET_PREFERRED_RELOAD_CLASS
17724#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
17725
cee66c68
WD
17726#undef TARGET_SCHED_REASSOCIATION_WIDTH
17727#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
17728
c2ec330c
AL
17729#undef TARGET_PROMOTED_TYPE
17730#define TARGET_PROMOTED_TYPE aarch64_promoted_type
17731
43e9d192
IB
17732#undef TARGET_SECONDARY_RELOAD
17733#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
17734
17735#undef TARGET_SHIFT_TRUNCATION_MASK
17736#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
17737
17738#undef TARGET_SETUP_INCOMING_VARARGS
17739#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
17740
17741#undef TARGET_STRUCT_VALUE_RTX
17742#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
17743
17744#undef TARGET_REGISTER_MOVE_COST
17745#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
17746
17747#undef TARGET_RETURN_IN_MEMORY
17748#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
17749
17750#undef TARGET_RETURN_IN_MSB
17751#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
17752
17753#undef TARGET_RTX_COSTS
7cc2145f 17754#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 17755
2e5f8203
JG
17756#undef TARGET_SCALAR_MODE_SUPPORTED_P
17757#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
17758
d126a4ae
AP
17759#undef TARGET_SCHED_ISSUE_RATE
17760#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
17761
d03f7e44
MK
17762#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
17763#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
17764 aarch64_sched_first_cycle_multipass_dfa_lookahead
17765
2d6bc7fa
KT
17766#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
17767#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
17768 aarch64_first_cycle_multipass_dfa_lookahead_guard
17769
827ab47a
KT
17770#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
17771#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
17772 aarch64_get_separate_components
17773
17774#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
17775#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
17776 aarch64_components_for_bb
17777
17778#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
17779#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
17780 aarch64_disqualify_components
17781
17782#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
17783#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
17784 aarch64_emit_prologue_components
17785
17786#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
17787#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
17788 aarch64_emit_epilogue_components
17789
17790#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
17791#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
17792 aarch64_set_handled_components
17793
43e9d192
IB
17794#undef TARGET_TRAMPOLINE_INIT
17795#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
17796
17797#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
17798#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
17799
17800#undef TARGET_VECTOR_MODE_SUPPORTED_P
17801#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
17802
7df76747
N
17803#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17804#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
17805 aarch64_builtin_support_vector_misalignment
17806
9f4cbab8
RS
17807#undef TARGET_ARRAY_MODE
17808#define TARGET_ARRAY_MODE aarch64_array_mode
17809
43e9d192
IB
17810#undef TARGET_ARRAY_MODE_SUPPORTED_P
17811#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
17812
8990e73a
TB
17813#undef TARGET_VECTORIZE_ADD_STMT_COST
17814#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
17815
17816#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
17817#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
17818 aarch64_builtin_vectorization_cost
17819
43e9d192
IB
17820#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17821#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
17822
42fc9a7f
JG
17823#undef TARGET_VECTORIZE_BUILTINS
17824#define TARGET_VECTORIZE_BUILTINS
17825
17826#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
17827#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
17828 aarch64_builtin_vectorized_function
17829
3b357264
JG
17830#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
17831#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
17832 aarch64_autovectorize_vector_sizes
17833
aa87aced
KV
17834#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17835#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
17836 aarch64_atomic_assign_expand_fenv
17837
43e9d192
IB
17838/* Section anchor support. */
17839
17840#undef TARGET_MIN_ANCHOR_OFFSET
17841#define TARGET_MIN_ANCHOR_OFFSET -256
17842
17843/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
17844 byte offset; we can do much more for larger data types, but have no way
17845 to determine the size of the access. We assume accesses are aligned. */
17846#undef TARGET_MAX_ANCHOR_OFFSET
17847#define TARGET_MAX_ANCHOR_OFFSET 4095
17848
db0253a4
TB
17849#undef TARGET_VECTOR_ALIGNMENT
17850#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
17851
43cacb12
RS
17852#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
17853#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
17854 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
17855#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
17856#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
17857 aarch64_simd_vector_alignment_reachable
17858
88b08073
JG
17859/* vec_perm support. */
17860
f151c9e1
RS
17861#undef TARGET_VECTORIZE_VEC_PERM_CONST
17862#define TARGET_VECTORIZE_VEC_PERM_CONST \
17863 aarch64_vectorize_vec_perm_const
88b08073 17864
43cacb12
RS
17865#undef TARGET_VECTORIZE_GET_MASK_MODE
17866#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
17867#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
17868#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
17869 aarch64_empty_mask_is_expensive
6a86928d
RS
17870#undef TARGET_PREFERRED_ELSE_VALUE
17871#define TARGET_PREFERRED_ELSE_VALUE \
17872 aarch64_preferred_else_value
43cacb12 17873
c2ec330c
AL
17874#undef TARGET_INIT_LIBFUNCS
17875#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 17876
706b2314 17877#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
17878#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
17879
5cb74e90
RR
17880#undef TARGET_FLAGS_REGNUM
17881#define TARGET_FLAGS_REGNUM CC_REGNUM
17882
78607708
TV
17883#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
17884#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
17885
a3125fc2
CL
17886#undef TARGET_ASAN_SHADOW_OFFSET
17887#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
17888
0c4ec427
RE
17889#undef TARGET_LEGITIMIZE_ADDRESS
17890#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
17891
b48d6421
KT
17892#undef TARGET_SCHED_CAN_SPECULATE_INSN
17893#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
17894
594bdd53
FY
17895#undef TARGET_CAN_USE_DOLOOP_P
17896#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
17897
9bca63d4
WD
17898#undef TARGET_SCHED_ADJUST_PRIORITY
17899#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
17900
6a569cdd
KT
17901#undef TARGET_SCHED_MACRO_FUSION_P
17902#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
17903
17904#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
17905#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
17906
350013bc
BC
17907#undef TARGET_SCHED_FUSION_PRIORITY
17908#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
17909
7b841a12
JW
17910#undef TARGET_UNSPEC_MAY_TRAP_P
17911#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
17912
1b1e81f8
JW
17913#undef TARGET_USE_PSEUDO_PIC_REG
17914#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
17915
cc8ca59e
JB
17916#undef TARGET_PRINT_OPERAND
17917#define TARGET_PRINT_OPERAND aarch64_print_operand
17918
17919#undef TARGET_PRINT_OPERAND_ADDRESS
17920#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
17921
ee62a5a6
RS
17922#undef TARGET_OPTAB_SUPPORTED_P
17923#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
17924
43203dea
RR
17925#undef TARGET_OMIT_STRUCT_RETURN_REG
17926#define TARGET_OMIT_STRUCT_RETURN_REG true
17927
43cacb12
RS
17928#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
17929#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
17930 aarch64_dwarf_poly_indeterminate_value
17931
f46fe37e
EB
17932/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
17933#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17934#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
17935
c43f4279
RS
17936#undef TARGET_HARD_REGNO_NREGS
17937#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
17938#undef TARGET_HARD_REGNO_MODE_OK
17939#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
17940
99e1629f
RS
17941#undef TARGET_MODES_TIEABLE_P
17942#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
17943
80ec73f4
RS
17944#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17945#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17946 aarch64_hard_regno_call_part_clobbered
17947
58e17cf8
RS
17948#undef TARGET_CONSTANT_ALIGNMENT
17949#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
17950
43cacb12
RS
17951#undef TARGET_COMPUTE_PRESSURE_CLASSES
17952#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
17953
17954#undef TARGET_CAN_CHANGE_MODE_CLASS
17955#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
17956
5cce8171
RS
17957#undef TARGET_SELECT_EARLY_REMAT_MODES
17958#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
17959
c0111dc4
RE
17960#undef TARGET_SPECULATION_SAFE_VALUE
17961#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
17962
51b86113
DM
17963#if CHECKING_P
17964#undef TARGET_RUN_TARGET_SELFTESTS
17965#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
17966#endif /* #if CHECKING_P */
17967
43e9d192
IB
17968struct gcc_target targetm = TARGET_INITIALIZER;
17969
17970#include "gt-aarch64.h"