]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
re PR target/86014 ([AArch64] missed LDP optimization)
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
43e9d192
IB
81/* Classifies an address.
82
83 ADDRESS_REG_IMM
84 A simple base register plus immediate offset.
85
86 ADDRESS_REG_WB
87 A base register indexed by immediate offset with writeback.
88
89 ADDRESS_REG_REG
90 A base register indexed by (optionally scaled) register.
91
92 ADDRESS_REG_UXTW
93 A base register indexed by (optionally scaled) zero-extended register.
94
95 ADDRESS_REG_SXTW
96 A base register indexed by (optionally scaled) sign-extended register.
97
98 ADDRESS_LO_SUM
99 A LO_SUM rtx with a base register and "LO12" symbol relocation.
100
101 ADDRESS_SYMBOLIC:
102 A constant symbolic address, in pc-relative literal pool. */
103
104enum aarch64_address_type {
105 ADDRESS_REG_IMM,
106 ADDRESS_REG_WB,
107 ADDRESS_REG_REG,
108 ADDRESS_REG_UXTW,
109 ADDRESS_REG_SXTW,
110 ADDRESS_LO_SUM,
111 ADDRESS_SYMBOLIC
112};
113
114struct aarch64_address_info {
115 enum aarch64_address_type type;
116 rtx base;
117 rtx offset;
dc640181 118 poly_int64 const_offset;
43e9d192
IB
119 int shift;
120 enum aarch64_symbol_type symbol_type;
121};
122
b187677b 123/* Information about a legitimate vector immediate operand. */
48063b9d
IB
124struct simd_immediate_info
125{
b187677b
RS
126 enum insn_type { MOV, MVN };
127 enum modifier_type { LSL, MSL };
128
129 simd_immediate_info () {}
130 simd_immediate_info (scalar_float_mode, rtx);
131 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
132 insn_type = MOV, modifier_type = LSL,
133 unsigned int = 0);
43cacb12 134 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
135
136 /* The mode of the elements. */
137 scalar_mode elt_mode;
138
43cacb12
RS
139 /* The value of each element if all elements are the same, or the
140 first value if the constant is a series. */
48063b9d 141 rtx value;
b187677b 142
43cacb12
RS
143 /* The value of the step if the constant is a series, null otherwise. */
144 rtx step;
145
b187677b
RS
146 /* The instruction to use to move the immediate into a vector. */
147 insn_type insn;
148
149 /* The kind of shift modifier to use, and the number of bits to shift.
150 This is (LSL, 0) if no shift is needed. */
151 modifier_type modifier;
152 unsigned int shift;
48063b9d
IB
153};
154
b187677b
RS
155/* Construct a floating-point immediate in which each element has mode
156 ELT_MODE_IN and value VALUE_IN. */
157inline simd_immediate_info
158::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 159 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
160 modifier (LSL), shift (0)
161{}
162
163/* Construct an integer immediate in which each element has mode ELT_MODE_IN
164 and value VALUE_IN. The other parameters are as for the structure
165 fields. */
166inline simd_immediate_info
167::simd_immediate_info (scalar_int_mode elt_mode_in,
168 unsigned HOST_WIDE_INT value_in,
169 insn_type insn_in, modifier_type modifier_in,
170 unsigned int shift_in)
171 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
172 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
173{}
174
175/* Construct an integer immediate in which each element has mode ELT_MODE_IN
176 and where element I is equal to VALUE_IN + I * STEP_IN. */
177inline simd_immediate_info
178::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
179 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
180 modifier (LSL), shift (0)
b187677b
RS
181{}
182
43e9d192
IB
183/* The current code model. */
184enum aarch64_code_model aarch64_cmodel;
185
43cacb12
RS
186/* The number of 64-bit elements in an SVE vector. */
187poly_uint16 aarch64_sve_vg;
188
43e9d192
IB
189#ifdef HAVE_AS_TLS
190#undef TARGET_HAVE_TLS
191#define TARGET_HAVE_TLS 1
192#endif
193
ef4bddc2
RS
194static bool aarch64_composite_type_p (const_tree, machine_mode);
195static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 196 const_tree,
ef4bddc2 197 machine_mode *, int *,
43e9d192
IB
198 bool *);
199static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
200static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 201static void aarch64_override_options_after_change (void);
ef4bddc2 202static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 203static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
204static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
205 const_tree type,
206 int misalignment,
207 bool is_packed);
43cacb12 208static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
a25831ac
AV
209static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
210 aarch64_addr_query_type);
88b08073 211
0c6caaf8
RL
212/* Major revision number of the ARM Architecture implemented by the target. */
213unsigned aarch64_architecture_version;
214
43e9d192 215/* The processor for which instructions should be scheduled. */
02fdbd5b 216enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 217
43e9d192
IB
218/* Mask to specify which instruction scheduling options should be used. */
219unsigned long aarch64_tune_flags = 0;
220
1be34295 221/* Global flag for PC relative loads. */
9ee6540a 222bool aarch64_pcrelative_literal_loads;
1be34295 223
d6cb6d6a
WD
224/* Global flag for whether frame pointer is enabled. */
225bool aarch64_use_frame_pointer;
226
8dec06f2
JG
227/* Support for command line parsing of boolean flags in the tuning
228 structures. */
229struct aarch64_flag_desc
230{
231 const char* name;
232 unsigned int flag;
233};
234
ed9fa8d2 235#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
236 { name, AARCH64_FUSE_##internal_name },
237static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
238{
239 { "none", AARCH64_FUSE_NOTHING },
240#include "aarch64-fusion-pairs.def"
241 { "all", AARCH64_FUSE_ALL },
242 { NULL, AARCH64_FUSE_NOTHING }
243};
8dec06f2 244
a339a01c 245#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
246 { name, AARCH64_EXTRA_TUNE_##internal_name },
247static const struct aarch64_flag_desc aarch64_tuning_flags[] =
248{
249 { "none", AARCH64_EXTRA_TUNE_NONE },
250#include "aarch64-tuning-flags.def"
251 { "all", AARCH64_EXTRA_TUNE_ALL },
252 { NULL, AARCH64_EXTRA_TUNE_NONE }
253};
8dec06f2 254
43e9d192
IB
255/* Tuning parameters. */
256
43e9d192
IB
257static const struct cpu_addrcost_table generic_addrcost_table =
258{
67747367 259 {
2fae724a 260 1, /* hi */
bd95e655
JG
261 0, /* si */
262 0, /* di */
2fae724a 263 1, /* ti */
67747367 264 },
bd95e655
JG
265 0, /* pre_modify */
266 0, /* post_modify */
267 0, /* register_offset */
783879e6
EM
268 0, /* register_sextend */
269 0, /* register_zextend */
bd95e655 270 0 /* imm_offset */
43e9d192
IB
271};
272
5ec1ae3b
EM
273static const struct cpu_addrcost_table exynosm1_addrcost_table =
274{
275 {
276 0, /* hi */
277 0, /* si */
278 0, /* di */
279 2, /* ti */
280 },
281 0, /* pre_modify */
282 0, /* post_modify */
283 1, /* register_offset */
284 1, /* register_sextend */
285 2, /* register_zextend */
286 0, /* imm_offset */
287};
288
381e27aa
PT
289static const struct cpu_addrcost_table xgene1_addrcost_table =
290{
381e27aa 291 {
bd95e655
JG
292 1, /* hi */
293 0, /* si */
294 0, /* di */
295 1, /* ti */
381e27aa 296 },
bd95e655
JG
297 1, /* pre_modify */
298 0, /* post_modify */
299 0, /* register_offset */
783879e6
EM
300 1, /* register_sextend */
301 1, /* register_zextend */
bd95e655 302 0, /* imm_offset */
381e27aa
PT
303};
304
d1261ac6 305static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
306{
307 {
5f407e57
AP
308 1, /* hi */
309 1, /* si */
310 1, /* di */
ad611a4c
VP
311 2, /* ti */
312 },
313 0, /* pre_modify */
314 0, /* post_modify */
315 2, /* register_offset */
316 3, /* register_sextend */
317 3, /* register_zextend */
318 0, /* imm_offset */
319};
320
8d39ea2f
LM
321static const struct cpu_addrcost_table qdf24xx_addrcost_table =
322{
323 {
324 1, /* hi */
325 1, /* si */
326 1, /* di */
327 2, /* ti */
328 },
329 1, /* pre_modify */
330 1, /* post_modify */
331 3, /* register_offset */
332 4, /* register_sextend */
333 3, /* register_zextend */
334 2, /* imm_offset */
335};
336
43e9d192
IB
337static const struct cpu_regmove_cost generic_regmove_cost =
338{
bd95e655 339 1, /* GP2GP */
3969c510
WD
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
bd95e655
JG
342 5, /* GP2FP */
343 5, /* FP2GP */
344 2 /* FP2FP */
43e9d192
IB
345};
346
e4a9c55a
WD
347static const struct cpu_regmove_cost cortexa57_regmove_cost =
348{
bd95e655 349 1, /* GP2GP */
e4a9c55a
WD
350 /* Avoid the use of slow int<->fp moves for spilling by setting
351 their cost higher than memmov_cost. */
bd95e655
JG
352 5, /* GP2FP */
353 5, /* FP2GP */
354 2 /* FP2FP */
e4a9c55a
WD
355};
356
357static const struct cpu_regmove_cost cortexa53_regmove_cost =
358{
bd95e655 359 1, /* GP2GP */
e4a9c55a
WD
360 /* Avoid the use of slow int<->fp moves for spilling by setting
361 their cost higher than memmov_cost. */
bd95e655
JG
362 5, /* GP2FP */
363 5, /* FP2GP */
364 2 /* FP2FP */
e4a9c55a
WD
365};
366
5ec1ae3b
EM
367static const struct cpu_regmove_cost exynosm1_regmove_cost =
368{
369 1, /* GP2GP */
370 /* Avoid the use of slow int<->fp moves for spilling by setting
371 their cost higher than memmov_cost (actual, 4 and 9). */
372 9, /* GP2FP */
373 9, /* FP2GP */
374 1 /* FP2FP */
375};
376
d1bcc29f
AP
377static const struct cpu_regmove_cost thunderx_regmove_cost =
378{
bd95e655
JG
379 2, /* GP2GP */
380 2, /* GP2FP */
381 6, /* FP2GP */
382 4 /* FP2FP */
d1bcc29f
AP
383};
384
381e27aa
PT
385static const struct cpu_regmove_cost xgene1_regmove_cost =
386{
bd95e655 387 1, /* GP2GP */
381e27aa
PT
388 /* Avoid the use of slow int<->fp moves for spilling by setting
389 their cost higher than memmov_cost. */
bd95e655
JG
390 8, /* GP2FP */
391 8, /* FP2GP */
392 2 /* FP2FP */
381e27aa
PT
393};
394
ee446d9f
JW
395static const struct cpu_regmove_cost qdf24xx_regmove_cost =
396{
397 2, /* GP2GP */
398 /* Avoid the use of int<->fp moves for spilling. */
399 6, /* GP2FP */
400 6, /* FP2GP */
401 4 /* FP2FP */
402};
403
d1261ac6 404static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
405{
406 1, /* GP2GP */
407 /* Avoid the use of int<->fp moves for spilling. */
408 8, /* GP2FP */
409 8, /* FP2GP */
410 4 /* FP2FP */
411};
412
8990e73a 413/* Generic costs for vector insn classes. */
8990e73a
TB
414static const struct cpu_vector_cost generic_vector_cost =
415{
cd8ae5ed
AP
416 1, /* scalar_int_stmt_cost */
417 1, /* scalar_fp_stmt_cost */
bd95e655
JG
418 1, /* scalar_load_cost */
419 1, /* scalar_store_cost */
cd8ae5ed
AP
420 1, /* vec_int_stmt_cost */
421 1, /* vec_fp_stmt_cost */
c428f91c 422 2, /* vec_permute_cost */
bd95e655
JG
423 1, /* vec_to_scalar_cost */
424 1, /* scalar_to_vec_cost */
425 1, /* vec_align_load_cost */
426 1, /* vec_unalign_load_cost */
427 1, /* vec_unalign_store_cost */
428 1, /* vec_store_cost */
429 3, /* cond_taken_branch_cost */
430 1 /* cond_not_taken_branch_cost */
8990e73a
TB
431};
432
c3f20327
AP
433/* ThunderX costs for vector insn classes. */
434static const struct cpu_vector_cost thunderx_vector_cost =
435{
cd8ae5ed
AP
436 1, /* scalar_int_stmt_cost */
437 1, /* scalar_fp_stmt_cost */
c3f20327
AP
438 3, /* scalar_load_cost */
439 1, /* scalar_store_cost */
cd8ae5ed 440 4, /* vec_int_stmt_cost */
b29d7591 441 1, /* vec_fp_stmt_cost */
c3f20327
AP
442 4, /* vec_permute_cost */
443 2, /* vec_to_scalar_cost */
444 2, /* scalar_to_vec_cost */
445 3, /* vec_align_load_cost */
7e87a3d9
AP
446 5, /* vec_unalign_load_cost */
447 5, /* vec_unalign_store_cost */
c3f20327
AP
448 1, /* vec_store_cost */
449 3, /* cond_taken_branch_cost */
450 3 /* cond_not_taken_branch_cost */
451};
452
60bff090 453/* Generic costs for vector insn classes. */
60bff090
JG
454static const struct cpu_vector_cost cortexa57_vector_cost =
455{
cd8ae5ed
AP
456 1, /* scalar_int_stmt_cost */
457 1, /* scalar_fp_stmt_cost */
bd95e655
JG
458 4, /* scalar_load_cost */
459 1, /* scalar_store_cost */
cd8ae5ed
AP
460 2, /* vec_int_stmt_cost */
461 2, /* vec_fp_stmt_cost */
c428f91c 462 3, /* vec_permute_cost */
bd95e655
JG
463 8, /* vec_to_scalar_cost */
464 8, /* scalar_to_vec_cost */
db4a1c18
WD
465 4, /* vec_align_load_cost */
466 4, /* vec_unalign_load_cost */
bd95e655
JG
467 1, /* vec_unalign_store_cost */
468 1, /* vec_store_cost */
469 1, /* cond_taken_branch_cost */
470 1 /* cond_not_taken_branch_cost */
60bff090
JG
471};
472
5ec1ae3b
EM
473static const struct cpu_vector_cost exynosm1_vector_cost =
474{
cd8ae5ed
AP
475 1, /* scalar_int_stmt_cost */
476 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
477 5, /* scalar_load_cost */
478 1, /* scalar_store_cost */
cd8ae5ed
AP
479 3, /* vec_int_stmt_cost */
480 3, /* vec_fp_stmt_cost */
c428f91c 481 3, /* vec_permute_cost */
5ec1ae3b
EM
482 3, /* vec_to_scalar_cost */
483 3, /* scalar_to_vec_cost */
484 5, /* vec_align_load_cost */
485 5, /* vec_unalign_load_cost */
486 1, /* vec_unalign_store_cost */
487 1, /* vec_store_cost */
488 1, /* cond_taken_branch_cost */
489 1 /* cond_not_taken_branch_cost */
490};
491
381e27aa 492/* Generic costs for vector insn classes. */
381e27aa
PT
493static const struct cpu_vector_cost xgene1_vector_cost =
494{
cd8ae5ed
AP
495 1, /* scalar_int_stmt_cost */
496 1, /* scalar_fp_stmt_cost */
bd95e655
JG
497 5, /* scalar_load_cost */
498 1, /* scalar_store_cost */
cd8ae5ed
AP
499 2, /* vec_int_stmt_cost */
500 2, /* vec_fp_stmt_cost */
c428f91c 501 2, /* vec_permute_cost */
bd95e655
JG
502 4, /* vec_to_scalar_cost */
503 4, /* scalar_to_vec_cost */
504 10, /* vec_align_load_cost */
505 10, /* vec_unalign_load_cost */
506 2, /* vec_unalign_store_cost */
507 2, /* vec_store_cost */
508 2, /* cond_taken_branch_cost */
509 1 /* cond_not_taken_branch_cost */
381e27aa
PT
510};
511
ad611a4c 512/* Costs for vector insn classes for Vulcan. */
d1261ac6 513static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 514{
cd8ae5ed
AP
515 1, /* scalar_int_stmt_cost */
516 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
517 4, /* scalar_load_cost */
518 1, /* scalar_store_cost */
cd8ae5ed
AP
519 5, /* vec_int_stmt_cost */
520 6, /* vec_fp_stmt_cost */
ad611a4c
VP
521 3, /* vec_permute_cost */
522 6, /* vec_to_scalar_cost */
523 5, /* scalar_to_vec_cost */
524 8, /* vec_align_load_cost */
525 8, /* vec_unalign_load_cost */
526 4, /* vec_unalign_store_cost */
527 4, /* vec_store_cost */
528 2, /* cond_taken_branch_cost */
529 1 /* cond_not_taken_branch_cost */
530};
531
b9066f5a
MW
532/* Generic costs for branch instructions. */
533static const struct cpu_branch_cost generic_branch_cost =
534{
9094d4a4
WD
535 1, /* Predictable. */
536 3 /* Unpredictable. */
b9066f5a
MW
537};
538
9acc9cbe
EM
539/* Generic approximation modes. */
540static const cpu_approx_modes generic_approx_modes =
541{
79a2bc2d 542 AARCH64_APPROX_NONE, /* division */
98daafa0 543 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
544 AARCH64_APPROX_NONE /* recip_sqrt */
545};
546
547/* Approximation modes for Exynos M1. */
548static const cpu_approx_modes exynosm1_approx_modes =
549{
79a2bc2d 550 AARCH64_APPROX_NONE, /* division */
98daafa0 551 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
552 AARCH64_APPROX_ALL /* recip_sqrt */
553};
554
555/* Approximation modes for X-Gene 1. */
556static const cpu_approx_modes xgene1_approx_modes =
557{
79a2bc2d 558 AARCH64_APPROX_NONE, /* division */
98daafa0 559 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
560 AARCH64_APPROX_ALL /* recip_sqrt */
561};
562
9d2c6e2e
MK
563/* Generic prefetch settings (which disable prefetch). */
564static const cpu_prefetch_tune generic_prefetch_tune =
565{
566 0, /* num_slots */
567 -1, /* l1_cache_size */
568 -1, /* l1_cache_line_size */
16b2cafd 569 -1, /* l2_cache_size */
d2ff35c0 570 true, /* prefetch_dynamic_strides */
59100dfc 571 -1, /* minimum_stride */
16b2cafd 572 -1 /* default_opt_level */
9d2c6e2e
MK
573};
574
575static const cpu_prefetch_tune exynosm1_prefetch_tune =
576{
577 0, /* num_slots */
578 -1, /* l1_cache_size */
579 64, /* l1_cache_line_size */
16b2cafd 580 -1, /* l2_cache_size */
d2ff35c0 581 true, /* prefetch_dynamic_strides */
59100dfc 582 -1, /* minimum_stride */
16b2cafd 583 -1 /* default_opt_level */
9d2c6e2e
MK
584};
585
586static const cpu_prefetch_tune qdf24xx_prefetch_tune =
587{
70c51b58
MK
588 4, /* num_slots */
589 32, /* l1_cache_size */
9d2c6e2e 590 64, /* l1_cache_line_size */
725e2110 591 512, /* l2_cache_size */
d2ff35c0 592 false, /* prefetch_dynamic_strides */
59100dfc
LM
593 2048, /* minimum_stride */
594 3 /* default_opt_level */
9d2c6e2e
MK
595};
596
f1e247d0
AP
597static const cpu_prefetch_tune thunderxt88_prefetch_tune =
598{
599 8, /* num_slots */
600 32, /* l1_cache_size */
601 128, /* l1_cache_line_size */
602 16*1024, /* l2_cache_size */
d2ff35c0 603 true, /* prefetch_dynamic_strides */
59100dfc 604 -1, /* minimum_stride */
f1e247d0
AP
605 3 /* default_opt_level */
606};
607
608static const cpu_prefetch_tune thunderx_prefetch_tune =
609{
610 8, /* num_slots */
611 32, /* l1_cache_size */
612 128, /* l1_cache_line_size */
613 -1, /* l2_cache_size */
d2ff35c0 614 true, /* prefetch_dynamic_strides */
59100dfc 615 -1, /* minimum_stride */
f1e247d0
AP
616 -1 /* default_opt_level */
617};
618
9d2c6e2e
MK
619static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
620{
f1e247d0
AP
621 8, /* num_slots */
622 32, /* l1_cache_size */
9d2c6e2e 623 64, /* l1_cache_line_size */
f1e247d0 624 256, /* l2_cache_size */
d2ff35c0 625 true, /* prefetch_dynamic_strides */
59100dfc 626 -1, /* minimum_stride */
16b2cafd 627 -1 /* default_opt_level */
9d2c6e2e
MK
628};
629
43e9d192
IB
630static const struct tune_params generic_tunings =
631{
4e2cd668 632 &cortexa57_extra_costs,
43e9d192
IB
633 &generic_addrcost_table,
634 &generic_regmove_cost,
8990e73a 635 &generic_vector_cost,
b9066f5a 636 &generic_branch_cost,
9acc9cbe 637 &generic_approx_modes,
bd95e655
JG
638 4, /* memmov_cost */
639 2, /* issue_rate */
e0701ef0 640 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
641 "8", /* function_align. */
642 "4", /* jump_align. */
643 "8", /* loop_align. */
cee66c68
WD
644 2, /* int_reassoc_width. */
645 4, /* fp_reassoc_width. */
50093a33
WD
646 1, /* vec_reassoc_width. */
647 2, /* min_div_recip_mul_sf. */
dfba575f 648 2, /* min_div_recip_mul_df. */
50487d79 649 0, /* max_case_values. */
3b4c0f7e 650 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
651 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
652 &generic_prefetch_tune
43e9d192
IB
653};
654
1c72a3ca
JG
655static const struct tune_params cortexa35_tunings =
656{
657 &cortexa53_extra_costs,
658 &generic_addrcost_table,
659 &cortexa53_regmove_cost,
660 &generic_vector_cost,
aca97ef8 661 &generic_branch_cost,
9acc9cbe 662 &generic_approx_modes,
1c72a3ca
JG
663 4, /* memmov_cost */
664 1, /* issue_rate */
0bc24338 665 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 666 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
667 "16", /* function_align. */
668 "4", /* jump_align. */
669 "8", /* loop_align. */
1c72a3ca
JG
670 2, /* int_reassoc_width. */
671 4, /* fp_reassoc_width. */
672 1, /* vec_reassoc_width. */
673 2, /* min_div_recip_mul_sf. */
674 2, /* min_div_recip_mul_df. */
675 0, /* max_case_values. */
1c72a3ca 676 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
677 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
678 &generic_prefetch_tune
1c72a3ca
JG
679};
680
984239ad
KT
681static const struct tune_params cortexa53_tunings =
682{
683 &cortexa53_extra_costs,
684 &generic_addrcost_table,
e4a9c55a 685 &cortexa53_regmove_cost,
984239ad 686 &generic_vector_cost,
aca97ef8 687 &generic_branch_cost,
9acc9cbe 688 &generic_approx_modes,
bd95e655
JG
689 4, /* memmov_cost */
690 2, /* issue_rate */
00a8574a 691 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 692 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
693 "16", /* function_align. */
694 "4", /* jump_align. */
695 "8", /* loop_align. */
cee66c68
WD
696 2, /* int_reassoc_width. */
697 4, /* fp_reassoc_width. */
50093a33
WD
698 1, /* vec_reassoc_width. */
699 2, /* min_div_recip_mul_sf. */
dfba575f 700 2, /* min_div_recip_mul_df. */
50487d79 701 0, /* max_case_values. */
2d6bc7fa 702 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
703 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
704 &generic_prefetch_tune
984239ad
KT
705};
706
4fd92af6
KT
707static const struct tune_params cortexa57_tunings =
708{
709 &cortexa57_extra_costs,
a39d4348 710 &generic_addrcost_table,
e4a9c55a 711 &cortexa57_regmove_cost,
60bff090 712 &cortexa57_vector_cost,
aca97ef8 713 &generic_branch_cost,
9acc9cbe 714 &generic_approx_modes,
bd95e655
JG
715 4, /* memmov_cost */
716 3, /* issue_rate */
00a8574a 717 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 718 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
719 "16", /* function_align. */
720 "4", /* jump_align. */
721 "8", /* loop_align. */
cee66c68
WD
722 2, /* int_reassoc_width. */
723 4, /* fp_reassoc_width. */
50093a33
WD
724 1, /* vec_reassoc_width. */
725 2, /* min_div_recip_mul_sf. */
dfba575f 726 2, /* min_div_recip_mul_df. */
50487d79 727 0, /* max_case_values. */
2d6bc7fa 728 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
729 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
730 &generic_prefetch_tune
dfba575f
JG
731};
732
733static const struct tune_params cortexa72_tunings =
734{
735 &cortexa57_extra_costs,
a39d4348 736 &generic_addrcost_table,
dfba575f
JG
737 &cortexa57_regmove_cost,
738 &cortexa57_vector_cost,
aca97ef8 739 &generic_branch_cost,
9acc9cbe 740 &generic_approx_modes,
dfba575f
JG
741 4, /* memmov_cost */
742 3, /* issue_rate */
00a8574a 743 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f 744 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
c518c102
ML
745 "16", /* function_align. */
746 "4", /* jump_align. */
747 "8", /* loop_align. */
dfba575f
JG
748 2, /* int_reassoc_width. */
749 4, /* fp_reassoc_width. */
750 1, /* vec_reassoc_width. */
751 2, /* min_div_recip_mul_sf. */
752 2, /* min_div_recip_mul_df. */
50487d79 753 0, /* max_case_values. */
0bc24338 754 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
755 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
756 &generic_prefetch_tune
4fd92af6
KT
757};
758
4fb570c4
KT
759static const struct tune_params cortexa73_tunings =
760{
761 &cortexa57_extra_costs,
a39d4348 762 &generic_addrcost_table,
4fb570c4
KT
763 &cortexa57_regmove_cost,
764 &cortexa57_vector_cost,
aca97ef8 765 &generic_branch_cost,
4fb570c4
KT
766 &generic_approx_modes,
767 4, /* memmov_cost. */
768 2, /* issue_rate. */
769 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
770 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
c518c102
ML
771 "16", /* function_align. */
772 "4", /* jump_align. */
773 "8", /* loop_align. */
4fb570c4
KT
774 2, /* int_reassoc_width. */
775 4, /* fp_reassoc_width. */
776 1, /* vec_reassoc_width. */
777 2, /* min_div_recip_mul_sf. */
778 2, /* min_div_recip_mul_df. */
779 0, /* max_case_values. */
4fb570c4 780 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
781 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
782 &generic_prefetch_tune
4fb570c4
KT
783};
784
9d2c6e2e
MK
785
786
5ec1ae3b
EM
787static const struct tune_params exynosm1_tunings =
788{
789 &exynosm1_extra_costs,
790 &exynosm1_addrcost_table,
791 &exynosm1_regmove_cost,
792 &exynosm1_vector_cost,
793 &generic_branch_cost,
9acc9cbe 794 &exynosm1_approx_modes,
5ec1ae3b
EM
795 4, /* memmov_cost */
796 3, /* issue_rate */
25cc2199 797 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
c518c102
ML
798 "4", /* function_align. */
799 "4", /* jump_align. */
800 "4", /* loop_align. */
5ec1ae3b
EM
801 2, /* int_reassoc_width. */
802 4, /* fp_reassoc_width. */
803 1, /* vec_reassoc_width. */
804 2, /* min_div_recip_mul_sf. */
805 2, /* min_div_recip_mul_df. */
806 48, /* max_case_values. */
220379df 807 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
808 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
809 &exynosm1_prefetch_tune
5ec1ae3b
EM
810};
811
f1e247d0
AP
812static const struct tune_params thunderxt88_tunings =
813{
814 &thunderx_extra_costs,
815 &generic_addrcost_table,
816 &thunderx_regmove_cost,
817 &thunderx_vector_cost,
818 &generic_branch_cost,
819 &generic_approx_modes,
820 6, /* memmov_cost */
821 2, /* issue_rate */
822 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
823 "8", /* function_align. */
824 "8", /* jump_align. */
825 "8", /* loop_align. */
f1e247d0
AP
826 2, /* int_reassoc_width. */
827 4, /* fp_reassoc_width. */
828 1, /* vec_reassoc_width. */
829 2, /* min_div_recip_mul_sf. */
830 2, /* min_div_recip_mul_df. */
831 0, /* max_case_values. */
832 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
833 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
834 &thunderxt88_prefetch_tune
835};
836
d1bcc29f
AP
837static const struct tune_params thunderx_tunings =
838{
839 &thunderx_extra_costs,
840 &generic_addrcost_table,
841 &thunderx_regmove_cost,
c3f20327 842 &thunderx_vector_cost,
b9066f5a 843 &generic_branch_cost,
9acc9cbe 844 &generic_approx_modes,
bd95e655
JG
845 6, /* memmov_cost */
846 2, /* issue_rate */
e9a3a175 847 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
c518c102
ML
848 "8", /* function_align. */
849 "8", /* jump_align. */
850 "8", /* loop_align. */
cee66c68
WD
851 2, /* int_reassoc_width. */
852 4, /* fp_reassoc_width. */
50093a33
WD
853 1, /* vec_reassoc_width. */
854 2, /* min_div_recip_mul_sf. */
dfba575f 855 2, /* min_div_recip_mul_df. */
50487d79 856 0, /* max_case_values. */
2d6bc7fa 857 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
858 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
859 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 860 &thunderx_prefetch_tune
d1bcc29f
AP
861};
862
381e27aa
PT
863static const struct tune_params xgene1_tunings =
864{
865 &xgene1_extra_costs,
866 &xgene1_addrcost_table,
867 &xgene1_regmove_cost,
868 &xgene1_vector_cost,
b9066f5a 869 &generic_branch_cost,
9acc9cbe 870 &xgene1_approx_modes,
bd95e655
JG
871 6, /* memmov_cost */
872 4, /* issue_rate */
e9a3a175 873 AARCH64_FUSE_NOTHING, /* fusible_ops */
c518c102
ML
874 "16", /* function_align. */
875 "8", /* jump_align. */
876 "16", /* loop_align. */
381e27aa
PT
877 2, /* int_reassoc_width. */
878 4, /* fp_reassoc_width. */
50093a33
WD
879 1, /* vec_reassoc_width. */
880 2, /* min_div_recip_mul_sf. */
dfba575f 881 2, /* min_div_recip_mul_df. */
50487d79 882 0, /* max_case_values. */
2d6bc7fa 883 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9f5361c8 884 (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
9d2c6e2e 885 &generic_prefetch_tune
381e27aa
PT
886};
887
ee446d9f
JW
888static const struct tune_params qdf24xx_tunings =
889{
890 &qdf24xx_extra_costs,
8d39ea2f 891 &qdf24xx_addrcost_table,
ee446d9f
JW
892 &qdf24xx_regmove_cost,
893 &generic_vector_cost,
894 &generic_branch_cost,
895 &generic_approx_modes,
896 4, /* memmov_cost */
897 4, /* issue_rate */
898 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
899 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
900 "16", /* function_align. */
901 "8", /* jump_align. */
902 "16", /* loop_align. */
ee446d9f
JW
903 2, /* int_reassoc_width. */
904 4, /* fp_reassoc_width. */
905 1, /* vec_reassoc_width. */
906 2, /* min_div_recip_mul_sf. */
907 2, /* min_div_recip_mul_df. */
908 0, /* max_case_values. */
4f2a94e6 909 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
910 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
911 &qdf24xx_prefetch_tune
ee446d9f
JW
912};
913
52ee8191
SP
914/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
915 for now. */
916static const struct tune_params saphira_tunings =
917{
918 &generic_extra_costs,
919 &generic_addrcost_table,
920 &generic_regmove_cost,
921 &generic_vector_cost,
922 &generic_branch_cost,
923 &generic_approx_modes,
924 4, /* memmov_cost */
925 4, /* issue_rate */
926 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
927 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
c518c102
ML
928 "16", /* function_align. */
929 "8", /* jump_align. */
930 "16", /* loop_align. */
52ee8191
SP
931 2, /* int_reassoc_width. */
932 4, /* fp_reassoc_width. */
933 1, /* vec_reassoc_width. */
934 2, /* min_div_recip_mul_sf. */
935 2, /* min_div_recip_mul_df. */
936 0, /* max_case_values. */
937 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
938 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
939 &generic_prefetch_tune
940};
941
d1261ac6 942static const struct tune_params thunderx2t99_tunings =
ad611a4c 943{
d1261ac6
AP
944 &thunderx2t99_extra_costs,
945 &thunderx2t99_addrcost_table,
946 &thunderx2t99_regmove_cost,
947 &thunderx2t99_vector_cost,
aca97ef8 948 &generic_branch_cost,
ad611a4c
VP
949 &generic_approx_modes,
950 4, /* memmov_cost. */
951 4, /* issue_rate. */
00c7c57f
JB
952 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
953 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
c518c102
ML
954 "16", /* function_align. */
955 "8", /* jump_align. */
956 "16", /* loop_align. */
ad611a4c
VP
957 3, /* int_reassoc_width. */
958 2, /* fp_reassoc_width. */
959 2, /* vec_reassoc_width. */
960 2, /* min_div_recip_mul_sf. */
961 2, /* min_div_recip_mul_df. */
962 0, /* max_case_values. */
f1e247d0 963 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
964 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
965 &thunderx2t99_prefetch_tune
ad611a4c
VP
966};
967
8dec06f2
JG
968/* Support for fine-grained override of the tuning structures. */
969struct aarch64_tuning_override_function
970{
971 const char* name;
972 void (*parse_override)(const char*, struct tune_params*);
973};
974
975static void aarch64_parse_fuse_string (const char*, struct tune_params*);
976static void aarch64_parse_tune_string (const char*, struct tune_params*);
977
978static const struct aarch64_tuning_override_function
979aarch64_tuning_override_functions[] =
980{
981 { "fuse", aarch64_parse_fuse_string },
982 { "tune", aarch64_parse_tune_string },
983 { NULL, NULL }
984};
985
43e9d192
IB
986/* A processor implementing AArch64. */
987struct processor
988{
989 const char *const name;
46806c44
KT
990 enum aarch64_processor ident;
991 enum aarch64_processor sched_core;
393ae126 992 enum aarch64_arch arch;
0c6caaf8 993 unsigned architecture_version;
43e9d192
IB
994 const unsigned long flags;
995 const struct tune_params *const tune;
996};
997
393ae126
KT
998/* Architectures implementing AArch64. */
999static const struct processor all_architectures[] =
1000{
1001#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
1002 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
1003#include "aarch64-arches.def"
393ae126
KT
1004 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
1005};
1006
43e9d192
IB
1007/* Processor cores implementing AArch64. */
1008static const struct processor all_cores[] =
1009{
e8fcc9fa 1010#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
1011 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
1012 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
1013 FLAGS, &COSTS##_tunings},
43e9d192 1014#include "aarch64-cores.def"
393ae126
KT
1015 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
1016 AARCH64_FL_FOR_ARCH8, &generic_tunings},
1017 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
1018};
1019
43e9d192 1020
361fb3ee
KT
1021/* Target specification. These are populated by the -march, -mtune, -mcpu
1022 handling code or by target attributes. */
43e9d192
IB
1023static const struct processor *selected_arch;
1024static const struct processor *selected_cpu;
1025static const struct processor *selected_tune;
1026
b175b679
JG
1027/* The current tuning set. */
1028struct tune_params aarch64_tune_params = generic_tunings;
1029
43e9d192
IB
1030#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1031
1032/* An ISA extension in the co-processor and main instruction set space. */
1033struct aarch64_option_extension
1034{
1035 const char *const name;
1036 const unsigned long flags_on;
1037 const unsigned long flags_off;
1038};
1039
43e9d192
IB
1040typedef enum aarch64_cond_code
1041{
1042 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1043 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1044 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1045}
1046aarch64_cc;
1047
1048#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1049
1050/* The condition codes of the processor, and the inverse function. */
1051static const char * const aarch64_condition_codes[] =
1052{
1053 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1054 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1055};
1056
973d2e01
TP
1057/* Generate code to enable conditional branches in functions over 1 MiB. */
1058const char *
1059aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1060 const char * branch_format)
1061{
1062 rtx_code_label * tmp_label = gen_label_rtx ();
1063 char label_buf[256];
1064 char buffer[128];
1065 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1066 CODE_LABEL_NUMBER (tmp_label));
1067 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1068 rtx dest_label = operands[pos_label];
1069 operands[pos_label] = tmp_label;
1070
1071 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1072 output_asm_insn (buffer, operands);
1073
1074 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1075 operands[pos_label] = dest_label;
1076 output_asm_insn (buffer, operands);
1077 return "";
1078}
1079
261fb553 1080void
fc29dfc9 1081aarch64_err_no_fpadvsimd (machine_mode mode)
261fb553 1082{
261fb553 1083 if (TARGET_GENERAL_REGS_ONLY)
fc29dfc9
SE
1084 if (FLOAT_MODE_P (mode))
1085 error ("%qs is incompatible with the use of floating-point types",
1086 "-mgeneral-regs-only");
1087 else
1088 error ("%qs is incompatible with the use of vector types",
1089 "-mgeneral-regs-only");
261fb553 1090 else
fc29dfc9
SE
1091 if (FLOAT_MODE_P (mode))
1092 error ("%qs feature modifier is incompatible with the use of"
1093 " floating-point types", "+nofp");
1094 else
1095 error ("%qs feature modifier is incompatible with the use of"
1096 " vector types", "+nofp");
261fb553
AL
1097}
1098
c64f7d37 1099/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
2eb2847e
WD
1100 The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
1101 GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
1102 higher cost. POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
1103 and GENERAL_REGS is lower than the memory cost (in this case the best class
1104 is the lowest cost one). Using POINTER_AND_FP_REGS irrespectively of its
1105 cost results in bad allocations with many redundant int<->FP moves which
1106 are expensive on various cores.
1107 To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
1108 force a decision between FP_REGS and GENERAL_REGS. We use the allocno class
1109 if it isn't POINTER_AND_FP_REGS. Similarly, use the best class if it isn't
1110 POINTER_AND_FP_REGS. Otherwise set the allocno class depending on the mode.
31e2b5a3
WD
1111 The result of this is that it is no longer inefficient to have a higher
1112 memory move cost than the register move cost.
1113*/
c64f7d37
WD
1114
1115static reg_class_t
31e2b5a3
WD
1116aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1117 reg_class_t best_class)
c64f7d37 1118{
b8506a8a 1119 machine_mode mode;
c64f7d37 1120
67e5c59a
RS
1121 if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
1122 || !reg_class_subset_p (FP_REGS, allocno_class))
c64f7d37
WD
1123 return allocno_class;
1124
67e5c59a
RS
1125 if (!reg_class_subset_p (GENERAL_REGS, best_class)
1126 || !reg_class_subset_p (FP_REGS, best_class))
31e2b5a3
WD
1127 return best_class;
1128
c64f7d37
WD
1129 mode = PSEUDO_REGNO_MODE (regno);
1130 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1131}
1132
26e0ff94 1133static unsigned int
b8506a8a 1134aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1135{
50093a33 1136 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1137 return aarch64_tune_params.min_div_recip_mul_sf;
1138 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1139}
1140
b5b33e11 1141/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1142static int
b5b33e11 1143aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1144{
1145 if (VECTOR_MODE_P (mode))
b175b679 1146 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1147 if (INTEGRAL_MODE_P (mode))
b175b679 1148 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1149 /* Avoid reassociating floating point addition so we emit more FMAs. */
1150 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1151 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1152 return 1;
1153}
1154
43e9d192
IB
1155/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1156unsigned
1157aarch64_dbx_register_number (unsigned regno)
1158{
1159 if (GP_REGNUM_P (regno))
1160 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1161 else if (regno == SP_REGNUM)
1162 return AARCH64_DWARF_SP;
1163 else if (FP_REGNUM_P (regno))
1164 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1165 else if (PR_REGNUM_P (regno))
1166 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1167 else if (regno == VG_REGNUM)
1168 return AARCH64_DWARF_VG;
43e9d192
IB
1169
1170 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1171 equivalent DWARF register. */
1172 return DWARF_FRAME_REGISTERS;
1173}
1174
43cacb12
RS
1175/* Return true if MODE is any of the Advanced SIMD structure modes. */
1176static bool
1177aarch64_advsimd_struct_mode_p (machine_mode mode)
1178{
1179 return (TARGET_SIMD
1180 && (mode == OImode || mode == CImode || mode == XImode));
1181}
1182
1183/* Return true if MODE is an SVE predicate mode. */
1184static bool
1185aarch64_sve_pred_mode_p (machine_mode mode)
1186{
1187 return (TARGET_SVE
1188 && (mode == VNx16BImode
1189 || mode == VNx8BImode
1190 || mode == VNx4BImode
1191 || mode == VNx2BImode));
1192}
1193
1194/* Three mutually-exclusive flags describing a vector or predicate type. */
1195const unsigned int VEC_ADVSIMD = 1;
1196const unsigned int VEC_SVE_DATA = 2;
1197const unsigned int VEC_SVE_PRED = 4;
1198/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1199 a structure of 2, 3 or 4 vectors. */
1200const unsigned int VEC_STRUCT = 8;
1201/* Useful combinations of the above. */
1202const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1203const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1204
1205/* Return a set of flags describing the vector properties of mode MODE.
1206 Ignore modes that are not supported by the current target. */
1207static unsigned int
1208aarch64_classify_vector_mode (machine_mode mode)
1209{
1210 if (aarch64_advsimd_struct_mode_p (mode))
1211 return VEC_ADVSIMD | VEC_STRUCT;
1212
1213 if (aarch64_sve_pred_mode_p (mode))
1214 return VEC_SVE_PRED;
1215
1216 scalar_mode inner = GET_MODE_INNER (mode);
1217 if (VECTOR_MODE_P (mode)
1218 && (inner == QImode
1219 || inner == HImode
1220 || inner == HFmode
1221 || inner == SImode
1222 || inner == SFmode
1223 || inner == DImode
1224 || inner == DFmode))
1225 {
9f4cbab8
RS
1226 if (TARGET_SVE)
1227 {
1228 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1229 return VEC_SVE_DATA;
1230 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1231 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1232 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1233 return VEC_SVE_DATA | VEC_STRUCT;
1234 }
43cacb12
RS
1235
1236 /* This includes V1DF but not V1DI (which doesn't exist). */
1237 if (TARGET_SIMD
1238 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1239 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1240 return VEC_ADVSIMD;
1241 }
1242
1243 return 0;
1244}
1245
1246/* Return true if MODE is any of the data vector modes, including
1247 structure modes. */
43e9d192 1248static bool
43cacb12 1249aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1250{
43cacb12 1251 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1252}
1253
43cacb12
RS
1254/* Return true if MODE is an SVE data vector mode; either a single vector
1255 or a structure of vectors. */
43e9d192 1256static bool
43cacb12 1257aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1258{
43cacb12 1259 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1260}
1261
9f4cbab8
RS
1262/* Implement target hook TARGET_ARRAY_MODE. */
1263static opt_machine_mode
1264aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1265{
1266 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1267 && IN_RANGE (nelems, 2, 4))
1268 return mode_for_vector (GET_MODE_INNER (mode),
1269 GET_MODE_NUNITS (mode) * nelems);
1270
1271 return opt_machine_mode ();
1272}
1273
43e9d192
IB
1274/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1275static bool
ef4bddc2 1276aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1277 unsigned HOST_WIDE_INT nelems)
1278{
1279 if (TARGET_SIMD
635e66fe
AL
1280 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1281 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1282 && (nelems >= 2 && nelems <= 4))
1283 return true;
1284
1285 return false;
1286}
1287
43cacb12
RS
1288/* Return the SVE predicate mode to use for elements that have
1289 ELEM_NBYTES bytes, if such a mode exists. */
1290
1291opt_machine_mode
1292aarch64_sve_pred_mode (unsigned int elem_nbytes)
1293{
1294 if (TARGET_SVE)
1295 {
1296 if (elem_nbytes == 1)
1297 return VNx16BImode;
1298 if (elem_nbytes == 2)
1299 return VNx8BImode;
1300 if (elem_nbytes == 4)
1301 return VNx4BImode;
1302 if (elem_nbytes == 8)
1303 return VNx2BImode;
1304 }
1305 return opt_machine_mode ();
1306}
1307
1308/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1309
1310static opt_machine_mode
1311aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1312{
1313 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1314 {
1315 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1316 machine_mode pred_mode;
1317 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1318 return pred_mode;
1319 }
1320
1321 return default_get_mask_mode (nunits, nbytes);
1322}
1323
b41d1f6e
RS
1324/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
1325 prefer to use the first arithmetic operand as the else value if
1326 the else value doesn't matter, since that exactly matches the SVE
1327 destructive merging form. For ternary operations we could either
1328 pick the first operand and use FMAD-like instructions or the last
1329 operand and use FMLA-like instructions; the latter seems more
1330 natural. */
6a86928d
RS
1331
1332static tree
b41d1f6e 1333aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
6a86928d 1334{
b41d1f6e 1335 return nops == 3 ? ops[2] : ops[0];
6a86928d
RS
1336}
1337
c43f4279 1338/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1339
c43f4279 1340static unsigned int
ef4bddc2 1341aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1342{
6a70badb
RS
1343 /* ??? Logically we should only need to provide a value when
1344 HARD_REGNO_MODE_OK says that the combination is valid,
1345 but at the moment we need to handle all modes. Just ignore
1346 any runtime parts for registers that can't store them. */
1347 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1348 switch (aarch64_regno_regclass (regno))
1349 {
1350 case FP_REGS:
1351 case FP_LO_REGS:
43cacb12
RS
1352 if (aarch64_sve_data_mode_p (mode))
1353 return exact_div (GET_MODE_SIZE (mode),
1354 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1355 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1356 case PR_REGS:
1357 case PR_LO_REGS:
1358 case PR_HI_REGS:
1359 return 1;
43e9d192 1360 default:
6a70badb 1361 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1362 }
1363 gcc_unreachable ();
1364}
1365
f939c3e6 1366/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1367
f939c3e6 1368static bool
ef4bddc2 1369aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1370{
1371 if (GET_MODE_CLASS (mode) == MODE_CC)
1372 return regno == CC_REGNUM;
1373
43cacb12
RS
1374 if (regno == VG_REGNUM)
1375 /* This must have the same size as _Unwind_Word. */
1376 return mode == DImode;
1377
1378 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1379 if (vec_flags & VEC_SVE_PRED)
1380 return PR_REGNUM_P (regno);
1381
1382 if (PR_REGNUM_P (regno))
1383 return 0;
1384
9259db42
YZ
1385 if (regno == SP_REGNUM)
1386 /* The purpose of comparing with ptr_mode is to support the
1387 global register variable associated with the stack pointer
1388 register via the syntax of asm ("wsp") in ILP32. */
1389 return mode == Pmode || mode == ptr_mode;
1390
1391 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1392 return mode == Pmode;
1393
43cacb12 1394 if (GP_REGNUM_P (regno) && known_le (GET_MODE_SIZE (mode), 16))
f939c3e6 1395 return true;
43e9d192
IB
1396
1397 if (FP_REGNUM_P (regno))
1398 {
43cacb12 1399 if (vec_flags & VEC_STRUCT)
4edd6298 1400 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1401 else
43cacb12 1402 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1403 }
1404
f939c3e6 1405 return false;
43e9d192
IB
1406}
1407
80ec73f4
RS
1408/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1409 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1410 clobbers the top 64 bits when restoring the bottom 64 bits. */
1411
1412static bool
1413aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1414{
6a70badb 1415 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1416}
1417
43cacb12
RS
1418/* Implement REGMODE_NATURAL_SIZE. */
1419poly_uint64
1420aarch64_regmode_natural_size (machine_mode mode)
1421{
1422 /* The natural size for SVE data modes is one SVE data vector,
1423 and similarly for predicates. We can't independently modify
1424 anything smaller than that. */
1425 /* ??? For now, only do this for variable-width SVE registers.
1426 Doing it for constant-sized registers breaks lower-subreg.c. */
1427 /* ??? And once that's fixed, we should probably have similar
1428 code for Advanced SIMD. */
1429 if (!aarch64_sve_vg.is_constant ())
1430 {
1431 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1432 if (vec_flags & VEC_SVE_PRED)
1433 return BYTES_PER_SVE_PRED;
1434 if (vec_flags & VEC_SVE_DATA)
1435 return BYTES_PER_SVE_VECTOR;
1436 }
1437 return UNITS_PER_WORD;
1438}
1439
73d9ac6a 1440/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1441machine_mode
43cacb12
RS
1442aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1443 machine_mode mode)
1444{
1445 /* The predicate mode determines which bits are significant and
1446 which are "don't care". Decreasing the number of lanes would
1447 lose data while increasing the number of lanes would make bits
1448 unnecessarily significant. */
1449 if (PR_REGNUM_P (regno))
1450 return mode;
6a70badb
RS
1451 if (known_ge (GET_MODE_SIZE (mode), 4))
1452 return mode;
73d9ac6a 1453 else
6a70badb 1454 return SImode;
73d9ac6a
IB
1455}
1456
58e17cf8
RS
1457/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1458 that strcpy from constants will be faster. */
1459
1460static HOST_WIDE_INT
1461aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1462{
1463 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1464 return MAX (align, BITS_PER_WORD);
1465 return align;
1466}
1467
43e9d192
IB
1468/* Return true if calls to DECL should be treated as
1469 long-calls (ie called via a register). */
1470static bool
1471aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1472{
1473 return false;
1474}
1475
1476/* Return true if calls to symbol-ref SYM should be treated as
1477 long-calls (ie called via a register). */
1478bool
1479aarch64_is_long_call_p (rtx sym)
1480{
1481 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1482}
1483
b60d63cb
JW
1484/* Return true if calls to symbol-ref SYM should not go through
1485 plt stubs. */
1486
1487bool
1488aarch64_is_noplt_call_p (rtx sym)
1489{
1490 const_tree decl = SYMBOL_REF_DECL (sym);
1491
1492 if (flag_pic
1493 && decl
1494 && (!flag_plt
1495 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1496 && !targetm.binds_local_p (decl))
1497 return true;
1498
1499 return false;
1500}
1501
43e9d192
IB
1502/* Return true if the offsets to a zero/sign-extract operation
1503 represent an expression that matches an extend operation. The
1504 operands represent the paramters from
1505
4745e701 1506 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1507bool
77e994c9 1508aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1509 rtx extract_imm)
1510{
1511 HOST_WIDE_INT mult_val, extract_val;
1512
1513 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1514 return false;
1515
1516 mult_val = INTVAL (mult_imm);
1517 extract_val = INTVAL (extract_imm);
1518
1519 if (extract_val > 8
1520 && extract_val < GET_MODE_BITSIZE (mode)
1521 && exact_log2 (extract_val & ~7) > 0
1522 && (extract_val & 7) <= 4
1523 && mult_val == (1 << (extract_val & 7)))
1524 return true;
1525
1526 return false;
1527}
1528
1529/* Emit an insn that's a simple single-set. Both the operands must be
1530 known to be valid. */
827ab47a 1531inline static rtx_insn *
43e9d192
IB
1532emit_set_insn (rtx x, rtx y)
1533{
f7df4a84 1534 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1535}
1536
1537/* X and Y are two things to compare using CODE. Emit the compare insn and
1538 return the rtx for register 0 in the proper mode. */
1539rtx
1540aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1541{
ef4bddc2 1542 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1543 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1544
1545 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1546 return cc_reg;
1547}
1548
1549/* Build the SYMBOL_REF for __tls_get_addr. */
1550
1551static GTY(()) rtx tls_get_addr_libfunc;
1552
1553rtx
1554aarch64_tls_get_addr (void)
1555{
1556 if (!tls_get_addr_libfunc)
1557 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1558 return tls_get_addr_libfunc;
1559}
1560
1561/* Return the TLS model to use for ADDR. */
1562
1563static enum tls_model
1564tls_symbolic_operand_type (rtx addr)
1565{
1566 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1567 if (GET_CODE (addr) == CONST)
1568 {
6a70badb
RS
1569 poly_int64 addend;
1570 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1571 if (GET_CODE (sym) == SYMBOL_REF)
1572 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1573 }
1574 else if (GET_CODE (addr) == SYMBOL_REF)
1575 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1576
1577 return tls_kind;
1578}
1579
1580/* We'll allow lo_sum's in addresses in our legitimate addresses
1581 so that combine would take care of combining addresses where
1582 necessary, but for generation purposes, we'll generate the address
1583 as :
1584 RTL Absolute
1585 tmp = hi (symbol_ref); adrp x1, foo
1586 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1587 nop
1588
1589 PIC TLS
1590 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1591 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1592 bl __tls_get_addr
1593 nop
1594
1595 Load TLS symbol, depending on TLS mechanism and TLS access model.
1596
1597 Global Dynamic - Traditional TLS:
1598 adrp tmp, :tlsgd:imm
1599 add dest, tmp, #:tlsgd_lo12:imm
1600 bl __tls_get_addr
1601
1602 Global Dynamic - TLS Descriptors:
1603 adrp dest, :tlsdesc:imm
1604 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1605 add dest, dest, #:tlsdesc_lo12:imm
1606 blr tmp
1607 mrs tp, tpidr_el0
1608 add dest, dest, tp
1609
1610 Initial Exec:
1611 mrs tp, tpidr_el0
1612 adrp tmp, :gottprel:imm
1613 ldr dest, [tmp, #:gottprel_lo12:imm]
1614 add dest, dest, tp
1615
1616 Local Exec:
1617 mrs tp, tpidr_el0
0699caae
RL
1618 add t0, tp, #:tprel_hi12:imm, lsl #12
1619 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1620*/
1621
1622static void
1623aarch64_load_symref_appropriately (rtx dest, rtx imm,
1624 enum aarch64_symbol_type type)
1625{
1626 switch (type)
1627 {
1628 case SYMBOL_SMALL_ABSOLUTE:
1629 {
28514dda 1630 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1631 rtx tmp_reg = dest;
ef4bddc2 1632 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1633
1634 gcc_assert (mode == Pmode || mode == ptr_mode);
1635
43e9d192 1636 if (can_create_pseudo_p ())
28514dda 1637 tmp_reg = gen_reg_rtx (mode);
43e9d192 1638
28514dda 1639 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1640 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1641 return;
1642 }
1643
a5350ddc 1644 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1645 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1646 return;
1647
1b1e81f8
JW
1648 case SYMBOL_SMALL_GOT_28K:
1649 {
1650 machine_mode mode = GET_MODE (dest);
1651 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1652 rtx insn;
1653 rtx mem;
1b1e81f8
JW
1654
1655 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1656 here before rtl expand. Tree IVOPT will generate rtl pattern to
1657 decide rtx costs, in which case pic_offset_table_rtx is not
1658 initialized. For that case no need to generate the first adrp
026c3cfd 1659 instruction as the final cost for global variable access is
1b1e81f8
JW
1660 one instruction. */
1661 if (gp_rtx != NULL)
1662 {
1663 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1664 using the page base as GOT base, the first page may be wasted,
1665 in the worst scenario, there is only 28K space for GOT).
1666
1667 The generate instruction sequence for accessing global variable
1668 is:
1669
a3957742 1670 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1671
1672 Only one instruction needed. But we must initialize
1673 pic_offset_table_rtx properly. We generate initialize insn for
1674 every global access, and allow CSE to remove all redundant.
1675
1676 The final instruction sequences will look like the following
1677 for multiply global variables access.
1678
a3957742 1679 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1680
a3957742
JW
1681 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1682 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1683 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1684 ... */
1b1e81f8
JW
1685
1686 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1687 crtl->uses_pic_offset_table = 1;
1688 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1689
1690 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1691 gp_rtx = gen_lowpart (mode, gp_rtx);
1692
1b1e81f8
JW
1693 }
1694
1695 if (mode == ptr_mode)
1696 {
1697 if (mode == DImode)
53021678 1698 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1699 else
53021678
JW
1700 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1701
1702 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1703 }
1704 else
1705 {
1706 gcc_assert (mode == Pmode);
53021678
JW
1707
1708 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1709 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1710 }
1711
53021678
JW
1712 /* The operand is expected to be MEM. Whenever the related insn
1713 pattern changed, above code which calculate mem should be
1714 updated. */
1715 gcc_assert (GET_CODE (mem) == MEM);
1716 MEM_READONLY_P (mem) = 1;
1717 MEM_NOTRAP_P (mem) = 1;
1718 emit_insn (insn);
1b1e81f8
JW
1719 return;
1720 }
1721
6642bdb4 1722 case SYMBOL_SMALL_GOT_4G:
43e9d192 1723 {
28514dda
YZ
1724 /* In ILP32, the mode of dest can be either SImode or DImode,
1725 while the got entry is always of SImode size. The mode of
1726 dest depends on how dest is used: if dest is assigned to a
1727 pointer (e.g. in the memory), it has SImode; it may have
1728 DImode if dest is dereferenced to access the memeory.
1729 This is why we have to handle three different ldr_got_small
1730 patterns here (two patterns for ILP32). */
53021678
JW
1731
1732 rtx insn;
1733 rtx mem;
43e9d192 1734 rtx tmp_reg = dest;
ef4bddc2 1735 machine_mode mode = GET_MODE (dest);
28514dda 1736
43e9d192 1737 if (can_create_pseudo_p ())
28514dda
YZ
1738 tmp_reg = gen_reg_rtx (mode);
1739
1740 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1741 if (mode == ptr_mode)
1742 {
1743 if (mode == DImode)
53021678 1744 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1745 else
53021678
JW
1746 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1747
1748 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1749 }
1750 else
1751 {
1752 gcc_assert (mode == Pmode);
53021678
JW
1753
1754 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1755 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1756 }
1757
53021678
JW
1758 gcc_assert (GET_CODE (mem) == MEM);
1759 MEM_READONLY_P (mem) = 1;
1760 MEM_NOTRAP_P (mem) = 1;
1761 emit_insn (insn);
43e9d192
IB
1762 return;
1763 }
1764
1765 case SYMBOL_SMALL_TLSGD:
1766 {
5d8a22a5 1767 rtx_insn *insns;
23b88fda
N
1768 machine_mode mode = GET_MODE (dest);
1769 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1770
1771 start_sequence ();
23b88fda
N
1772 if (TARGET_ILP32)
1773 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1774 else
1775 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1776 insns = get_insns ();
1777 end_sequence ();
1778
1779 RTL_CONST_CALL_P (insns) = 1;
1780 emit_libcall_block (insns, dest, result, imm);
1781 return;
1782 }
1783
1784 case SYMBOL_SMALL_TLSDESC:
1785 {
ef4bddc2 1786 machine_mode mode = GET_MODE (dest);
621ad2de 1787 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1788 rtx tp;
1789
621ad2de
AP
1790 gcc_assert (mode == Pmode || mode == ptr_mode);
1791
2876a13f
JW
1792 /* In ILP32, the got entry is always of SImode size. Unlike
1793 small GOT, the dest is fixed at reg 0. */
1794 if (TARGET_ILP32)
1795 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1796 else
2876a13f 1797 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1798 tp = aarch64_load_tp (NULL);
621ad2de
AP
1799
1800 if (mode != Pmode)
1801 tp = gen_lowpart (mode, tp);
1802
2876a13f 1803 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1804 if (REG_P (dest))
1805 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1806 return;
1807 }
1808
79496620 1809 case SYMBOL_SMALL_TLSIE:
43e9d192 1810 {
621ad2de
AP
1811 /* In ILP32, the mode of dest can be either SImode or DImode,
1812 while the got entry is always of SImode size. The mode of
1813 dest depends on how dest is used: if dest is assigned to a
1814 pointer (e.g. in the memory), it has SImode; it may have
1815 DImode if dest is dereferenced to access the memeory.
1816 This is why we have to handle three different tlsie_small
1817 patterns here (two patterns for ILP32). */
ef4bddc2 1818 machine_mode mode = GET_MODE (dest);
621ad2de 1819 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1820 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1821
1822 if (mode == ptr_mode)
1823 {
1824 if (mode == DImode)
1825 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1826 else
1827 {
1828 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1829 tp = gen_lowpart (mode, tp);
1830 }
1831 }
1832 else
1833 {
1834 gcc_assert (mode == Pmode);
1835 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1836 }
1837
f7df4a84 1838 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
1839 if (REG_P (dest))
1840 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1841 return;
1842 }
1843
cbf5629e 1844 case SYMBOL_TLSLE12:
d18ba284 1845 case SYMBOL_TLSLE24:
cbf5629e
JW
1846 case SYMBOL_TLSLE32:
1847 case SYMBOL_TLSLE48:
43e9d192 1848 {
cbf5629e 1849 machine_mode mode = GET_MODE (dest);
43e9d192 1850 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1851
cbf5629e
JW
1852 if (mode != Pmode)
1853 tp = gen_lowpart (mode, tp);
1854
1855 switch (type)
1856 {
1857 case SYMBOL_TLSLE12:
1858 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1859 (dest, tp, imm));
1860 break;
1861 case SYMBOL_TLSLE24:
1862 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1863 (dest, tp, imm));
1864 break;
1865 case SYMBOL_TLSLE32:
1866 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1867 (dest, imm));
1868 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1869 (dest, dest, tp));
1870 break;
1871 case SYMBOL_TLSLE48:
1872 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1873 (dest, imm));
1874 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1875 (dest, dest, tp));
1876 break;
1877 default:
1878 gcc_unreachable ();
1879 }
e6f7f0e9 1880
241dbd9d
QZ
1881 if (REG_P (dest))
1882 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1883 return;
1884 }
1885
87dd8ab0
MS
1886 case SYMBOL_TINY_GOT:
1887 emit_insn (gen_ldr_got_tiny (dest, imm));
1888 return;
1889
5ae7caad
JW
1890 case SYMBOL_TINY_TLSIE:
1891 {
1892 machine_mode mode = GET_MODE (dest);
1893 rtx tp = aarch64_load_tp (NULL);
1894
1895 if (mode == ptr_mode)
1896 {
1897 if (mode == DImode)
1898 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1899 else
1900 {
1901 tp = gen_lowpart (mode, tp);
1902 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1903 }
1904 }
1905 else
1906 {
1907 gcc_assert (mode == Pmode);
1908 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1909 }
1910
241dbd9d
QZ
1911 if (REG_P (dest))
1912 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
1913 return;
1914 }
1915
43e9d192
IB
1916 default:
1917 gcc_unreachable ();
1918 }
1919}
1920
1921/* Emit a move from SRC to DEST. Assume that the move expanders can
1922 handle all moves if !can_create_pseudo_p (). The distinction is
1923 important because, unlike emit_move_insn, the move expanders know
1924 how to force Pmode objects into the constant pool even when the
1925 constant pool address is not itself legitimate. */
1926static rtx
1927aarch64_emit_move (rtx dest, rtx src)
1928{
1929 return (can_create_pseudo_p ()
1930 ? emit_move_insn (dest, src)
1931 : emit_move_insn_1 (dest, src));
1932}
1933
f22d7973
RS
1934/* Apply UNOPTAB to OP and store the result in DEST. */
1935
1936static void
1937aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
1938{
1939 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
1940 if (dest != tmp)
1941 emit_move_insn (dest, tmp);
1942}
1943
1944/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
1945
1946static void
1947aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
1948{
1949 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
1950 OPTAB_DIRECT);
1951 if (dest != tmp)
1952 emit_move_insn (dest, tmp);
1953}
1954
030d03b8
RE
1955/* Split a 128-bit move operation into two 64-bit move operations,
1956 taking care to handle partial overlap of register to register
1957 copies. Special cases are needed when moving between GP regs and
1958 FP regs. SRC can be a register, constant or memory; DST a register
1959 or memory. If either operand is memory it must not have any side
1960 effects. */
43e9d192
IB
1961void
1962aarch64_split_128bit_move (rtx dst, rtx src)
1963{
030d03b8
RE
1964 rtx dst_lo, dst_hi;
1965 rtx src_lo, src_hi;
43e9d192 1966
ef4bddc2 1967 machine_mode mode = GET_MODE (dst);
12dc6974 1968
030d03b8
RE
1969 gcc_assert (mode == TImode || mode == TFmode);
1970 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1971 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1972
1973 if (REG_P (dst) && REG_P (src))
1974 {
030d03b8
RE
1975 int src_regno = REGNO (src);
1976 int dst_regno = REGNO (dst);
43e9d192 1977
030d03b8 1978 /* Handle FP <-> GP regs. */
43e9d192
IB
1979 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1980 {
030d03b8
RE
1981 src_lo = gen_lowpart (word_mode, src);
1982 src_hi = gen_highpart (word_mode, src);
1983
1984 if (mode == TImode)
1985 {
1986 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1987 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1988 }
1989 else
1990 {
1991 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1992 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1993 }
1994 return;
43e9d192
IB
1995 }
1996 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1997 {
030d03b8
RE
1998 dst_lo = gen_lowpart (word_mode, dst);
1999 dst_hi = gen_highpart (word_mode, dst);
2000
2001 if (mode == TImode)
2002 {
2003 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
2004 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
2005 }
2006 else
2007 {
2008 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
2009 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
2010 }
2011 return;
43e9d192 2012 }
43e9d192
IB
2013 }
2014
030d03b8
RE
2015 dst_lo = gen_lowpart (word_mode, dst);
2016 dst_hi = gen_highpart (word_mode, dst);
2017 src_lo = gen_lowpart (word_mode, src);
2018 src_hi = gen_highpart_mode (word_mode, mode, src);
2019
2020 /* At most one pairing may overlap. */
2021 if (reg_overlap_mentioned_p (dst_lo, src_hi))
2022 {
2023 aarch64_emit_move (dst_hi, src_hi);
2024 aarch64_emit_move (dst_lo, src_lo);
2025 }
2026 else
2027 {
2028 aarch64_emit_move (dst_lo, src_lo);
2029 aarch64_emit_move (dst_hi, src_hi);
2030 }
43e9d192
IB
2031}
2032
2033bool
2034aarch64_split_128bit_move_p (rtx dst, rtx src)
2035{
2036 return (! REG_P (src)
2037 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
2038}
2039
8b033a8a
SN
2040/* Split a complex SIMD combine. */
2041
2042void
2043aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
2044{
ef4bddc2
RS
2045 machine_mode src_mode = GET_MODE (src1);
2046 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
2047
2048 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
2049 gcc_assert (register_operand (dst, dst_mode)
2050 && register_operand (src1, src_mode)
2051 && register_operand (src2, src_mode));
8b033a8a 2052
a977dc0c 2053 rtx (*gen) (rtx, rtx, rtx);
8b033a8a 2054
a977dc0c
MC
2055 switch (src_mode)
2056 {
4e10a5a7 2057 case E_V8QImode:
a977dc0c
MC
2058 gen = gen_aarch64_simd_combinev8qi;
2059 break;
4e10a5a7 2060 case E_V4HImode:
a977dc0c
MC
2061 gen = gen_aarch64_simd_combinev4hi;
2062 break;
4e10a5a7 2063 case E_V2SImode:
a977dc0c
MC
2064 gen = gen_aarch64_simd_combinev2si;
2065 break;
4e10a5a7 2066 case E_V4HFmode:
a977dc0c
MC
2067 gen = gen_aarch64_simd_combinev4hf;
2068 break;
4e10a5a7 2069 case E_V2SFmode:
a977dc0c
MC
2070 gen = gen_aarch64_simd_combinev2sf;
2071 break;
4e10a5a7 2072 case E_DImode:
a977dc0c
MC
2073 gen = gen_aarch64_simd_combinedi;
2074 break;
4e10a5a7 2075 case E_DFmode:
a977dc0c
MC
2076 gen = gen_aarch64_simd_combinedf;
2077 break;
2078 default:
2079 gcc_unreachable ();
8b033a8a 2080 }
a977dc0c
MC
2081
2082 emit_insn (gen (dst, src1, src2));
2083 return;
8b033a8a
SN
2084}
2085
fd4842cd
SN
2086/* Split a complex SIMD move. */
2087
2088void
2089aarch64_split_simd_move (rtx dst, rtx src)
2090{
ef4bddc2
RS
2091 machine_mode src_mode = GET_MODE (src);
2092 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2093
2094 gcc_assert (VECTOR_MODE_P (dst_mode));
2095
2096 if (REG_P (dst) && REG_P (src))
2097 {
c59b7e28
SN
2098 rtx (*gen) (rtx, rtx);
2099
fd4842cd
SN
2100 gcc_assert (VECTOR_MODE_P (src_mode));
2101
2102 switch (src_mode)
2103 {
4e10a5a7 2104 case E_V16QImode:
c59b7e28 2105 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd 2106 break;
4e10a5a7 2107 case E_V8HImode:
c59b7e28 2108 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd 2109 break;
4e10a5a7 2110 case E_V4SImode:
c59b7e28 2111 gen = gen_aarch64_split_simd_movv4si;
fd4842cd 2112 break;
4e10a5a7 2113 case E_V2DImode:
c59b7e28 2114 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 2115 break;
4e10a5a7 2116 case E_V8HFmode:
71a11456
AL
2117 gen = gen_aarch64_split_simd_movv8hf;
2118 break;
4e10a5a7 2119 case E_V4SFmode:
c59b7e28 2120 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd 2121 break;
4e10a5a7 2122 case E_V2DFmode:
c59b7e28 2123 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
2124 break;
2125 default:
2126 gcc_unreachable ();
2127 }
c59b7e28
SN
2128
2129 emit_insn (gen (dst, src));
fd4842cd
SN
2130 return;
2131 }
2132}
2133
ef22810a
RH
2134bool
2135aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2136 machine_mode ymode, rtx y)
2137{
2138 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2139 gcc_assert (r != NULL);
2140 return rtx_equal_p (x, r);
2141}
2142
2143
43e9d192 2144static rtx
ef4bddc2 2145aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2146{
2147 if (can_create_pseudo_p ())
e18b4a81 2148 return force_reg (mode, value);
43e9d192
IB
2149 else
2150 {
f5470a77
RS
2151 gcc_assert (x);
2152 aarch64_emit_move (x, value);
43e9d192
IB
2153 return x;
2154 }
2155}
2156
43cacb12
RS
2157/* Return true if we can move VALUE into a register using a single
2158 CNT[BHWD] instruction. */
2159
2160static bool
2161aarch64_sve_cnt_immediate_p (poly_int64 value)
2162{
2163 HOST_WIDE_INT factor = value.coeffs[0];
2164 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2165 return (value.coeffs[1] == factor
2166 && IN_RANGE (factor, 2, 16 * 16)
2167 && (factor & 1) == 0
2168 && factor <= 16 * (factor & -factor));
2169}
2170
2171/* Likewise for rtx X. */
2172
2173bool
2174aarch64_sve_cnt_immediate_p (rtx x)
2175{
2176 poly_int64 value;
2177 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2178}
2179
2180/* Return the asm string for an instruction with a CNT-like vector size
2181 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2182 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2183 first part of the operands template (the part that comes before the
2184 vector size itself). FACTOR is the number of quadwords.
2185 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2186 If it is zero, we can use any element size. */
2187
2188static char *
2189aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2190 unsigned int factor,
2191 unsigned int nelts_per_vq)
2192{
2193 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2194
2195 if (nelts_per_vq == 0)
2196 /* There is some overlap in the ranges of the four CNT instructions.
2197 Here we always use the smallest possible element size, so that the
2198 multiplier is 1 whereever possible. */
2199 nelts_per_vq = factor & -factor;
2200 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2201 gcc_assert (IN_RANGE (shift, 1, 4));
2202 char suffix = "dwhb"[shift - 1];
2203
2204 factor >>= shift;
2205 unsigned int written;
2206 if (factor == 1)
2207 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2208 prefix, suffix, operands);
2209 else
2210 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2211 prefix, suffix, operands, factor);
2212 gcc_assert (written < sizeof (buffer));
2213 return buffer;
2214}
2215
2216/* Return the asm string for an instruction with a CNT-like vector size
2217 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2218 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2219 first part of the operands template (the part that comes before the
2220 vector size itself). X is the value of the vector size operand,
2221 as a polynomial integer rtx. */
2222
2223char *
2224aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2225 rtx x)
2226{
2227 poly_int64 value = rtx_to_poly_int64 (x);
2228 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2229 return aarch64_output_sve_cnt_immediate (prefix, operands,
2230 value.coeffs[1], 0);
2231}
2232
2233/* Return true if we can add VALUE to a register using a single ADDVL
2234 or ADDPL instruction. */
2235
2236static bool
2237aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2238{
2239 HOST_WIDE_INT factor = value.coeffs[0];
2240 if (factor == 0 || value.coeffs[1] != factor)
2241 return false;
2242 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2243 and a value of 16 is one vector width. */
2244 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2245 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2246}
2247
2248/* Likewise for rtx X. */
2249
2250bool
2251aarch64_sve_addvl_addpl_immediate_p (rtx x)
2252{
2253 poly_int64 value;
2254 return (poly_int_rtx_p (x, &value)
2255 && aarch64_sve_addvl_addpl_immediate_p (value));
2256}
2257
2258/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2259 and storing the result in operand 0. */
2260
2261char *
2262aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2263{
2264 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2265 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2266 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2267
2268 /* Use INC or DEC if possible. */
2269 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2270 {
2271 if (aarch64_sve_cnt_immediate_p (offset_value))
2272 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2273 offset_value.coeffs[1], 0);
2274 if (aarch64_sve_cnt_immediate_p (-offset_value))
2275 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2276 -offset_value.coeffs[1], 0);
2277 }
2278
2279 int factor = offset_value.coeffs[1];
2280 if ((factor & 15) == 0)
2281 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2282 else
2283 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2284 return buffer;
2285}
2286
2287/* Return true if X is a valid immediate for an SVE vector INC or DEC
2288 instruction. If it is, store the number of elements in each vector
2289 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2290 factor in *FACTOR_OUT (if nonnull). */
2291
2292bool
2293aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2294 unsigned int *nelts_per_vq_out)
2295{
2296 rtx elt;
2297 poly_int64 value;
2298
2299 if (!const_vec_duplicate_p (x, &elt)
2300 || !poly_int_rtx_p (elt, &value))
2301 return false;
2302
2303 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2304 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2305 /* There's no vector INCB. */
2306 return false;
2307
2308 HOST_WIDE_INT factor = value.coeffs[0];
2309 if (value.coeffs[1] != factor)
2310 return false;
2311
2312 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2313 if ((factor % nelts_per_vq) != 0
2314 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2315 return false;
2316
2317 if (factor_out)
2318 *factor_out = factor;
2319 if (nelts_per_vq_out)
2320 *nelts_per_vq_out = nelts_per_vq;
2321 return true;
2322}
2323
2324/* Return true if X is a valid immediate for an SVE vector INC or DEC
2325 instruction. */
2326
2327bool
2328aarch64_sve_inc_dec_immediate_p (rtx x)
2329{
2330 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2331}
2332
2333/* Return the asm template for an SVE vector INC or DEC instruction.
2334 OPERANDS gives the operands before the vector count and X is the
2335 value of the vector count operand itself. */
2336
2337char *
2338aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2339{
2340 int factor;
2341 unsigned int nelts_per_vq;
2342 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2343 gcc_unreachable ();
2344 if (factor < 0)
2345 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2346 nelts_per_vq);
2347 else
2348 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2349 nelts_per_vq);
2350}
43e9d192 2351
82614948
RR
2352static int
2353aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2354 scalar_int_mode mode)
43e9d192 2355{
43e9d192 2356 int i;
9a4865db
WD
2357 unsigned HOST_WIDE_INT val, val2, mask;
2358 int one_match, zero_match;
2359 int num_insns;
43e9d192 2360
9a4865db
WD
2361 val = INTVAL (imm);
2362
2363 if (aarch64_move_imm (val, mode))
43e9d192 2364 {
82614948 2365 if (generate)
f7df4a84 2366 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2367 return 1;
43e9d192
IB
2368 }
2369
9de00935
TC
2370 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2371 (with XXXX non-zero). In that case check to see if the move can be done in
2372 a smaller mode. */
2373 val2 = val & 0xffffffff;
2374 if (mode == DImode
2375 && aarch64_move_imm (val2, SImode)
2376 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2377 {
2378 if (generate)
2379 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2380
2381 /* Check if we have to emit a second instruction by checking to see
2382 if any of the upper 32 bits of the original DI mode value is set. */
2383 if (val == val2)
2384 return 1;
2385
2386 i = (val >> 48) ? 48 : 32;
2387
2388 if (generate)
2389 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2390 GEN_INT ((val >> i) & 0xffff)));
2391
2392 return 2;
2393 }
2394
9a4865db 2395 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2396 {
82614948
RR
2397 if (generate)
2398 {
9a4865db
WD
2399 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2400 if (mode == SImode)
2401 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2402 GEN_INT ((val >> 16) & 0xffff)));
2403 else
2404 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2405 GEN_INT ((val >> 16) & 0xffff)));
82614948 2406 }
9a4865db 2407 return 2;
43e9d192
IB
2408 }
2409
2410 /* Remaining cases are all for DImode. */
2411
43e9d192 2412 mask = 0xffff;
9a4865db
WD
2413 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2414 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2415 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2416 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2417
62c8d76c 2418 if (zero_match != 2 && one_match != 2)
43e9d192 2419 {
62c8d76c
WD
2420 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2421 For a 64-bit bitmask try whether changing 16 bits to all ones or
2422 zeroes creates a valid bitmask. To check any repeated bitmask,
2423 try using 16 bits from the other 32-bit half of val. */
43e9d192 2424
62c8d76c 2425 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2426 {
62c8d76c
WD
2427 val2 = val & ~mask;
2428 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2429 break;
2430 val2 = val | mask;
2431 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2432 break;
2433 val2 = val2 & ~mask;
2434 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2435 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2436 break;
43e9d192 2437 }
62c8d76c 2438 if (i != 64)
43e9d192 2439 {
62c8d76c 2440 if (generate)
43e9d192 2441 {
62c8d76c
WD
2442 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2443 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2444 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2445 }
1312b1ba 2446 return 2;
43e9d192
IB
2447 }
2448 }
2449
9a4865db
WD
2450 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2451 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2452 otherwise skip zero bits. */
2c274197 2453
9a4865db 2454 num_insns = 1;
43e9d192 2455 mask = 0xffff;
9a4865db
WD
2456 val2 = one_match > zero_match ? ~val : val;
2457 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2458
2459 if (generate)
2460 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2461 ? (val | ~(mask << i))
2462 : (val & (mask << i)))));
2463 for (i += 16; i < 64; i += 16)
43e9d192 2464 {
9a4865db
WD
2465 if ((val2 & (mask << i)) == 0)
2466 continue;
2467 if (generate)
2468 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2469 GEN_INT ((val >> i) & 0xffff)));
2470 num_insns ++;
82614948
RR
2471 }
2472
2473 return num_insns;
2474}
2475
c0bb5bc5
WD
2476/* Return whether imm is a 128-bit immediate which is simple enough to
2477 expand inline. */
2478bool
2479aarch64_mov128_immediate (rtx imm)
2480{
2481 if (GET_CODE (imm) == CONST_INT)
2482 return true;
2483
2484 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2485
2486 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2487 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2488
2489 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2490 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2491}
2492
2493
43cacb12
RS
2494/* Return the number of temporary registers that aarch64_add_offset_1
2495 would need to add OFFSET to a register. */
2496
2497static unsigned int
2498aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2499{
2500 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2501}
2502
f5470a77
RS
2503/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2504 a non-polynomial OFFSET. MODE is the mode of the addition.
2505 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2506 be set and CFA adjustments added to the generated instructions.
2507
2508 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2509 temporary if register allocation is already complete. This temporary
2510 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2511 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2512 the immediate again.
0100c5f9
RS
2513
2514 Since this function may be used to adjust the stack pointer, we must
2515 ensure that it cannot cause transient stack deallocation (for example
2516 by first incrementing SP and then decrementing when adjusting by a
2517 large immediate). */
2518
2519static void
f5470a77
RS
2520aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2521 rtx src, HOST_WIDE_INT offset, rtx temp1,
2522 bool frame_related_p, bool emit_move_imm)
0100c5f9 2523{
f5470a77
RS
2524 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2525 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2526
2527 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2528 rtx_insn *insn;
2529
f5470a77
RS
2530 if (!moffset)
2531 {
2532 if (!rtx_equal_p (dest, src))
2533 {
2534 insn = emit_insn (gen_rtx_SET (dest, src));
2535 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2536 }
2537 return;
2538 }
0100c5f9
RS
2539
2540 /* Single instruction adjustment. */
f5470a77 2541 if (aarch64_uimm12_shift (moffset))
0100c5f9 2542 {
f5470a77 2543 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2544 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2545 return;
2546 }
2547
f5470a77
RS
2548 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2549 and either:
2550
2551 a) the offset cannot be loaded by a 16-bit move or
2552 b) there is no spare register into which we can move it. */
2553 if (moffset < 0x1000000
2554 && ((!temp1 && !can_create_pseudo_p ())
2555 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2556 {
f5470a77 2557 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2558
f5470a77
RS
2559 low_off = offset < 0 ? -low_off : low_off;
2560 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2561 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2562 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2563 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2564 return;
2565 }
2566
2567 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2568 if (emit_move_imm)
f5470a77
RS
2569 {
2570 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2571 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2572 }
2573 insn = emit_insn (offset < 0
2574 ? gen_sub3_insn (dest, src, temp1)
2575 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2576 if (frame_related_p)
2577 {
2578 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2579 rtx adj = plus_constant (mode, src, offset);
2580 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2581 }
2582}
2583
43cacb12
RS
2584/* Return the number of temporary registers that aarch64_add_offset
2585 would need to move OFFSET into a register or add OFFSET to a register;
2586 ADD_P is true if we want the latter rather than the former. */
2587
2588static unsigned int
2589aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2590{
2591 /* This follows the same structure as aarch64_add_offset. */
2592 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2593 return 0;
2594
2595 unsigned int count = 0;
2596 HOST_WIDE_INT factor = offset.coeffs[1];
2597 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2598 poly_int64 poly_offset (factor, factor);
2599 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2600 /* Need one register for the ADDVL/ADDPL result. */
2601 count += 1;
2602 else if (factor != 0)
2603 {
2604 factor = abs (factor);
2605 if (factor > 16 * (factor & -factor))
2606 /* Need one register for the CNT result and one for the multiplication
2607 factor. If necessary, the second temporary can be reused for the
2608 constant part of the offset. */
2609 return 2;
2610 /* Need one register for the CNT result (which might then
2611 be shifted). */
2612 count += 1;
2613 }
2614 return count + aarch64_add_offset_1_temporaries (constant);
2615}
2616
2617/* If X can be represented as a poly_int64, return the number
2618 of temporaries that are required to add it to a register.
2619 Return -1 otherwise. */
2620
2621int
2622aarch64_add_offset_temporaries (rtx x)
2623{
2624 poly_int64 offset;
2625 if (!poly_int_rtx_p (x, &offset))
2626 return -1;
2627 return aarch64_offset_temporaries (true, offset);
2628}
2629
f5470a77
RS
2630/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2631 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2632 be set and CFA adjustments added to the generated instructions.
2633
2634 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2635 temporary if register allocation is already complete. This temporary
43cacb12
RS
2636 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2637 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2638 false to avoid emitting the immediate again.
2639
2640 TEMP2, if nonnull, is a second temporary register that doesn't
2641 overlap either DEST or REG.
f5470a77
RS
2642
2643 Since this function may be used to adjust the stack pointer, we must
2644 ensure that it cannot cause transient stack deallocation (for example
2645 by first incrementing SP and then decrementing when adjusting by a
2646 large immediate). */
2647
2648static void
2649aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2650 poly_int64 offset, rtx temp1, rtx temp2,
2651 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2652{
f5470a77
RS
2653 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2654 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2655 gcc_assert (temp1 == NULL_RTX
2656 || !frame_related_p
2657 || !reg_overlap_mentioned_p (temp1, dest));
2658 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2659
2660 /* Try using ADDVL or ADDPL to add the whole value. */
2661 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2662 {
2663 rtx offset_rtx = gen_int_mode (offset, mode);
2664 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2665 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2666 return;
2667 }
2668
2669 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2670 SVE vector register, over and above the minimum size of 128 bits.
2671 This is equivalent to half the value returned by CNTD with a
2672 vector shape of ALL. */
2673 HOST_WIDE_INT factor = offset.coeffs[1];
2674 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2675
2676 /* Try using ADDVL or ADDPL to add the VG-based part. */
2677 poly_int64 poly_offset (factor, factor);
2678 if (src != const0_rtx
2679 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2680 {
2681 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2682 if (frame_related_p)
2683 {
2684 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2685 RTX_FRAME_RELATED_P (insn) = true;
2686 src = dest;
2687 }
2688 else
2689 {
2690 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2691 src = aarch64_force_temporary (mode, temp1, addr);
2692 temp1 = temp2;
2693 temp2 = NULL_RTX;
2694 }
2695 }
2696 /* Otherwise use a CNT-based sequence. */
2697 else if (factor != 0)
2698 {
2699 /* Use a subtraction if we have a negative factor. */
2700 rtx_code code = PLUS;
2701 if (factor < 0)
2702 {
2703 factor = -factor;
2704 code = MINUS;
2705 }
2706
2707 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2708 into the multiplication. */
2709 rtx val;
2710 int shift = 0;
2711 if (factor & 1)
2712 /* Use a right shift by 1. */
2713 shift = -1;
2714 else
2715 factor /= 2;
2716 HOST_WIDE_INT low_bit = factor & -factor;
2717 if (factor <= 16 * low_bit)
2718 {
2719 if (factor > 16 * 8)
2720 {
2721 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2722 the value with the minimum multiplier and shift it into
2723 position. */
2724 int extra_shift = exact_log2 (low_bit);
2725 shift += extra_shift;
2726 factor >>= extra_shift;
2727 }
2728 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2729 }
2730 else
2731 {
2732 /* Use CNTD, then multiply it by FACTOR. */
2733 val = gen_int_mode (poly_int64 (2, 2), mode);
2734 val = aarch64_force_temporary (mode, temp1, val);
2735
2736 /* Go back to using a negative multiplication factor if we have
2737 no register from which to subtract. */
2738 if (code == MINUS && src == const0_rtx)
2739 {
2740 factor = -factor;
2741 code = PLUS;
2742 }
2743 rtx coeff1 = gen_int_mode (factor, mode);
2744 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2745 val = gen_rtx_MULT (mode, val, coeff1);
2746 }
2747
2748 if (shift > 0)
2749 {
2750 /* Multiply by 1 << SHIFT. */
2751 val = aarch64_force_temporary (mode, temp1, val);
2752 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2753 }
2754 else if (shift == -1)
2755 {
2756 /* Divide by 2. */
2757 val = aarch64_force_temporary (mode, temp1, val);
2758 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2759 }
2760
2761 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2762 if (src != const0_rtx)
2763 {
2764 val = aarch64_force_temporary (mode, temp1, val);
2765 val = gen_rtx_fmt_ee (code, mode, src, val);
2766 }
2767 else if (code == MINUS)
2768 {
2769 val = aarch64_force_temporary (mode, temp1, val);
2770 val = gen_rtx_NEG (mode, val);
2771 }
2772
2773 if (constant == 0 || frame_related_p)
2774 {
2775 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2776 if (frame_related_p)
2777 {
2778 RTX_FRAME_RELATED_P (insn) = true;
2779 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2780 gen_rtx_SET (dest, plus_constant (Pmode, src,
2781 poly_offset)));
2782 }
2783 src = dest;
2784 if (constant == 0)
2785 return;
2786 }
2787 else
2788 {
2789 src = aarch64_force_temporary (mode, temp1, val);
2790 temp1 = temp2;
2791 temp2 = NULL_RTX;
2792 }
2793
2794 emit_move_imm = true;
2795 }
f5470a77 2796
f5470a77
RS
2797 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2798 frame_related_p, emit_move_imm);
0100c5f9
RS
2799}
2800
43cacb12
RS
2801/* Like aarch64_add_offset, but the offset is given as an rtx rather
2802 than a poly_int64. */
2803
2804void
2805aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2806 rtx offset_rtx, rtx temp1, rtx temp2)
2807{
2808 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2809 temp1, temp2, false);
2810}
2811
f5470a77
RS
2812/* Add DELTA to the stack pointer, marking the instructions frame-related.
2813 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2814 if TEMP1 already contains abs (DELTA). */
2815
0100c5f9 2816static inline void
43cacb12 2817aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2818{
f5470a77 2819 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2820 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2821}
2822
f5470a77
RS
2823/* Subtract DELTA from the stack pointer, marking the instructions
2824 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2825 if nonnull. */
2826
0100c5f9 2827static inline void
43cacb12 2828aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p)
0100c5f9 2829{
f5470a77 2830 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
43cacb12 2831 temp1, temp2, frame_related_p);
0100c5f9 2832}
82614948 2833
43cacb12
RS
2834/* Set DEST to (vec_series BASE STEP). */
2835
2836static void
2837aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2838{
2839 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2840 scalar_mode inner = GET_MODE_INNER (mode);
2841
2842 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2843 if (!aarch64_sve_index_immediate_p (base))
2844 base = force_reg (inner, base);
2845 if (!aarch64_sve_index_immediate_p (step))
2846 step = force_reg (inner, step);
2847
2848 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2849}
82614948 2850
43cacb12
RS
2851/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2852 integer of mode INT_MODE. Return true on success. */
2853
2854static bool
2855aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2856 rtx src)
2857{
2858 /* If the constant is smaller than 128 bits, we can do the move
2859 using a vector of SRC_MODEs. */
2860 if (src_mode != TImode)
2861 {
2862 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2863 GET_MODE_SIZE (src_mode));
2864 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2865 emit_move_insn (gen_lowpart (dup_mode, dest),
2866 gen_const_vec_duplicate (dup_mode, src));
2867 return true;
2868 }
2869
947b1372 2870 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2871 src = force_const_mem (src_mode, src);
2872 if (!src)
2873 return false;
2874
2875 /* Make sure that the address is legitimate. */
2876 if (!aarch64_sve_ld1r_operand_p (src))
2877 {
2878 rtx addr = force_reg (Pmode, XEXP (src, 0));
2879 src = replace_equiv_address (src, addr);
2880 }
2881
947b1372
RS
2882 machine_mode mode = GET_MODE (dest);
2883 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
2884 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
2885 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
2886 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
2887 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
2888 return true;
2889}
2890
2891/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
2892 isn't a simple duplicate or series. */
2893
2894static void
2895aarch64_expand_sve_const_vector (rtx dest, rtx src)
2896{
2897 machine_mode mode = GET_MODE (src);
2898 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
2899 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
2900 gcc_assert (npatterns > 1);
2901
2902 if (nelts_per_pattern == 1)
2903 {
2904 /* The constant is a repeating seqeuence of at least two elements,
2905 where the repeating elements occupy no more than 128 bits.
2906 Get an integer representation of the replicated value. */
8179efe0
RS
2907 scalar_int_mode int_mode;
2908 if (BYTES_BIG_ENDIAN)
2909 /* For now, always use LD1RQ to load the value on big-endian
2910 targets, since the handling of smaller integers includes a
2911 subreg that is semantically an element reverse. */
2912 int_mode = TImode;
2913 else
2914 {
2915 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
2916 gcc_assert (int_bits <= 128);
2917 int_mode = int_mode_for_size (int_bits, 0).require ();
2918 }
43cacb12
RS
2919 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
2920 if (int_value
2921 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
2922 return;
2923 }
2924
2925 /* Expand each pattern individually. */
2926 rtx_vector_builder builder;
2927 auto_vec<rtx, 16> vectors (npatterns);
2928 for (unsigned int i = 0; i < npatterns; ++i)
2929 {
2930 builder.new_vector (mode, 1, nelts_per_pattern);
2931 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
2932 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
2933 vectors.quick_push (force_reg (mode, builder.build ()));
2934 }
2935
2936 /* Use permutes to interleave the separate vectors. */
2937 while (npatterns > 1)
2938 {
2939 npatterns /= 2;
2940 for (unsigned int i = 0; i < npatterns; ++i)
2941 {
2942 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
2943 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
2944 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
2945 vectors[i] = tmp;
2946 }
2947 }
2948 gcc_assert (vectors[0] == dest);
2949}
2950
2951/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
2952 is a pattern that can be used to set DEST to a replicated scalar
2953 element. */
2954
2955void
2956aarch64_expand_mov_immediate (rtx dest, rtx imm,
2957 rtx (*gen_vec_duplicate) (rtx, rtx))
2958{
2959 machine_mode mode = GET_MODE (dest);
82614948
RR
2960
2961 /* Check on what type of symbol it is. */
77e994c9
RS
2962 scalar_int_mode int_mode;
2963 if ((GET_CODE (imm) == SYMBOL_REF
2964 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
2965 || GET_CODE (imm) == CONST
2966 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 2967 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 2968 {
43cacb12
RS
2969 rtx mem;
2970 poly_int64 offset;
2971 HOST_WIDE_INT const_offset;
82614948
RR
2972 enum aarch64_symbol_type sty;
2973
2974 /* If we have (const (plus symbol offset)), separate out the offset
2975 before we start classifying the symbol. */
43cacb12 2976 rtx base = strip_offset (imm, &offset);
82614948 2977
43cacb12
RS
2978 /* We must always add an offset involving VL separately, rather than
2979 folding it into the relocation. */
2980 if (!offset.is_constant (&const_offset))
2981 {
2982 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
2983 emit_insn (gen_rtx_SET (dest, imm));
2984 else
2985 {
2986 /* Do arithmetic on 32-bit values if the result is smaller
2987 than that. */
2988 if (partial_subreg_p (int_mode, SImode))
2989 {
2990 /* It is invalid to do symbol calculations in modes
2991 narrower than SImode. */
2992 gcc_assert (base == const0_rtx);
2993 dest = gen_lowpart (SImode, dest);
2994 int_mode = SImode;
2995 }
2996 if (base != const0_rtx)
2997 {
2998 base = aarch64_force_temporary (int_mode, dest, base);
2999 aarch64_add_offset (int_mode, dest, base, offset,
3000 NULL_RTX, NULL_RTX, false);
3001 }
3002 else
3003 aarch64_add_offset (int_mode, dest, base, offset,
3004 dest, NULL_RTX, false);
3005 }
3006 return;
3007 }
3008
3009 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
3010 switch (sty)
3011 {
3012 case SYMBOL_FORCE_TO_MEM:
43cacb12 3013 if (const_offset != 0
77e994c9 3014 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
3015 {
3016 gcc_assert (can_create_pseudo_p ());
77e994c9 3017 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3018 aarch64_add_offset (int_mode, dest, base, const_offset,
3019 NULL_RTX, NULL_RTX, false);
82614948
RR
3020 return;
3021 }
b4f50fd4 3022
82614948
RR
3023 mem = force_const_mem (ptr_mode, imm);
3024 gcc_assert (mem);
b4f50fd4
RR
3025
3026 /* If we aren't generating PC relative literals, then
3027 we need to expand the literal pool access carefully.
3028 This is something that needs to be done in a number
3029 of places, so could well live as a separate function. */
9ee6540a 3030 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
3031 {
3032 gcc_assert (can_create_pseudo_p ());
3033 base = gen_reg_rtx (ptr_mode);
3034 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
3035 if (ptr_mode != Pmode)
3036 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
3037 mem = gen_rtx_MEM (ptr_mode, base);
3038 }
3039
77e994c9
RS
3040 if (int_mode != ptr_mode)
3041 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 3042
f7df4a84 3043 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 3044
82614948
RR
3045 return;
3046
3047 case SYMBOL_SMALL_TLSGD:
3048 case SYMBOL_SMALL_TLSDESC:
79496620 3049 case SYMBOL_SMALL_TLSIE:
1b1e81f8 3050 case SYMBOL_SMALL_GOT_28K:
6642bdb4 3051 case SYMBOL_SMALL_GOT_4G:
82614948 3052 case SYMBOL_TINY_GOT:
5ae7caad 3053 case SYMBOL_TINY_TLSIE:
43cacb12 3054 if (const_offset != 0)
82614948
RR
3055 {
3056 gcc_assert(can_create_pseudo_p ());
77e994c9 3057 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3058 aarch64_add_offset (int_mode, dest, base, const_offset,
3059 NULL_RTX, NULL_RTX, false);
82614948
RR
3060 return;
3061 }
3062 /* FALLTHRU */
3063
82614948
RR
3064 case SYMBOL_SMALL_ABSOLUTE:
3065 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3066 case SYMBOL_TLSLE12:
d18ba284 3067 case SYMBOL_TLSLE24:
cbf5629e
JW
3068 case SYMBOL_TLSLE32:
3069 case SYMBOL_TLSLE48:
82614948
RR
3070 aarch64_load_symref_appropriately (dest, imm, sty);
3071 return;
3072
3073 default:
3074 gcc_unreachable ();
3075 }
3076 }
3077
3078 if (!CONST_INT_P (imm))
3079 {
43cacb12
RS
3080 rtx base, step, value;
3081 if (GET_CODE (imm) == HIGH
3082 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3083 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3084 else if (const_vec_series_p (imm, &base, &step))
3085 aarch64_expand_vec_series (dest, base, step);
3086 else if (const_vec_duplicate_p (imm, &value))
3087 {
3088 /* If the constant is out of range of an SVE vector move,
3089 load it from memory if we can, otherwise move it into
3090 a register and use a DUP. */
3091 scalar_mode inner_mode = GET_MODE_INNER (mode);
3092 rtx op = force_const_mem (inner_mode, value);
3093 if (!op)
3094 op = force_reg (inner_mode, value);
3095 else if (!aarch64_sve_ld1r_operand_p (op))
3096 {
3097 rtx addr = force_reg (Pmode, XEXP (op, 0));
3098 op = replace_equiv_address (op, addr);
3099 }
3100 emit_insn (gen_vec_duplicate (dest, op));
3101 }
3102 else if (GET_CODE (imm) == CONST_VECTOR
3103 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3104 aarch64_expand_sve_const_vector (dest, imm);
82614948 3105 else
43cacb12 3106 {
82614948
RR
3107 rtx mem = force_const_mem (mode, imm);
3108 gcc_assert (mem);
43cacb12 3109 emit_move_insn (dest, mem);
43e9d192 3110 }
82614948
RR
3111
3112 return;
43e9d192 3113 }
82614948 3114
77e994c9
RS
3115 aarch64_internal_mov_immediate (dest, imm, true,
3116 as_a <scalar_int_mode> (mode));
43e9d192
IB
3117}
3118
43cacb12
RS
3119/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3120 that is known to contain PTRUE. */
3121
3122void
3123aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3124{
3125 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3126 gen_rtvec (2, pred, src),
3127 UNSPEC_MERGE_PTRUE)));
3128}
3129
3130/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3131 operand is in memory. In this case we need to use the predicated LD1
3132 and ST1 instead of LDR and STR, both for correctness on big-endian
3133 targets and because LD1 and ST1 support a wider range of addressing modes.
3134 PRED_MODE is the mode of the predicate.
3135
3136 See the comment at the head of aarch64-sve.md for details about the
3137 big-endian handling. */
3138
3139void
3140aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3141{
3142 machine_mode mode = GET_MODE (dest);
3143 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3144 if (!register_operand (src, mode)
3145 && !register_operand (dest, mode))
3146 {
3147 rtx tmp = gen_reg_rtx (mode);
3148 if (MEM_P (src))
3149 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3150 else
3151 emit_move_insn (tmp, src);
3152 src = tmp;
3153 }
3154 aarch64_emit_sve_pred_move (dest, ptrue, src);
3155}
3156
002092be
RS
3157/* Called only on big-endian targets. See whether an SVE vector move
3158 from SRC to DEST is effectively a REV[BHW] instruction, because at
3159 least one operand is a subreg of an SVE vector that has wider or
3160 narrower elements. Return true and emit the instruction if so.
3161
3162 For example:
3163
3164 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3165
3166 represents a VIEW_CONVERT between the following vectors, viewed
3167 in memory order:
3168
3169 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3170 R1: { [0], [1], [2], [3], ... }
3171
3172 The high part of lane X in R2 should therefore correspond to lane X*2
3173 of R1, but the register representations are:
3174
3175 msb lsb
3176 R2: ...... [1].high [1].low [0].high [0].low
3177 R1: ...... [3] [2] [1] [0]
3178
3179 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3180 We therefore need a reverse operation to swap the high and low values
3181 around.
3182
3183 This is purely an optimization. Without it we would spill the
3184 subreg operand to the stack in one mode and reload it in the
3185 other mode, which has the same effect as the REV. */
3186
3187bool
3188aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3189{
3190 gcc_assert (BYTES_BIG_ENDIAN);
3191 if (GET_CODE (dest) == SUBREG)
3192 dest = SUBREG_REG (dest);
3193 if (GET_CODE (src) == SUBREG)
3194 src = SUBREG_REG (src);
3195
3196 /* The optimization handles two single SVE REGs with different element
3197 sizes. */
3198 if (!REG_P (dest)
3199 || !REG_P (src)
3200 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3201 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3202 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3203 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3204 return false;
3205
3206 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3207 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3208 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3209 UNSPEC_REV_SUBREG);
3210 emit_insn (gen_rtx_SET (dest, unspec));
3211 return true;
3212}
3213
3214/* Return a copy of X with mode MODE, without changing its other
3215 attributes. Unlike gen_lowpart, this doesn't care whether the
3216 mode change is valid. */
3217
3218static rtx
3219aarch64_replace_reg_mode (rtx x, machine_mode mode)
3220{
3221 if (GET_MODE (x) == mode)
3222 return x;
3223
3224 x = shallow_copy_rtx (x);
3225 set_mode_and_regno (x, mode, REGNO (x));
3226 return x;
3227}
3228
3229/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3230 operands. */
3231
3232void
3233aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3234{
3235 /* Decide which REV operation we need. The mode with narrower elements
3236 determines the mode of the operands and the mode with the wider
3237 elements determines the reverse width. */
3238 machine_mode mode_with_wider_elts = GET_MODE (dest);
3239 machine_mode mode_with_narrower_elts = GET_MODE (src);
3240 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3241 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3242 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3243
3244 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3245 unsigned int unspec;
3246 if (wider_bytes == 8)
3247 unspec = UNSPEC_REV64;
3248 else if (wider_bytes == 4)
3249 unspec = UNSPEC_REV32;
3250 else if (wider_bytes == 2)
3251 unspec = UNSPEC_REV16;
3252 else
3253 gcc_unreachable ();
3254 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3255
3256 /* Emit:
3257
3258 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3259 UNSPEC_MERGE_PTRUE))
3260
3261 with the appropriate modes. */
3262 ptrue = gen_lowpart (pred_mode, ptrue);
3263 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3264 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3265 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3266 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3267 UNSPEC_MERGE_PTRUE);
3268 emit_insn (gen_rtx_SET (dest, src));
3269}
3270
43e9d192 3271static bool
fee9ba42
JW
3272aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3273 tree exp ATTRIBUTE_UNUSED)
43e9d192 3274{
fee9ba42 3275 /* Currently, always true. */
43e9d192
IB
3276 return true;
3277}
3278
3279/* Implement TARGET_PASS_BY_REFERENCE. */
3280
3281static bool
3282aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3283 machine_mode mode,
43e9d192
IB
3284 const_tree type,
3285 bool named ATTRIBUTE_UNUSED)
3286{
3287 HOST_WIDE_INT size;
ef4bddc2 3288 machine_mode dummymode;
43e9d192
IB
3289 int nregs;
3290
3291 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3292 if (mode == BLKmode && type)
3293 size = int_size_in_bytes (type);
3294 else
3295 /* No frontends can create types with variable-sized modes, so we
3296 shouldn't be asked to pass or return them. */
3297 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3298
aadc1c43
MHD
3299 /* Aggregates are passed by reference based on their size. */
3300 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3301 {
aadc1c43 3302 size = int_size_in_bytes (type);
43e9d192
IB
3303 }
3304
3305 /* Variable sized arguments are always returned by reference. */
3306 if (size < 0)
3307 return true;
3308
3309 /* Can this be a candidate to be passed in fp/simd register(s)? */
3310 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3311 &dummymode, &nregs,
3312 NULL))
3313 return false;
3314
3315 /* Arguments which are variable sized or larger than 2 registers are
3316 passed by reference unless they are a homogenous floating point
3317 aggregate. */
3318 return size > 2 * UNITS_PER_WORD;
3319}
3320
3321/* Return TRUE if VALTYPE is padded to its least significant bits. */
3322static bool
3323aarch64_return_in_msb (const_tree valtype)
3324{
ef4bddc2 3325 machine_mode dummy_mode;
43e9d192
IB
3326 int dummy_int;
3327
3328 /* Never happens in little-endian mode. */
3329 if (!BYTES_BIG_ENDIAN)
3330 return false;
3331
3332 /* Only composite types smaller than or equal to 16 bytes can
3333 be potentially returned in registers. */
3334 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3335 || int_size_in_bytes (valtype) <= 0
3336 || int_size_in_bytes (valtype) > 16)
3337 return false;
3338
3339 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3340 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3341 is always passed/returned in the least significant bits of fp/simd
3342 register(s). */
3343 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3344 &dummy_mode, &dummy_int, NULL))
3345 return false;
3346
3347 return true;
3348}
3349
3350/* Implement TARGET_FUNCTION_VALUE.
3351 Define how to find the value returned by a function. */
3352
3353static rtx
3354aarch64_function_value (const_tree type, const_tree func,
3355 bool outgoing ATTRIBUTE_UNUSED)
3356{
ef4bddc2 3357 machine_mode mode;
43e9d192
IB
3358 int unsignedp;
3359 int count;
ef4bddc2 3360 machine_mode ag_mode;
43e9d192
IB
3361
3362 mode = TYPE_MODE (type);
3363 if (INTEGRAL_TYPE_P (type))
3364 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3365
3366 if (aarch64_return_in_msb (type))
3367 {
3368 HOST_WIDE_INT size = int_size_in_bytes (type);
3369
3370 if (size % UNITS_PER_WORD != 0)
3371 {
3372 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3373 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3374 }
3375 }
3376
3377 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3378 &ag_mode, &count, NULL))
3379 {
3380 if (!aarch64_composite_type_p (type, mode))
3381 {
3382 gcc_assert (count == 1 && mode == ag_mode);
3383 return gen_rtx_REG (mode, V0_REGNUM);
3384 }
3385 else
3386 {
3387 int i;
3388 rtx par;
3389
3390 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3391 for (i = 0; i < count; i++)
3392 {
3393 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3394 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3395 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3396 XVECEXP (par, 0, i) = tmp;
3397 }
3398 return par;
3399 }
3400 }
3401 else
3402 return gen_rtx_REG (mode, R0_REGNUM);
3403}
3404
3405/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3406 Return true if REGNO is the number of a hard register in which the values
3407 of called function may come back. */
3408
3409static bool
3410aarch64_function_value_regno_p (const unsigned int regno)
3411{
3412 /* Maximum of 16 bytes can be returned in the general registers. Examples
3413 of 16-byte return values are: 128-bit integers and 16-byte small
3414 structures (excluding homogeneous floating-point aggregates). */
3415 if (regno == R0_REGNUM || regno == R1_REGNUM)
3416 return true;
3417
3418 /* Up to four fp/simd registers can return a function value, e.g. a
3419 homogeneous floating-point aggregate having four members. */
3420 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3421 return TARGET_FLOAT;
43e9d192
IB
3422
3423 return false;
3424}
3425
3426/* Implement TARGET_RETURN_IN_MEMORY.
3427
3428 If the type T of the result of a function is such that
3429 void func (T arg)
3430 would require that arg be passed as a value in a register (or set of
3431 registers) according to the parameter passing rules, then the result
3432 is returned in the same registers as would be used for such an
3433 argument. */
3434
3435static bool
3436aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3437{
3438 HOST_WIDE_INT size;
ef4bddc2 3439 machine_mode ag_mode;
43e9d192
IB
3440 int count;
3441
3442 if (!AGGREGATE_TYPE_P (type)
3443 && TREE_CODE (type) != COMPLEX_TYPE
3444 && TREE_CODE (type) != VECTOR_TYPE)
3445 /* Simple scalar types always returned in registers. */
3446 return false;
3447
3448 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3449 type,
3450 &ag_mode,
3451 &count,
3452 NULL))
3453 return false;
3454
3455 /* Types larger than 2 registers returned in memory. */
3456 size = int_size_in_bytes (type);
3457 return (size < 0 || size > 2 * UNITS_PER_WORD);
3458}
3459
3460static bool
ef4bddc2 3461aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3462 const_tree type, int *nregs)
3463{
3464 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3465 return aarch64_vfp_is_call_or_return_candidate (mode,
3466 type,
3467 &pcum->aapcs_vfp_rmode,
3468 nregs,
3469 NULL);
3470}
3471
985b8393 3472/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3473 bits. The idea is to suppress any stronger alignment requested by
3474 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3475 This is a helper function for local use only. */
3476
985b8393 3477static unsigned int
ef4bddc2 3478aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3479{
75d6cc81 3480 if (!type)
985b8393 3481 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3482
75d6cc81 3483 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3484 return 0;
43e9d192 3485
75d6cc81
AL
3486 gcc_assert (TYPE_MODE (type) == mode);
3487
3488 if (!AGGREGATE_TYPE_P (type))
985b8393 3489 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3490
3491 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3492 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3493
985b8393 3494 unsigned int alignment = 0;
75d6cc81 3495 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3496 if (TREE_CODE (field) == FIELD_DECL)
3497 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3498
985b8393 3499 return alignment;
43e9d192
IB
3500}
3501
3502/* Layout a function argument according to the AAPCS64 rules. The rule
3503 numbers refer to the rule numbers in the AAPCS64. */
3504
3505static void
ef4bddc2 3506aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3507 const_tree type,
3508 bool named ATTRIBUTE_UNUSED)
3509{
3510 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3511 int ncrn, nvrn, nregs;
3512 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3513 HOST_WIDE_INT size;
43e9d192
IB
3514
3515 /* We need to do this once per argument. */
3516 if (pcum->aapcs_arg_processed)
3517 return;
3518
3519 pcum->aapcs_arg_processed = true;
3520
3abf17cf 3521 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3522 if (type)
3523 size = int_size_in_bytes (type);
3524 else
3525 /* No frontends can create types with variable-sized modes, so we
3526 shouldn't be asked to pass or return them. */
3527 size = GET_MODE_SIZE (mode).to_constant ();
3528 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3529
43e9d192
IB
3530 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3531 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3532 mode,
3533 type,
3534 &nregs);
3535
3536 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3537 The following code thus handles passing by SIMD/FP registers first. */
3538
3539 nvrn = pcum->aapcs_nvrn;
3540
3541 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3542 and homogenous short-vector aggregates (HVA). */
3543 if (allocate_nvrn)
3544 {
261fb553 3545 if (!TARGET_FLOAT)
fc29dfc9 3546 aarch64_err_no_fpadvsimd (mode);
261fb553 3547
43e9d192
IB
3548 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3549 {
3550 pcum->aapcs_nextnvrn = nvrn + nregs;
3551 if (!aarch64_composite_type_p (type, mode))
3552 {
3553 gcc_assert (nregs == 1);
3554 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3555 }
3556 else
3557 {
3558 rtx par;
3559 int i;
3560 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3561 for (i = 0; i < nregs; i++)
3562 {
3563 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3564 V0_REGNUM + nvrn + i);
6a70badb
RS
3565 rtx offset = gen_int_mode
3566 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3567 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3568 XVECEXP (par, 0, i) = tmp;
3569 }
3570 pcum->aapcs_reg = par;
3571 }
3572 return;
3573 }
3574 else
3575 {
3576 /* C.3 NSRN is set to 8. */
3577 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3578 goto on_stack;
3579 }
3580 }
3581
3582 ncrn = pcum->aapcs_ncrn;
3abf17cf 3583 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3584
3585 /* C6 - C9. though the sign and zero extension semantics are
3586 handled elsewhere. This is the case where the argument fits
3587 entirely general registers. */
3588 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3589 {
43e9d192
IB
3590
3591 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3592
3593 /* C.8 if the argument has an alignment of 16 then the NGRN is
3594 rounded up to the next even number. */
985b8393
JJ
3595 if (nregs == 2
3596 && ncrn % 2
2ec07fa6 3597 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3598 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3599 alignment nregs should be > 2 and therefore it should be
3600 passed by reference rather than value. */
985b8393
JJ
3601 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3602 {
3603 ++ncrn;
3604 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3605 }
2ec07fa6 3606
43e9d192
IB
3607 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3608 A reg is still generated for it, but the caller should be smart
3609 enough not to use it. */
3610 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3611 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3612 else
3613 {
3614 rtx par;
3615 int i;
3616
3617 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3618 for (i = 0; i < nregs; i++)
3619 {
3620 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3621 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3622 GEN_INT (i * UNITS_PER_WORD));
3623 XVECEXP (par, 0, i) = tmp;
3624 }
3625 pcum->aapcs_reg = par;
3626 }
3627
3628 pcum->aapcs_nextncrn = ncrn + nregs;
3629 return;
3630 }
3631
3632 /* C.11 */
3633 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3634
3635 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3636 this argument and align the total size if necessary. */
43e9d192 3637on_stack:
3abf17cf 3638 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3639
985b8393 3640 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3641 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3642 16 / UNITS_PER_WORD);
43e9d192
IB
3643 return;
3644}
3645
3646/* Implement TARGET_FUNCTION_ARG. */
3647
3648static rtx
ef4bddc2 3649aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3650 const_tree type, bool named)
3651{
3652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3653 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3654
3655 if (mode == VOIDmode)
3656 return NULL_RTX;
3657
3658 aarch64_layout_arg (pcum_v, mode, type, named);
3659 return pcum->aapcs_reg;
3660}
3661
3662void
3663aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3664 const_tree fntype ATTRIBUTE_UNUSED,
3665 rtx libname ATTRIBUTE_UNUSED,
3666 const_tree fndecl ATTRIBUTE_UNUSED,
3667 unsigned n_named ATTRIBUTE_UNUSED)
3668{
3669 pcum->aapcs_ncrn = 0;
3670 pcum->aapcs_nvrn = 0;
3671 pcum->aapcs_nextncrn = 0;
3672 pcum->aapcs_nextnvrn = 0;
3673 pcum->pcs_variant = ARM_PCS_AAPCS64;
3674 pcum->aapcs_reg = NULL_RTX;
3675 pcum->aapcs_arg_processed = false;
3676 pcum->aapcs_stack_words = 0;
3677 pcum->aapcs_stack_size = 0;
3678
261fb553
AL
3679 if (!TARGET_FLOAT
3680 && fndecl && TREE_PUBLIC (fndecl)
3681 && fntype && fntype != error_mark_node)
3682 {
3683 const_tree type = TREE_TYPE (fntype);
3684 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3685 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3686 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3687 &mode, &nregs, NULL))
fc29dfc9 3688 aarch64_err_no_fpadvsimd (TYPE_MODE (type));
261fb553 3689 }
43e9d192
IB
3690 return;
3691}
3692
3693static void
3694aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3695 machine_mode mode,
43e9d192
IB
3696 const_tree type,
3697 bool named)
3698{
3699 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3700 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3701 {
3702 aarch64_layout_arg (pcum_v, mode, type, named);
3703 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3704 != (pcum->aapcs_stack_words != 0));
3705 pcum->aapcs_arg_processed = false;
3706 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3707 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3708 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3709 pcum->aapcs_stack_words = 0;
3710 pcum->aapcs_reg = NULL_RTX;
3711 }
3712}
3713
3714bool
3715aarch64_function_arg_regno_p (unsigned regno)
3716{
3717 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3718 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3719}
3720
3721/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3722 PARM_BOUNDARY bits of alignment, but will be given anything up
3723 to STACK_BOUNDARY bits if the type requires it. This makes sure
3724 that both before and after the layout of each argument, the Next
3725 Stacked Argument Address (NSAA) will have a minimum alignment of
3726 8 bytes. */
3727
3728static unsigned int
ef4bddc2 3729aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3730{
985b8393
JJ
3731 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3732 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3733}
3734
43cacb12
RS
3735/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3736
3737static fixed_size_mode
3738aarch64_get_reg_raw_mode (int regno)
3739{
3740 if (TARGET_SVE && FP_REGNUM_P (regno))
3741 /* Don't use the SVE part of the register for __builtin_apply and
3742 __builtin_return. The SVE registers aren't used by the normal PCS,
3743 so using them there would be a waste of time. The PCS extensions
3744 for SVE types are fundamentally incompatible with the
3745 __builtin_return/__builtin_apply interface. */
3746 return as_a <fixed_size_mode> (V16QImode);
3747 return default_get_reg_raw_mode (regno);
3748}
3749
76b0cbf8 3750/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3751
3752 Small aggregate types are placed in the lowest memory address.
3753
3754 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3755
76b0cbf8
RS
3756static pad_direction
3757aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3758{
3759 /* On little-endian targets, the least significant byte of every stack
3760 argument is passed at the lowest byte address of the stack slot. */
3761 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3762 return PAD_UPWARD;
43e9d192 3763
00edcfbe 3764 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3765 the least significant byte of a stack argument is passed at the highest
3766 byte address of the stack slot. */
3767 if (type
00edcfbe
YZ
3768 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3769 || POINTER_TYPE_P (type))
43e9d192 3770 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3771 return PAD_DOWNWARD;
43e9d192
IB
3772
3773 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3774 return PAD_UPWARD;
43e9d192
IB
3775}
3776
3777/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3778
3779 It specifies padding for the last (may also be the only)
3780 element of a block move between registers and memory. If
3781 assuming the block is in the memory, padding upward means that
3782 the last element is padded after its highest significant byte,
3783 while in downward padding, the last element is padded at the
3784 its least significant byte side.
3785
3786 Small aggregates and small complex types are always padded
3787 upwards.
3788
3789 We don't need to worry about homogeneous floating-point or
3790 short-vector aggregates; their move is not affected by the
3791 padding direction determined here. Regardless of endianness,
3792 each element of such an aggregate is put in the least
3793 significant bits of a fp/simd register.
3794
3795 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3796 register has useful data, and return the opposite if the most
3797 significant byte does. */
3798
3799bool
ef4bddc2 3800aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3801 bool first ATTRIBUTE_UNUSED)
3802{
3803
3804 /* Small composite types are always padded upward. */
3805 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3806 {
6a70badb
RS
3807 HOST_WIDE_INT size;
3808 if (type)
3809 size = int_size_in_bytes (type);
3810 else
3811 /* No frontends can create types with variable-sized modes, so we
3812 shouldn't be asked to pass or return them. */
3813 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3814 if (size < 2 * UNITS_PER_WORD)
3815 return true;
3816 }
3817
3818 /* Otherwise, use the default padding. */
3819 return !BYTES_BIG_ENDIAN;
3820}
3821
095a2d76 3822static scalar_int_mode
43e9d192
IB
3823aarch64_libgcc_cmp_return_mode (void)
3824{
3825 return SImode;
3826}
3827
a3eb8a52
EB
3828#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3829
3830/* We use the 12-bit shifted immediate arithmetic instructions so values
3831 must be multiple of (1 << 12), i.e. 4096. */
3832#define ARITH_FACTOR 4096
3833
3834#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3835#error Cannot use simple address calculation for stack probing
3836#endif
3837
3838/* The pair of scratch registers used for stack probing. */
3839#define PROBE_STACK_FIRST_REG 9
3840#define PROBE_STACK_SECOND_REG 10
3841
6a70badb 3842/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3843 inclusive. These are offsets from the current stack pointer. */
3844
3845static void
6a70badb 3846aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3847{
6a70badb
RS
3848 HOST_WIDE_INT size;
3849 if (!poly_size.is_constant (&size))
3850 {
3851 sorry ("stack probes for SVE frames");
3852 return;
3853 }
3854
5f5c5e0f 3855 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3856
3857 /* See the same assertion on PROBE_INTERVAL above. */
3858 gcc_assert ((first % ARITH_FACTOR) == 0);
3859
3860 /* See if we have a constant small number of probes to generate. If so,
3861 that's the easy case. */
3862 if (size <= PROBE_INTERVAL)
3863 {
3864 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3865
3866 emit_set_insn (reg1,
5f5c5e0f 3867 plus_constant (Pmode,
a3eb8a52 3868 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3869 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3870 }
3871
3872 /* The run-time loop is made up of 8 insns in the generic case while the
3873 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3874 else if (size <= 4 * PROBE_INTERVAL)
3875 {
3876 HOST_WIDE_INT i, rem;
3877
3878 emit_set_insn (reg1,
5f5c5e0f 3879 plus_constant (Pmode,
a3eb8a52
EB
3880 stack_pointer_rtx,
3881 -(first + PROBE_INTERVAL)));
3882 emit_stack_probe (reg1);
3883
3884 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3885 it exceeds SIZE. If only two probes are needed, this will not
3886 generate any code. Then probe at FIRST + SIZE. */
3887 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3888 {
3889 emit_set_insn (reg1,
5f5c5e0f 3890 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
3891 emit_stack_probe (reg1);
3892 }
3893
3894 rem = size - (i - PROBE_INTERVAL);
3895 if (rem > 256)
3896 {
3897 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3898
5f5c5e0f
EB
3899 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
3900 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
3901 }
3902 else
5f5c5e0f 3903 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
3904 }
3905
3906 /* Otherwise, do the same as above, but in a loop. Note that we must be
3907 extra careful with variables wrapping around because we might be at
3908 the very top (or the very bottom) of the address space and we have
3909 to be able to handle this case properly; in particular, we use an
3910 equality test for the loop condition. */
3911 else
3912 {
5f5c5e0f 3913 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
3914
3915 /* Step 1: round SIZE to the previous multiple of the interval. */
3916
3917 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
3918
3919
3920 /* Step 2: compute initial and final value of the loop counter. */
3921
3922 /* TEST_ADDR = SP + FIRST. */
3923 emit_set_insn (reg1,
5f5c5e0f 3924 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
3925
3926 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
3927 HOST_WIDE_INT adjustment = - (first + rounded_size);
3928 if (! aarch64_uimm12_shift (adjustment))
3929 {
3930 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
3931 true, Pmode);
3932 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
3933 }
3934 else
8dd64cdf
EB
3935 emit_set_insn (reg2,
3936 plus_constant (Pmode, stack_pointer_rtx, adjustment));
3937
a3eb8a52
EB
3938 /* Step 3: the loop
3939
3940 do
3941 {
3942 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3943 probe at TEST_ADDR
3944 }
3945 while (TEST_ADDR != LAST_ADDR)
3946
3947 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3948 until it is equal to ROUNDED_SIZE. */
3949
5f5c5e0f 3950 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
3951
3952
3953 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3954 that SIZE is equal to ROUNDED_SIZE. */
3955
3956 if (size != rounded_size)
3957 {
3958 HOST_WIDE_INT rem = size - rounded_size;
3959
3960 if (rem > 256)
3961 {
3962 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3963
5f5c5e0f
EB
3964 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
3965 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
3966 }
3967 else
5f5c5e0f 3968 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
3969 }
3970 }
3971
3972 /* Make sure nothing is scheduled before we are done. */
3973 emit_insn (gen_blockage ());
3974}
3975
3976/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3977 absolute addresses. */
3978
3979const char *
3980aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
3981{
3982 static int labelno = 0;
3983 char loop_lab[32];
3984 rtx xops[2];
3985
3986 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3987
3988 /* Loop. */
3989 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3990
3991 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3992 xops[0] = reg1;
3993 xops[1] = GEN_INT (PROBE_INTERVAL);
3994 output_asm_insn ("sub\t%0, %0, %1", xops);
3995
3996 /* Probe at TEST_ADDR. */
3997 output_asm_insn ("str\txzr, [%0]", xops);
3998
3999 /* Test if TEST_ADDR == LAST_ADDR. */
4000 xops[1] = reg2;
4001 output_asm_insn ("cmp\t%0, %1", xops);
4002
4003 /* Branch. */
4004 fputs ("\tb.ne\t", asm_out_file);
4005 assemble_name_raw (asm_out_file, loop_lab);
4006 fputc ('\n', asm_out_file);
4007
4008 return "";
4009}
4010
d6cb6d6a
WD
4011/* Determine whether a frame chain needs to be generated. */
4012static bool
4013aarch64_needs_frame_chain (void)
4014{
4015 /* Force a frame chain for EH returns so the return address is at FP+8. */
4016 if (frame_pointer_needed || crtl->calls_eh_return)
4017 return true;
4018
4019 /* A leaf function cannot have calls or write LR. */
4020 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
4021
4022 /* Don't use a frame chain in leaf functions if leaf frame pointers
4023 are disabled. */
4024 if (flag_omit_leaf_frame_pointer && is_leaf)
4025 return false;
4026
4027 return aarch64_use_frame_pointer;
4028}
4029
43e9d192
IB
4030/* Mark the registers that need to be saved by the callee and calculate
4031 the size of the callee-saved registers area and frame record (both FP
33a2e348 4032 and LR may be omitted). */
43e9d192
IB
4033static void
4034aarch64_layout_frame (void)
4035{
4036 HOST_WIDE_INT offset = 0;
4b0685d9 4037 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
4038
4039 if (reload_completed && cfun->machine->frame.laid_out)
4040 return;
4041
d6cb6d6a 4042 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 4043
97826595
MS
4044#define SLOT_NOT_REQUIRED (-2)
4045#define SLOT_REQUIRED (-1)
4046
71bfb77a
WD
4047 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
4048 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 4049
43e9d192
IB
4050 /* First mark all the registers that really need to be saved... */
4051 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4052 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4053
4054 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4055 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4056
4057 /* ... that includes the eh data registers (if needed)... */
4058 if (crtl->calls_eh_return)
4059 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4060 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4061 = SLOT_REQUIRED;
43e9d192
IB
4062
4063 /* ... and any callee saved register that dataflow says is live. */
4064 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4065 if (df_regs_ever_live_p (regno)
1c923b60
JW
4066 && (regno == R30_REGNUM
4067 || !call_used_regs[regno]))
97826595 4068 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4069
4070 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4071 if (df_regs_ever_live_p (regno)
4072 && !call_used_regs[regno])
4b0685d9
WD
4073 {
4074 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4075 last_fp_reg = regno;
4076 }
43e9d192 4077
204d2c03 4078 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4079 {
2e1cdae5 4080 /* FP and LR are placed in the linkage record. */
43e9d192 4081 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4082 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4083 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4084 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4085 offset = 2 * UNITS_PER_WORD;
4086 }
43e9d192
IB
4087
4088 /* Now assign stack slots for them. */
2e1cdae5 4089 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4090 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4091 {
4092 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4093 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4094 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4095 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4096 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4097 offset += UNITS_PER_WORD;
4098 }
4099
4b0685d9
WD
4100 HOST_WIDE_INT max_int_offset = offset;
4101 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4102 bool has_align_gap = offset != max_int_offset;
4103
43e9d192 4104 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4105 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4106 {
4b0685d9
WD
4107 /* If there is an alignment gap between integer and fp callee-saves,
4108 allocate the last fp register to it if possible. */
4109 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
4110 {
4111 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4112 break;
4113 }
4114
43e9d192 4115 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4116 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4117 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4118 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4119 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4120 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4121 offset += UNITS_PER_WORD;
4122 }
4123
4f59f9f2 4124 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4125
4126 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4127
71bfb77a
WD
4128 HOST_WIDE_INT varargs_and_saved_regs_size
4129 = offset + cfun->machine->frame.saved_varargs_size;
4130
1c960e02 4131 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4132 = aligned_upper_bound (varargs_and_saved_regs_size
4133 + get_frame_size (),
4134 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4135
6a70badb
RS
4136 /* Both these values are already aligned. */
4137 gcc_assert (multiple_p (crtl->outgoing_args_size,
4138 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4139 cfun->machine->frame.frame_size
6a70badb
RS
4140 = (cfun->machine->frame.hard_fp_offset
4141 + crtl->outgoing_args_size);
1c960e02 4142
71bfb77a
WD
4143 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4144
4145 cfun->machine->frame.initial_adjust = 0;
4146 cfun->machine->frame.final_adjust = 0;
4147 cfun->machine->frame.callee_adjust = 0;
4148 cfun->machine->frame.callee_offset = 0;
4149
4150 HOST_WIDE_INT max_push_offset = 0;
4151 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4152 max_push_offset = 512;
4153 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4154 max_push_offset = 256;
4155
6a70badb
RS
4156 HOST_WIDE_INT const_size, const_fp_offset;
4157 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4158 && const_size < max_push_offset
4159 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4160 {
4161 /* Simple, small frame with no outgoing arguments:
4162 stp reg1, reg2, [sp, -frame_size]!
4163 stp reg3, reg4, [sp, 16] */
6a70badb 4164 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4165 }
6a70badb
RS
4166 else if (known_lt (crtl->outgoing_args_size
4167 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4168 && !(cfun->calls_alloca
6a70badb
RS
4169 && known_lt (cfun->machine->frame.hard_fp_offset,
4170 max_push_offset)))
71bfb77a
WD
4171 {
4172 /* Frame with small outgoing arguments:
4173 sub sp, sp, frame_size
4174 stp reg1, reg2, [sp, outgoing_args_size]
4175 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4176 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4177 cfun->machine->frame.callee_offset
4178 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4179 }
6a70badb
RS
4180 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4181 && const_fp_offset < max_push_offset)
71bfb77a
WD
4182 {
4183 /* Frame with large outgoing arguments but a small local area:
4184 stp reg1, reg2, [sp, -hard_fp_offset]!
4185 stp reg3, reg4, [sp, 16]
4186 sub sp, sp, outgoing_args_size */
6a70badb 4187 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4188 cfun->machine->frame.final_adjust
4189 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4190 }
71bfb77a
WD
4191 else
4192 {
4193 /* Frame with large local area and outgoing arguments using frame pointer:
4194 sub sp, sp, hard_fp_offset
4195 stp x29, x30, [sp, 0]
4196 add x29, sp, 0
4197 stp reg3, reg4, [sp, 16]
4198 sub sp, sp, outgoing_args_size */
4199 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4200 cfun->machine->frame.final_adjust
4201 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4202 }
4203
43e9d192
IB
4204 cfun->machine->frame.laid_out = true;
4205}
4206
04ddfe06
KT
4207/* Return true if the register REGNO is saved on entry to
4208 the current function. */
4209
43e9d192
IB
4210static bool
4211aarch64_register_saved_on_entry (int regno)
4212{
97826595 4213 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4214}
4215
04ddfe06
KT
4216/* Return the next register up from REGNO up to LIMIT for the callee
4217 to save. */
4218
64dedd72
JW
4219static unsigned
4220aarch64_next_callee_save (unsigned regno, unsigned limit)
4221{
4222 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4223 regno ++;
4224 return regno;
4225}
43e9d192 4226
04ddfe06
KT
4227/* Push the register number REGNO of mode MODE to the stack with write-back
4228 adjusting the stack by ADJUSTMENT. */
4229
c5e1f66e 4230static void
ef4bddc2 4231aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4232 HOST_WIDE_INT adjustment)
4233 {
4234 rtx base_rtx = stack_pointer_rtx;
4235 rtx insn, reg, mem;
4236
4237 reg = gen_rtx_REG (mode, regno);
4238 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4239 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4240 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4241
4242 insn = emit_move_insn (mem, reg);
4243 RTX_FRAME_RELATED_P (insn) = 1;
4244}
4245
04ddfe06
KT
4246/* Generate and return an instruction to store the pair of registers
4247 REG and REG2 of mode MODE to location BASE with write-back adjusting
4248 the stack location BASE by ADJUSTMENT. */
4249
80c11907 4250static rtx
ef4bddc2 4251aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4252 HOST_WIDE_INT adjustment)
4253{
4254 switch (mode)
4255 {
4e10a5a7 4256 case E_DImode:
80c11907
JW
4257 return gen_storewb_pairdi_di (base, base, reg, reg2,
4258 GEN_INT (-adjustment),
4259 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4260 case E_DFmode:
80c11907
JW
4261 return gen_storewb_pairdf_di (base, base, reg, reg2,
4262 GEN_INT (-adjustment),
4263 GEN_INT (UNITS_PER_WORD - adjustment));
4264 default:
4265 gcc_unreachable ();
4266 }
4267}
4268
04ddfe06
KT
4269/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4270 stack pointer by ADJUSTMENT. */
4271
80c11907 4272static void
89ac681e 4273aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4274{
5d8a22a5 4275 rtx_insn *insn;
0d4a1197 4276 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 4277
71bfb77a 4278 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4279 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4280
80c11907
JW
4281 rtx reg1 = gen_rtx_REG (mode, regno1);
4282 rtx reg2 = gen_rtx_REG (mode, regno2);
4283
4284 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4285 reg2, adjustment));
4286 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4287 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4288 RTX_FRAME_RELATED_P (insn) = 1;
4289}
4290
04ddfe06
KT
4291/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4292 adjusting it by ADJUSTMENT afterwards. */
4293
159313d9 4294static rtx
ef4bddc2 4295aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4296 HOST_WIDE_INT adjustment)
4297{
4298 switch (mode)
4299 {
4e10a5a7 4300 case E_DImode:
159313d9 4301 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4302 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4303 case E_DFmode:
159313d9 4304 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4305 GEN_INT (UNITS_PER_WORD));
159313d9
JW
4306 default:
4307 gcc_unreachable ();
4308 }
4309}
4310
04ddfe06
KT
4311/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4312 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4313 into CFI_OPS. */
4314
89ac681e
WD
4315static void
4316aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4317 rtx *cfi_ops)
4318{
0d4a1197 4319 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
4320 rtx reg1 = gen_rtx_REG (mode, regno1);
4321
4322 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4323
71bfb77a 4324 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4325 {
4326 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4327 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4328 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4329 }
4330 else
4331 {
4332 rtx reg2 = gen_rtx_REG (mode, regno2);
4333 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4334 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4335 reg2, adjustment));
4336 }
4337}
4338
04ddfe06
KT
4339/* Generate and return a store pair instruction of mode MODE to store
4340 register REG1 to MEM1 and register REG2 to MEM2. */
4341
72df5c1f 4342static rtx
ef4bddc2 4343aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4344 rtx reg2)
4345{
4346 switch (mode)
4347 {
4e10a5a7 4348 case E_DImode:
dfe1da23 4349 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4350
4e10a5a7 4351 case E_DFmode:
dfe1da23 4352 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f
JW
4353
4354 default:
4355 gcc_unreachable ();
4356 }
4357}
4358
04ddfe06
KT
4359/* Generate and regurn a load pair isntruction of mode MODE to load register
4360 REG1 from MEM1 and register REG2 from MEM2. */
4361
72df5c1f 4362static rtx
ef4bddc2 4363aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4364 rtx mem2)
4365{
4366 switch (mode)
4367 {
4e10a5a7 4368 case E_DImode:
dfe1da23 4369 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4370
4e10a5a7 4371 case E_DFmode:
dfe1da23 4372 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f
JW
4373
4374 default:
4375 gcc_unreachable ();
4376 }
4377}
4378
db58fd89
JW
4379/* Return TRUE if return address signing should be enabled for the current
4380 function, otherwise return FALSE. */
4381
4382bool
4383aarch64_return_address_signing_enabled (void)
4384{
4385 /* This function should only be called after frame laid out. */
4386 gcc_assert (cfun->machine->frame.laid_out);
4387
4388 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4389 if it's LR is pushed onto stack. */
4390 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4391 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4392 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4393}
4394
04ddfe06
KT
4395/* Emit code to save the callee-saved registers from register number START
4396 to LIMIT to the stack at the location starting at offset START_OFFSET,
4397 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4398
43e9d192 4399static void
6a70badb 4400aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4401 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4402{
5d8a22a5 4403 rtx_insn *insn;
43e9d192
IB
4404 unsigned regno;
4405 unsigned regno2;
4406
0ec74a1e 4407 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4408 regno <= limit;
4409 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4410 {
ae13fce3 4411 rtx reg, mem;
6a70badb 4412 poly_int64 offset;
64dedd72 4413
ae13fce3
JW
4414 if (skip_wb
4415 && (regno == cfun->machine->frame.wb_candidate1
4416 || regno == cfun->machine->frame.wb_candidate2))
4417 continue;
4418
827ab47a
KT
4419 if (cfun->machine->reg_is_wrapped_separately[regno])
4420 continue;
4421
ae13fce3
JW
4422 reg = gen_rtx_REG (mode, regno);
4423 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4424 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4425 offset));
64dedd72
JW
4426
4427 regno2 = aarch64_next_callee_save (regno + 1, limit);
4428
4429 if (regno2 <= limit
827ab47a 4430 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
4431 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4432 == cfun->machine->frame.reg_offset[regno2]))
4433
43e9d192 4434 {
0ec74a1e 4435 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4436 rtx mem2;
4437
4438 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4439 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4440 offset));
8ed2fc62
JW
4441 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4442 reg2));
0b4a9743 4443
64dedd72
JW
4444 /* The first part of a frame-related parallel insn is
4445 always assumed to be relevant to the frame
4446 calculations; subsequent parts, are only
4447 frame-related if explicitly marked. */
4448 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4449 regno = regno2;
4450 }
4451 else
8ed2fc62
JW
4452 insn = emit_move_insn (mem, reg);
4453
4454 RTX_FRAME_RELATED_P (insn) = 1;
4455 }
4456}
4457
04ddfe06
KT
4458/* Emit code to restore the callee registers of mode MODE from register
4459 number START up to and including LIMIT. Restore from the stack offset
4460 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4461 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4462
8ed2fc62 4463static void
ef4bddc2 4464aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4465 poly_int64 start_offset, unsigned start,
dd991abb 4466 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4467{
8ed2fc62 4468 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4469 unsigned regno;
4470 unsigned regno2;
6a70badb 4471 poly_int64 offset;
8ed2fc62
JW
4472
4473 for (regno = aarch64_next_callee_save (start, limit);
4474 regno <= limit;
4475 regno = aarch64_next_callee_save (regno + 1, limit))
4476 {
827ab47a
KT
4477 if (cfun->machine->reg_is_wrapped_separately[regno])
4478 continue;
4479
ae13fce3 4480 rtx reg, mem;
8ed2fc62 4481
ae13fce3
JW
4482 if (skip_wb
4483 && (regno == cfun->machine->frame.wb_candidate1
4484 || regno == cfun->machine->frame.wb_candidate2))
4485 continue;
4486
4487 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4488 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4489 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4490
4491 regno2 = aarch64_next_callee_save (regno + 1, limit);
4492
4493 if (regno2 <= limit
827ab47a 4494 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
4495 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4496 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 4497 {
8ed2fc62
JW
4498 rtx reg2 = gen_rtx_REG (mode, regno2);
4499 rtx mem2;
4500
4501 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4502 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4503 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4504
dd991abb 4505 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4506 regno = regno2;
43e9d192 4507 }
8ed2fc62 4508 else
dd991abb
RH
4509 emit_move_insn (reg, mem);
4510 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4511 }
43e9d192
IB
4512}
4513
43cacb12
RS
4514/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4515 of MODE. */
4516
4517static inline bool
4518offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4519{
4520 HOST_WIDE_INT multiple;
4521 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4522 && IN_RANGE (multiple, -8, 7));
4523}
4524
4525/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4526 of MODE. */
4527
4528static inline bool
4529offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4530{
4531 HOST_WIDE_INT multiple;
4532 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4533 && IN_RANGE (multiple, 0, 63));
4534}
4535
4536/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4537 of MODE. */
4538
4539bool
4540aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4541{
4542 HOST_WIDE_INT multiple;
4543 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4544 && IN_RANGE (multiple, -64, 63));
4545}
4546
4547/* Return true if OFFSET is a signed 9-bit value. */
4548
827ab47a
KT
4549static inline bool
4550offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
6a70badb 4551 poly_int64 offset)
827ab47a 4552{
6a70badb
RS
4553 HOST_WIDE_INT const_offset;
4554 return (offset.is_constant (&const_offset)
4555 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4556}
4557
43cacb12
RS
4558/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4559 of MODE. */
4560
827ab47a 4561static inline bool
43cacb12 4562offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4563{
6a70badb
RS
4564 HOST_WIDE_INT multiple;
4565 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4566 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4567}
4568
43cacb12
RS
4569/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4570 of MODE. */
4571
4572static inline bool
4573offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4574{
6a70badb
RS
4575 HOST_WIDE_INT multiple;
4576 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4577 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4578}
4579
4580/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4581
4582static sbitmap
4583aarch64_get_separate_components (void)
4584{
4585 aarch64_layout_frame ();
4586
4587 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4588 bitmap_clear (components);
4589
4590 /* The registers we need saved to the frame. */
4591 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4592 if (aarch64_register_saved_on_entry (regno))
4593 {
6a70badb 4594 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4595 if (!frame_pointer_needed)
4596 offset += cfun->machine->frame.frame_size
4597 - cfun->machine->frame.hard_fp_offset;
4598 /* Check that we can access the stack slot of the register with one
4599 direct load with no adjustments needed. */
4600 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4601 bitmap_set_bit (components, regno);
4602 }
4603
4604 /* Don't mess with the hard frame pointer. */
4605 if (frame_pointer_needed)
4606 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4607
4608 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4609 unsigned reg2 = cfun->machine->frame.wb_candidate2;
4610 /* If aarch64_layout_frame has chosen registers to store/restore with
4611 writeback don't interfere with them to avoid having to output explicit
4612 stack adjustment instructions. */
4613 if (reg2 != INVALID_REGNUM)
4614 bitmap_clear_bit (components, reg2);
4615 if (reg1 != INVALID_REGNUM)
4616 bitmap_clear_bit (components, reg1);
4617
4618 bitmap_clear_bit (components, LR_REGNUM);
4619 bitmap_clear_bit (components, SP_REGNUM);
4620
4621 return components;
4622}
4623
4624/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4625
4626static sbitmap
4627aarch64_components_for_bb (basic_block bb)
4628{
4629 bitmap in = DF_LIVE_IN (bb);
4630 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4631 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
4632
4633 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4634 bitmap_clear (components);
4635
4636 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4637 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4638 if ((!call_used_regs[regno])
4639 && (bitmap_bit_p (in, regno)
4640 || bitmap_bit_p (gen, regno)
4641 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4642 {
4643 unsigned regno2, offset, offset2;
4644 bitmap_set_bit (components, regno);
4645
4646 /* If there is a callee-save at an adjacent offset, add it too
4647 to increase the use of LDP/STP. */
4648 offset = cfun->machine->frame.reg_offset[regno];
4649 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4650
4651 if (regno2 <= LAST_SAVED_REGNUM)
4652 {
4653 offset2 = cfun->machine->frame.reg_offset[regno2];
4654 if ((offset & ~8) == (offset2 & ~8))
4655 bitmap_set_bit (components, regno2);
4656 }
4657 }
827ab47a
KT
4658
4659 return components;
4660}
4661
4662/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4663 Nothing to do for aarch64. */
4664
4665static void
4666aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4667{
4668}
4669
4670/* Return the next set bit in BMP from START onwards. Return the total number
4671 of bits in BMP if no set bit is found at or after START. */
4672
4673static unsigned int
4674aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4675{
4676 unsigned int nbits = SBITMAP_SIZE (bmp);
4677 if (start == nbits)
4678 return start;
4679
4680 gcc_assert (start < nbits);
4681 for (unsigned int i = start; i < nbits; i++)
4682 if (bitmap_bit_p (bmp, i))
4683 return i;
4684
4685 return nbits;
4686}
4687
4688/* Do the work for aarch64_emit_prologue_components and
4689 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4690 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4691 for these components or the epilogue sequence. That is, it determines
4692 whether we should emit stores or loads and what kind of CFA notes to attach
4693 to the insns. Otherwise the logic for the two sequences is very
4694 similar. */
4695
4696static void
4697aarch64_process_components (sbitmap components, bool prologue_p)
4698{
4699 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4700 ? HARD_FRAME_POINTER_REGNUM
4701 : STACK_POINTER_REGNUM);
4702
4703 unsigned last_regno = SBITMAP_SIZE (components);
4704 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4705 rtx_insn *insn = NULL;
4706
4707 while (regno != last_regno)
4708 {
4709 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
4710 so DFmode for the vector registers is enough. */
0d4a1197 4711 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a 4712 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4713 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4714 if (!frame_pointer_needed)
4715 offset += cfun->machine->frame.frame_size
4716 - cfun->machine->frame.hard_fp_offset;
4717 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4718 rtx mem = gen_frame_mem (mode, addr);
4719
4720 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4721 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4722 /* No more registers to handle after REGNO.
4723 Emit a single save/restore and exit. */
4724 if (regno2 == last_regno)
4725 {
4726 insn = emit_insn (set);
4727 RTX_FRAME_RELATED_P (insn) = 1;
4728 if (prologue_p)
4729 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4730 else
4731 add_reg_note (insn, REG_CFA_RESTORE, reg);
4732 break;
4733 }
4734
6a70badb 4735 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4736 /* The next register is not of the same class or its offset is not
4737 mergeable with the current one into a pair. */
4738 if (!satisfies_constraint_Ump (mem)
4739 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
6a70badb
RS
4740 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
4741 GET_MODE_SIZE (mode)))
827ab47a
KT
4742 {
4743 insn = emit_insn (set);
4744 RTX_FRAME_RELATED_P (insn) = 1;
4745 if (prologue_p)
4746 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4747 else
4748 add_reg_note (insn, REG_CFA_RESTORE, reg);
4749
4750 regno = regno2;
4751 continue;
4752 }
4753
4754 /* REGNO2 can be saved/restored in a pair with REGNO. */
4755 rtx reg2 = gen_rtx_REG (mode, regno2);
4756 if (!frame_pointer_needed)
4757 offset2 += cfun->machine->frame.frame_size
4758 - cfun->machine->frame.hard_fp_offset;
4759 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
4760 rtx mem2 = gen_frame_mem (mode, addr2);
4761 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
4762 : gen_rtx_SET (reg2, mem2);
4763
4764 if (prologue_p)
4765 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
4766 else
4767 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
4768
4769 RTX_FRAME_RELATED_P (insn) = 1;
4770 if (prologue_p)
4771 {
4772 add_reg_note (insn, REG_CFA_OFFSET, set);
4773 add_reg_note (insn, REG_CFA_OFFSET, set2);
4774 }
4775 else
4776 {
4777 add_reg_note (insn, REG_CFA_RESTORE, reg);
4778 add_reg_note (insn, REG_CFA_RESTORE, reg2);
4779 }
4780
4781 regno = aarch64_get_next_set_bit (components, regno2 + 1);
4782 }
4783}
4784
4785/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
4786
4787static void
4788aarch64_emit_prologue_components (sbitmap components)
4789{
4790 aarch64_process_components (components, true);
4791}
4792
4793/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
4794
4795static void
4796aarch64_emit_epilogue_components (sbitmap components)
4797{
4798 aarch64_process_components (components, false);
4799}
4800
4801/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
4802
4803static void
4804aarch64_set_handled_components (sbitmap components)
4805{
4806 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4807 if (bitmap_bit_p (components, regno))
4808 cfun->machine->reg_is_wrapped_separately[regno] = true;
4809}
4810
43cacb12
RS
4811/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
4812 is saved at BASE + OFFSET. */
4813
4814static void
4815aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
4816 rtx base, poly_int64 offset)
4817{
4818 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
4819 add_reg_note (insn, REG_CFA_EXPRESSION,
4820 gen_rtx_SET (mem, regno_reg_rtx[reg]));
4821}
4822
43e9d192
IB
4823/* AArch64 stack frames generated by this compiler look like:
4824
4825 +-------------------------------+
4826 | |
4827 | incoming stack arguments |
4828 | |
34834420
MS
4829 +-------------------------------+
4830 | | <-- incoming stack pointer (aligned)
43e9d192
IB
4831 | callee-allocated save area |
4832 | for register varargs |
4833 | |
34834420
MS
4834 +-------------------------------+
4835 | local variables | <-- frame_pointer_rtx
43e9d192
IB
4836 | |
4837 +-------------------------------+
454fdba9
RL
4838 | padding0 | \
4839 +-------------------------------+ |
454fdba9 4840 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
4841 +-------------------------------+ |
4842 | LR' | |
4843 +-------------------------------+ |
34834420
MS
4844 | FP' | / <- hard_frame_pointer_rtx (aligned)
4845 +-------------------------------+
43e9d192
IB
4846 | dynamic allocation |
4847 +-------------------------------+
34834420
MS
4848 | padding |
4849 +-------------------------------+
4850 | outgoing stack arguments | <-- arg_pointer
4851 | |
4852 +-------------------------------+
4853 | | <-- stack_pointer_rtx (aligned)
43e9d192 4854
34834420
MS
4855 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
4856 but leave frame_pointer_rtx and hard_frame_pointer_rtx
4857 unchanged. */
43e9d192
IB
4858
4859/* Generate the prologue instructions for entry into a function.
4860 Establish the stack frame by decreasing the stack pointer with a
4861 properly calculated size and, if necessary, create a frame record
4862 filled with the values of LR and previous frame pointer. The
6991c977 4863 current FP is also set up if it is in use. */
43e9d192
IB
4864
4865void
4866aarch64_expand_prologue (void)
4867{
43e9d192 4868 aarch64_layout_frame ();
43e9d192 4869
6a70badb
RS
4870 poly_int64 frame_size = cfun->machine->frame.frame_size;
4871 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4872 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4873 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
4874 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
4875 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4876 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 4877 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 4878 rtx_insn *insn;
43e9d192 4879
db58fd89
JW
4880 /* Sign return address for functions. */
4881 if (aarch64_return_address_signing_enabled ())
27169e45
JW
4882 {
4883 insn = emit_insn (gen_pacisp ());
4884 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
4885 RTX_FRAME_RELATED_P (insn) = 1;
4886 }
db58fd89 4887
dd991abb 4888 if (flag_stack_usage_info)
6a70badb 4889 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 4890
a3eb8a52
EB
4891 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4892 {
4893 if (crtl->is_leaf && !cfun->calls_alloca)
4894 {
6a70badb
RS
4895 if (maybe_gt (frame_size, PROBE_INTERVAL)
4896 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
4897 aarch64_emit_probe_stack_range (get_stack_check_protect (),
4898 (frame_size
4899 - get_stack_check_protect ()));
a3eb8a52 4900 }
6a70badb 4901 else if (maybe_gt (frame_size, 0))
8c1dd970 4902 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
4903 }
4904
f5470a77
RS
4905 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
4906 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
4907
43cacb12 4908 aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
43e9d192 4909
71bfb77a
WD
4910 if (callee_adjust != 0)
4911 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 4912
204d2c03 4913 if (emit_frame_chain)
43e9d192 4914 {
43cacb12 4915 poly_int64 reg_offset = callee_adjust;
71bfb77a 4916 if (callee_adjust == 0)
43cacb12
RS
4917 {
4918 reg1 = R29_REGNUM;
4919 reg2 = R30_REGNUM;
4920 reg_offset = callee_offset;
4921 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
4922 }
f5470a77 4923 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
4924 stack_pointer_rtx, callee_offset,
4925 ip1_rtx, ip0_rtx, frame_pointer_needed);
4926 if (frame_pointer_needed && !frame_size.is_constant ())
4927 {
4928 /* Variable-sized frames need to describe the save slot
4929 address using DW_CFA_expression rather than DW_CFA_offset.
4930 This means that, without taking further action, the
4931 locations of the registers that we've already saved would
4932 remain based on the stack pointer even after we redefine
4933 the CFA based on the frame pointer. We therefore need new
4934 DW_CFA_expressions to re-express the save slots with addresses
4935 based on the frame pointer. */
4936 rtx_insn *insn = get_last_insn ();
4937 gcc_assert (RTX_FRAME_RELATED_P (insn));
4938
4939 /* Add an explicit CFA definition if this was previously
4940 implicit. */
4941 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
4942 {
4943 rtx src = plus_constant (Pmode, stack_pointer_rtx,
4944 callee_offset);
4945 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4946 gen_rtx_SET (hard_frame_pointer_rtx, src));
4947 }
4948
4949 /* Change the save slot expressions for the registers that
4950 we've already saved. */
4951 reg_offset -= callee_offset;
4952 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
4953 reg_offset + UNITS_PER_WORD);
4954 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
4955 reg_offset);
4956 }
71bfb77a 4957 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 4958 }
71bfb77a
WD
4959
4960 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 4961 callee_adjust != 0 || emit_frame_chain);
71bfb77a 4962 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
204d2c03 4963 callee_adjust != 0 || emit_frame_chain);
43cacb12 4964 aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
43e9d192
IB
4965}
4966
4f942779
RL
4967/* Return TRUE if we can use a simple_return insn.
4968
4969 This function checks whether the callee saved stack is empty, which
4970 means no restore actions are need. The pro_and_epilogue will use
4971 this to check whether shrink-wrapping opt is feasible. */
4972
4973bool
4974aarch64_use_return_insn_p (void)
4975{
4976 if (!reload_completed)
4977 return false;
4978
4979 if (crtl->profile)
4980 return false;
4981
4982 aarch64_layout_frame ();
4983
6a70badb 4984 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
4985}
4986
71bfb77a
WD
4987/* Generate the epilogue instructions for returning from a function.
4988 This is almost exactly the reverse of the prolog sequence, except
4989 that we need to insert barriers to avoid scheduling loads that read
4990 from a deallocated stack, and we optimize the unwind records by
4991 emitting them all together if possible. */
43e9d192
IB
4992void
4993aarch64_expand_epilogue (bool for_sibcall)
4994{
43e9d192 4995 aarch64_layout_frame ();
43e9d192 4996
6a70badb 4997 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4998 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4999 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
5000 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
5001 unsigned reg1 = cfun->machine->frame.wb_candidate1;
5002 unsigned reg2 = cfun->machine->frame.wb_candidate2;
5003 rtx cfi_ops = NULL;
5004 rtx_insn *insn;
43cacb12
RS
5005 /* A stack clash protection prologue may not have left IP0_REGNUM or
5006 IP1_REGNUM in a usable state. The same is true for allocations
5007 with an SVE component, since we then need both temporary registers
5008 for each allocation. */
5009 bool can_inherit_p = (initial_adjust.is_constant ()
5010 && final_adjust.is_constant ()
5011 && !flag_stack_clash_protection);
44c0e7b9 5012
71bfb77a 5013 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
5014 bool need_barrier_p
5015 = maybe_ne (get_frame_size ()
5016 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 5017
71bfb77a 5018 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
5019 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
5020 || cfun->calls_alloca
8144a493 5021 || crtl->calls_eh_return)
43e9d192 5022 {
71bfb77a
WD
5023 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5024 need_barrier_p = false;
5025 }
7e8c2bd5 5026
71bfb77a
WD
5027 /* Restore the stack pointer from the frame pointer if it may not
5028 be the same as the stack pointer. */
f5470a77
RS
5029 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
5030 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
5031 if (frame_pointer_needed
5032 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
5033 /* If writeback is used when restoring callee-saves, the CFA
5034 is restored on the instruction doing the writeback. */
5035 aarch64_add_offset (Pmode, stack_pointer_rtx,
5036 hard_frame_pointer_rtx, -callee_offset,
43cacb12 5037 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 5038 else
43cacb12
RS
5039 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust,
5040 !can_inherit_p || df_regs_ever_live_p (IP1_REGNUM));
43e9d192 5041
71bfb77a
WD
5042 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
5043 callee_adjust != 0, &cfi_ops);
5044 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
5045 callee_adjust != 0, &cfi_ops);
43e9d192 5046
71bfb77a
WD
5047 if (need_barrier_p)
5048 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
5049
5050 if (callee_adjust != 0)
5051 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
5052
6a70badb 5053 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
5054 {
5055 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5056 insn = get_last_insn ();
71bfb77a
WD
5057 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5058 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5059 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5060 cfi_ops = NULL;
43e9d192
IB
5061 }
5062
43cacb12
RS
5063 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
5064 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 5065
71bfb77a
WD
5066 if (cfi_ops)
5067 {
5068 /* Emit delayed restores and reset the CFA to be SP. */
5069 insn = get_last_insn ();
5070 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5071 REG_NOTES (insn) = cfi_ops;
5072 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5073 }
5074
db58fd89
JW
5075 /* We prefer to emit the combined return/authenticate instruction RETAA,
5076 however there are three cases in which we must instead emit an explicit
5077 authentication instruction.
5078
5079 1) Sibcalls don't return in a normal way, so if we're about to call one
5080 we must authenticate.
5081
5082 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5083 generating code for !TARGET_ARMV8_3 we can't use it and must
5084 explicitly authenticate.
5085
5086 3) On an eh_return path we make extra stack adjustments to update the
5087 canonical frame address to be the exception handler's CFA. We want
5088 to authenticate using the CFA of the function which calls eh_return.
5089 */
5090 if (aarch64_return_address_signing_enabled ()
5091 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5092 {
5093 insn = emit_insn (gen_autisp ());
5094 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5095 RTX_FRAME_RELATED_P (insn) = 1;
5096 }
db58fd89 5097
dd991abb
RH
5098 /* Stack adjustment for exception handler. */
5099 if (crtl->calls_eh_return)
5100 {
5101 /* We need to unwind the stack by the offset computed by
5102 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5103 to be SP; letting the CFA move during this adjustment
5104 is just as correct as retaining the CFA from the body
5105 of the function. Therefore, do nothing special. */
5106 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5107 }
5108
5109 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5110 if (!for_sibcall)
5111 emit_jump_insn (ret_rtx);
5112}
5113
8144a493
WD
5114/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5115 normally or return to a previous frame after unwinding.
1c960e02 5116
8144a493
WD
5117 An EH return uses a single shared return sequence. The epilogue is
5118 exactly like a normal epilogue except that it has an extra input
5119 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5120 that must be applied after the frame has been destroyed. An extra label
5121 is inserted before the epilogue which initializes this register to zero,
5122 and this is the entry point for a normal return.
43e9d192 5123
8144a493
WD
5124 An actual EH return updates the return address, initializes the stack
5125 adjustment and jumps directly into the epilogue (bypassing the zeroing
5126 of the adjustment). Since the return address is typically saved on the
5127 stack when a function makes a call, the saved LR must be updated outside
5128 the epilogue.
43e9d192 5129
8144a493
WD
5130 This poses problems as the store is generated well before the epilogue,
5131 so the offset of LR is not known yet. Also optimizations will remove the
5132 store as it appears dead, even after the epilogue is generated (as the
5133 base or offset for loading LR is different in many cases).
43e9d192 5134
8144a493
WD
5135 To avoid these problems this implementation forces the frame pointer
5136 in eh_return functions so that the location of LR is fixed and known early.
5137 It also marks the store volatile, so no optimization is permitted to
5138 remove the store. */
5139rtx
5140aarch64_eh_return_handler_rtx (void)
5141{
5142 rtx tmp = gen_frame_mem (Pmode,
5143 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5144
8144a493
WD
5145 /* Mark the store volatile, so no optimization is permitted to remove it. */
5146 MEM_VOLATILE_P (tmp) = true;
5147 return tmp;
43e9d192
IB
5148}
5149
43e9d192
IB
5150/* Output code to add DELTA to the first argument, and then jump
5151 to FUNCTION. Used for C++ multiple inheritance. */
5152static void
5153aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5154 HOST_WIDE_INT delta,
5155 HOST_WIDE_INT vcall_offset,
5156 tree function)
5157{
5158 /* The this pointer is always in x0. Note that this differs from
5159 Arm where the this pointer maybe bumped to r1 if r0 is required
5160 to return a pointer to an aggregate. On AArch64 a result value
5161 pointer will be in x8. */
5162 int this_regno = R0_REGNUM;
5d8a22a5
DM
5163 rtx this_rtx, temp0, temp1, addr, funexp;
5164 rtx_insn *insn;
43e9d192 5165
75f1d6fc
SN
5166 reload_completed = 1;
5167 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5168
f5470a77
RS
5169 this_rtx = gen_rtx_REG (Pmode, this_regno);
5170 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5171 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5172
43e9d192 5173 if (vcall_offset == 0)
43cacb12 5174 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5175 else
5176 {
28514dda 5177 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5178
75f1d6fc
SN
5179 addr = this_rtx;
5180 if (delta != 0)
5181 {
5182 if (delta >= -256 && delta < 256)
5183 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5184 plus_constant (Pmode, this_rtx, delta));
5185 else
43cacb12
RS
5186 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5187 temp1, temp0, false);
43e9d192
IB
5188 }
5189
28514dda
YZ
5190 if (Pmode == ptr_mode)
5191 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5192 else
5193 aarch64_emit_move (temp0,
5194 gen_rtx_ZERO_EXTEND (Pmode,
5195 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5196
28514dda 5197 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5198 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5199 else
5200 {
f43657b4
JW
5201 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5202 Pmode);
75f1d6fc 5203 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5204 }
5205
28514dda
YZ
5206 if (Pmode == ptr_mode)
5207 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5208 else
5209 aarch64_emit_move (temp1,
5210 gen_rtx_SIGN_EXTEND (Pmode,
5211 gen_rtx_MEM (ptr_mode, addr)));
5212
75f1d6fc 5213 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5214 }
5215
75f1d6fc
SN
5216 /* Generate a tail call to the target function. */
5217 if (!TREE_USED (function))
5218 {
5219 assemble_external (function);
5220 TREE_USED (function) = 1;
5221 }
5222 funexp = XEXP (DECL_RTL (function), 0);
5223 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5224 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5225 SIBLING_CALL_P (insn) = 1;
5226
5227 insn = get_insns ();
5228 shorten_branches (insn);
5229 final_start_function (insn, file, 1);
5230 final (insn, file, 1);
43e9d192 5231 final_end_function ();
75f1d6fc
SN
5232
5233 /* Stop pretending to be a post-reload pass. */
5234 reload_completed = 0;
43e9d192
IB
5235}
5236
43e9d192
IB
5237static bool
5238aarch64_tls_referenced_p (rtx x)
5239{
5240 if (!TARGET_HAVE_TLS)
5241 return false;
e7de8563
RS
5242 subrtx_iterator::array_type array;
5243 FOR_EACH_SUBRTX (iter, array, x, ALL)
5244 {
5245 const_rtx x = *iter;
5246 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5247 return true;
5248 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5249 TLS offsets, not real symbol references. */
5250 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5251 iter.skip_subrtxes ();
5252 }
5253 return false;
43e9d192
IB
5254}
5255
5256
43e9d192
IB
5257/* Return true if val can be encoded as a 12-bit unsigned immediate with
5258 a left shift of 0 or 12 bits. */
5259bool
5260aarch64_uimm12_shift (HOST_WIDE_INT val)
5261{
5262 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5263 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5264 );
5265}
5266
5267
5268/* Return true if val is an immediate that can be loaded into a
5269 register by a MOVZ instruction. */
5270static bool
77e994c9 5271aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5272{
5273 if (GET_MODE_SIZE (mode) > 4)
5274 {
5275 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5276 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5277 return 1;
5278 }
5279 else
5280 {
43cacb12
RS
5281 /* Ignore sign extension. */
5282 val &= (HOST_WIDE_INT) 0xffffffff;
5283 }
5284 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5285 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5286}
5287
5288/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5289 64-bit (DImode) integer. */
5290
5291static unsigned HOST_WIDE_INT
5292aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5293{
5294 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5295 while (size < 64)
5296 {
5297 val &= (HOST_WIDE_INT_1U << size) - 1;
5298 val |= val << size;
5299 size *= 2;
43e9d192 5300 }
43cacb12 5301 return val;
43e9d192
IB
5302}
5303
a64c73a2
WD
5304/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5305
5306static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5307 {
5308 0x0000000100000001ull,
5309 0x0001000100010001ull,
5310 0x0101010101010101ull,
5311 0x1111111111111111ull,
5312 0x5555555555555555ull,
5313 };
5314
43e9d192
IB
5315
5316/* Return true if val is a valid bitmask immediate. */
a64c73a2 5317
43e9d192 5318bool
a64c73a2 5319aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5320{
a64c73a2
WD
5321 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5322 int bits;
5323
5324 /* Check for a single sequence of one bits and return quickly if so.
5325 The special cases of all ones and all zeroes returns false. */
43cacb12 5326 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5327 tmp = val + (val & -val);
5328
5329 if (tmp == (tmp & -tmp))
5330 return (val + 1) > 1;
5331
5332 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5333 if (mode == SImode)
5334 val = (val << 32) | (val & 0xffffffff);
5335
5336 /* Invert if the immediate doesn't start with a zero bit - this means we
5337 only need to search for sequences of one bits. */
5338 if (val & 1)
5339 val = ~val;
5340
5341 /* Find the first set bit and set tmp to val with the first sequence of one
5342 bits removed. Return success if there is a single sequence of ones. */
5343 first_one = val & -val;
5344 tmp = val & (val + first_one);
5345
5346 if (tmp == 0)
5347 return true;
5348
5349 /* Find the next set bit and compute the difference in bit position. */
5350 next_one = tmp & -tmp;
5351 bits = clz_hwi (first_one) - clz_hwi (next_one);
5352 mask = val ^ tmp;
5353
5354 /* Check the bit position difference is a power of 2, and that the first
5355 sequence of one bits fits within 'bits' bits. */
5356 if ((mask >> bits) != 0 || bits != (bits & -bits))
5357 return false;
5358
5359 /* Check the sequence of one bits is repeated 64/bits times. */
5360 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5361}
5362
43fd192f
MC
5363/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5364 Assumed precondition: VAL_IN Is not zero. */
5365
5366unsigned HOST_WIDE_INT
5367aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5368{
5369 int lowest_bit_set = ctz_hwi (val_in);
5370 int highest_bit_set = floor_log2 (val_in);
5371 gcc_assert (val_in != 0);
5372
5373 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5374 (HOST_WIDE_INT_1U << lowest_bit_set));
5375}
5376
5377/* Create constant where bits outside of lowest bit set to highest bit set
5378 are set to 1. */
5379
5380unsigned HOST_WIDE_INT
5381aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5382{
5383 return val_in | ~aarch64_and_split_imm1 (val_in);
5384}
5385
5386/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5387
5388bool
5389aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5390{
77e994c9
RS
5391 scalar_int_mode int_mode;
5392 if (!is_a <scalar_int_mode> (mode, &int_mode))
5393 return false;
5394
5395 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5396 return false;
5397
77e994c9 5398 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5399 return false;
5400
5401 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5402
77e994c9 5403 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 5404}
43e9d192
IB
5405
5406/* Return true if val is an immediate that can be loaded into a
5407 register in a single instruction. */
5408bool
ef4bddc2 5409aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 5410{
77e994c9
RS
5411 scalar_int_mode int_mode;
5412 if (!is_a <scalar_int_mode> (mode, &int_mode))
5413 return false;
5414
5415 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 5416 return 1;
77e994c9 5417 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
5418}
5419
5420static bool
ef4bddc2 5421aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
5422{
5423 rtx base, offset;
7eda14e1 5424
43e9d192
IB
5425 if (GET_CODE (x) == HIGH)
5426 return true;
5427
43cacb12
RS
5428 /* There's no way to calculate VL-based values using relocations. */
5429 subrtx_iterator::array_type array;
5430 FOR_EACH_SUBRTX (iter, array, x, ALL)
5431 if (GET_CODE (*iter) == CONST_POLY_INT)
5432 return true;
5433
43e9d192
IB
5434 split_const (x, &base, &offset);
5435 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 5436 {
43cacb12 5437 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
5438 != SYMBOL_FORCE_TO_MEM)
5439 return true;
5440 else
5441 /* Avoid generating a 64-bit relocation in ILP32; leave
5442 to aarch64_expand_mov_immediate to handle it properly. */
5443 return mode != ptr_mode;
5444 }
43e9d192
IB
5445
5446 return aarch64_tls_referenced_p (x);
5447}
5448
e79136e4
WD
5449/* Implement TARGET_CASE_VALUES_THRESHOLD.
5450 The expansion for a table switch is quite expensive due to the number
5451 of instructions, the table lookup and hard to predict indirect jump.
5452 When optimizing for speed, and -O3 enabled, use the per-core tuning if
5453 set, otherwise use tables for > 16 cases as a tradeoff between size and
5454 performance. When optimizing for size, use the default setting. */
50487d79
EM
5455
5456static unsigned int
5457aarch64_case_values_threshold (void)
5458{
5459 /* Use the specified limit for the number of cases before using jump
5460 tables at higher optimization levels. */
5461 if (optimize > 2
5462 && selected_cpu->tune->max_case_values != 0)
5463 return selected_cpu->tune->max_case_values;
5464 else
e79136e4 5465 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
5466}
5467
43e9d192
IB
5468/* Return true if register REGNO is a valid index register.
5469 STRICT_P is true if REG_OK_STRICT is in effect. */
5470
5471bool
5472aarch64_regno_ok_for_index_p (int regno, bool strict_p)
5473{
5474 if (!HARD_REGISTER_NUM_P (regno))
5475 {
5476 if (!strict_p)
5477 return true;
5478
5479 if (!reg_renumber)
5480 return false;
5481
5482 regno = reg_renumber[regno];
5483 }
5484 return GP_REGNUM_P (regno);
5485}
5486
5487/* Return true if register REGNO is a valid base register for mode MODE.
5488 STRICT_P is true if REG_OK_STRICT is in effect. */
5489
5490bool
5491aarch64_regno_ok_for_base_p (int regno, bool strict_p)
5492{
5493 if (!HARD_REGISTER_NUM_P (regno))
5494 {
5495 if (!strict_p)
5496 return true;
5497
5498 if (!reg_renumber)
5499 return false;
5500
5501 regno = reg_renumber[regno];
5502 }
5503
5504 /* The fake registers will be eliminated to either the stack or
5505 hard frame pointer, both of which are usually valid base registers.
5506 Reload deals with the cases where the eliminated form isn't valid. */
5507 return (GP_REGNUM_P (regno)
5508 || regno == SP_REGNUM
5509 || regno == FRAME_POINTER_REGNUM
5510 || regno == ARG_POINTER_REGNUM);
5511}
5512
5513/* Return true if X is a valid base register for mode MODE.
5514 STRICT_P is true if REG_OK_STRICT is in effect. */
5515
5516static bool
5517aarch64_base_register_rtx_p (rtx x, bool strict_p)
5518{
76160199
RS
5519 if (!strict_p
5520 && GET_CODE (x) == SUBREG
5521 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
5522 x = SUBREG_REG (x);
5523
5524 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
5525}
5526
5527/* Return true if address offset is a valid index. If it is, fill in INFO
5528 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
5529
5530static bool
5531aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 5532 machine_mode mode, bool strict_p)
43e9d192
IB
5533{
5534 enum aarch64_address_type type;
5535 rtx index;
5536 int shift;
5537
5538 /* (reg:P) */
5539 if ((REG_P (x) || GET_CODE (x) == SUBREG)
5540 && GET_MODE (x) == Pmode)
5541 {
5542 type = ADDRESS_REG_REG;
5543 index = x;
5544 shift = 0;
5545 }
5546 /* (sign_extend:DI (reg:SI)) */
5547 else if ((GET_CODE (x) == SIGN_EXTEND
5548 || GET_CODE (x) == ZERO_EXTEND)
5549 && GET_MODE (x) == DImode
5550 && GET_MODE (XEXP (x, 0)) == SImode)
5551 {
5552 type = (GET_CODE (x) == SIGN_EXTEND)
5553 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5554 index = XEXP (x, 0);
5555 shift = 0;
5556 }
5557 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
5558 else if (GET_CODE (x) == MULT
5559 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5560 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5561 && GET_MODE (XEXP (x, 0)) == DImode
5562 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5563 && CONST_INT_P (XEXP (x, 1)))
5564 {
5565 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5566 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5567 index = XEXP (XEXP (x, 0), 0);
5568 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5569 }
5570 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
5571 else if (GET_CODE (x) == ASHIFT
5572 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5573 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5574 && GET_MODE (XEXP (x, 0)) == DImode
5575 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5576 && CONST_INT_P (XEXP (x, 1)))
5577 {
5578 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5579 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5580 index = XEXP (XEXP (x, 0), 0);
5581 shift = INTVAL (XEXP (x, 1));
5582 }
5583 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
5584 else if ((GET_CODE (x) == SIGN_EXTRACT
5585 || GET_CODE (x) == ZERO_EXTRACT)
5586 && GET_MODE (x) == DImode
5587 && GET_CODE (XEXP (x, 0)) == MULT
5588 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5589 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5590 {
5591 type = (GET_CODE (x) == SIGN_EXTRACT)
5592 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5593 index = XEXP (XEXP (x, 0), 0);
5594 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5595 if (INTVAL (XEXP (x, 1)) != 32 + shift
5596 || INTVAL (XEXP (x, 2)) != 0)
5597 shift = -1;
5598 }
5599 /* (and:DI (mult:DI (reg:DI) (const_int scale))
5600 (const_int 0xffffffff<<shift)) */
5601 else if (GET_CODE (x) == AND
5602 && GET_MODE (x) == DImode
5603 && GET_CODE (XEXP (x, 0)) == MULT
5604 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5605 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5606 && CONST_INT_P (XEXP (x, 1)))
5607 {
5608 type = ADDRESS_REG_UXTW;
5609 index = XEXP (XEXP (x, 0), 0);
5610 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5611 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5612 shift = -1;
5613 }
5614 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
5615 else if ((GET_CODE (x) == SIGN_EXTRACT
5616 || GET_CODE (x) == ZERO_EXTRACT)
5617 && GET_MODE (x) == DImode
5618 && GET_CODE (XEXP (x, 0)) == ASHIFT
5619 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5620 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5621 {
5622 type = (GET_CODE (x) == SIGN_EXTRACT)
5623 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5624 index = XEXP (XEXP (x, 0), 0);
5625 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5626 if (INTVAL (XEXP (x, 1)) != 32 + shift
5627 || INTVAL (XEXP (x, 2)) != 0)
5628 shift = -1;
5629 }
5630 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
5631 (const_int 0xffffffff<<shift)) */
5632 else if (GET_CODE (x) == AND
5633 && GET_MODE (x) == DImode
5634 && GET_CODE (XEXP (x, 0)) == ASHIFT
5635 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5636 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5637 && CONST_INT_P (XEXP (x, 1)))
5638 {
5639 type = ADDRESS_REG_UXTW;
5640 index = XEXP (XEXP (x, 0), 0);
5641 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5642 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5643 shift = -1;
5644 }
5645 /* (mult:P (reg:P) (const_int scale)) */
5646 else if (GET_CODE (x) == MULT
5647 && GET_MODE (x) == Pmode
5648 && GET_MODE (XEXP (x, 0)) == Pmode
5649 && CONST_INT_P (XEXP (x, 1)))
5650 {
5651 type = ADDRESS_REG_REG;
5652 index = XEXP (x, 0);
5653 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5654 }
5655 /* (ashift:P (reg:P) (const_int shift)) */
5656 else if (GET_CODE (x) == ASHIFT
5657 && GET_MODE (x) == Pmode
5658 && GET_MODE (XEXP (x, 0)) == Pmode
5659 && CONST_INT_P (XEXP (x, 1)))
5660 {
5661 type = ADDRESS_REG_REG;
5662 index = XEXP (x, 0);
5663 shift = INTVAL (XEXP (x, 1));
5664 }
5665 else
5666 return false;
5667
76160199
RS
5668 if (!strict_p
5669 && GET_CODE (index) == SUBREG
5670 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
5671 index = SUBREG_REG (index);
5672
43cacb12
RS
5673 if (aarch64_sve_data_mode_p (mode))
5674 {
5675 if (type != ADDRESS_REG_REG
5676 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
5677 return false;
5678 }
5679 else
5680 {
5681 if (shift != 0
5682 && !(IN_RANGE (shift, 1, 3)
5683 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
5684 return false;
5685 }
5686
5687 if (REG_P (index)
43e9d192
IB
5688 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
5689 {
5690 info->type = type;
5691 info->offset = index;
5692 info->shift = shift;
5693 return true;
5694 }
5695
5696 return false;
5697}
5698
abc52318
KT
5699/* Return true if MODE is one of the modes for which we
5700 support LDP/STP operations. */
5701
5702static bool
5703aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
5704{
5705 return mode == SImode || mode == DImode
5706 || mode == SFmode || mode == DFmode
5707 || (aarch64_vector_mode_supported_p (mode)
9f5361c8
KT
5708 && (known_eq (GET_MODE_SIZE (mode), 8)
5709 || (known_eq (GET_MODE_SIZE (mode), 16)
5710 && (aarch64_tune_params.extra_tuning_flags
5711 & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
abc52318
KT
5712}
5713
9e0218fc
RH
5714/* Return true if REGNO is a virtual pointer register, or an eliminable
5715 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
5716 include stack_pointer or hard_frame_pointer. */
5717static bool
5718virt_or_elim_regno_p (unsigned regno)
5719{
5720 return ((regno >= FIRST_VIRTUAL_REGISTER
5721 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
5722 || regno == FRAME_POINTER_REGNUM
5723 || regno == ARG_POINTER_REGNUM);
5724}
5725
a97d8b98
RS
5726/* Return true if X is a valid address of type TYPE for machine mode MODE.
5727 If it is, fill in INFO appropriately. STRICT_P is true if
5728 REG_OK_STRICT is in effect. */
43e9d192
IB
5729
5730static bool
5731aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98
RS
5732 rtx x, machine_mode mode, bool strict_p,
5733 aarch64_addr_query_type type = ADDR_QUERY_M)
43e9d192
IB
5734{
5735 enum rtx_code code = GET_CODE (x);
5736 rtx op0, op1;
dc640181
RS
5737 poly_int64 offset;
5738
6a70badb 5739 HOST_WIDE_INT const_size;
2d8c6dc1 5740
80d43579
WD
5741 /* On BE, we use load/store pair for all large int mode load/stores.
5742 TI/TFmode may also use a load/store pair. */
43cacb12
RS
5743 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5744 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 5745 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
a25831ac 5746 || type == ADDR_QUERY_LDP_STP_N
80d43579
WD
5747 || mode == TImode
5748 || mode == TFmode
43cacb12 5749 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 5750
a25831ac
AV
5751 /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
5752 corresponds to the actual size of the memory being loaded/stored and the
5753 mode of the corresponding addressing mode is half of that. */
5754 if (type == ADDR_QUERY_LDP_STP_N
5755 && known_eq (GET_MODE_SIZE (mode), 16))
5756 mode = DFmode;
5757
6a70badb 5758 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
5759 && (known_lt (GET_MODE_SIZE (mode), 16)
5760 || vec_flags == VEC_ADVSIMD
5761 || vec_flags == VEC_SVE_DATA));
5762
5763 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
5764 [Rn, #offset, MUL VL]. */
5765 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
5766 && (code != REG && code != PLUS))
5767 return false;
2d8c6dc1
AH
5768
5769 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
5770 REG addressing. */
43cacb12
RS
5771 if (advsimd_struct_p
5772 && !BYTES_BIG_ENDIAN
43e9d192
IB
5773 && (code != POST_INC && code != REG))
5774 return false;
5775
43cacb12
RS
5776 gcc_checking_assert (GET_MODE (x) == VOIDmode
5777 || SCALAR_INT_MODE_P (GET_MODE (x)));
5778
43e9d192
IB
5779 switch (code)
5780 {
5781 case REG:
5782 case SUBREG:
5783 info->type = ADDRESS_REG_IMM;
5784 info->base = x;
5785 info->offset = const0_rtx;
dc640181 5786 info->const_offset = 0;
43e9d192
IB
5787 return aarch64_base_register_rtx_p (x, strict_p);
5788
5789 case PLUS:
5790 op0 = XEXP (x, 0);
5791 op1 = XEXP (x, 1);
15c0c5c9
JW
5792
5793 if (! strict_p
4aa81c2e 5794 && REG_P (op0)
9e0218fc 5795 && virt_or_elim_regno_p (REGNO (op0))
dc640181 5796 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
5797 {
5798 info->type = ADDRESS_REG_IMM;
5799 info->base = op0;
5800 info->offset = op1;
dc640181 5801 info->const_offset = offset;
15c0c5c9
JW
5802
5803 return true;
5804 }
5805
6a70badb 5806 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
5807 && aarch64_base_register_rtx_p (op0, strict_p)
5808 && poly_int_rtx_p (op1, &offset))
43e9d192 5809 {
43e9d192
IB
5810 info->type = ADDRESS_REG_IMM;
5811 info->base = op0;
5812 info->offset = op1;
dc640181 5813 info->const_offset = offset;
43e9d192
IB
5814
5815 /* TImode and TFmode values are allowed in both pairs of X
5816 registers and individual Q registers. The available
5817 address modes are:
5818 X,X: 7-bit signed scaled offset
5819 Q: 9-bit signed offset
5820 We conservatively require an offset representable in either mode.
8ed49fab
KT
5821 When performing the check for pairs of X registers i.e. LDP/STP
5822 pass down DImode since that is the natural size of the LDP/STP
5823 instruction memory accesses. */
43e9d192 5824 if (mode == TImode || mode == TFmode)
8ed49fab 5825 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
5826 && (offset_9bit_signed_unscaled_p (mode, offset)
5827 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 5828
2d8c6dc1
AH
5829 /* A 7bit offset check because OImode will emit a ldp/stp
5830 instruction (only big endian will get here).
5831 For ldp/stp instructions, the offset is scaled for the size of a
5832 single element of the pair. */
5833 if (mode == OImode)
5834 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
5835
5836 /* Three 9/12 bit offsets checks because CImode will emit three
5837 ldr/str instructions (only big endian will get here). */
5838 if (mode == CImode)
5839 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5840 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
5841 || offset_12bit_unsigned_scaled_p (V16QImode,
5842 offset + 32)));
5843
5844 /* Two 7bit offsets checks because XImode will emit two ldp/stp
5845 instructions (only big endian will get here). */
5846 if (mode == XImode)
5847 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5848 && aarch64_offset_7bit_signed_scaled_p (TImode,
5849 offset + 32));
5850
43cacb12
RS
5851 /* Make "m" use the LD1 offset range for SVE data modes, so
5852 that pre-RTL optimizers like ivopts will work to that
5853 instead of the wider LDR/STR range. */
5854 if (vec_flags == VEC_SVE_DATA)
5855 return (type == ADDR_QUERY_M
5856 ? offset_4bit_signed_scaled_p (mode, offset)
5857 : offset_9bit_signed_scaled_p (mode, offset));
5858
9f4cbab8
RS
5859 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
5860 {
5861 poly_int64 end_offset = (offset
5862 + GET_MODE_SIZE (mode)
5863 - BYTES_PER_SVE_VECTOR);
5864 return (type == ADDR_QUERY_M
5865 ? offset_4bit_signed_scaled_p (mode, offset)
5866 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
5867 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
5868 end_offset)));
5869 }
5870
43cacb12
RS
5871 if (vec_flags == VEC_SVE_PRED)
5872 return offset_9bit_signed_scaled_p (mode, offset);
5873
2d8c6dc1 5874 if (load_store_pair_p)
6a70badb 5875 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
5876 || known_eq (GET_MODE_SIZE (mode), 8)
5877 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 5878 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5879 else
5880 return (offset_9bit_signed_unscaled_p (mode, offset)
5881 || offset_12bit_unsigned_scaled_p (mode, offset));
5882 }
5883
5884 if (allow_reg_index_p)
5885 {
5886 /* Look for base + (scaled/extended) index register. */
5887 if (aarch64_base_register_rtx_p (op0, strict_p)
5888 && aarch64_classify_index (info, op1, mode, strict_p))
5889 {
5890 info->base = op0;
5891 return true;
5892 }
5893 if (aarch64_base_register_rtx_p (op1, strict_p)
5894 && aarch64_classify_index (info, op0, mode, strict_p))
5895 {
5896 info->base = op1;
5897 return true;
5898 }
5899 }
5900
5901 return false;
5902
5903 case POST_INC:
5904 case POST_DEC:
5905 case PRE_INC:
5906 case PRE_DEC:
5907 info->type = ADDRESS_REG_WB;
5908 info->base = XEXP (x, 0);
5909 info->offset = NULL_RTX;
5910 return aarch64_base_register_rtx_p (info->base, strict_p);
5911
5912 case POST_MODIFY:
5913 case PRE_MODIFY:
5914 info->type = ADDRESS_REG_WB;
5915 info->base = XEXP (x, 0);
5916 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 5917 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
5918 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
5919 && aarch64_base_register_rtx_p (info->base, strict_p))
5920 {
43e9d192 5921 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 5922 info->const_offset = offset;
43e9d192
IB
5923
5924 /* TImode and TFmode values are allowed in both pairs of X
5925 registers and individual Q registers. The available
5926 address modes are:
5927 X,X: 7-bit signed scaled offset
5928 Q: 9-bit signed offset
5929 We conservatively require an offset representable in either mode.
5930 */
5931 if (mode == TImode || mode == TFmode)
44707478 5932 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
5933 && offset_9bit_signed_unscaled_p (mode, offset));
5934
2d8c6dc1 5935 if (load_store_pair_p)
6a70badb 5936 return ((known_eq (GET_MODE_SIZE (mode), 4)
9f5361c8
KT
5937 || known_eq (GET_MODE_SIZE (mode), 8)
5938 || known_eq (GET_MODE_SIZE (mode), 16))
44707478 5939 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5940 else
5941 return offset_9bit_signed_unscaled_p (mode, offset);
5942 }
5943 return false;
5944
5945 case CONST:
5946 case SYMBOL_REF:
5947 case LABEL_REF:
79517551
SN
5948 /* load literal: pc-relative constant pool entry. Only supported
5949 for SI mode or larger. */
43e9d192 5950 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 5951
6a70badb
RS
5952 if (!load_store_pair_p
5953 && GET_MODE_SIZE (mode).is_constant (&const_size)
5954 && const_size >= 4)
43e9d192
IB
5955 {
5956 rtx sym, addend;
5957
5958 split_const (x, &sym, &addend);
b4f50fd4
RR
5959 return ((GET_CODE (sym) == LABEL_REF
5960 || (GET_CODE (sym) == SYMBOL_REF
5961 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 5962 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
5963 }
5964 return false;
5965
5966 case LO_SUM:
5967 info->type = ADDRESS_LO_SUM;
5968 info->base = XEXP (x, 0);
5969 info->offset = XEXP (x, 1);
5970 if (allow_reg_index_p
5971 && aarch64_base_register_rtx_p (info->base, strict_p))
5972 {
5973 rtx sym, offs;
5974 split_const (info->offset, &sym, &offs);
5975 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
5976 && (aarch64_classify_symbol (sym, INTVAL (offs))
5977 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
5978 {
5979 /* The symbol and offset must be aligned to the access size. */
5980 unsigned int align;
43e9d192
IB
5981
5982 if (CONSTANT_POOL_ADDRESS_P (sym))
5983 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
5984 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
5985 {
5986 tree exp = SYMBOL_REF_DECL (sym);
5987 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 5988 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
5989 }
5990 else if (SYMBOL_REF_DECL (sym))
5991 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
5992 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
5993 && SYMBOL_REF_BLOCK (sym) != NULL)
5994 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
5995 else
5996 align = BITS_PER_UNIT;
5997
6a70badb
RS
5998 poly_int64 ref_size = GET_MODE_SIZE (mode);
5999 if (known_eq (ref_size, 0))
43e9d192
IB
6000 ref_size = GET_MODE_SIZE (DImode);
6001
6a70badb
RS
6002 return (multiple_p (INTVAL (offs), ref_size)
6003 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
6004 }
6005 }
6006 return false;
6007
6008 default:
6009 return false;
6010 }
6011}
6012
9bf2f779
KT
6013/* Return true if the address X is valid for a PRFM instruction.
6014 STRICT_P is true if we should do strict checking with
6015 aarch64_classify_address. */
6016
6017bool
6018aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
6019{
6020 struct aarch64_address_info addr;
6021
6022 /* PRFM accepts the same addresses as DImode... */
a97d8b98 6023 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
6024 if (!res)
6025 return false;
6026
6027 /* ... except writeback forms. */
6028 return addr.type != ADDRESS_REG_WB;
6029}
6030
43e9d192
IB
6031bool
6032aarch64_symbolic_address_p (rtx x)
6033{
6034 rtx offset;
6035
6036 split_const (x, &x, &offset);
6037 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
6038}
6039
a6e0bfa7 6040/* Classify the base of symbolic expression X. */
da4f13a4
MS
6041
6042enum aarch64_symbol_type
a6e0bfa7 6043aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
6044{
6045 rtx offset;
da4f13a4 6046
43e9d192 6047 split_const (x, &x, &offset);
43cacb12 6048 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
6049}
6050
6051
6052/* Return TRUE if X is a legitimate address for accessing memory in
6053 mode MODE. */
6054static bool
ef4bddc2 6055aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
6056{
6057 struct aarch64_address_info addr;
6058
a97d8b98 6059 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
6060}
6061
a97d8b98
RS
6062/* Return TRUE if X is a legitimate address of type TYPE for accessing
6063 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 6064bool
a97d8b98
RS
6065aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
6066 aarch64_addr_query_type type)
43e9d192
IB
6067{
6068 struct aarch64_address_info addr;
6069
a97d8b98 6070 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6071}
6072
9005477f
RS
6073/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6074
491ec060 6075static bool
9005477f
RS
6076aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6077 poly_int64 orig_offset,
6078 machine_mode mode)
491ec060 6079{
6a70badb
RS
6080 HOST_WIDE_INT size;
6081 if (GET_MODE_SIZE (mode).is_constant (&size))
6082 {
9005477f
RS
6083 HOST_WIDE_INT const_offset, second_offset;
6084
6085 /* A general SVE offset is A * VQ + B. Remove the A component from
6086 coefficient 0 in order to get the constant B. */
6087 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6088
6089 /* Split an out-of-range address displacement into a base and
6090 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6091 range otherwise to increase opportunities for sharing the base
6092 address of different sizes. Unaligned accesses use the signed
6093 9-bit range, TImode/TFmode use the intersection of signed
6094 scaled 7-bit and signed 9-bit offset. */
6a70badb 6095 if (mode == TImode || mode == TFmode)
9005477f
RS
6096 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6097 else if ((const_offset & (size - 1)) != 0)
6098 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6099 else
9005477f 6100 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6101
9005477f
RS
6102 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6103 return false;
6104
6105 /* Split the offset into second_offset and the rest. */
6106 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6107 *offset2 = gen_int_mode (second_offset, Pmode);
6108 return true;
6109 }
6110 else
6111 {
6112 /* Get the mode we should use as the basis of the range. For structure
6113 modes this is the mode of one vector. */
6114 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6115 machine_mode step_mode
6116 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6117
6118 /* Get the "mul vl" multiplier we'd like to use. */
6119 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6120 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6121 if (vec_flags & VEC_SVE_DATA)
6122 /* LDR supports a 9-bit range, but the move patterns for
6123 structure modes require all vectors to be in range of the
6124 same base. The simplest way of accomodating that while still
6125 promoting reuse of anchor points between different modes is
6126 to use an 8-bit range unconditionally. */
6127 vnum = ((vnum + 128) & 255) - 128;
6128 else
6129 /* Predicates are only handled singly, so we might as well use
6130 the full range. */
6131 vnum = ((vnum + 256) & 511) - 256;
6132 if (vnum == 0)
6133 return false;
6134
6135 /* Convert the "mul vl" multiplier into a byte offset. */
6136 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6137 if (known_eq (second_offset, orig_offset))
6138 return false;
6139
6140 /* Split the offset into second_offset and the rest. */
6141 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6142 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6143 return true;
6144 }
491ec060
WD
6145}
6146
a2170965
TC
6147/* Return the binary representation of floating point constant VALUE in INTVAL.
6148 If the value cannot be converted, return false without setting INTVAL.
6149 The conversion is done in the given MODE. */
6150bool
6151aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6152{
6153
6154 /* We make a general exception for 0. */
6155 if (aarch64_float_const_zero_rtx_p (value))
6156 {
6157 *intval = 0;
6158 return true;
6159 }
6160
0d0e0188 6161 scalar_float_mode mode;
a2170965 6162 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6163 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6164 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6165 /* Only support up to DF mode. */
6166 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6167 return false;
6168
6169 unsigned HOST_WIDE_INT ival = 0;
6170
6171 long res[2];
6172 real_to_target (res,
6173 CONST_DOUBLE_REAL_VALUE (value),
6174 REAL_MODE_FORMAT (mode));
6175
5c22bb48
TC
6176 if (mode == DFmode)
6177 {
6178 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6179 ival = zext_hwi (res[order], 32);
6180 ival |= (zext_hwi (res[1 - order], 32) << 32);
6181 }
6182 else
6183 ival = zext_hwi (res[0], 32);
a2170965
TC
6184
6185 *intval = ival;
6186 return true;
6187}
6188
6189/* Return TRUE if rtx X is an immediate constant that can be moved using a
6190 single MOV(+MOVK) followed by an FMOV. */
6191bool
6192aarch64_float_const_rtx_p (rtx x)
6193{
6194 machine_mode mode = GET_MODE (x);
6195 if (mode == VOIDmode)
6196 return false;
6197
6198 /* Determine whether it's cheaper to write float constants as
6199 mov/movk pairs over ldr/adrp pairs. */
6200 unsigned HOST_WIDE_INT ival;
6201
6202 if (GET_CODE (x) == CONST_DOUBLE
6203 && SCALAR_FLOAT_MODE_P (mode)
6204 && aarch64_reinterpret_float_as_int (x, &ival))
6205 {
77e994c9
RS
6206 scalar_int_mode imode = (mode == HFmode
6207 ? SImode
6208 : int_mode_for_mode (mode).require ());
a2170965
TC
6209 int num_instr = aarch64_internal_mov_immediate
6210 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6211 return num_instr < 3;
6212 }
6213
6214 return false;
6215}
6216
43e9d192
IB
6217/* Return TRUE if rtx X is immediate constant 0.0 */
6218bool
3520f7cc 6219aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6220{
43e9d192
IB
6221 if (GET_MODE (x) == VOIDmode)
6222 return false;
6223
34a72c33 6224 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6225 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6226 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6227}
6228
a2170965
TC
6229/* Return TRUE if rtx X is immediate constant that fits in a single
6230 MOVI immediate operation. */
6231bool
6232aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6233{
6234 if (!TARGET_SIMD)
6235 return false;
6236
77e994c9
RS
6237 machine_mode vmode;
6238 scalar_int_mode imode;
a2170965
TC
6239 unsigned HOST_WIDE_INT ival;
6240
6241 if (GET_CODE (x) == CONST_DOUBLE
6242 && SCALAR_FLOAT_MODE_P (mode))
6243 {
6244 if (!aarch64_reinterpret_float_as_int (x, &ival))
6245 return false;
6246
35c38fa6
TC
6247 /* We make a general exception for 0. */
6248 if (aarch64_float_const_zero_rtx_p (x))
6249 return true;
6250
304b9962 6251 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6252 }
6253 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6254 && is_a <scalar_int_mode> (mode, &imode))
6255 ival = INTVAL (x);
a2170965
TC
6256 else
6257 return false;
6258
6259 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6260 a 128 bit vector mode. */
77e994c9 6261 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6262
6263 vmode = aarch64_simd_container_mode (imode, width);
6264 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6265
b187677b 6266 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6267}
6268
6269
70f09188
AP
6270/* Return the fixed registers used for condition codes. */
6271
6272static bool
6273aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6274{
6275 *p1 = CC_REGNUM;
6276 *p2 = INVALID_REGNUM;
6277 return true;
6278}
6279
47210a04
RL
6280/* This function is used by the call expanders of the machine description.
6281 RESULT is the register in which the result is returned. It's NULL for
6282 "call" and "sibcall".
6283 MEM is the location of the function call.
6284 SIBCALL indicates whether this function call is normal call or sibling call.
6285 It will generate different pattern accordingly. */
6286
6287void
6288aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6289{
6290 rtx call, callee, tmp;
6291 rtvec vec;
6292 machine_mode mode;
6293
6294 gcc_assert (MEM_P (mem));
6295 callee = XEXP (mem, 0);
6296 mode = GET_MODE (callee);
6297 gcc_assert (mode == Pmode);
6298
6299 /* Decide if we should generate indirect calls by loading the
6300 address of the callee into a register before performing
6301 the branch-and-link. */
6302 if (SYMBOL_REF_P (callee)
6303 ? (aarch64_is_long_call_p (callee)
6304 || aarch64_is_noplt_call_p (callee))
6305 : !REG_P (callee))
6306 XEXP (mem, 0) = force_reg (mode, callee);
6307
6308 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6309
6310 if (result != NULL_RTX)
6311 call = gen_rtx_SET (result, call);
6312
6313 if (sibcall)
6314 tmp = ret_rtx;
6315 else
6316 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6317
6318 vec = gen_rtvec (2, call, tmp);
6319 call = gen_rtx_PARALLEL (VOIDmode, vec);
6320
6321 aarch64_emit_call_insn (call);
6322}
6323
78607708
TV
6324/* Emit call insn with PAT and do aarch64-specific handling. */
6325
d07a3fed 6326void
78607708
TV
6327aarch64_emit_call_insn (rtx pat)
6328{
6329 rtx insn = emit_call_insn (pat);
6330
6331 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6332 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6333 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6334}
6335
ef4bddc2 6336machine_mode
43e9d192
IB
6337aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6338{
6339 /* All floating point compares return CCFP if it is an equality
6340 comparison, and CCFPE otherwise. */
6341 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6342 {
6343 switch (code)
6344 {
6345 case EQ:
6346 case NE:
6347 case UNORDERED:
6348 case ORDERED:
6349 case UNLT:
6350 case UNLE:
6351 case UNGT:
6352 case UNGE:
6353 case UNEQ:
43e9d192
IB
6354 return CCFPmode;
6355
6356 case LT:
6357 case LE:
6358 case GT:
6359 case GE:
8332c5ee 6360 case LTGT:
43e9d192
IB
6361 return CCFPEmode;
6362
6363 default:
6364 gcc_unreachable ();
6365 }
6366 }
6367
2b8568fe
KT
6368 /* Equality comparisons of short modes against zero can be performed
6369 using the TST instruction with the appropriate bitmask. */
6370 if (y == const0_rtx && REG_P (x)
6371 && (code == EQ || code == NE)
6372 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6373 return CC_NZmode;
6374
b06335f9
KT
6375 /* Similarly, comparisons of zero_extends from shorter modes can
6376 be performed using an ANDS with an immediate mask. */
6377 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6378 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6379 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6380 && (code == EQ || code == NE))
6381 return CC_NZmode;
6382
43e9d192
IB
6383 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6384 && y == const0_rtx
6385 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6386 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6387 || GET_CODE (x) == NEG
6388 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6389 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6390 return CC_NZmode;
6391
1c992d1e 6392 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6393 the comparison will have to be swapped when we emit the assembly
6394 code. */
6395 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6396 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6397 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6398 || GET_CODE (x) == LSHIFTRT
1c992d1e 6399 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6400 return CC_SWPmode;
6401
1c992d1e
RE
6402 /* Similarly for a negated operand, but we can only do this for
6403 equalities. */
6404 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 6405 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
6406 && (code == EQ || code == NE)
6407 && GET_CODE (x) == NEG)
6408 return CC_Zmode;
6409
ef22810a
RH
6410 /* A test for unsigned overflow. */
6411 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6412 && code == NE
6413 && GET_CODE (x) == PLUS
6414 && GET_CODE (y) == ZERO_EXTEND)
6415 return CC_Cmode;
6416
30c46053
MC
6417 /* A test for signed overflow. */
6418 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6419 && code == NE
6420 && GET_CODE (x) == PLUS
6421 && GET_CODE (y) == SIGN_EXTEND)
6422 return CC_Vmode;
6423
43e9d192
IB
6424 /* For everything else, return CCmode. */
6425 return CCmode;
6426}
6427
3dfa7055 6428static int
b8506a8a 6429aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 6430
cd5660ab 6431int
43e9d192
IB
6432aarch64_get_condition_code (rtx x)
6433{
ef4bddc2 6434 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
6435 enum rtx_code comp_code = GET_CODE (x);
6436
6437 if (GET_MODE_CLASS (mode) != MODE_CC)
6438 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
6439 return aarch64_get_condition_code_1 (mode, comp_code);
6440}
43e9d192 6441
3dfa7055 6442static int
b8506a8a 6443aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 6444{
43e9d192
IB
6445 switch (mode)
6446 {
4e10a5a7
RS
6447 case E_CCFPmode:
6448 case E_CCFPEmode:
43e9d192
IB
6449 switch (comp_code)
6450 {
6451 case GE: return AARCH64_GE;
6452 case GT: return AARCH64_GT;
6453 case LE: return AARCH64_LS;
6454 case LT: return AARCH64_MI;
6455 case NE: return AARCH64_NE;
6456 case EQ: return AARCH64_EQ;
6457 case ORDERED: return AARCH64_VC;
6458 case UNORDERED: return AARCH64_VS;
6459 case UNLT: return AARCH64_LT;
6460 case UNLE: return AARCH64_LE;
6461 case UNGT: return AARCH64_HI;
6462 case UNGE: return AARCH64_PL;
cd5660ab 6463 default: return -1;
43e9d192
IB
6464 }
6465 break;
6466
4e10a5a7 6467 case E_CCmode:
43e9d192
IB
6468 switch (comp_code)
6469 {
6470 case NE: return AARCH64_NE;
6471 case EQ: return AARCH64_EQ;
6472 case GE: return AARCH64_GE;
6473 case GT: return AARCH64_GT;
6474 case LE: return AARCH64_LE;
6475 case LT: return AARCH64_LT;
6476 case GEU: return AARCH64_CS;
6477 case GTU: return AARCH64_HI;
6478 case LEU: return AARCH64_LS;
6479 case LTU: return AARCH64_CC;
cd5660ab 6480 default: return -1;
43e9d192
IB
6481 }
6482 break;
6483
4e10a5a7 6484 case E_CC_SWPmode:
43e9d192
IB
6485 switch (comp_code)
6486 {
6487 case NE: return AARCH64_NE;
6488 case EQ: return AARCH64_EQ;
6489 case GE: return AARCH64_LE;
6490 case GT: return AARCH64_LT;
6491 case LE: return AARCH64_GE;
6492 case LT: return AARCH64_GT;
6493 case GEU: return AARCH64_LS;
6494 case GTU: return AARCH64_CC;
6495 case LEU: return AARCH64_CS;
6496 case LTU: return AARCH64_HI;
cd5660ab 6497 default: return -1;
43e9d192
IB
6498 }
6499 break;
6500
4e10a5a7 6501 case E_CC_NZmode:
43e9d192
IB
6502 switch (comp_code)
6503 {
6504 case NE: return AARCH64_NE;
6505 case EQ: return AARCH64_EQ;
6506 case GE: return AARCH64_PL;
6507 case LT: return AARCH64_MI;
cd5660ab 6508 default: return -1;
43e9d192
IB
6509 }
6510 break;
6511
4e10a5a7 6512 case E_CC_Zmode:
1c992d1e
RE
6513 switch (comp_code)
6514 {
6515 case NE: return AARCH64_NE;
6516 case EQ: return AARCH64_EQ;
cd5660ab 6517 default: return -1;
1c992d1e
RE
6518 }
6519 break;
6520
4e10a5a7 6521 case E_CC_Cmode:
ef22810a
RH
6522 switch (comp_code)
6523 {
6524 case NE: return AARCH64_CS;
6525 case EQ: return AARCH64_CC;
6526 default: return -1;
6527 }
6528 break;
6529
30c46053
MC
6530 case E_CC_Vmode:
6531 switch (comp_code)
6532 {
6533 case NE: return AARCH64_VS;
6534 case EQ: return AARCH64_VC;
6535 default: return -1;
6536 }
6537 break;
6538
43e9d192 6539 default:
cd5660ab 6540 return -1;
43e9d192 6541 }
3dfa7055 6542
3dfa7055 6543 return -1;
43e9d192
IB
6544}
6545
ddeabd3e
AL
6546bool
6547aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
6548 HOST_WIDE_INT minval,
6549 HOST_WIDE_INT maxval)
ddeabd3e 6550{
6a70badb
RS
6551 rtx elt;
6552 return (const_vec_duplicate_p (x, &elt)
6553 && CONST_INT_P (elt)
6554 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
6555}
6556
6557bool
6558aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
6559{
6560 return aarch64_const_vec_all_same_in_range_p (x, val, val);
6561}
6562
43cacb12
RS
6563/* Return true if VEC is a constant in which every element is in the range
6564 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
6565
6566static bool
6567aarch64_const_vec_all_in_range_p (rtx vec,
6568 HOST_WIDE_INT minval,
6569 HOST_WIDE_INT maxval)
6570{
6571 if (GET_CODE (vec) != CONST_VECTOR
6572 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
6573 return false;
6574
6575 int nunits;
6576 if (!CONST_VECTOR_STEPPED_P (vec))
6577 nunits = const_vector_encoded_nelts (vec);
6578 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
6579 return false;
6580
6581 for (int i = 0; i < nunits; i++)
6582 {
6583 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
6584 if (!CONST_INT_P (vec_elem)
6585 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
6586 return false;
6587 }
6588 return true;
6589}
43e9d192 6590
cf670503
ZC
6591/* N Z C V. */
6592#define AARCH64_CC_V 1
6593#define AARCH64_CC_C (1 << 1)
6594#define AARCH64_CC_Z (1 << 2)
6595#define AARCH64_CC_N (1 << 3)
6596
c8012fbc
WD
6597/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
6598static const int aarch64_nzcv_codes[] =
6599{
6600 0, /* EQ, Z == 1. */
6601 AARCH64_CC_Z, /* NE, Z == 0. */
6602 0, /* CS, C == 1. */
6603 AARCH64_CC_C, /* CC, C == 0. */
6604 0, /* MI, N == 1. */
6605 AARCH64_CC_N, /* PL, N == 0. */
6606 0, /* VS, V == 1. */
6607 AARCH64_CC_V, /* VC, V == 0. */
6608 0, /* HI, C ==1 && Z == 0. */
6609 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
6610 AARCH64_CC_V, /* GE, N == V. */
6611 0, /* LT, N != V. */
6612 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
6613 0, /* LE, !(Z == 0 && N == V). */
6614 0, /* AL, Any. */
6615 0 /* NV, Any. */
cf670503
ZC
6616};
6617
43cacb12
RS
6618/* Print floating-point vector immediate operand X to F, negating it
6619 first if NEGATE is true. Return true on success, false if it isn't
6620 a constant we can handle. */
6621
6622static bool
6623aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
6624{
6625 rtx elt;
6626
6627 if (!const_vec_duplicate_p (x, &elt))
6628 return false;
6629
6630 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
6631 if (negate)
6632 r = real_value_negate (&r);
6633
6634 /* We only handle the SVE single-bit immediates here. */
6635 if (real_equal (&r, &dconst0))
6636 asm_fprintf (f, "0.0");
6637 else if (real_equal (&r, &dconst1))
6638 asm_fprintf (f, "1.0");
6639 else if (real_equal (&r, &dconsthalf))
6640 asm_fprintf (f, "0.5");
6641 else
6642 return false;
6643
6644 return true;
6645}
6646
9f4cbab8
RS
6647/* Return the equivalent letter for size. */
6648static char
6649sizetochar (int size)
6650{
6651 switch (size)
6652 {
6653 case 64: return 'd';
6654 case 32: return 's';
6655 case 16: return 'h';
6656 case 8 : return 'b';
6657 default: gcc_unreachable ();
6658 }
6659}
6660
bcf19844
JW
6661/* Print operand X to file F in a target specific manner according to CODE.
6662 The acceptable formatting commands given by CODE are:
6663 'c': An integer or symbol address without a preceding #
6664 sign.
43cacb12
RS
6665 'C': Take the duplicated element in a vector constant
6666 and print it in hex.
6667 'D': Take the duplicated element in a vector constant
6668 and print it as an unsigned integer, in decimal.
bcf19844
JW
6669 'e': Print the sign/zero-extend size as a character 8->b,
6670 16->h, 32->w.
6671 'p': Prints N such that 2^N == X (X must be power of 2 and
6672 const int).
6673 'P': Print the number of non-zero bits in X (a const_int).
6674 'H': Print the higher numbered register of a pair (TImode)
6675 of regs.
6676 'm': Print a condition (eq, ne, etc).
6677 'M': Same as 'm', but invert condition.
43cacb12
RS
6678 'N': Take the duplicated element in a vector constant
6679 and print the negative of it in decimal.
bcf19844
JW
6680 'b/h/s/d/q': Print a scalar FP/SIMD register name.
6681 'S/T/U/V': Print a FP/SIMD register name for a register list.
6682 The register printed is the FP/SIMD register name
6683 of X + 0/1/2/3 for S/T/U/V.
6684 'R': Print a scalar FP/SIMD register name + 1.
6685 'X': Print bottom 16 bits of integer constant in hex.
6686 'w/x': Print a general register name or the zero register
6687 (32-bit or 64-bit).
6688 '0': Print a normal operand, if it's a general register,
6689 then we assume DImode.
6690 'k': Print NZCV for conditional compare instructions.
6691 'A': Output address constant representing the first
6692 argument of X, specifying a relocation offset
6693 if appropriate.
6694 'L': Output constant address specified by X
6695 with a relocation offset if appropriate.
6696 'G': Prints address of X, specifying a PC relative
e69a816d
WD
6697 relocation mode if appropriate.
6698 'y': Output address of LDP or STP - this is used for
6699 some LDP/STPs which don't use a PARALLEL in their
6700 pattern (so the mode needs to be adjusted).
6701 'z': Output address of a typical LDP or STP. */
bcf19844 6702
cc8ca59e
JB
6703static void
6704aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 6705{
43cacb12 6706 rtx elt;
43e9d192
IB
6707 switch (code)
6708 {
f541a481
KT
6709 case 'c':
6710 switch (GET_CODE (x))
6711 {
6712 case CONST_INT:
6713 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6714 break;
6715
6716 case SYMBOL_REF:
6717 output_addr_const (f, x);
6718 break;
6719
6720 case CONST:
6721 if (GET_CODE (XEXP (x, 0)) == PLUS
6722 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
6723 {
6724 output_addr_const (f, x);
6725 break;
6726 }
6727 /* Fall through. */
6728
6729 default:
ee61f880 6730 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
6731 }
6732 break;
6733
43e9d192 6734 case 'e':
43e9d192
IB
6735 {
6736 int n;
6737
4aa81c2e 6738 if (!CONST_INT_P (x)
43e9d192
IB
6739 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
6740 {
6741 output_operand_lossage ("invalid operand for '%%%c'", code);
6742 return;
6743 }
6744
6745 switch (n)
6746 {
6747 case 3:
6748 fputc ('b', f);
6749 break;
6750 case 4:
6751 fputc ('h', f);
6752 break;
6753 case 5:
6754 fputc ('w', f);
6755 break;
6756 default:
6757 output_operand_lossage ("invalid operand for '%%%c'", code);
6758 return;
6759 }
6760 }
6761 break;
6762
6763 case 'p':
6764 {
6765 int n;
6766
4aa81c2e 6767 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
6768 {
6769 output_operand_lossage ("invalid operand for '%%%c'", code);
6770 return;
6771 }
6772
6773 asm_fprintf (f, "%d", n);
6774 }
6775 break;
6776
6777 case 'P':
4aa81c2e 6778 if (!CONST_INT_P (x))
43e9d192
IB
6779 {
6780 output_operand_lossage ("invalid operand for '%%%c'", code);
6781 return;
6782 }
6783
8d55c61b 6784 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
6785 break;
6786
6787 case 'H':
c0111dc4
RE
6788 if (x == const0_rtx)
6789 {
6790 asm_fprintf (f, "xzr");
6791 break;
6792 }
6793
4aa81c2e 6794 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
6795 {
6796 output_operand_lossage ("invalid operand for '%%%c'", code);
6797 return;
6798 }
6799
01a3a324 6800 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
6801 break;
6802
43e9d192 6803 case 'M':
c8012fbc 6804 case 'm':
cd5660ab
KT
6805 {
6806 int cond_code;
c8012fbc
WD
6807 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
6808 if (x == const_true_rtx)
cd5660ab 6809 {
c8012fbc
WD
6810 if (code == 'M')
6811 fputs ("nv", f);
cd5660ab
KT
6812 return;
6813 }
43e9d192 6814
cd5660ab
KT
6815 if (!COMPARISON_P (x))
6816 {
6817 output_operand_lossage ("invalid operand for '%%%c'", code);
6818 return;
6819 }
c8012fbc 6820
cd5660ab
KT
6821 cond_code = aarch64_get_condition_code (x);
6822 gcc_assert (cond_code >= 0);
c8012fbc
WD
6823 if (code == 'M')
6824 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
6825 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 6826 }
43e9d192
IB
6827 break;
6828
43cacb12
RS
6829 case 'N':
6830 if (!const_vec_duplicate_p (x, &elt))
6831 {
6832 output_operand_lossage ("invalid vector constant");
6833 return;
6834 }
6835
6836 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6837 asm_fprintf (f, "%wd", -INTVAL (elt));
6838 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
6839 && aarch64_print_vector_float_operand (f, x, true))
6840 ;
6841 else
6842 {
6843 output_operand_lossage ("invalid vector constant");
6844 return;
6845 }
6846 break;
6847
43e9d192
IB
6848 case 'b':
6849 case 'h':
6850 case 's':
6851 case 'd':
6852 case 'q':
43e9d192
IB
6853 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6854 {
6855 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6856 return;
6857 }
50ce6f88 6858 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
6859 break;
6860
6861 case 'S':
6862 case 'T':
6863 case 'U':
6864 case 'V':
43e9d192
IB
6865 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6866 {
6867 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6868 return;
6869 }
43cacb12
RS
6870 asm_fprintf (f, "%c%d",
6871 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
6872 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
6873 break;
6874
2d8c6dc1 6875 case 'R':
2d8c6dc1
AH
6876 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6877 {
6878 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6879 return;
6880 }
6881 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
6882 break;
6883
a05c0ddf 6884 case 'X':
4aa81c2e 6885 if (!CONST_INT_P (x))
a05c0ddf
IB
6886 {
6887 output_operand_lossage ("invalid operand for '%%%c'", code);
6888 return;
6889 }
50d38551 6890 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
6891 break;
6892
43cacb12
RS
6893 case 'C':
6894 {
6895 /* Print a replicated constant in hex. */
6896 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6897 {
6898 output_operand_lossage ("invalid operand for '%%%c'", code);
6899 return;
6900 }
6901 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6902 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6903 }
6904 break;
6905
6906 case 'D':
6907 {
6908 /* Print a replicated constant in decimal, treating it as
6909 unsigned. */
6910 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6911 {
6912 output_operand_lossage ("invalid operand for '%%%c'", code);
6913 return;
6914 }
6915 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6916 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6917 }
6918 break;
6919
43e9d192
IB
6920 case 'w':
6921 case 'x':
3520f7cc
JG
6922 if (x == const0_rtx
6923 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 6924 {
50ce6f88 6925 asm_fprintf (f, "%czr", code);
43e9d192
IB
6926 break;
6927 }
6928
6929 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
6930 {
50ce6f88 6931 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
6932 break;
6933 }
6934
6935 if (REG_P (x) && REGNO (x) == SP_REGNUM)
6936 {
50ce6f88 6937 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
6938 break;
6939 }
6940
6941 /* Fall through */
6942
6943 case 0:
43e9d192
IB
6944 if (x == NULL)
6945 {
6946 output_operand_lossage ("missing operand");
6947 return;
6948 }
6949
6950 switch (GET_CODE (x))
6951 {
6952 case REG:
43cacb12 6953 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
6954 {
6955 if (REG_NREGS (x) == 1)
6956 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
6957 else
6958 {
6959 char suffix
6960 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
6961 asm_fprintf (f, "{z%d.%c - z%d.%c}",
6962 REGNO (x) - V0_REGNUM, suffix,
6963 END_REGNO (x) - V0_REGNUM - 1, suffix);
6964 }
6965 }
43cacb12
RS
6966 else
6967 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
6968 break;
6969
6970 case MEM:
cc8ca59e 6971 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
6972 break;
6973
6974 case LABEL_REF:
6975 case SYMBOL_REF:
6976 output_addr_const (asm_out_file, x);
6977 break;
6978
6979 case CONST_INT:
6980 asm_fprintf (f, "%wd", INTVAL (x));
6981 break;
6982
43cacb12
RS
6983 case CONST:
6984 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 6985 {
43cacb12
RS
6986 output_addr_const (asm_out_file, x);
6987 break;
3520f7cc 6988 }
43cacb12
RS
6989 /* fall through */
6990
6991 case CONST_VECTOR:
6992 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 6993 {
43cacb12
RS
6994 output_operand_lossage ("invalid vector constant");
6995 return;
3520f7cc 6996 }
43cacb12
RS
6997
6998 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6999 asm_fprintf (f, "%wd", INTVAL (elt));
7000 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
7001 && aarch64_print_vector_float_operand (f, x, false))
7002 ;
3520f7cc 7003 else
43cacb12
RS
7004 {
7005 output_operand_lossage ("invalid vector constant");
7006 return;
7007 }
43e9d192
IB
7008 break;
7009
3520f7cc 7010 case CONST_DOUBLE:
2ca5b430
KT
7011 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
7012 be getting CONST_DOUBLEs holding integers. */
7013 gcc_assert (GET_MODE (x) != VOIDmode);
7014 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
7015 {
7016 fputc ('0', f);
7017 break;
7018 }
7019 else if (aarch64_float_const_representable_p (x))
7020 {
7021#define buf_size 20
7022 char float_buf[buf_size] = {'\0'};
34a72c33
RS
7023 real_to_decimal_for_mode (float_buf,
7024 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
7025 buf_size, buf_size,
7026 1, GET_MODE (x));
7027 asm_fprintf (asm_out_file, "%s", float_buf);
7028 break;
7029#undef buf_size
7030 }
7031 output_operand_lossage ("invalid constant");
7032 return;
43e9d192
IB
7033 default:
7034 output_operand_lossage ("invalid operand");
7035 return;
7036 }
7037 break;
7038
7039 case 'A':
7040 if (GET_CODE (x) == HIGH)
7041 x = XEXP (x, 0);
7042
a6e0bfa7 7043 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7044 {
6642bdb4 7045 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7046 asm_fprintf (asm_out_file, ":got:");
7047 break;
7048
7049 case SYMBOL_SMALL_TLSGD:
7050 asm_fprintf (asm_out_file, ":tlsgd:");
7051 break;
7052
7053 case SYMBOL_SMALL_TLSDESC:
7054 asm_fprintf (asm_out_file, ":tlsdesc:");
7055 break;
7056
79496620 7057 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7058 asm_fprintf (asm_out_file, ":gottprel:");
7059 break;
7060
d18ba284 7061 case SYMBOL_TLSLE24:
43e9d192
IB
7062 asm_fprintf (asm_out_file, ":tprel:");
7063 break;
7064
87dd8ab0
MS
7065 case SYMBOL_TINY_GOT:
7066 gcc_unreachable ();
7067 break;
7068
43e9d192
IB
7069 default:
7070 break;
7071 }
7072 output_addr_const (asm_out_file, x);
7073 break;
7074
7075 case 'L':
a6e0bfa7 7076 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7077 {
6642bdb4 7078 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
7079 asm_fprintf (asm_out_file, ":lo12:");
7080 break;
7081
7082 case SYMBOL_SMALL_TLSGD:
7083 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
7084 break;
7085
7086 case SYMBOL_SMALL_TLSDESC:
7087 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
7088 break;
7089
79496620 7090 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7091 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7092 break;
7093
cbf5629e
JW
7094 case SYMBOL_TLSLE12:
7095 asm_fprintf (asm_out_file, ":tprel_lo12:");
7096 break;
7097
d18ba284 7098 case SYMBOL_TLSLE24:
43e9d192
IB
7099 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7100 break;
7101
87dd8ab0
MS
7102 case SYMBOL_TINY_GOT:
7103 asm_fprintf (asm_out_file, ":got:");
7104 break;
7105
5ae7caad
JW
7106 case SYMBOL_TINY_TLSIE:
7107 asm_fprintf (asm_out_file, ":gottprel:");
7108 break;
7109
43e9d192
IB
7110 default:
7111 break;
7112 }
7113 output_addr_const (asm_out_file, x);
7114 break;
7115
7116 case 'G':
a6e0bfa7 7117 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7118 {
d18ba284 7119 case SYMBOL_TLSLE24:
43e9d192
IB
7120 asm_fprintf (asm_out_file, ":tprel_hi12:");
7121 break;
7122 default:
7123 break;
7124 }
7125 output_addr_const (asm_out_file, x);
7126 break;
7127
cf670503
ZC
7128 case 'k':
7129 {
c8012fbc 7130 HOST_WIDE_INT cond_code;
cf670503 7131
c8012fbc 7132 if (!CONST_INT_P (x))
cf670503
ZC
7133 {
7134 output_operand_lossage ("invalid operand for '%%%c'", code);
7135 return;
7136 }
7137
c8012fbc
WD
7138 cond_code = INTVAL (x);
7139 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7140 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7141 }
7142 break;
7143
e69a816d
WD
7144 case 'y':
7145 case 'z':
7146 {
7147 machine_mode mode = GET_MODE (x);
7148
c348cab0 7149 if (GET_CODE (x) != MEM
6a70badb 7150 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7151 {
7152 output_operand_lossage ("invalid operand for '%%%c'", code);
7153 return;
7154 }
7155
a25831ac
AV
7156 if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
7157 code == 'y'
7158 ? ADDR_QUERY_LDP_STP_N
7159 : ADDR_QUERY_LDP_STP))
c348cab0 7160 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7161 }
7162 break;
7163
43e9d192
IB
7164 default:
7165 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7166 return;
7167 }
7168}
7169
e69a816d
WD
7170/* Print address 'x' of a memory access with mode 'mode'.
7171 'op' is the context required by aarch64_classify_address. It can either be
7172 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7173static bool
a97d8b98
RS
7174aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7175 aarch64_addr_query_type type)
43e9d192
IB
7176{
7177 struct aarch64_address_info addr;
6a70badb 7178 unsigned int size;
43e9d192 7179
e69a816d 7180 /* Check all addresses are Pmode - including ILP32. */
67c58c8f
SE
7181 if (GET_MODE (x) != Pmode)
7182 output_operand_lossage ("invalid address mode");
e69a816d 7183
a97d8b98 7184 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7185 switch (addr.type)
7186 {
7187 case ADDRESS_REG_IMM:
dc640181 7188 if (known_eq (addr.const_offset, 0))
01a3a324 7189 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7190 else if (aarch64_sve_data_mode_p (mode))
7191 {
7192 HOST_WIDE_INT vnum
7193 = exact_div (addr.const_offset,
7194 BYTES_PER_SVE_VECTOR).to_constant ();
7195 asm_fprintf (f, "[%s, #%wd, mul vl]",
7196 reg_names[REGNO (addr.base)], vnum);
7197 }
7198 else if (aarch64_sve_pred_mode_p (mode))
7199 {
7200 HOST_WIDE_INT vnum
7201 = exact_div (addr.const_offset,
7202 BYTES_PER_SVE_PRED).to_constant ();
7203 asm_fprintf (f, "[%s, #%wd, mul vl]",
7204 reg_names[REGNO (addr.base)], vnum);
7205 }
43e9d192 7206 else
16a3246f 7207 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7208 INTVAL (addr.offset));
c348cab0 7209 return true;
43e9d192
IB
7210
7211 case ADDRESS_REG_REG:
7212 if (addr.shift == 0)
16a3246f 7213 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7214 reg_names [REGNO (addr.offset)]);
43e9d192 7215 else
16a3246f 7216 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7217 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7218 return true;
43e9d192
IB
7219
7220 case ADDRESS_REG_UXTW:
7221 if (addr.shift == 0)
16a3246f 7222 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7223 REGNO (addr.offset) - R0_REGNUM);
7224 else
16a3246f 7225 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7226 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7227 return true;
43e9d192
IB
7228
7229 case ADDRESS_REG_SXTW:
7230 if (addr.shift == 0)
16a3246f 7231 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7232 REGNO (addr.offset) - R0_REGNUM);
7233 else
16a3246f 7234 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7235 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7236 return true;
43e9d192
IB
7237
7238 case ADDRESS_REG_WB:
6a70badb
RS
7239 /* Writeback is only supported for fixed-width modes. */
7240 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7241 switch (GET_CODE (x))
7242 {
7243 case PRE_INC:
6a70badb 7244 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7245 return true;
43e9d192 7246 case POST_INC:
6a70badb 7247 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7248 return true;
43e9d192 7249 case PRE_DEC:
6a70badb 7250 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7251 return true;
43e9d192 7252 case POST_DEC:
6a70badb 7253 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7254 return true;
43e9d192 7255 case PRE_MODIFY:
6a70badb 7256 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7257 INTVAL (addr.offset));
c348cab0 7258 return true;
43e9d192 7259 case POST_MODIFY:
6a70badb 7260 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7261 INTVAL (addr.offset));
c348cab0 7262 return true;
43e9d192
IB
7263 default:
7264 break;
7265 }
7266 break;
7267
7268 case ADDRESS_LO_SUM:
16a3246f 7269 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7270 output_addr_const (f, addr.offset);
7271 asm_fprintf (f, "]");
c348cab0 7272 return true;
43e9d192
IB
7273
7274 case ADDRESS_SYMBOLIC:
d6591257 7275 output_addr_const (f, x);
c348cab0 7276 return true;
43e9d192
IB
7277 }
7278
c348cab0 7279 return false;
43e9d192
IB
7280}
7281
e69a816d
WD
7282/* Print address 'x' of a memory access with mode 'mode'. */
7283static void
7284aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7285{
43cacb12 7286 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7287 output_addr_const (f, x);
e69a816d
WD
7288}
7289
43e9d192
IB
7290bool
7291aarch64_label_mentioned_p (rtx x)
7292{
7293 const char *fmt;
7294 int i;
7295
7296 if (GET_CODE (x) == LABEL_REF)
7297 return true;
7298
7299 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7300 referencing instruction, but they are constant offsets, not
7301 symbols. */
7302 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7303 return false;
7304
7305 fmt = GET_RTX_FORMAT (GET_CODE (x));
7306 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7307 {
7308 if (fmt[i] == 'E')
7309 {
7310 int j;
7311
7312 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7313 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7314 return 1;
7315 }
7316 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7317 return 1;
7318 }
7319
7320 return 0;
7321}
7322
7323/* Implement REGNO_REG_CLASS. */
7324
7325enum reg_class
7326aarch64_regno_regclass (unsigned regno)
7327{
7328 if (GP_REGNUM_P (regno))
a4a182c6 7329 return GENERAL_REGS;
43e9d192
IB
7330
7331 if (regno == SP_REGNUM)
7332 return STACK_REG;
7333
7334 if (regno == FRAME_POINTER_REGNUM
7335 || regno == ARG_POINTER_REGNUM)
f24bb080 7336 return POINTER_REGS;
43e9d192
IB
7337
7338 if (FP_REGNUM_P (regno))
7339 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7340
43cacb12
RS
7341 if (PR_REGNUM_P (regno))
7342 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7343
43e9d192
IB
7344 return NO_REGS;
7345}
7346
6a70badb
RS
7347/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7348 If OFFSET is out of range, return an offset of an anchor point
7349 that is in range. Return 0 otherwise. */
7350
7351static HOST_WIDE_INT
7352aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7353 machine_mode mode)
7354{
7355 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7356 if (size > 16)
7357 return (offset + 0x400) & ~0x7f0;
7358
7359 /* For offsets that aren't a multiple of the access size, the limit is
7360 -256...255. */
7361 if (offset & (size - 1))
7362 {
7363 /* BLKmode typically uses LDP of X-registers. */
7364 if (mode == BLKmode)
7365 return (offset + 512) & ~0x3ff;
7366 return (offset + 0x100) & ~0x1ff;
7367 }
7368
7369 /* Small negative offsets are supported. */
7370 if (IN_RANGE (offset, -256, 0))
7371 return 0;
7372
7373 if (mode == TImode || mode == TFmode)
7374 return (offset + 0x100) & ~0x1ff;
7375
7376 /* Use 12-bit offset by access size. */
7377 return offset & (~0xfff * size);
7378}
7379
0c4ec427 7380static rtx
ef4bddc2 7381aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7382{
7383 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7384 where mask is selected by alignment and size of the offset.
7385 We try to pick as large a range for the offset as possible to
7386 maximize the chance of a CSE. However, for aligned addresses
7387 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7388 elements are likely to use the same base. We need to be careful
7389 not to split a CONST for some forms of address expression, otherwise
7390 it will generate sub-optimal code. */
0c4ec427
RE
7391
7392 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7393 {
9e0218fc 7394 rtx base = XEXP (x, 0);
17d7bdd8 7395 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7396 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 7397
9e0218fc 7398 if (GET_CODE (base) == PLUS)
e8426e0a 7399 {
9e0218fc
RH
7400 rtx op0 = XEXP (base, 0);
7401 rtx op1 = XEXP (base, 1);
7402
7403 /* Force any scaling into a temp for CSE. */
7404 op0 = force_reg (Pmode, op0);
7405 op1 = force_reg (Pmode, op1);
7406
7407 /* Let the pointer register be in op0. */
7408 if (REG_POINTER (op1))
7409 std::swap (op0, op1);
7410
7411 /* If the pointer is virtual or frame related, then we know that
7412 virtual register instantiation or register elimination is going
7413 to apply a second constant. We want the two constants folded
7414 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
7415 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 7416 {
9e0218fc
RH
7417 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
7418 NULL_RTX, true, OPTAB_DIRECT);
7419 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 7420 }
e8426e0a 7421
9e0218fc
RH
7422 /* Otherwise, in order to encourage CSE (and thence loop strength
7423 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
7424 base = expand_binop (Pmode, add_optab, op0, op1,
7425 NULL_RTX, true, OPTAB_DIRECT);
7426 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
7427 }
7428
6a70badb
RS
7429 HOST_WIDE_INT size;
7430 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 7431 {
6a70badb
RS
7432 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
7433 mode);
7434 if (base_offset != 0)
7435 {
7436 base = plus_constant (Pmode, base, base_offset);
7437 base = force_operand (base, NULL_RTX);
7438 return plus_constant (Pmode, base, offset - base_offset);
7439 }
9e0218fc 7440 }
0c4ec427
RE
7441 }
7442
7443 return x;
7444}
7445
b4f50fd4
RR
7446/* Return the reload icode required for a constant pool in mode. */
7447static enum insn_code
7448aarch64_constant_pool_reload_icode (machine_mode mode)
7449{
7450 switch (mode)
7451 {
4e10a5a7 7452 case E_SFmode:
b4f50fd4
RR
7453 return CODE_FOR_aarch64_reload_movcpsfdi;
7454
4e10a5a7 7455 case E_DFmode:
b4f50fd4
RR
7456 return CODE_FOR_aarch64_reload_movcpdfdi;
7457
4e10a5a7 7458 case E_TFmode:
b4f50fd4
RR
7459 return CODE_FOR_aarch64_reload_movcptfdi;
7460
4e10a5a7 7461 case E_V8QImode:
b4f50fd4
RR
7462 return CODE_FOR_aarch64_reload_movcpv8qidi;
7463
4e10a5a7 7464 case E_V16QImode:
b4f50fd4
RR
7465 return CODE_FOR_aarch64_reload_movcpv16qidi;
7466
4e10a5a7 7467 case E_V4HImode:
b4f50fd4
RR
7468 return CODE_FOR_aarch64_reload_movcpv4hidi;
7469
4e10a5a7 7470 case E_V8HImode:
b4f50fd4
RR
7471 return CODE_FOR_aarch64_reload_movcpv8hidi;
7472
4e10a5a7 7473 case E_V2SImode:
b4f50fd4
RR
7474 return CODE_FOR_aarch64_reload_movcpv2sidi;
7475
4e10a5a7 7476 case E_V4SImode:
b4f50fd4
RR
7477 return CODE_FOR_aarch64_reload_movcpv4sidi;
7478
4e10a5a7 7479 case E_V2DImode:
b4f50fd4
RR
7480 return CODE_FOR_aarch64_reload_movcpv2didi;
7481
4e10a5a7 7482 case E_V2DFmode:
b4f50fd4
RR
7483 return CODE_FOR_aarch64_reload_movcpv2dfdi;
7484
7485 default:
7486 gcc_unreachable ();
7487 }
7488
7489 gcc_unreachable ();
7490}
43e9d192
IB
7491static reg_class_t
7492aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
7493 reg_class_t rclass,
ef4bddc2 7494 machine_mode mode,
43e9d192
IB
7495 secondary_reload_info *sri)
7496{
9a1b9cb4
RS
7497 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
7498 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
7499 comment at the head of aarch64-sve.md for more details about the
7500 big-endian handling. */
43cacb12
RS
7501 if (BYTES_BIG_ENDIAN
7502 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
7503 && !((REG_P (x) && HARD_REGISTER_P (x))
7504 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
7505 && aarch64_sve_data_mode_p (mode))
7506 {
7507 sri->icode = CODE_FOR_aarch64_sve_reload_be;
7508 return NO_REGS;
7509 }
b4f50fd4
RR
7510
7511 /* If we have to disable direct literal pool loads and stores because the
7512 function is too big, then we need a scratch register. */
7513 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
7514 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
7515 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 7516 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
7517 {
7518 sri->icode = aarch64_constant_pool_reload_icode (mode);
7519 return NO_REGS;
7520 }
7521
43e9d192
IB
7522 /* Without the TARGET_SIMD instructions we cannot move a Q register
7523 to a Q register directly. We need a scratch. */
7524 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
7525 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
7526 && reg_class_subset_p (rclass, FP_REGS))
7527 {
7528 if (mode == TFmode)
7529 sri->icode = CODE_FOR_aarch64_reload_movtf;
7530 else if (mode == TImode)
7531 sri->icode = CODE_FOR_aarch64_reload_movti;
7532 return NO_REGS;
7533 }
7534
7535 /* A TFmode or TImode memory access should be handled via an FP_REGS
7536 because AArch64 has richer addressing modes for LDR/STR instructions
7537 than LDP/STP instructions. */
d5726973 7538 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 7539 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
7540 return FP_REGS;
7541
7542 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 7543 return GENERAL_REGS;
43e9d192
IB
7544
7545 return NO_REGS;
7546}
7547
7548static bool
6216fd90 7549aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 7550{
6216fd90 7551 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 7552
6216fd90
WD
7553 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
7554 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 7555 if (frame_pointer_needed)
6216fd90 7556 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
7557 return true;
7558}
7559
6a70badb 7560poly_int64
43e9d192
IB
7561aarch64_initial_elimination_offset (unsigned from, unsigned to)
7562{
43e9d192 7563 aarch64_layout_frame ();
78c29983
MS
7564
7565 if (to == HARD_FRAME_POINTER_REGNUM)
7566 {
7567 if (from == ARG_POINTER_REGNUM)
71bfb77a 7568 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
7569
7570 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7571 return cfun->machine->frame.hard_fp_offset
7572 - cfun->machine->frame.locals_offset;
78c29983
MS
7573 }
7574
7575 if (to == STACK_POINTER_REGNUM)
7576 {
7577 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7578 return cfun->machine->frame.frame_size
7579 - cfun->machine->frame.locals_offset;
78c29983
MS
7580 }
7581
1c960e02 7582 return cfun->machine->frame.frame_size;
43e9d192
IB
7583}
7584
43e9d192
IB
7585/* Implement RETURN_ADDR_RTX. We do not support moving back to a
7586 previous frame. */
7587
7588rtx
7589aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7590{
7591 if (count != 0)
7592 return const0_rtx;
7593 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
7594}
7595
7596
7597static void
7598aarch64_asm_trampoline_template (FILE *f)
7599{
28514dda
YZ
7600 if (TARGET_ILP32)
7601 {
7602 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
7603 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
7604 }
7605 else
7606 {
7607 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
7608 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
7609 }
01a3a324 7610 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 7611 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
7612 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
7613 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
7614}
7615
7616static void
7617aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
7618{
7619 rtx fnaddr, mem, a_tramp;
28514dda 7620 const int tramp_code_sz = 16;
43e9d192
IB
7621
7622 /* Don't need to copy the trailing D-words, we fill those in below. */
7623 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
7624 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
7625 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 7626 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
7627 if (GET_MODE (fnaddr) != ptr_mode)
7628 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
7629 emit_move_insn (mem, fnaddr);
7630
28514dda 7631 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
7632 emit_move_insn (mem, chain_value);
7633
7634 /* XXX We should really define a "clear_cache" pattern and use
7635 gen_clear_cache(). */
7636 a_tramp = XEXP (m_tramp, 0);
7637 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 7638 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
7639 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
7640 ptr_mode);
43e9d192
IB
7641}
7642
7643static unsigned char
ef4bddc2 7644aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 7645{
6a70badb
RS
7646 /* ??? Logically we should only need to provide a value when
7647 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
7648 can hold MODE, but at the moment we need to handle all modes.
7649 Just ignore any runtime parts for registers that can't store them. */
7650 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 7651 unsigned int nregs;
43e9d192
IB
7652 switch (regclass)
7653 {
d677263e 7654 case TAILCALL_ADDR_REGS:
43e9d192
IB
7655 case POINTER_REGS:
7656 case GENERAL_REGS:
7657 case ALL_REGS:
f25a140b 7658 case POINTER_AND_FP_REGS:
43e9d192
IB
7659 case FP_REGS:
7660 case FP_LO_REGS:
43cacb12
RS
7661 if (aarch64_sve_data_mode_p (mode)
7662 && constant_multiple_p (GET_MODE_SIZE (mode),
7663 BYTES_PER_SVE_VECTOR, &nregs))
7664 return nregs;
7665 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
7666 ? CEIL (lowest_size, UNITS_PER_VREG)
7667 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 7668 case STACK_REG:
43cacb12
RS
7669 case PR_REGS:
7670 case PR_LO_REGS:
7671 case PR_HI_REGS:
43e9d192
IB
7672 return 1;
7673
7674 case NO_REGS:
7675 return 0;
7676
7677 default:
7678 break;
7679 }
7680 gcc_unreachable ();
7681}
7682
7683static reg_class_t
78d8b9f0 7684aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 7685{
51bb310d 7686 if (regclass == POINTER_REGS)
78d8b9f0
IB
7687 return GENERAL_REGS;
7688
51bb310d
MS
7689 if (regclass == STACK_REG)
7690 {
7691 if (REG_P(x)
7692 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
7693 return regclass;
7694
7695 return NO_REGS;
7696 }
7697
27bd251b
IB
7698 /* Register eliminiation can result in a request for
7699 SP+constant->FP_REGS. We cannot support such operations which
7700 use SP as source and an FP_REG as destination, so reject out
7701 right now. */
7702 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
7703 {
7704 rtx lhs = XEXP (x, 0);
7705
7706 /* Look through a possible SUBREG introduced by ILP32. */
7707 if (GET_CODE (lhs) == SUBREG)
7708 lhs = SUBREG_REG (lhs);
7709
7710 gcc_assert (REG_P (lhs));
7711 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
7712 POINTER_REGS));
7713 return NO_REGS;
7714 }
7715
78d8b9f0 7716 return regclass;
43e9d192
IB
7717}
7718
7719void
7720aarch64_asm_output_labelref (FILE* f, const char *name)
7721{
7722 asm_fprintf (f, "%U%s", name);
7723}
7724
7725static void
7726aarch64_elf_asm_constructor (rtx symbol, int priority)
7727{
7728 if (priority == DEFAULT_INIT_PRIORITY)
7729 default_ctor_section_asm_out_constructor (symbol, priority);
7730 else
7731 {
7732 section *s;
53d190c1
AT
7733 /* While priority is known to be in range [0, 65535], so 18 bytes
7734 would be enough, the compiler might not know that. To avoid
7735 -Wformat-truncation false positive, use a larger size. */
7736 char buf[23];
43e9d192 7737 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 7738 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7739 switch_to_section (s);
7740 assemble_align (POINTER_SIZE);
28514dda 7741 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7742 }
7743}
7744
7745static void
7746aarch64_elf_asm_destructor (rtx symbol, int priority)
7747{
7748 if (priority == DEFAULT_INIT_PRIORITY)
7749 default_dtor_section_asm_out_destructor (symbol, priority);
7750 else
7751 {
7752 section *s;
53d190c1
AT
7753 /* While priority is known to be in range [0, 65535], so 18 bytes
7754 would be enough, the compiler might not know that. To avoid
7755 -Wformat-truncation false positive, use a larger size. */
7756 char buf[23];
43e9d192 7757 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 7758 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7759 switch_to_section (s);
7760 assemble_align (POINTER_SIZE);
28514dda 7761 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7762 }
7763}
7764
7765const char*
7766aarch64_output_casesi (rtx *operands)
7767{
7768 char buf[100];
7769 char label[100];
b32d5189 7770 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
7771 int index;
7772 static const char *const patterns[4][2] =
7773 {
7774 {
7775 "ldrb\t%w3, [%0,%w1,uxtw]",
7776 "add\t%3, %4, %w3, sxtb #2"
7777 },
7778 {
7779 "ldrh\t%w3, [%0,%w1,uxtw #1]",
7780 "add\t%3, %4, %w3, sxth #2"
7781 },
7782 {
7783 "ldr\t%w3, [%0,%w1,uxtw #2]",
7784 "add\t%3, %4, %w3, sxtw #2"
7785 },
7786 /* We assume that DImode is only generated when not optimizing and
7787 that we don't really need 64-bit address offsets. That would
7788 imply an object file with 8GB of code in a single function! */
7789 {
7790 "ldr\t%w3, [%0,%w1,uxtw #2]",
7791 "add\t%3, %4, %w3, sxtw #2"
7792 }
7793 };
7794
7795 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
7796
77e994c9
RS
7797 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
7798 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
7799
7800 gcc_assert (index >= 0 && index <= 3);
7801
7802 /* Need to implement table size reduction, by chaning the code below. */
7803 output_asm_insn (patterns[index][0], operands);
7804 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
7805 snprintf (buf, sizeof (buf),
7806 "adr\t%%4, %s", targetm.strip_name_encoding (label));
7807 output_asm_insn (buf, operands);
7808 output_asm_insn (patterns[index][1], operands);
7809 output_asm_insn ("br\t%3", operands);
7810 assemble_label (asm_out_file, label);
7811 return "";
7812}
7813
7814
7815/* Return size in bits of an arithmetic operand which is shifted/scaled and
7816 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
7817 operator. */
7818
7819int
7820aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
7821{
7822 if (shift >= 0 && shift <= 3)
7823 {
7824 int size;
7825 for (size = 8; size <= 32; size *= 2)
7826 {
7827 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
7828 if (mask == bits << shift)
7829 return size;
7830 }
7831 }
7832 return 0;
7833}
7834
e78d485e
RR
7835/* Constant pools are per function only when PC relative
7836 literal loads are true or we are in the large memory
7837 model. */
7838
7839static inline bool
7840aarch64_can_use_per_function_literal_pools_p (void)
7841{
9ee6540a 7842 return (aarch64_pcrelative_literal_loads
e78d485e
RR
7843 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
7844}
7845
43e9d192 7846static bool
e78d485e 7847aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 7848{
74a9301d
VM
7849 /* We can't use blocks for constants when we're using a per-function
7850 constant pool. */
7851 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
7852}
7853
e78d485e
RR
7854/* Select appropriate section for constants depending
7855 on where we place literal pools. */
7856
43e9d192 7857static section *
e78d485e
RR
7858aarch64_select_rtx_section (machine_mode mode,
7859 rtx x,
7860 unsigned HOST_WIDE_INT align)
43e9d192 7861{
e78d485e
RR
7862 if (aarch64_can_use_per_function_literal_pools_p ())
7863 return function_section (current_function_decl);
43e9d192 7864
e78d485e
RR
7865 return default_elf_select_rtx_section (mode, x, align);
7866}
43e9d192 7867
5fca7b66
RH
7868/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
7869void
7870aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
7871 HOST_WIDE_INT offset)
7872{
7873 /* When using per-function literal pools, we must ensure that any code
7874 section is aligned to the minimal instruction length, lest we get
7875 errors from the assembler re "unaligned instructions". */
7876 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
7877 ASM_OUTPUT_ALIGN (f, 2);
7878}
7879
43e9d192
IB
7880/* Costs. */
7881
7882/* Helper function for rtx cost calculation. Strip a shift expression
7883 from X. Returns the inner operand if successful, or the original
7884 expression on failure. */
7885static rtx
7886aarch64_strip_shift (rtx x)
7887{
7888 rtx op = x;
7889
57b77d46
RE
7890 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
7891 we can convert both to ROR during final output. */
43e9d192
IB
7892 if ((GET_CODE (op) == ASHIFT
7893 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
7894 || GET_CODE (op) == LSHIFTRT
7895 || GET_CODE (op) == ROTATERT
7896 || GET_CODE (op) == ROTATE)
43e9d192
IB
7897 && CONST_INT_P (XEXP (op, 1)))
7898 return XEXP (op, 0);
7899
7900 if (GET_CODE (op) == MULT
7901 && CONST_INT_P (XEXP (op, 1))
7902 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
7903 return XEXP (op, 0);
7904
7905 return x;
7906}
7907
4745e701 7908/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
7909 expression from X. Returns the inner operand if successful, or the
7910 original expression on failure. We deal with a number of possible
b10f1009
AP
7911 canonicalization variations here. If STRIP_SHIFT is true, then
7912 we can strip off a shift also. */
43e9d192 7913static rtx
b10f1009 7914aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 7915{
77e994c9 7916 scalar_int_mode mode;
43e9d192
IB
7917 rtx op = x;
7918
77e994c9
RS
7919 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
7920 return op;
7921
43e9d192
IB
7922 /* Zero and sign extraction of a widened value. */
7923 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
7924 && XEXP (op, 2) == const0_rtx
4745e701 7925 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 7926 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
7927 XEXP (op, 1)))
7928 return XEXP (XEXP (op, 0), 0);
7929
7930 /* It can also be represented (for zero-extend) as an AND with an
7931 immediate. */
7932 if (GET_CODE (op) == AND
7933 && GET_CODE (XEXP (op, 0)) == MULT
7934 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
7935 && CONST_INT_P (XEXP (op, 1))
7936 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
7937 INTVAL (XEXP (op, 1))) != 0)
7938 return XEXP (XEXP (op, 0), 0);
7939
7940 /* Now handle extended register, as this may also have an optional
7941 left shift by 1..4. */
b10f1009
AP
7942 if (strip_shift
7943 && GET_CODE (op) == ASHIFT
43e9d192
IB
7944 && CONST_INT_P (XEXP (op, 1))
7945 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
7946 op = XEXP (op, 0);
7947
7948 if (GET_CODE (op) == ZERO_EXTEND
7949 || GET_CODE (op) == SIGN_EXTEND)
7950 op = XEXP (op, 0);
7951
7952 if (op != x)
7953 return op;
7954
4745e701
JG
7955 return x;
7956}
7957
0a78ebe4
KT
7958/* Return true iff CODE is a shift supported in combination
7959 with arithmetic instructions. */
4d1919ed 7960
0a78ebe4
KT
7961static bool
7962aarch64_shift_p (enum rtx_code code)
7963{
7964 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
7965}
7966
b10f1009
AP
7967
7968/* Return true iff X is a cheap shift without a sign extend. */
7969
7970static bool
7971aarch64_cheap_mult_shift_p (rtx x)
7972{
7973 rtx op0, op1;
7974
7975 op0 = XEXP (x, 0);
7976 op1 = XEXP (x, 1);
7977
7978 if (!(aarch64_tune_params.extra_tuning_flags
7979 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
7980 return false;
7981
7982 if (GET_CODE (op0) == SIGN_EXTEND)
7983 return false;
7984
7985 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
7986 && UINTVAL (op1) <= 4)
7987 return true;
7988
7989 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
7990 return false;
7991
7992 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
7993
7994 if (l2 > 0 && l2 <= 4)
7995 return true;
7996
7997 return false;
7998}
7999
4745e701 8000/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
8001 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
8002 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
8003 operands where needed. */
8004
8005static int
e548c9df 8006aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
8007{
8008 rtx op0, op1;
8009 const struct cpu_cost_table *extra_cost
b175b679 8010 = aarch64_tune_params.insn_extra_cost;
4745e701 8011 int cost = 0;
0a78ebe4 8012 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 8013 machine_mode mode = GET_MODE (x);
4745e701
JG
8014
8015 gcc_checking_assert (code == MULT);
8016
8017 op0 = XEXP (x, 0);
8018 op1 = XEXP (x, 1);
8019
8020 if (VECTOR_MODE_P (mode))
8021 mode = GET_MODE_INNER (mode);
8022
8023 /* Integer multiply/fma. */
8024 if (GET_MODE_CLASS (mode) == MODE_INT)
8025 {
8026 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
8027 if (aarch64_shift_p (GET_CODE (x))
8028 || (CONST_INT_P (op1)
8029 && exact_log2 (INTVAL (op1)) > 0))
4745e701 8030 {
0a78ebe4
KT
8031 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
8032 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
8033 if (speed)
8034 {
0a78ebe4
KT
8035 if (compound_p)
8036 {
b10f1009
AP
8037 /* If the shift is considered cheap,
8038 then don't add any cost. */
8039 if (aarch64_cheap_mult_shift_p (x))
8040 ;
8041 else if (REG_P (op1))
0a78ebe4
KT
8042 /* ARITH + shift-by-register. */
8043 cost += extra_cost->alu.arith_shift_reg;
8044 else if (is_extend)
8045 /* ARITH + extended register. We don't have a cost field
8046 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
8047 cost += extra_cost->alu.extend_arith;
8048 else
8049 /* ARITH + shift-by-immediate. */
8050 cost += extra_cost->alu.arith_shift;
8051 }
4745e701
JG
8052 else
8053 /* LSL (immediate). */
0a78ebe4
KT
8054 cost += extra_cost->alu.shift;
8055
4745e701 8056 }
0a78ebe4
KT
8057 /* Strip extends as we will have costed them in the case above. */
8058 if (is_extend)
b10f1009 8059 op0 = aarch64_strip_extend (op0, true);
4745e701 8060
e548c9df 8061 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
8062
8063 return cost;
8064 }
8065
d2ac256b
KT
8066 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
8067 compound and let the below cases handle it. After all, MNEG is a
8068 special-case alias of MSUB. */
8069 if (GET_CODE (op0) == NEG)
8070 {
8071 op0 = XEXP (op0, 0);
8072 compound_p = true;
8073 }
8074
4745e701
JG
8075 /* Integer multiplies or FMAs have zero/sign extending variants. */
8076 if ((GET_CODE (op0) == ZERO_EXTEND
8077 && GET_CODE (op1) == ZERO_EXTEND)
8078 || (GET_CODE (op0) == SIGN_EXTEND
8079 && GET_CODE (op1) == SIGN_EXTEND))
8080 {
e548c9df
AM
8081 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8082 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8083
8084 if (speed)
8085 {
0a78ebe4 8086 if (compound_p)
d2ac256b 8087 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8088 cost += extra_cost->mult[0].extend_add;
8089 else
8090 /* MUL/SMULL/UMULL. */
8091 cost += extra_cost->mult[0].extend;
8092 }
8093
8094 return cost;
8095 }
8096
d2ac256b 8097 /* This is either an integer multiply or a MADD. In both cases
4745e701 8098 we want to recurse and cost the operands. */
e548c9df
AM
8099 cost += rtx_cost (op0, mode, MULT, 0, speed);
8100 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8101
8102 if (speed)
8103 {
0a78ebe4 8104 if (compound_p)
d2ac256b 8105 /* MADD/MSUB. */
4745e701
JG
8106 cost += extra_cost->mult[mode == DImode].add;
8107 else
8108 /* MUL. */
8109 cost += extra_cost->mult[mode == DImode].simple;
8110 }
8111
8112 return cost;
8113 }
8114 else
8115 {
8116 if (speed)
8117 {
3d840f7d 8118 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8119 operands, unless the rounding mode is upward or downward in
8120 which case FNMUL is different than FMUL with operand negation. */
8121 bool neg0 = GET_CODE (op0) == NEG;
8122 bool neg1 = GET_CODE (op1) == NEG;
8123 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8124 {
8125 if (neg0)
8126 op0 = XEXP (op0, 0);
8127 if (neg1)
8128 op1 = XEXP (op1, 0);
8129 }
4745e701 8130
0a78ebe4 8131 if (compound_p)
4745e701
JG
8132 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8133 cost += extra_cost->fp[mode == DFmode].fma;
8134 else
3d840f7d 8135 /* FMUL/FNMUL. */
4745e701
JG
8136 cost += extra_cost->fp[mode == DFmode].mult;
8137 }
8138
e548c9df
AM
8139 cost += rtx_cost (op0, mode, MULT, 0, speed);
8140 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8141 return cost;
8142 }
43e9d192
IB
8143}
8144
67747367
JG
8145static int
8146aarch64_address_cost (rtx x,
ef4bddc2 8147 machine_mode mode,
67747367
JG
8148 addr_space_t as ATTRIBUTE_UNUSED,
8149 bool speed)
8150{
8151 enum rtx_code c = GET_CODE (x);
b175b679 8152 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8153 struct aarch64_address_info info;
8154 int cost = 0;
8155 info.shift = 0;
8156
a97d8b98 8157 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8158 {
8159 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8160 {
8161 /* This is a CONST or SYMBOL ref which will be split
8162 in a different way depending on the code model in use.
8163 Cost it through the generic infrastructure. */
e548c9df 8164 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8165 /* Divide through by the cost of one instruction to
8166 bring it to the same units as the address costs. */
8167 cost_symbol_ref /= COSTS_N_INSNS (1);
8168 /* The cost is then the cost of preparing the address,
8169 followed by an immediate (possibly 0) offset. */
8170 return cost_symbol_ref + addr_cost->imm_offset;
8171 }
8172 else
8173 {
8174 /* This is most likely a jump table from a case
8175 statement. */
8176 return addr_cost->register_offset;
8177 }
8178 }
8179
8180 switch (info.type)
8181 {
8182 case ADDRESS_LO_SUM:
8183 case ADDRESS_SYMBOLIC:
8184 case ADDRESS_REG_IMM:
8185 cost += addr_cost->imm_offset;
8186 break;
8187
8188 case ADDRESS_REG_WB:
8189 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8190 cost += addr_cost->pre_modify;
8191 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8192 cost += addr_cost->post_modify;
8193 else
8194 gcc_unreachable ();
8195
8196 break;
8197
8198 case ADDRESS_REG_REG:
8199 cost += addr_cost->register_offset;
8200 break;
8201
67747367 8202 case ADDRESS_REG_SXTW:
783879e6
EM
8203 cost += addr_cost->register_sextend;
8204 break;
8205
8206 case ADDRESS_REG_UXTW:
8207 cost += addr_cost->register_zextend;
67747367
JG
8208 break;
8209
8210 default:
8211 gcc_unreachable ();
8212 }
8213
8214
8215 if (info.shift > 0)
8216 {
8217 /* For the sake of calculating the cost of the shifted register
8218 component, we can treat same sized modes in the same way. */
6a70badb
RS
8219 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8220 cost += addr_cost->addr_scale_costs.hi;
8221 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8222 cost += addr_cost->addr_scale_costs.si;
8223 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8224 cost += addr_cost->addr_scale_costs.di;
8225 else
8226 /* We can't tell, or this is a 128-bit vector. */
8227 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8228 }
8229
8230 return cost;
8231}
8232
b9066f5a
MW
8233/* Return the cost of a branch. If SPEED_P is true then the compiler is
8234 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8235 to be taken. */
8236
8237int
8238aarch64_branch_cost (bool speed_p, bool predictable_p)
8239{
8240 /* When optimizing for speed, use the cost of unpredictable branches. */
8241 const struct cpu_branch_cost *branch_costs =
b175b679 8242 aarch64_tune_params.branch_costs;
b9066f5a
MW
8243
8244 if (!speed_p || predictable_p)
8245 return branch_costs->predictable;
8246 else
8247 return branch_costs->unpredictable;
8248}
8249
7cc2145f
JG
8250/* Return true if the RTX X in mode MODE is a zero or sign extract
8251 usable in an ADD or SUB (extended register) instruction. */
8252static bool
77e994c9 8253aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8254{
8255 /* Catch add with a sign extract.
8256 This is add_<optab><mode>_multp2. */
8257 if (GET_CODE (x) == SIGN_EXTRACT
8258 || GET_CODE (x) == ZERO_EXTRACT)
8259 {
8260 rtx op0 = XEXP (x, 0);
8261 rtx op1 = XEXP (x, 1);
8262 rtx op2 = XEXP (x, 2);
8263
8264 if (GET_CODE (op0) == MULT
8265 && CONST_INT_P (op1)
8266 && op2 == const0_rtx
8267 && CONST_INT_P (XEXP (op0, 1))
8268 && aarch64_is_extend_from_extract (mode,
8269 XEXP (op0, 1),
8270 op1))
8271 {
8272 return true;
8273 }
8274 }
e47c4031
KT
8275 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8276 No shift. */
8277 else if (GET_CODE (x) == SIGN_EXTEND
8278 || GET_CODE (x) == ZERO_EXTEND)
8279 return REG_P (XEXP (x, 0));
7cc2145f
JG
8280
8281 return false;
8282}
8283
61263118
KT
8284static bool
8285aarch64_frint_unspec_p (unsigned int u)
8286{
8287 switch (u)
8288 {
8289 case UNSPEC_FRINTZ:
8290 case UNSPEC_FRINTP:
8291 case UNSPEC_FRINTM:
8292 case UNSPEC_FRINTA:
8293 case UNSPEC_FRINTN:
8294 case UNSPEC_FRINTX:
8295 case UNSPEC_FRINTI:
8296 return true;
8297
8298 default:
8299 return false;
8300 }
8301}
8302
fb0cb7fa
KT
8303/* Return true iff X is an rtx that will match an extr instruction
8304 i.e. as described in the *extr<mode>5_insn family of patterns.
8305 OP0 and OP1 will be set to the operands of the shifts involved
8306 on success and will be NULL_RTX otherwise. */
8307
8308static bool
8309aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8310{
8311 rtx op0, op1;
77e994c9
RS
8312 scalar_int_mode mode;
8313 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8314 return false;
fb0cb7fa
KT
8315
8316 *res_op0 = NULL_RTX;
8317 *res_op1 = NULL_RTX;
8318
8319 if (GET_CODE (x) != IOR)
8320 return false;
8321
8322 op0 = XEXP (x, 0);
8323 op1 = XEXP (x, 1);
8324
8325 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8326 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8327 {
8328 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8329 if (GET_CODE (op1) == ASHIFT)
8330 std::swap (op0, op1);
8331
8332 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8333 return false;
8334
8335 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8336 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8337
8338 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8339 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8340 {
8341 *res_op0 = XEXP (op0, 0);
8342 *res_op1 = XEXP (op1, 0);
8343 return true;
8344 }
8345 }
8346
8347 return false;
8348}
8349
2d5ffe46
AP
8350/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8351 storing it in *COST. Result is true if the total cost of the operation
8352 has now been calculated. */
8353static bool
8354aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8355{
b9e3afe9
AP
8356 rtx inner;
8357 rtx comparator;
8358 enum rtx_code cmpcode;
8359
8360 if (COMPARISON_P (op0))
8361 {
8362 inner = XEXP (op0, 0);
8363 comparator = XEXP (op0, 1);
8364 cmpcode = GET_CODE (op0);
8365 }
8366 else
8367 {
8368 inner = op0;
8369 comparator = const0_rtx;
8370 cmpcode = NE;
8371 }
8372
2d5ffe46
AP
8373 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8374 {
8375 /* Conditional branch. */
b9e3afe9 8376 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8377 return true;
8378 else
8379 {
b9e3afe9 8380 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8381 {
2d5ffe46
AP
8382 if (comparator == const0_rtx)
8383 {
8384 /* TBZ/TBNZ/CBZ/CBNZ. */
8385 if (GET_CODE (inner) == ZERO_EXTRACT)
8386 /* TBZ/TBNZ. */
e548c9df
AM
8387 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8388 ZERO_EXTRACT, 0, speed);
8389 else
8390 /* CBZ/CBNZ. */
8391 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8392
8393 return true;
8394 }
8395 }
b9e3afe9 8396 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8397 {
2d5ffe46
AP
8398 /* TBZ/TBNZ. */
8399 if (comparator == const0_rtx)
8400 return true;
8401 }
8402 }
8403 }
b9e3afe9 8404 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8405 {
786298dc 8406 /* CCMP. */
6dfeb7ce 8407 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8408 {
8409 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8410 if (XEXP (op1, 1) == const0_rtx)
8411 *cost += 1;
8412 if (speed)
8413 {
8414 machine_mode mode = GET_MODE (XEXP (op1, 0));
8415 const struct cpu_cost_table *extra_cost
8416 = aarch64_tune_params.insn_extra_cost;
8417
8418 if (GET_MODE_CLASS (mode) == MODE_INT)
8419 *cost += extra_cost->alu.arith;
8420 else
8421 *cost += extra_cost->fp[mode == DFmode].compare;
8422 }
8423 return true;
8424 }
8425
2d5ffe46
AP
8426 /* It's a conditional operation based on the status flags,
8427 so it must be some flavor of CSEL. */
8428
8429 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8430 if (GET_CODE (op1) == NEG
8431 || GET_CODE (op1) == NOT
8432 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8433 op1 = XEXP (op1, 0);
bad00732
KT
8434 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8435 {
8436 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8437 op1 = XEXP (op1, 0);
8438 op2 = XEXP (op2, 0);
8439 }
2d5ffe46 8440
e548c9df
AM
8441 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8442 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8443 return true;
8444 }
8445
8446 /* We don't know what this is, cost all operands. */
8447 return false;
8448}
8449
283b6c85
KT
8450/* Check whether X is a bitfield operation of the form shift + extend that
8451 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
8452 operand to which the bitfield operation is applied. Otherwise return
8453 NULL_RTX. */
8454
8455static rtx
8456aarch64_extend_bitfield_pattern_p (rtx x)
8457{
8458 rtx_code outer_code = GET_CODE (x);
8459 machine_mode outer_mode = GET_MODE (x);
8460
8461 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
8462 && outer_mode != SImode && outer_mode != DImode)
8463 return NULL_RTX;
8464
8465 rtx inner = XEXP (x, 0);
8466 rtx_code inner_code = GET_CODE (inner);
8467 machine_mode inner_mode = GET_MODE (inner);
8468 rtx op = NULL_RTX;
8469
8470 switch (inner_code)
8471 {
8472 case ASHIFT:
8473 if (CONST_INT_P (XEXP (inner, 1))
8474 && (inner_mode == QImode || inner_mode == HImode))
8475 op = XEXP (inner, 0);
8476 break;
8477 case LSHIFTRT:
8478 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
8479 && (inner_mode == QImode || inner_mode == HImode))
8480 op = XEXP (inner, 0);
8481 break;
8482 case ASHIFTRT:
8483 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
8484 && (inner_mode == QImode || inner_mode == HImode))
8485 op = XEXP (inner, 0);
8486 break;
8487 default:
8488 break;
8489 }
8490
8491 return op;
8492}
8493
8c83f71d
KT
8494/* Return true if the mask and a shift amount from an RTX of the form
8495 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
8496 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
8497
8498bool
77e994c9
RS
8499aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
8500 rtx shft_amnt)
8c83f71d
KT
8501{
8502 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
8503 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
8504 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
8505 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
8506}
8507
43e9d192
IB
8508/* Calculate the cost of calculating X, storing it in *COST. Result
8509 is true if the total cost of the operation has now been calculated. */
8510static bool
e548c9df 8511aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
8512 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
8513{
a8eecd00 8514 rtx op0, op1, op2;
73250c4c 8515 const struct cpu_cost_table *extra_cost
b175b679 8516 = aarch64_tune_params.insn_extra_cost;
e548c9df 8517 int code = GET_CODE (x);
b4206259 8518 scalar_int_mode int_mode;
43e9d192 8519
7fc5ef02
JG
8520 /* By default, assume that everything has equivalent cost to the
8521 cheapest instruction. Any additional costs are applied as a delta
8522 above this default. */
8523 *cost = COSTS_N_INSNS (1);
8524
43e9d192
IB
8525 switch (code)
8526 {
8527 case SET:
ba123b0d
JG
8528 /* The cost depends entirely on the operands to SET. */
8529 *cost = 0;
43e9d192
IB
8530 op0 = SET_DEST (x);
8531 op1 = SET_SRC (x);
8532
8533 switch (GET_CODE (op0))
8534 {
8535 case MEM:
8536 if (speed)
2961177e
JG
8537 {
8538 rtx address = XEXP (op0, 0);
b6875aac
KV
8539 if (VECTOR_MODE_P (mode))
8540 *cost += extra_cost->ldst.storev;
8541 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8542 *cost += extra_cost->ldst.store;
8543 else if (mode == SFmode)
8544 *cost += extra_cost->ldst.storef;
8545 else if (mode == DFmode)
8546 *cost += extra_cost->ldst.stored;
8547
8548 *cost +=
8549 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8550 0, speed));
8551 }
43e9d192 8552
e548c9df 8553 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8554 return true;
8555
8556 case SUBREG:
8557 if (! REG_P (SUBREG_REG (op0)))
e548c9df 8558 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 8559
43e9d192
IB
8560 /* Fall through. */
8561 case REG:
b6875aac
KV
8562 /* The cost is one per vector-register copied. */
8563 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
8564 {
fe1447a1
RS
8565 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
8566 *cost = COSTS_N_INSNS (nregs);
b6875aac 8567 }
ba123b0d
JG
8568 /* const0_rtx is in general free, but we will use an
8569 instruction to set a register to 0. */
b6875aac
KV
8570 else if (REG_P (op1) || op1 == const0_rtx)
8571 {
8572 /* The cost is 1 per register copied. */
fe1447a1
RS
8573 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
8574 *cost = COSTS_N_INSNS (nregs);
b6875aac 8575 }
ba123b0d
JG
8576 else
8577 /* Cost is just the cost of the RHS of the set. */
e548c9df 8578 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8579 return true;
8580
ba123b0d 8581 case ZERO_EXTRACT:
43e9d192 8582 case SIGN_EXTRACT:
ba123b0d
JG
8583 /* Bit-field insertion. Strip any redundant widening of
8584 the RHS to meet the width of the target. */
43e9d192
IB
8585 if (GET_CODE (op1) == SUBREG)
8586 op1 = SUBREG_REG (op1);
8587 if ((GET_CODE (op1) == ZERO_EXTEND
8588 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 8589 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
8590 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
8591 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 8592 op1 = XEXP (op1, 0);
ba123b0d
JG
8593
8594 if (CONST_INT_P (op1))
8595 {
8596 /* MOV immediate is assumed to always be cheap. */
8597 *cost = COSTS_N_INSNS (1);
8598 }
8599 else
8600 {
8601 /* BFM. */
8602 if (speed)
8603 *cost += extra_cost->alu.bfi;
e548c9df 8604 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
8605 }
8606
43e9d192
IB
8607 return true;
8608
8609 default:
ba123b0d
JG
8610 /* We can't make sense of this, assume default cost. */
8611 *cost = COSTS_N_INSNS (1);
61263118 8612 return false;
43e9d192
IB
8613 }
8614 return false;
8615
9dfc162c
JG
8616 case CONST_INT:
8617 /* If an instruction can incorporate a constant within the
8618 instruction, the instruction's expression avoids calling
8619 rtx_cost() on the constant. If rtx_cost() is called on a
8620 constant, then it is usually because the constant must be
8621 moved into a register by one or more instructions.
8622
8623 The exception is constant 0, which can be expressed
8624 as XZR/WZR and is therefore free. The exception to this is
8625 if we have (set (reg) (const0_rtx)) in which case we must cost
8626 the move. However, we can catch that when we cost the SET, so
8627 we don't need to consider that here. */
8628 if (x == const0_rtx)
8629 *cost = 0;
8630 else
8631 {
8632 /* To an approximation, building any other constant is
8633 proportionally expensive to the number of instructions
8634 required to build that constant. This is true whether we
8635 are compiling for SPEED or otherwise. */
77e994c9
RS
8636 if (!is_a <scalar_int_mode> (mode, &int_mode))
8637 int_mode = word_mode;
82614948 8638 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 8639 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
8640 }
8641 return true;
8642
8643 case CONST_DOUBLE:
a2170965
TC
8644
8645 /* First determine number of instructions to do the move
8646 as an integer constant. */
8647 if (!aarch64_float_const_representable_p (x)
8648 && !aarch64_can_const_movi_rtx_p (x, mode)
8649 && aarch64_float_const_rtx_p (x))
8650 {
8651 unsigned HOST_WIDE_INT ival;
8652 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
8653 gcc_assert (succeed);
8654
77e994c9
RS
8655 scalar_int_mode imode = (mode == HFmode
8656 ? SImode
8657 : int_mode_for_mode (mode).require ());
a2170965
TC
8658 int ncost = aarch64_internal_mov_immediate
8659 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
8660 *cost += COSTS_N_INSNS (ncost);
8661 return true;
8662 }
8663
9dfc162c
JG
8664 if (speed)
8665 {
8666 /* mov[df,sf]_aarch64. */
8667 if (aarch64_float_const_representable_p (x))
8668 /* FMOV (scalar immediate). */
8669 *cost += extra_cost->fp[mode == DFmode].fpconst;
8670 else if (!aarch64_float_const_zero_rtx_p (x))
8671 {
8672 /* This will be a load from memory. */
8673 if (mode == DFmode)
8674 *cost += extra_cost->ldst.loadd;
8675 else
8676 *cost += extra_cost->ldst.loadf;
8677 }
8678 else
8679 /* Otherwise this is +0.0. We get this using MOVI d0, #0
8680 or MOV v0.s[0], wzr - neither of which are modeled by the
8681 cost tables. Just use the default cost. */
8682 {
8683 }
8684 }
8685
8686 return true;
8687
43e9d192
IB
8688 case MEM:
8689 if (speed)
2961177e
JG
8690 {
8691 /* For loads we want the base cost of a load, plus an
8692 approximation for the additional cost of the addressing
8693 mode. */
8694 rtx address = XEXP (x, 0);
b6875aac
KV
8695 if (VECTOR_MODE_P (mode))
8696 *cost += extra_cost->ldst.loadv;
8697 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8698 *cost += extra_cost->ldst.load;
8699 else if (mode == SFmode)
8700 *cost += extra_cost->ldst.loadf;
8701 else if (mode == DFmode)
8702 *cost += extra_cost->ldst.loadd;
8703
8704 *cost +=
8705 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8706 0, speed));
8707 }
43e9d192
IB
8708
8709 return true;
8710
8711 case NEG:
4745e701
JG
8712 op0 = XEXP (x, 0);
8713
b6875aac
KV
8714 if (VECTOR_MODE_P (mode))
8715 {
8716 if (speed)
8717 {
8718 /* FNEG. */
8719 *cost += extra_cost->vect.alu;
8720 }
8721 return false;
8722 }
8723
e548c9df
AM
8724 if (GET_MODE_CLASS (mode) == MODE_INT)
8725 {
4745e701
JG
8726 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8727 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8728 {
8729 /* CSETM. */
e548c9df 8730 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
8731 return true;
8732 }
8733
8734 /* Cost this as SUB wzr, X. */
e548c9df 8735 op0 = CONST0_RTX (mode);
4745e701
JG
8736 op1 = XEXP (x, 0);
8737 goto cost_minus;
8738 }
8739
e548c9df 8740 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
8741 {
8742 /* Support (neg(fma...)) as a single instruction only if
8743 sign of zeros is unimportant. This matches the decision
8744 making in aarch64.md. */
8745 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
8746 {
8747 /* FNMADD. */
e548c9df 8748 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
8749 return true;
8750 }
d318517d
SN
8751 if (GET_CODE (op0) == MULT)
8752 {
8753 /* FNMUL. */
8754 *cost = rtx_cost (op0, mode, NEG, 0, speed);
8755 return true;
8756 }
4745e701
JG
8757 if (speed)
8758 /* FNEG. */
8759 *cost += extra_cost->fp[mode == DFmode].neg;
8760 return false;
8761 }
8762
8763 return false;
43e9d192 8764
781aeb73
KT
8765 case CLRSB:
8766 case CLZ:
8767 if (speed)
b6875aac
KV
8768 {
8769 if (VECTOR_MODE_P (mode))
8770 *cost += extra_cost->vect.alu;
8771 else
8772 *cost += extra_cost->alu.clz;
8773 }
781aeb73
KT
8774
8775 return false;
8776
43e9d192
IB
8777 case COMPARE:
8778 op0 = XEXP (x, 0);
8779 op1 = XEXP (x, 1);
8780
8781 if (op1 == const0_rtx
8782 && GET_CODE (op0) == AND)
8783 {
8784 x = op0;
e548c9df 8785 mode = GET_MODE (op0);
43e9d192
IB
8786 goto cost_logic;
8787 }
8788
a8eecd00
JG
8789 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
8790 {
8791 /* TODO: A write to the CC flags possibly costs extra, this
8792 needs encoding in the cost tables. */
8793
e548c9df 8794 mode = GET_MODE (op0);
a8eecd00
JG
8795 /* ANDS. */
8796 if (GET_CODE (op0) == AND)
8797 {
8798 x = op0;
8799 goto cost_logic;
8800 }
8801
8802 if (GET_CODE (op0) == PLUS)
8803 {
8804 /* ADDS (and CMN alias). */
8805 x = op0;
8806 goto cost_plus;
8807 }
8808
8809 if (GET_CODE (op0) == MINUS)
8810 {
8811 /* SUBS. */
8812 x = op0;
8813 goto cost_minus;
8814 }
8815
345854d8
KT
8816 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
8817 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
8818 && CONST_INT_P (XEXP (op0, 2)))
8819 {
8820 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
8821 Handle it here directly rather than going to cost_logic
8822 since we know the immediate generated for the TST is valid
8823 so we can avoid creating an intermediate rtx for it only
8824 for costing purposes. */
8825 if (speed)
8826 *cost += extra_cost->alu.logical;
8827
8828 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
8829 ZERO_EXTRACT, 0, speed);
8830 return true;
8831 }
8832
a8eecd00
JG
8833 if (GET_CODE (op1) == NEG)
8834 {
8835 /* CMN. */
8836 if (speed)
8837 *cost += extra_cost->alu.arith;
8838
e548c9df
AM
8839 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
8840 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
8841 return true;
8842 }
8843
8844 /* CMP.
8845
8846 Compare can freely swap the order of operands, and
8847 canonicalization puts the more complex operation first.
8848 But the integer MINUS logic expects the shift/extend
8849 operation in op1. */
8850 if (! (REG_P (op0)
8851 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
8852 {
8853 op0 = XEXP (x, 1);
8854 op1 = XEXP (x, 0);
8855 }
8856 goto cost_minus;
8857 }
8858
8859 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8860 {
8861 /* FCMP. */
8862 if (speed)
8863 *cost += extra_cost->fp[mode == DFmode].compare;
8864
8865 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
8866 {
e548c9df 8867 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
8868 /* FCMP supports constant 0.0 for no extra cost. */
8869 return true;
8870 }
8871 return false;
8872 }
8873
b6875aac
KV
8874 if (VECTOR_MODE_P (mode))
8875 {
8876 /* Vector compare. */
8877 if (speed)
8878 *cost += extra_cost->vect.alu;
8879
8880 if (aarch64_float_const_zero_rtx_p (op1))
8881 {
8882 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
8883 cost. */
8884 return true;
8885 }
8886 return false;
8887 }
a8eecd00 8888 return false;
43e9d192
IB
8889
8890 case MINUS:
4745e701
JG
8891 {
8892 op0 = XEXP (x, 0);
8893 op1 = XEXP (x, 1);
8894
8895cost_minus:
e548c9df 8896 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 8897
4745e701
JG
8898 /* Detect valid immediates. */
8899 if ((GET_MODE_CLASS (mode) == MODE_INT
8900 || (GET_MODE_CLASS (mode) == MODE_CC
8901 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
8902 && CONST_INT_P (op1)
8903 && aarch64_uimm12_shift (INTVAL (op1)))
8904 {
4745e701
JG
8905 if (speed)
8906 /* SUB(S) (immediate). */
8907 *cost += extra_cost->alu.arith;
8908 return true;
4745e701
JG
8909 }
8910
7cc2145f 8911 /* Look for SUB (extended register). */
77e994c9
RS
8912 if (is_a <scalar_int_mode> (mode, &int_mode)
8913 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
8914 {
8915 if (speed)
2533c820 8916 *cost += extra_cost->alu.extend_arith;
7cc2145f 8917
b10f1009 8918 op1 = aarch64_strip_extend (op1, true);
e47c4031 8919 *cost += rtx_cost (op1, VOIDmode,
e548c9df 8920 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
8921 return true;
8922 }
8923
b10f1009 8924 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
8925
8926 /* Cost this as an FMA-alike operation. */
8927 if ((GET_CODE (new_op1) == MULT
0a78ebe4 8928 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
8929 && code != COMPARE)
8930 {
8931 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
8932 (enum rtx_code) code,
8933 speed);
4745e701
JG
8934 return true;
8935 }
43e9d192 8936
e548c9df 8937 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 8938
4745e701
JG
8939 if (speed)
8940 {
b6875aac
KV
8941 if (VECTOR_MODE_P (mode))
8942 {
8943 /* Vector SUB. */
8944 *cost += extra_cost->vect.alu;
8945 }
8946 else if (GET_MODE_CLASS (mode) == MODE_INT)
8947 {
8948 /* SUB(S). */
8949 *cost += extra_cost->alu.arith;
8950 }
4745e701 8951 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
8952 {
8953 /* FSUB. */
8954 *cost += extra_cost->fp[mode == DFmode].addsub;
8955 }
4745e701
JG
8956 }
8957 return true;
8958 }
43e9d192
IB
8959
8960 case PLUS:
4745e701
JG
8961 {
8962 rtx new_op0;
43e9d192 8963
4745e701
JG
8964 op0 = XEXP (x, 0);
8965 op1 = XEXP (x, 1);
43e9d192 8966
a8eecd00 8967cost_plus:
4745e701
JG
8968 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8969 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8970 {
8971 /* CSINC. */
e548c9df
AM
8972 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
8973 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
8974 return true;
8975 }
43e9d192 8976
4745e701 8977 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
8978 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
8979 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 8980 {
e548c9df 8981 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 8982
4745e701
JG
8983 if (speed)
8984 /* ADD (immediate). */
8985 *cost += extra_cost->alu.arith;
8986 return true;
8987 }
8988
e548c9df 8989 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 8990
7cc2145f 8991 /* Look for ADD (extended register). */
77e994c9
RS
8992 if (is_a <scalar_int_mode> (mode, &int_mode)
8993 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
8994 {
8995 if (speed)
2533c820 8996 *cost += extra_cost->alu.extend_arith;
7cc2145f 8997
b10f1009 8998 op0 = aarch64_strip_extend (op0, true);
e47c4031 8999 *cost += rtx_cost (op0, VOIDmode,
e548c9df 9000 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
9001 return true;
9002 }
9003
4745e701
JG
9004 /* Strip any extend, leave shifts behind as we will
9005 cost them through mult_cost. */
b10f1009 9006 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
9007
9008 if (GET_CODE (new_op0) == MULT
0a78ebe4 9009 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
9010 {
9011 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
9012 speed);
4745e701
JG
9013 return true;
9014 }
9015
e548c9df 9016 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
9017
9018 if (speed)
9019 {
b6875aac
KV
9020 if (VECTOR_MODE_P (mode))
9021 {
9022 /* Vector ADD. */
9023 *cost += extra_cost->vect.alu;
9024 }
9025 else if (GET_MODE_CLASS (mode) == MODE_INT)
9026 {
9027 /* ADD. */
9028 *cost += extra_cost->alu.arith;
9029 }
4745e701 9030 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
9031 {
9032 /* FADD. */
9033 *cost += extra_cost->fp[mode == DFmode].addsub;
9034 }
4745e701
JG
9035 }
9036 return true;
9037 }
43e9d192 9038
18b42b2a
KT
9039 case BSWAP:
9040 *cost = COSTS_N_INSNS (1);
9041
9042 if (speed)
b6875aac
KV
9043 {
9044 if (VECTOR_MODE_P (mode))
9045 *cost += extra_cost->vect.alu;
9046 else
9047 *cost += extra_cost->alu.rev;
9048 }
18b42b2a
KT
9049 return false;
9050
43e9d192 9051 case IOR:
f7d5cf8d
KT
9052 if (aarch_rev16_p (x))
9053 {
9054 *cost = COSTS_N_INSNS (1);
9055
b6875aac
KV
9056 if (speed)
9057 {
9058 if (VECTOR_MODE_P (mode))
9059 *cost += extra_cost->vect.alu;
9060 else
9061 *cost += extra_cost->alu.rev;
9062 }
9063 return true;
f7d5cf8d 9064 }
fb0cb7fa
KT
9065
9066 if (aarch64_extr_rtx_p (x, &op0, &op1))
9067 {
e548c9df
AM
9068 *cost += rtx_cost (op0, mode, IOR, 0, speed);
9069 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
9070 if (speed)
9071 *cost += extra_cost->alu.shift;
9072
9073 return true;
9074 }
f7d5cf8d 9075 /* Fall through. */
43e9d192
IB
9076 case XOR:
9077 case AND:
9078 cost_logic:
9079 op0 = XEXP (x, 0);
9080 op1 = XEXP (x, 1);
9081
b6875aac
KV
9082 if (VECTOR_MODE_P (mode))
9083 {
9084 if (speed)
9085 *cost += extra_cost->vect.alu;
9086 return true;
9087 }
9088
268c3b47
JG
9089 if (code == AND
9090 && GET_CODE (op0) == MULT
9091 && CONST_INT_P (XEXP (op0, 1))
9092 && CONST_INT_P (op1)
9093 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9094 INTVAL (op1)) != 0)
9095 {
9096 /* This is a UBFM/SBFM. */
e548c9df 9097 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9098 if (speed)
9099 *cost += extra_cost->alu.bfx;
9100 return true;
9101 }
9102
b4206259 9103 if (is_int_mode (mode, &int_mode))
43e9d192 9104 {
8c83f71d 9105 if (CONST_INT_P (op1))
43e9d192 9106 {
8c83f71d
KT
9107 /* We have a mask + shift version of a UBFIZ
9108 i.e. the *andim_ashift<mode>_bfiz pattern. */
9109 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9110 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9111 XEXP (op0, 1)))
8c83f71d 9112 {
b4206259 9113 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9114 (enum rtx_code) code, 0, speed);
9115 if (speed)
9116 *cost += extra_cost->alu.bfx;
268c3b47 9117
8c83f71d
KT
9118 return true;
9119 }
b4206259 9120 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9121 {
9122 /* We possibly get the immediate for free, this is not
9123 modelled. */
b4206259
RS
9124 *cost += rtx_cost (op0, int_mode,
9125 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9126 if (speed)
9127 *cost += extra_cost->alu.logical;
268c3b47 9128
8c83f71d
KT
9129 return true;
9130 }
43e9d192
IB
9131 }
9132 else
9133 {
268c3b47
JG
9134 rtx new_op0 = op0;
9135
9136 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9137 if (GET_CODE (op0) == NOT)
9138 op0 = XEXP (op0, 0);
268c3b47
JG
9139
9140 new_op0 = aarch64_strip_shift (op0);
9141
9142 /* If we had a shift on op0 then this is a logical-shift-
9143 by-register/immediate operation. Otherwise, this is just
9144 a logical operation. */
9145 if (speed)
9146 {
9147 if (new_op0 != op0)
9148 {
9149 /* Shift by immediate. */
9150 if (CONST_INT_P (XEXP (op0, 1)))
9151 *cost += extra_cost->alu.log_shift;
9152 else
9153 *cost += extra_cost->alu.log_shift_reg;
9154 }
9155 else
9156 *cost += extra_cost->alu.logical;
9157 }
9158
9159 /* In both cases we want to cost both operands. */
b4206259
RS
9160 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9161 0, speed);
9162 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9163 1, speed);
268c3b47
JG
9164
9165 return true;
43e9d192 9166 }
43e9d192
IB
9167 }
9168 return false;
9169
268c3b47 9170 case NOT:
6365da9e
KT
9171 x = XEXP (x, 0);
9172 op0 = aarch64_strip_shift (x);
9173
b6875aac
KV
9174 if (VECTOR_MODE_P (mode))
9175 {
9176 /* Vector NOT. */
9177 *cost += extra_cost->vect.alu;
9178 return false;
9179 }
9180
6365da9e
KT
9181 /* MVN-shifted-reg. */
9182 if (op0 != x)
9183 {
e548c9df 9184 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9185
9186 if (speed)
9187 *cost += extra_cost->alu.log_shift;
9188
9189 return true;
9190 }
9191 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9192 Handle the second form here taking care that 'a' in the above can
9193 be a shift. */
9194 else if (GET_CODE (op0) == XOR)
9195 {
9196 rtx newop0 = XEXP (op0, 0);
9197 rtx newop1 = XEXP (op0, 1);
9198 rtx op0_stripped = aarch64_strip_shift (newop0);
9199
e548c9df
AM
9200 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9201 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9202
9203 if (speed)
9204 {
9205 if (op0_stripped != newop0)
9206 *cost += extra_cost->alu.log_shift;
9207 else
9208 *cost += extra_cost->alu.logical;
9209 }
9210
9211 return true;
9212 }
268c3b47
JG
9213 /* MVN. */
9214 if (speed)
9215 *cost += extra_cost->alu.logical;
9216
268c3b47
JG
9217 return false;
9218
43e9d192 9219 case ZERO_EXTEND:
b1685e62
JG
9220
9221 op0 = XEXP (x, 0);
9222 /* If a value is written in SI mode, then zero extended to DI
9223 mode, the operation will in general be free as a write to
9224 a 'w' register implicitly zeroes the upper bits of an 'x'
9225 register. However, if this is
9226
9227 (set (reg) (zero_extend (reg)))
9228
9229 we must cost the explicit register move. */
9230 if (mode == DImode
9231 && GET_MODE (op0) == SImode
9232 && outer == SET)
9233 {
e548c9df 9234 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9235
dde23f43
KM
9236 /* If OP_COST is non-zero, then the cost of the zero extend
9237 is effectively the cost of the inner operation. Otherwise
9238 we have a MOV instruction and we take the cost from the MOV
9239 itself. This is true independently of whether we are
9240 optimizing for space or time. */
9241 if (op_cost)
b1685e62
JG
9242 *cost = op_cost;
9243
9244 return true;
9245 }
e548c9df 9246 else if (MEM_P (op0))
43e9d192 9247 {
b1685e62 9248 /* All loads can zero extend to any size for free. */
e548c9df 9249 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9250 return true;
9251 }
b1685e62 9252
283b6c85
KT
9253 op0 = aarch64_extend_bitfield_pattern_p (x);
9254 if (op0)
9255 {
9256 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9257 if (speed)
9258 *cost += extra_cost->alu.bfx;
9259 return true;
9260 }
9261
b1685e62 9262 if (speed)
b6875aac
KV
9263 {
9264 if (VECTOR_MODE_P (mode))
9265 {
9266 /* UMOV. */
9267 *cost += extra_cost->vect.alu;
9268 }
9269 else
9270 {
63715e5e
WD
9271 /* We generate an AND instead of UXTB/UXTH. */
9272 *cost += extra_cost->alu.logical;
b6875aac
KV
9273 }
9274 }
43e9d192
IB
9275 return false;
9276
9277 case SIGN_EXTEND:
b1685e62 9278 if (MEM_P (XEXP (x, 0)))
43e9d192 9279 {
b1685e62
JG
9280 /* LDRSH. */
9281 if (speed)
9282 {
9283 rtx address = XEXP (XEXP (x, 0), 0);
9284 *cost += extra_cost->ldst.load_sign_extend;
9285
9286 *cost +=
9287 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9288 0, speed));
9289 }
43e9d192
IB
9290 return true;
9291 }
b1685e62 9292
283b6c85
KT
9293 op0 = aarch64_extend_bitfield_pattern_p (x);
9294 if (op0)
9295 {
9296 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9297 if (speed)
9298 *cost += extra_cost->alu.bfx;
9299 return true;
9300 }
9301
b1685e62 9302 if (speed)
b6875aac
KV
9303 {
9304 if (VECTOR_MODE_P (mode))
9305 *cost += extra_cost->vect.alu;
9306 else
9307 *cost += extra_cost->alu.extend;
9308 }
43e9d192
IB
9309 return false;
9310
ba0cfa17
JG
9311 case ASHIFT:
9312 op0 = XEXP (x, 0);
9313 op1 = XEXP (x, 1);
9314
9315 if (CONST_INT_P (op1))
9316 {
ba0cfa17 9317 if (speed)
b6875aac
KV
9318 {
9319 if (VECTOR_MODE_P (mode))
9320 {
9321 /* Vector shift (immediate). */
9322 *cost += extra_cost->vect.alu;
9323 }
9324 else
9325 {
9326 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9327 aliases. */
9328 *cost += extra_cost->alu.shift;
9329 }
9330 }
ba0cfa17
JG
9331
9332 /* We can incorporate zero/sign extend for free. */
9333 if (GET_CODE (op0) == ZERO_EXTEND
9334 || GET_CODE (op0) == SIGN_EXTEND)
9335 op0 = XEXP (op0, 0);
9336
e548c9df 9337 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9338 return true;
9339 }
9340 else
9341 {
7813b280 9342 if (VECTOR_MODE_P (mode))
b6875aac 9343 {
7813b280
KT
9344 if (speed)
9345 /* Vector shift (register). */
9346 *cost += extra_cost->vect.alu;
9347 }
9348 else
9349 {
9350 if (speed)
9351 /* LSLV. */
9352 *cost += extra_cost->alu.shift_reg;
9353
9354 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9355 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9356 && known_eq (INTVAL (XEXP (op1, 1)),
9357 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9358 {
7813b280
KT
9359 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9360 /* We already demanded XEXP (op1, 0) to be REG_P, so
9361 don't recurse into it. */
9362 return true;
b6875aac
KV
9363 }
9364 }
ba0cfa17
JG
9365 return false; /* All arguments need to be in registers. */
9366 }
9367
43e9d192 9368 case ROTATE:
43e9d192
IB
9369 case ROTATERT:
9370 case LSHIFTRT:
43e9d192 9371 case ASHIFTRT:
ba0cfa17
JG
9372 op0 = XEXP (x, 0);
9373 op1 = XEXP (x, 1);
43e9d192 9374
ba0cfa17
JG
9375 if (CONST_INT_P (op1))
9376 {
9377 /* ASR (immediate) and friends. */
9378 if (speed)
b6875aac
KV
9379 {
9380 if (VECTOR_MODE_P (mode))
9381 *cost += extra_cost->vect.alu;
9382 else
9383 *cost += extra_cost->alu.shift;
9384 }
43e9d192 9385
e548c9df 9386 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9387 return true;
9388 }
9389 else
9390 {
7813b280 9391 if (VECTOR_MODE_P (mode))
b6875aac 9392 {
7813b280
KT
9393 if (speed)
9394 /* Vector shift (register). */
b6875aac 9395 *cost += extra_cost->vect.alu;
7813b280
KT
9396 }
9397 else
9398 {
9399 if (speed)
9400 /* ASR (register) and friends. */
b6875aac 9401 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9402
9403 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9404 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9405 && known_eq (INTVAL (XEXP (op1, 1)),
9406 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9407 {
9408 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9409 /* We already demanded XEXP (op1, 0) to be REG_P, so
9410 don't recurse into it. */
9411 return true;
9412 }
b6875aac 9413 }
ba0cfa17
JG
9414 return false; /* All arguments need to be in registers. */
9415 }
43e9d192 9416
909734be
JG
9417 case SYMBOL_REF:
9418
1b1e81f8
JW
9419 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9420 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9421 {
9422 /* LDR. */
9423 if (speed)
9424 *cost += extra_cost->ldst.load;
9425 }
9426 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9427 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9428 {
9429 /* ADRP, followed by ADD. */
9430 *cost += COSTS_N_INSNS (1);
9431 if (speed)
9432 *cost += 2 * extra_cost->alu.arith;
9433 }
9434 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9435 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9436 {
9437 /* ADR. */
9438 if (speed)
9439 *cost += extra_cost->alu.arith;
9440 }
9441
9442 if (flag_pic)
9443 {
9444 /* One extra load instruction, after accessing the GOT. */
9445 *cost += COSTS_N_INSNS (1);
9446 if (speed)
9447 *cost += extra_cost->ldst.load;
9448 }
43e9d192
IB
9449 return true;
9450
909734be 9451 case HIGH:
43e9d192 9452 case LO_SUM:
909734be
JG
9453 /* ADRP/ADD (immediate). */
9454 if (speed)
9455 *cost += extra_cost->alu.arith;
43e9d192
IB
9456 return true;
9457
9458 case ZERO_EXTRACT:
9459 case SIGN_EXTRACT:
7cc2145f
JG
9460 /* UBFX/SBFX. */
9461 if (speed)
b6875aac
KV
9462 {
9463 if (VECTOR_MODE_P (mode))
9464 *cost += extra_cost->vect.alu;
9465 else
9466 *cost += extra_cost->alu.bfx;
9467 }
7cc2145f
JG
9468
9469 /* We can trust that the immediates used will be correct (there
9470 are no by-register forms), so we need only cost op0. */
e548c9df 9471 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
9472 return true;
9473
9474 case MULT:
4745e701
JG
9475 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
9476 /* aarch64_rtx_mult_cost always handles recursion to its
9477 operands. */
9478 return true;
43e9d192
IB
9479
9480 case MOD:
4f58fe36
KT
9481 /* We can expand signed mod by power of 2 using a NEGS, two parallel
9482 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
9483 an unconditional negate. This case should only ever be reached through
9484 the set_smod_pow2_cheap check in expmed.c. */
9485 if (CONST_INT_P (XEXP (x, 1))
9486 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9487 && (mode == SImode || mode == DImode))
9488 {
9489 /* We expand to 4 instructions. Reset the baseline. */
9490 *cost = COSTS_N_INSNS (4);
9491
9492 if (speed)
9493 *cost += 2 * extra_cost->alu.logical
9494 + 2 * extra_cost->alu.arith;
9495
9496 return true;
9497 }
9498
9499 /* Fall-through. */
43e9d192 9500 case UMOD:
43e9d192
IB
9501 if (speed)
9502 {
cb9ac430 9503 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
9504 if (VECTOR_MODE_P (mode))
9505 *cost += extra_cost->vect.alu;
e548c9df
AM
9506 else if (GET_MODE_CLASS (mode) == MODE_INT)
9507 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
9508 + extra_cost->mult[mode == DImode].idiv
9509 + (code == MOD ? 1 : 0));
43e9d192
IB
9510 }
9511 return false; /* All arguments need to be in registers. */
9512
9513 case DIV:
9514 case UDIV:
4105fe38 9515 case SQRT:
43e9d192
IB
9516 if (speed)
9517 {
b6875aac
KV
9518 if (VECTOR_MODE_P (mode))
9519 *cost += extra_cost->vect.alu;
9520 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
9521 /* There is no integer SQRT, so only DIV and UDIV can get
9522 here. */
cb9ac430
TC
9523 *cost += (extra_cost->mult[mode == DImode].idiv
9524 /* Slighly prefer UDIV over SDIV. */
9525 + (code == DIV ? 1 : 0));
4105fe38
JG
9526 else
9527 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
9528 }
9529 return false; /* All arguments need to be in registers. */
9530
a8eecd00 9531 case IF_THEN_ELSE:
2d5ffe46
AP
9532 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
9533 XEXP (x, 2), cost, speed);
a8eecd00
JG
9534
9535 case EQ:
9536 case NE:
9537 case GT:
9538 case GTU:
9539 case LT:
9540 case LTU:
9541 case GE:
9542 case GEU:
9543 case LE:
9544 case LEU:
9545
9546 return false; /* All arguments must be in registers. */
9547
b292109f
JG
9548 case FMA:
9549 op0 = XEXP (x, 0);
9550 op1 = XEXP (x, 1);
9551 op2 = XEXP (x, 2);
9552
9553 if (speed)
b6875aac
KV
9554 {
9555 if (VECTOR_MODE_P (mode))
9556 *cost += extra_cost->vect.alu;
9557 else
9558 *cost += extra_cost->fp[mode == DFmode].fma;
9559 }
b292109f
JG
9560
9561 /* FMSUB, FNMADD, and FNMSUB are free. */
9562 if (GET_CODE (op0) == NEG)
9563 op0 = XEXP (op0, 0);
9564
9565 if (GET_CODE (op2) == NEG)
9566 op2 = XEXP (op2, 0);
9567
9568 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
9569 and the by-element operand as operand 0. */
9570 if (GET_CODE (op1) == NEG)
9571 op1 = XEXP (op1, 0);
9572
9573 /* Catch vector-by-element operations. The by-element operand can
9574 either be (vec_duplicate (vec_select (x))) or just
9575 (vec_select (x)), depending on whether we are multiplying by
9576 a vector or a scalar.
9577
9578 Canonicalization is not very good in these cases, FMA4 will put the
9579 by-element operand as operand 0, FNMA4 will have it as operand 1. */
9580 if (GET_CODE (op0) == VEC_DUPLICATE)
9581 op0 = XEXP (op0, 0);
9582 else if (GET_CODE (op1) == VEC_DUPLICATE)
9583 op1 = XEXP (op1, 0);
9584
9585 if (GET_CODE (op0) == VEC_SELECT)
9586 op0 = XEXP (op0, 0);
9587 else if (GET_CODE (op1) == VEC_SELECT)
9588 op1 = XEXP (op1, 0);
9589
9590 /* If the remaining parameters are not registers,
9591 get the cost to put them into registers. */
e548c9df
AM
9592 *cost += rtx_cost (op0, mode, FMA, 0, speed);
9593 *cost += rtx_cost (op1, mode, FMA, 1, speed);
9594 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
9595 return true;
9596
5e2a765b
KT
9597 case FLOAT:
9598 case UNSIGNED_FLOAT:
9599 if (speed)
9600 *cost += extra_cost->fp[mode == DFmode].fromint;
9601 return false;
9602
b292109f
JG
9603 case FLOAT_EXTEND:
9604 if (speed)
b6875aac
KV
9605 {
9606 if (VECTOR_MODE_P (mode))
9607 {
9608 /*Vector truncate. */
9609 *cost += extra_cost->vect.alu;
9610 }
9611 else
9612 *cost += extra_cost->fp[mode == DFmode].widen;
9613 }
b292109f
JG
9614 return false;
9615
9616 case FLOAT_TRUNCATE:
9617 if (speed)
b6875aac
KV
9618 {
9619 if (VECTOR_MODE_P (mode))
9620 {
9621 /*Vector conversion. */
9622 *cost += extra_cost->vect.alu;
9623 }
9624 else
9625 *cost += extra_cost->fp[mode == DFmode].narrow;
9626 }
b292109f
JG
9627 return false;
9628
61263118
KT
9629 case FIX:
9630 case UNSIGNED_FIX:
9631 x = XEXP (x, 0);
9632 /* Strip the rounding part. They will all be implemented
9633 by the fcvt* family of instructions anyway. */
9634 if (GET_CODE (x) == UNSPEC)
9635 {
9636 unsigned int uns_code = XINT (x, 1);
9637
9638 if (uns_code == UNSPEC_FRINTA
9639 || uns_code == UNSPEC_FRINTM
9640 || uns_code == UNSPEC_FRINTN
9641 || uns_code == UNSPEC_FRINTP
9642 || uns_code == UNSPEC_FRINTZ)
9643 x = XVECEXP (x, 0, 0);
9644 }
9645
9646 if (speed)
b6875aac
KV
9647 {
9648 if (VECTOR_MODE_P (mode))
9649 *cost += extra_cost->vect.alu;
9650 else
9651 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
9652 }
39252973
KT
9653
9654 /* We can combine fmul by a power of 2 followed by a fcvt into a single
9655 fixed-point fcvt. */
9656 if (GET_CODE (x) == MULT
9657 && ((VECTOR_MODE_P (mode)
9658 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
9659 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
9660 {
9661 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
9662 0, speed);
9663 return true;
9664 }
9665
e548c9df 9666 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
9667 return true;
9668
b292109f 9669 case ABS:
b6875aac
KV
9670 if (VECTOR_MODE_P (mode))
9671 {
9672 /* ABS (vector). */
9673 if (speed)
9674 *cost += extra_cost->vect.alu;
9675 }
9676 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 9677 {
19261b99
KT
9678 op0 = XEXP (x, 0);
9679
9680 /* FABD, which is analogous to FADD. */
9681 if (GET_CODE (op0) == MINUS)
9682 {
e548c9df
AM
9683 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
9684 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
9685 if (speed)
9686 *cost += extra_cost->fp[mode == DFmode].addsub;
9687
9688 return true;
9689 }
9690 /* Simple FABS is analogous to FNEG. */
b292109f
JG
9691 if (speed)
9692 *cost += extra_cost->fp[mode == DFmode].neg;
9693 }
9694 else
9695 {
9696 /* Integer ABS will either be split to
9697 two arithmetic instructions, or will be an ABS
9698 (scalar), which we don't model. */
9699 *cost = COSTS_N_INSNS (2);
9700 if (speed)
9701 *cost += 2 * extra_cost->alu.arith;
9702 }
9703 return false;
9704
9705 case SMAX:
9706 case SMIN:
9707 if (speed)
9708 {
b6875aac
KV
9709 if (VECTOR_MODE_P (mode))
9710 *cost += extra_cost->vect.alu;
9711 else
9712 {
9713 /* FMAXNM/FMINNM/FMAX/FMIN.
9714 TODO: This may not be accurate for all implementations, but
9715 we do not model this in the cost tables. */
9716 *cost += extra_cost->fp[mode == DFmode].addsub;
9717 }
b292109f
JG
9718 }
9719 return false;
9720
61263118
KT
9721 case UNSPEC:
9722 /* The floating point round to integer frint* instructions. */
9723 if (aarch64_frint_unspec_p (XINT (x, 1)))
9724 {
9725 if (speed)
9726 *cost += extra_cost->fp[mode == DFmode].roundint;
9727
9728 return false;
9729 }
781aeb73
KT
9730
9731 if (XINT (x, 1) == UNSPEC_RBIT)
9732 {
9733 if (speed)
9734 *cost += extra_cost->alu.rev;
9735
9736 return false;
9737 }
61263118
KT
9738 break;
9739
fb620c4a
JG
9740 case TRUNCATE:
9741
9742 /* Decompose <su>muldi3_highpart. */
9743 if (/* (truncate:DI */
9744 mode == DImode
9745 /* (lshiftrt:TI */
9746 && GET_MODE (XEXP (x, 0)) == TImode
9747 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
9748 /* (mult:TI */
9749 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9750 /* (ANY_EXTEND:TI (reg:DI))
9751 (ANY_EXTEND:TI (reg:DI))) */
9752 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
9753 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
9754 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
9755 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
9756 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
9757 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
9758 /* (const_int 64) */
9759 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9760 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
9761 {
9762 /* UMULH/SMULH. */
9763 if (speed)
9764 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
9765 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
9766 mode, MULT, 0, speed);
9767 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
9768 mode, MULT, 1, speed);
fb620c4a
JG
9769 return true;
9770 }
9771
9772 /* Fall through. */
43e9d192 9773 default:
61263118 9774 break;
43e9d192 9775 }
61263118 9776
c10e3d7f
AP
9777 if (dump_file
9778 && flag_aarch64_verbose_cost)
61263118
KT
9779 fprintf (dump_file,
9780 "\nFailed to cost RTX. Assuming default cost.\n");
9781
9782 return true;
43e9d192
IB
9783}
9784
0ee859b5
JG
9785/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
9786 calculated for X. This cost is stored in *COST. Returns true
9787 if the total cost of X was calculated. */
9788static bool
e548c9df 9789aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
9790 int param, int *cost, bool speed)
9791{
e548c9df 9792 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 9793
c10e3d7f
AP
9794 if (dump_file
9795 && flag_aarch64_verbose_cost)
0ee859b5
JG
9796 {
9797 print_rtl_single (dump_file, x);
9798 fprintf (dump_file, "\n%s cost: %d (%s)\n",
9799 speed ? "Hot" : "Cold",
9800 *cost, result ? "final" : "partial");
9801 }
9802
9803 return result;
9804}
9805
43e9d192 9806static int
ef4bddc2 9807aarch64_register_move_cost (machine_mode mode,
8a3a7e67 9808 reg_class_t from_i, reg_class_t to_i)
43e9d192 9809{
8a3a7e67
RH
9810 enum reg_class from = (enum reg_class) from_i;
9811 enum reg_class to = (enum reg_class) to_i;
43e9d192 9812 const struct cpu_regmove_cost *regmove_cost
b175b679 9813 = aarch64_tune_params.regmove_cost;
43e9d192 9814
3be07662 9815 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 9816 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
9817 to = GENERAL_REGS;
9818
d677263e 9819 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
9820 from = GENERAL_REGS;
9821
6ee70f81
AP
9822 /* Moving between GPR and stack cost is the same as GP2GP. */
9823 if ((from == GENERAL_REGS && to == STACK_REG)
9824 || (to == GENERAL_REGS && from == STACK_REG))
9825 return regmove_cost->GP2GP;
9826
9827 /* To/From the stack register, we move via the gprs. */
9828 if (to == STACK_REG || from == STACK_REG)
9829 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
9830 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
9831
6a70badb 9832 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
9833 {
9834 /* 128-bit operations on general registers require 2 instructions. */
9835 if (from == GENERAL_REGS && to == GENERAL_REGS)
9836 return regmove_cost->GP2GP * 2;
9837 else if (from == GENERAL_REGS)
9838 return regmove_cost->GP2FP * 2;
9839 else if (to == GENERAL_REGS)
9840 return regmove_cost->FP2GP * 2;
9841
9842 /* When AdvSIMD instructions are disabled it is not possible to move
9843 a 128-bit value directly between Q registers. This is handled in
9844 secondary reload. A general register is used as a scratch to move
9845 the upper DI value and the lower DI value is moved directly,
9846 hence the cost is the sum of three moves. */
9847 if (! TARGET_SIMD)
9848 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
9849
9850 return regmove_cost->FP2FP;
9851 }
9852
43e9d192
IB
9853 if (from == GENERAL_REGS && to == GENERAL_REGS)
9854 return regmove_cost->GP2GP;
9855 else if (from == GENERAL_REGS)
9856 return regmove_cost->GP2FP;
9857 else if (to == GENERAL_REGS)
9858 return regmove_cost->FP2GP;
9859
43e9d192
IB
9860 return regmove_cost->FP2FP;
9861}
9862
9863static int
ef4bddc2 9864aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
9865 reg_class_t rclass ATTRIBUTE_UNUSED,
9866 bool in ATTRIBUTE_UNUSED)
9867{
b175b679 9868 return aarch64_tune_params.memmov_cost;
43e9d192
IB
9869}
9870
0c30e0f3
EM
9871/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
9872 to optimize 1.0/sqrt. */
ee62a5a6
RS
9873
9874static bool
9acc9cbe 9875use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
9876{
9877 return (!flag_trapping_math
9878 && flag_unsafe_math_optimizations
9acc9cbe
EM
9879 && ((aarch64_tune_params.approx_modes->recip_sqrt
9880 & AARCH64_APPROX_MODE (mode))
1a33079e 9881 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
9882}
9883
0c30e0f3
EM
9884/* Function to decide when to use the approximate reciprocal square root
9885 builtin. */
a6fc00da
BH
9886
9887static tree
ee62a5a6 9888aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 9889{
9acc9cbe
EM
9890 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
9891
9892 if (!use_rsqrt_p (mode))
a6fc00da 9893 return NULL_TREE;
ee62a5a6 9894 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
9895}
9896
9897typedef rtx (*rsqrte_type) (rtx, rtx);
9898
98daafa0
EM
9899/* Select reciprocal square root initial estimate insn depending on machine
9900 mode. */
a6fc00da 9901
98daafa0 9902static rsqrte_type
a6fc00da
BH
9903get_rsqrte_type (machine_mode mode)
9904{
9905 switch (mode)
9906 {
4e10a5a7
RS
9907 case E_DFmode: return gen_aarch64_rsqrtedf;
9908 case E_SFmode: return gen_aarch64_rsqrtesf;
9909 case E_V2DFmode: return gen_aarch64_rsqrtev2df;
9910 case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
9911 case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
9912 default: gcc_unreachable ();
9913 }
9914}
9915
9916typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
9917
98daafa0 9918/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 9919
98daafa0 9920static rsqrts_type
a6fc00da
BH
9921get_rsqrts_type (machine_mode mode)
9922{
9923 switch (mode)
9924 {
4e10a5a7
RS
9925 case E_DFmode: return gen_aarch64_rsqrtsdf;
9926 case E_SFmode: return gen_aarch64_rsqrtssf;
9927 case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
9928 case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
9929 case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
9930 default: gcc_unreachable ();
9931 }
9932}
9933
98daafa0
EM
9934/* Emit instruction sequence to compute either the approximate square root
9935 or its approximate reciprocal, depending on the flag RECP, and return
9936 whether the sequence was emitted or not. */
a6fc00da 9937
98daafa0
EM
9938bool
9939aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 9940{
98daafa0 9941 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
9942
9943 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
9944 {
9945 gcc_assert (!recp);
9946 return false;
9947 }
9948
2e19adc8
RE
9949 if (!recp)
9950 {
9951 if (!(flag_mlow_precision_sqrt
9952 || (aarch64_tune_params.approx_modes->sqrt
9953 & AARCH64_APPROX_MODE (mode))))
9954 return false;
9955
9956 if (flag_finite_math_only
9957 || flag_trapping_math
9958 || !flag_unsafe_math_optimizations
9959 || optimize_function_for_size_p (cfun))
9960 return false;
9961 }
9962 else
9963 /* Caller assumes we cannot fail. */
9964 gcc_assert (use_rsqrt_p (mode));
daef0a8c 9965
ddc203a7 9966 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
9967 rtx xmsk = gen_reg_rtx (mmsk);
9968 if (!recp)
2e19adc8
RE
9969 /* When calculating the approximate square root, compare the
9970 argument with 0.0 and create a mask. */
9971 emit_insn (gen_rtx_SET (xmsk,
9972 gen_rtx_NEG (mmsk,
9973 gen_rtx_EQ (mmsk, src,
9974 CONST0_RTX (mode)))));
a6fc00da 9975
98daafa0
EM
9976 /* Estimate the approximate reciprocal square root. */
9977 rtx xdst = gen_reg_rtx (mode);
9978 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 9979
98daafa0
EM
9980 /* Iterate over the series twice for SF and thrice for DF. */
9981 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 9982
98daafa0
EM
9983 /* Optionally iterate over the series once less for faster performance
9984 while sacrificing the accuracy. */
9985 if ((recp && flag_mrecip_low_precision_sqrt)
9986 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
9987 iterations--;
9988
98daafa0
EM
9989 /* Iterate over the series to calculate the approximate reciprocal square
9990 root. */
9991 rtx x1 = gen_reg_rtx (mode);
9992 while (iterations--)
a6fc00da 9993 {
a6fc00da 9994 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
9995 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
9996
9997 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 9998
98daafa0
EM
9999 if (iterations > 0)
10000 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
10001 }
10002
10003 if (!recp)
10004 {
10005 /* Qualify the approximate reciprocal square root when the argument is
10006 0.0 by squashing the intermediary result to 0.0. */
10007 rtx xtmp = gen_reg_rtx (mmsk);
10008 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
10009 gen_rtx_SUBREG (mmsk, xdst, 0)));
10010 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 10011
98daafa0
EM
10012 /* Calculate the approximate square root. */
10013 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
10014 }
10015
98daafa0
EM
10016 /* Finalize the approximation. */
10017 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
10018
10019 return true;
a6fc00da
BH
10020}
10021
79a2bc2d
EM
10022typedef rtx (*recpe_type) (rtx, rtx);
10023
10024/* Select reciprocal initial estimate insn depending on machine mode. */
10025
10026static recpe_type
10027get_recpe_type (machine_mode mode)
10028{
10029 switch (mode)
10030 {
4e10a5a7
RS
10031 case E_SFmode: return (gen_aarch64_frecpesf);
10032 case E_V2SFmode: return (gen_aarch64_frecpev2sf);
10033 case E_V4SFmode: return (gen_aarch64_frecpev4sf);
10034 case E_DFmode: return (gen_aarch64_frecpedf);
10035 case E_V2DFmode: return (gen_aarch64_frecpev2df);
10036 default: gcc_unreachable ();
79a2bc2d
EM
10037 }
10038}
10039
10040typedef rtx (*recps_type) (rtx, rtx, rtx);
10041
10042/* Select reciprocal series step insn depending on machine mode. */
10043
10044static recps_type
10045get_recps_type (machine_mode mode)
10046{
10047 switch (mode)
10048 {
4e10a5a7
RS
10049 case E_SFmode: return (gen_aarch64_frecpssf);
10050 case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
10051 case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
10052 case E_DFmode: return (gen_aarch64_frecpsdf);
10053 case E_V2DFmode: return (gen_aarch64_frecpsv2df);
10054 default: gcc_unreachable ();
79a2bc2d
EM
10055 }
10056}
10057
10058/* Emit the instruction sequence to compute the approximation for the division
10059 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
10060
10061bool
10062aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
10063{
10064 machine_mode mode = GET_MODE (quo);
33d72b63
JW
10065
10066 if (GET_MODE_INNER (mode) == HFmode)
10067 return false;
10068
79a2bc2d
EM
10069 bool use_approx_division_p = (flag_mlow_precision_div
10070 || (aarch64_tune_params.approx_modes->division
10071 & AARCH64_APPROX_MODE (mode)));
10072
10073 if (!flag_finite_math_only
10074 || flag_trapping_math
10075 || !flag_unsafe_math_optimizations
10076 || optimize_function_for_size_p (cfun)
10077 || !use_approx_division_p)
10078 return false;
10079
1be49a38
RR
10080 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10081 return false;
10082
79a2bc2d
EM
10083 /* Estimate the approximate reciprocal. */
10084 rtx xrcp = gen_reg_rtx (mode);
10085 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
10086
10087 /* Iterate over the series twice for SF and thrice for DF. */
10088 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10089
10090 /* Optionally iterate over the series once less for faster performance,
10091 while sacrificing the accuracy. */
10092 if (flag_mlow_precision_div)
10093 iterations--;
10094
10095 /* Iterate over the series to calculate the approximate reciprocal. */
10096 rtx xtmp = gen_reg_rtx (mode);
10097 while (iterations--)
10098 {
10099 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
10100
10101 if (iterations > 0)
10102 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10103 }
10104
10105 if (num != CONST1_RTX (mode))
10106 {
10107 /* As the approximate reciprocal of DEN is already calculated, only
10108 calculate the approximate division when NUM is not 1.0. */
10109 rtx xnum = force_reg (mode, num);
10110 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10111 }
10112
10113 /* Finalize the approximation. */
10114 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10115 return true;
10116}
10117
d126a4ae
AP
10118/* Return the number of instructions that can be issued per cycle. */
10119static int
10120aarch64_sched_issue_rate (void)
10121{
b175b679 10122 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10123}
10124
d03f7e44
MK
10125static int
10126aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10127{
10128 int issue_rate = aarch64_sched_issue_rate ();
10129
10130 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10131}
10132
2d6bc7fa
KT
10133
10134/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10135 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10136 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10137
10138static int
10139aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10140 int ready_index)
10141{
10142 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10143}
10144
10145
8990e73a
TB
10146/* Vectorizer cost model target hooks. */
10147
10148/* Implement targetm.vectorize.builtin_vectorization_cost. */
10149static int
10150aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10151 tree vectype,
10152 int misalign ATTRIBUTE_UNUSED)
10153{
10154 unsigned elements;
cd8ae5ed
AP
10155 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10156 bool fp = false;
10157
10158 if (vectype != NULL)
10159 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10160
10161 switch (type_of_cost)
10162 {
10163 case scalar_stmt:
cd8ae5ed 10164 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10165
10166 case scalar_load:
cd8ae5ed 10167 return costs->scalar_load_cost;
8990e73a
TB
10168
10169 case scalar_store:
cd8ae5ed 10170 return costs->scalar_store_cost;
8990e73a
TB
10171
10172 case vector_stmt:
cd8ae5ed 10173 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10174
10175 case vector_load:
cd8ae5ed 10176 return costs->vec_align_load_cost;
8990e73a
TB
10177
10178 case vector_store:
cd8ae5ed 10179 return costs->vec_store_cost;
8990e73a
TB
10180
10181 case vec_to_scalar:
cd8ae5ed 10182 return costs->vec_to_scalar_cost;
8990e73a
TB
10183
10184 case scalar_to_vec:
cd8ae5ed 10185 return costs->scalar_to_vec_cost;
8990e73a
TB
10186
10187 case unaligned_load:
cc9fe6bb 10188 case vector_gather_load:
cd8ae5ed 10189 return costs->vec_unalign_load_cost;
8990e73a
TB
10190
10191 case unaligned_store:
cc9fe6bb 10192 case vector_scatter_store:
cd8ae5ed 10193 return costs->vec_unalign_store_cost;
8990e73a
TB
10194
10195 case cond_branch_taken:
cd8ae5ed 10196 return costs->cond_taken_branch_cost;
8990e73a
TB
10197
10198 case cond_branch_not_taken:
cd8ae5ed 10199 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10200
10201 case vec_perm:
cd8ae5ed 10202 return costs->vec_permute_cost;
c428f91c 10203
8990e73a 10204 case vec_promote_demote:
cd8ae5ed 10205 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10206
10207 case vec_construct:
6a70badb 10208 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10209 return elements / 2 + 1;
10210
10211 default:
10212 gcc_unreachable ();
10213 }
10214}
10215
10216/* Implement targetm.vectorize.add_stmt_cost. */
10217static unsigned
10218aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10219 struct _stmt_vec_info *stmt_info, int misalign,
10220 enum vect_cost_model_location where)
10221{
10222 unsigned *cost = (unsigned *) data;
10223 unsigned retval = 0;
10224
10225 if (flag_vect_cost_model)
10226 {
10227 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10228 int stmt_cost =
10229 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10230
10231 /* Statements in an inner loop relative to the loop being
10232 vectorized are weighted more heavily. The value here is
058e4c71 10233 arbitrary and could potentially be improved with analysis. */
8990e73a 10234 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10235 count *= 50; /* FIXME */
8990e73a
TB
10236
10237 retval = (unsigned) (count * stmt_cost);
10238 cost[where] += retval;
10239 }
10240
10241 return retval;
10242}
10243
0cfff2a1 10244static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10245
0cfff2a1
KT
10246/* Parse the TO_PARSE string and put the architecture struct that it
10247 selects into RES and the architectural features into ISA_FLAGS.
10248 Return an aarch64_parse_opt_result describing the parse result.
10249 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 10250
0cfff2a1
KT
10251static enum aarch64_parse_opt_result
10252aarch64_parse_arch (const char *to_parse, const struct processor **res,
10253 unsigned long *isa_flags)
43e9d192
IB
10254{
10255 char *ext;
10256 const struct processor *arch;
0cfff2a1 10257 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10258 size_t len;
10259
0cfff2a1 10260 strcpy (str, to_parse);
43e9d192
IB
10261
10262 ext = strchr (str, '+');
10263
10264 if (ext != NULL)
10265 len = ext - str;
10266 else
10267 len = strlen (str);
10268
10269 if (len == 0)
0cfff2a1
KT
10270 return AARCH64_PARSE_MISSING_ARG;
10271
43e9d192 10272
0cfff2a1 10273 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10274 for (arch = all_architectures; arch->name != NULL; arch++)
10275 {
10276 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
10277 {
0cfff2a1 10278 unsigned long isa_temp = arch->flags;
43e9d192
IB
10279
10280 if (ext != NULL)
10281 {
0cfff2a1
KT
10282 /* TO_PARSE string contains at least one extension. */
10283 enum aarch64_parse_opt_result ext_res
10284 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10285
0cfff2a1
KT
10286 if (ext_res != AARCH64_PARSE_OK)
10287 return ext_res;
ffee7aa9 10288 }
0cfff2a1
KT
10289 /* Extension parsing was successful. Confirm the result
10290 arch and ISA flags. */
10291 *res = arch;
10292 *isa_flags = isa_temp;
10293 return AARCH64_PARSE_OK;
43e9d192
IB
10294 }
10295 }
10296
10297 /* ARCH name not found in list. */
0cfff2a1 10298 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10299}
10300
0cfff2a1
KT
10301/* Parse the TO_PARSE string and put the result tuning in RES and the
10302 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10303 describing the parse result. If there is an error parsing, RES and
10304 ISA_FLAGS are left unchanged. */
43e9d192 10305
0cfff2a1
KT
10306static enum aarch64_parse_opt_result
10307aarch64_parse_cpu (const char *to_parse, const struct processor **res,
10308 unsigned long *isa_flags)
43e9d192
IB
10309{
10310 char *ext;
10311 const struct processor *cpu;
0cfff2a1 10312 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10313 size_t len;
10314
0cfff2a1 10315 strcpy (str, to_parse);
43e9d192
IB
10316
10317 ext = strchr (str, '+');
10318
10319 if (ext != NULL)
10320 len = ext - str;
10321 else
10322 len = strlen (str);
10323
10324 if (len == 0)
0cfff2a1
KT
10325 return AARCH64_PARSE_MISSING_ARG;
10326
43e9d192
IB
10327
10328 /* Loop through the list of supported CPUs to find a match. */
10329 for (cpu = all_cores; cpu->name != NULL; cpu++)
10330 {
10331 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
10332 {
0cfff2a1
KT
10333 unsigned long isa_temp = cpu->flags;
10334
43e9d192
IB
10335
10336 if (ext != NULL)
10337 {
0cfff2a1
KT
10338 /* TO_PARSE string contains at least one extension. */
10339 enum aarch64_parse_opt_result ext_res
10340 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10341
0cfff2a1
KT
10342 if (ext_res != AARCH64_PARSE_OK)
10343 return ext_res;
10344 }
10345 /* Extension parsing was successfull. Confirm the result
10346 cpu and ISA flags. */
10347 *res = cpu;
10348 *isa_flags = isa_temp;
10349 return AARCH64_PARSE_OK;
43e9d192
IB
10350 }
10351 }
10352
10353 /* CPU name not found in list. */
0cfff2a1 10354 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10355}
10356
0cfff2a1
KT
10357/* Parse the TO_PARSE string and put the cpu it selects into RES.
10358 Return an aarch64_parse_opt_result describing the parse result.
10359 If the parsing fails the RES does not change. */
43e9d192 10360
0cfff2a1
KT
10361static enum aarch64_parse_opt_result
10362aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10363{
10364 const struct processor *cpu;
0cfff2a1
KT
10365 char *str = (char *) alloca (strlen (to_parse) + 1);
10366
10367 strcpy (str, to_parse);
43e9d192
IB
10368
10369 /* Loop through the list of supported CPUs to find a match. */
10370 for (cpu = all_cores; cpu->name != NULL; cpu++)
10371 {
10372 if (strcmp (cpu->name, str) == 0)
10373 {
0cfff2a1
KT
10374 *res = cpu;
10375 return AARCH64_PARSE_OK;
43e9d192
IB
10376 }
10377 }
10378
10379 /* CPU name not found in list. */
0cfff2a1 10380 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10381}
10382
8dec06f2
JG
10383/* Parse TOKEN, which has length LENGTH to see if it is an option
10384 described in FLAG. If it is, return the index bit for that fusion type.
10385 If not, error (printing OPTION_NAME) and return zero. */
10386
10387static unsigned int
10388aarch64_parse_one_option_token (const char *token,
10389 size_t length,
10390 const struct aarch64_flag_desc *flag,
10391 const char *option_name)
10392{
10393 for (; flag->name != NULL; flag++)
10394 {
10395 if (length == strlen (flag->name)
10396 && !strncmp (flag->name, token, length))
10397 return flag->flag;
10398 }
10399
10400 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10401 return 0;
10402}
10403
10404/* Parse OPTION which is a comma-separated list of flags to enable.
10405 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10406 default state we inherit from the CPU tuning structures. OPTION_NAME
10407 gives the top-level option we are parsing in the -moverride string,
10408 for use in error messages. */
10409
10410static unsigned int
10411aarch64_parse_boolean_options (const char *option,
10412 const struct aarch64_flag_desc *flags,
10413 unsigned int initial_state,
10414 const char *option_name)
10415{
10416 const char separator = '.';
10417 const char* specs = option;
10418 const char* ntoken = option;
10419 unsigned int found_flags = initial_state;
10420
10421 while ((ntoken = strchr (specs, separator)))
10422 {
10423 size_t token_length = ntoken - specs;
10424 unsigned token_ops = aarch64_parse_one_option_token (specs,
10425 token_length,
10426 flags,
10427 option_name);
10428 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10429 in the token stream, reset the supported operations. So:
10430
10431 adrp+add.cmp+branch.none.adrp+add
10432
10433 would have the result of turning on only adrp+add fusion. */
10434 if (!token_ops)
10435 found_flags = 0;
10436
10437 found_flags |= token_ops;
10438 specs = ++ntoken;
10439 }
10440
10441 /* We ended with a comma, print something. */
10442 if (!(*specs))
10443 {
10444 error ("%s string ill-formed\n", option_name);
10445 return 0;
10446 }
10447
10448 /* We still have one more token to parse. */
10449 size_t token_length = strlen (specs);
10450 unsigned token_ops = aarch64_parse_one_option_token (specs,
10451 token_length,
10452 flags,
10453 option_name);
10454 if (!token_ops)
10455 found_flags = 0;
10456
10457 found_flags |= token_ops;
10458 return found_flags;
10459}
10460
10461/* Support for overriding instruction fusion. */
10462
10463static void
10464aarch64_parse_fuse_string (const char *fuse_string,
10465 struct tune_params *tune)
10466{
10467 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10468 aarch64_fusible_pairs,
10469 tune->fusible_ops,
10470 "fuse=");
10471}
10472
10473/* Support for overriding other tuning flags. */
10474
10475static void
10476aarch64_parse_tune_string (const char *tune_string,
10477 struct tune_params *tune)
10478{
10479 tune->extra_tuning_flags
10480 = aarch64_parse_boolean_options (tune_string,
10481 aarch64_tuning_flags,
10482 tune->extra_tuning_flags,
10483 "tune=");
10484}
10485
10486/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10487 we understand. If it is, extract the option string and handoff to
10488 the appropriate function. */
10489
10490void
10491aarch64_parse_one_override_token (const char* token,
10492 size_t length,
10493 struct tune_params *tune)
10494{
10495 const struct aarch64_tuning_override_function *fn
10496 = aarch64_tuning_override_functions;
10497
10498 const char *option_part = strchr (token, '=');
10499 if (!option_part)
10500 {
10501 error ("tuning string missing in option (%s)", token);
10502 return;
10503 }
10504
10505 /* Get the length of the option name. */
10506 length = option_part - token;
10507 /* Skip the '=' to get to the option string. */
10508 option_part++;
10509
10510 for (; fn->name != NULL; fn++)
10511 {
10512 if (!strncmp (fn->name, token, length))
10513 {
10514 fn->parse_override (option_part, tune);
10515 return;
10516 }
10517 }
10518
10519 error ("unknown tuning option (%s)",token);
10520 return;
10521}
10522
5eee3c34
JW
10523/* A checking mechanism for the implementation of the tls size. */
10524
10525static void
10526initialize_aarch64_tls_size (struct gcc_options *opts)
10527{
10528 if (aarch64_tls_size == 0)
10529 aarch64_tls_size = 24;
10530
10531 switch (opts->x_aarch64_cmodel_var)
10532 {
10533 case AARCH64_CMODEL_TINY:
10534 /* Both the default and maximum TLS size allowed under tiny is 1M which
10535 needs two instructions to address, so we clamp the size to 24. */
10536 if (aarch64_tls_size > 24)
10537 aarch64_tls_size = 24;
10538 break;
10539 case AARCH64_CMODEL_SMALL:
10540 /* The maximum TLS size allowed under small is 4G. */
10541 if (aarch64_tls_size > 32)
10542 aarch64_tls_size = 32;
10543 break;
10544 case AARCH64_CMODEL_LARGE:
10545 /* The maximum TLS size allowed under large is 16E.
10546 FIXME: 16E should be 64bit, we only support 48bit offset now. */
10547 if (aarch64_tls_size > 48)
10548 aarch64_tls_size = 48;
10549 break;
10550 default:
10551 gcc_unreachable ();
10552 }
10553
10554 return;
10555}
10556
8dec06f2
JG
10557/* Parse STRING looking for options in the format:
10558 string :: option:string
10559 option :: name=substring
10560 name :: {a-z}
10561 substring :: defined by option. */
10562
10563static void
10564aarch64_parse_override_string (const char* input_string,
10565 struct tune_params* tune)
10566{
10567 const char separator = ':';
10568 size_t string_length = strlen (input_string) + 1;
10569 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
10570 char *string = string_root;
10571 strncpy (string, input_string, string_length);
10572 string[string_length - 1] = '\0';
10573
10574 char* ntoken = string;
10575
10576 while ((ntoken = strchr (string, separator)))
10577 {
10578 size_t token_length = ntoken - string;
10579 /* Make this substring look like a string. */
10580 *ntoken = '\0';
10581 aarch64_parse_one_override_token (string, token_length, tune);
10582 string = ++ntoken;
10583 }
10584
10585 /* One last option to parse. */
10586 aarch64_parse_one_override_token (string, strlen (string), tune);
10587 free (string_root);
10588}
43e9d192 10589
43e9d192
IB
10590
10591static void
0cfff2a1 10592aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 10593{
acea40ac
WD
10594 /* PR 70044: We have to be careful about being called multiple times for the
10595 same function. This means all changes should be repeatable. */
10596
d6cb6d6a
WD
10597 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
10598 Disable the frame pointer flag so the mid-end will not use a frame
10599 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
10600 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
10601 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
10602 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 10603 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 10604 opts->x_flag_omit_frame_pointer = 2;
43e9d192 10605
1be34295 10606 /* If not optimizing for size, set the default
0cfff2a1
KT
10607 alignment to what the target wants. */
10608 if (!opts->x_optimize_size)
43e9d192 10609 {
c518c102
ML
10610 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
10611 opts->x_str_align_loops = aarch64_tune_params.loop_align;
10612 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
10613 opts->x_str_align_jumps = aarch64_tune_params.jump_align;
10614 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
10615 opts->x_str_align_functions = aarch64_tune_params.function_align;
43e9d192 10616 }
b4f50fd4 10617
9ee6540a
WD
10618 /* We default to no pc-relative literal loads. */
10619
10620 aarch64_pcrelative_literal_loads = false;
10621
10622 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 10623 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
10624 if (opts->x_pcrelative_literal_loads == 1)
10625 aarch64_pcrelative_literal_loads = true;
b4f50fd4 10626
9ee6540a
WD
10627 /* In the tiny memory model it makes no sense to disallow PC relative
10628 literal pool loads. */
10629 if (aarch64_cmodel == AARCH64_CMODEL_TINY
10630 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
10631 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
10632
10633 /* When enabling the lower precision Newton series for the square root, also
10634 enable it for the reciprocal square root, since the latter is an
10635 intermediary step for the former. */
10636 if (flag_mlow_precision_sqrt)
10637 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 10638}
43e9d192 10639
0cfff2a1
KT
10640/* 'Unpack' up the internal tuning structs and update the options
10641 in OPTS. The caller must have set up selected_tune and selected_arch
10642 as all the other target-specific codegen decisions are
10643 derived from them. */
10644
e4ea20c8 10645void
0cfff2a1
KT
10646aarch64_override_options_internal (struct gcc_options *opts)
10647{
10648 aarch64_tune_flags = selected_tune->flags;
10649 aarch64_tune = selected_tune->sched_core;
10650 /* Make a copy of the tuning parameters attached to the core, which
10651 we may later overwrite. */
10652 aarch64_tune_params = *(selected_tune->tune);
10653 aarch64_architecture_version = selected_arch->architecture_version;
10654
10655 if (opts->x_aarch64_override_tune_string)
10656 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
10657 &aarch64_tune_params);
10658
10659 /* This target defaults to strict volatile bitfields. */
10660 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
10661 opts->x_flag_strict_volatile_bitfields = 1;
10662
0cfff2a1 10663 initialize_aarch64_code_model (opts);
5eee3c34 10664 initialize_aarch64_tls_size (opts);
63892fa2 10665
2d6bc7fa
KT
10666 int queue_depth = 0;
10667 switch (aarch64_tune_params.autoprefetcher_model)
10668 {
10669 case tune_params::AUTOPREFETCHER_OFF:
10670 queue_depth = -1;
10671 break;
10672 case tune_params::AUTOPREFETCHER_WEAK:
10673 queue_depth = 0;
10674 break;
10675 case tune_params::AUTOPREFETCHER_STRONG:
10676 queue_depth = max_insn_queue_index + 1;
10677 break;
10678 default:
10679 gcc_unreachable ();
10680 }
10681
10682 /* We don't mind passing in global_options_set here as we don't use
10683 the *options_set structs anyway. */
10684 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
10685 queue_depth,
10686 opts->x_param_values,
10687 global_options_set.x_param_values);
10688
9d2c6e2e
MK
10689 /* Set up parameters to be used in prefetching algorithm. Do not
10690 override the defaults unless we are tuning for a core we have
10691 researched values for. */
10692 if (aarch64_tune_params.prefetch->num_slots > 0)
10693 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
10694 aarch64_tune_params.prefetch->num_slots,
10695 opts->x_param_values,
10696 global_options_set.x_param_values);
10697 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
10698 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
10699 aarch64_tune_params.prefetch->l1_cache_size,
10700 opts->x_param_values,
10701 global_options_set.x_param_values);
10702 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 10703 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
10704 aarch64_tune_params.prefetch->l1_cache_line_size,
10705 opts->x_param_values,
10706 global_options_set.x_param_values);
10707 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
10708 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
10709 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
10710 opts->x_param_values,
10711 global_options_set.x_param_values);
d2ff35c0
LM
10712 if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
10713 maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
10714 0,
10715 opts->x_param_values,
10716 global_options_set.x_param_values);
59100dfc
LM
10717 if (aarch64_tune_params.prefetch->minimum_stride >= 0)
10718 maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
10719 aarch64_tune_params.prefetch->minimum_stride,
10720 opts->x_param_values,
10721 global_options_set.x_param_values);
50487d79 10722
13494fcb
WD
10723 /* Use the alternative scheduling-pressure algorithm by default. */
10724 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
10725 opts->x_param_values,
10726 global_options_set.x_param_values);
10727
16b2cafd
MK
10728 /* Enable sw prefetching at specified optimization level for
10729 CPUS that have prefetch. Lower optimization level threshold by 1
10730 when profiling is enabled. */
10731 if (opts->x_flag_prefetch_loop_arrays < 0
10732 && !opts->x_optimize_size
10733 && aarch64_tune_params.prefetch->default_opt_level >= 0
10734 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
10735 opts->x_flag_prefetch_loop_arrays = 1;
10736
0cfff2a1
KT
10737 aarch64_override_options_after_change_1 (opts);
10738}
43e9d192 10739
01f44038
KT
10740/* Print a hint with a suggestion for a core or architecture name that
10741 most closely resembles what the user passed in STR. ARCH is true if
10742 the user is asking for an architecture name. ARCH is false if the user
10743 is asking for a core name. */
10744
10745static void
10746aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
10747{
10748 auto_vec<const char *> candidates;
10749 const struct processor *entry = arch ? all_architectures : all_cores;
10750 for (; entry->name != NULL; entry++)
10751 candidates.safe_push (entry->name);
a08b5429
ML
10752
10753#ifdef HAVE_LOCAL_CPU_DETECT
10754 /* Add also "native" as possible value. */
10755 if (arch)
10756 candidates.safe_push ("native");
10757#endif
10758
01f44038
KT
10759 char *s;
10760 const char *hint = candidates_list_and_hint (str, s, candidates);
10761 if (hint)
10762 inform (input_location, "valid arguments are: %s;"
10763 " did you mean %qs?", s, hint);
6285e915
ML
10764 else
10765 inform (input_location, "valid arguments are: %s", s);
10766
01f44038
KT
10767 XDELETEVEC (s);
10768}
10769
10770/* Print a hint with a suggestion for a core name that most closely resembles
10771 what the user passed in STR. */
10772
10773inline static void
10774aarch64_print_hint_for_core (const char *str)
10775{
10776 aarch64_print_hint_for_core_or_arch (str, false);
10777}
10778
10779/* Print a hint with a suggestion for an architecture name that most closely
10780 resembles what the user passed in STR. */
10781
10782inline static void
10783aarch64_print_hint_for_arch (const char *str)
10784{
10785 aarch64_print_hint_for_core_or_arch (str, true);
10786}
10787
0cfff2a1
KT
10788/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
10789 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
10790 they are valid in RES and ISA_FLAGS. Return whether the option is
10791 valid. */
43e9d192 10792
361fb3ee 10793static bool
0cfff2a1
KT
10794aarch64_validate_mcpu (const char *str, const struct processor **res,
10795 unsigned long *isa_flags)
10796{
10797 enum aarch64_parse_opt_result parse_res
10798 = aarch64_parse_cpu (str, res, isa_flags);
10799
10800 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10801 return true;
0cfff2a1
KT
10802
10803 switch (parse_res)
10804 {
10805 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10806 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
10807 break;
10808 case AARCH64_PARSE_INVALID_ARG:
10809 error ("unknown value %qs for -mcpu", str);
01f44038 10810 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10811 break;
10812 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10813 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
10814 break;
10815 default:
10816 gcc_unreachable ();
10817 }
361fb3ee
KT
10818
10819 return false;
0cfff2a1
KT
10820}
10821
10822/* Validate a command-line -march option. Parse the arch and extensions
10823 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10824 results, if they are valid, in RES and ISA_FLAGS. Return whether the
10825 option is valid. */
0cfff2a1 10826
361fb3ee 10827static bool
0cfff2a1 10828aarch64_validate_march (const char *str, const struct processor **res,
01f44038 10829 unsigned long *isa_flags)
0cfff2a1
KT
10830{
10831 enum aarch64_parse_opt_result parse_res
10832 = aarch64_parse_arch (str, res, isa_flags);
10833
10834 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10835 return true;
0cfff2a1
KT
10836
10837 switch (parse_res)
10838 {
10839 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10840 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
10841 break;
10842 case AARCH64_PARSE_INVALID_ARG:
10843 error ("unknown value %qs for -march", str);
01f44038 10844 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
10845 break;
10846 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10847 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
10848 break;
10849 default:
10850 gcc_unreachable ();
10851 }
361fb3ee
KT
10852
10853 return false;
0cfff2a1
KT
10854}
10855
10856/* Validate a command-line -mtune option. Parse the cpu
10857 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10858 result, if it is valid, in RES. Return whether the option is
10859 valid. */
0cfff2a1 10860
361fb3ee 10861static bool
0cfff2a1
KT
10862aarch64_validate_mtune (const char *str, const struct processor **res)
10863{
10864 enum aarch64_parse_opt_result parse_res
10865 = aarch64_parse_tune (str, res);
10866
10867 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10868 return true;
0cfff2a1
KT
10869
10870 switch (parse_res)
10871 {
10872 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10873 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
10874 break;
10875 case AARCH64_PARSE_INVALID_ARG:
10876 error ("unknown value %qs for -mtune", str);
01f44038 10877 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10878 break;
10879 default:
10880 gcc_unreachable ();
10881 }
361fb3ee
KT
10882 return false;
10883}
10884
10885/* Return the CPU corresponding to the enum CPU.
10886 If it doesn't specify a cpu, return the default. */
10887
10888static const struct processor *
10889aarch64_get_tune_cpu (enum aarch64_processor cpu)
10890{
10891 if (cpu != aarch64_none)
10892 return &all_cores[cpu];
10893
10894 /* The & 0x3f is to extract the bottom 6 bits that encode the
10895 default cpu as selected by the --with-cpu GCC configure option
10896 in config.gcc.
10897 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
10898 flags mechanism should be reworked to make it more sane. */
10899 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10900}
10901
10902/* Return the architecture corresponding to the enum ARCH.
10903 If it doesn't specify a valid architecture, return the default. */
10904
10905static const struct processor *
10906aarch64_get_arch (enum aarch64_arch arch)
10907{
10908 if (arch != aarch64_no_arch)
10909 return &all_architectures[arch];
10910
10911 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10912
10913 return &all_architectures[cpu->arch];
0cfff2a1
KT
10914}
10915
43cacb12
RS
10916/* Return the VG value associated with -msve-vector-bits= value VALUE. */
10917
10918static poly_uint16
10919aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
10920{
10921 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
10922 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
10923 deciding which .md file patterns to use and when deciding whether
10924 something is a legitimate address or constant. */
10925 if (value == SVE_SCALABLE || value == SVE_128)
10926 return poly_uint16 (2, 2);
10927 else
10928 return (int) value / 64;
10929}
10930
0cfff2a1
KT
10931/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
10932 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
10933 tuning structs. In particular it must set selected_tune and
10934 aarch64_isa_flags that define the available ISA features and tuning
10935 decisions. It must also set selected_arch as this will be used to
10936 output the .arch asm tags for each function. */
10937
10938static void
10939aarch64_override_options (void)
10940{
10941 unsigned long cpu_isa = 0;
10942 unsigned long arch_isa = 0;
10943 aarch64_isa_flags = 0;
10944
361fb3ee
KT
10945 bool valid_cpu = true;
10946 bool valid_tune = true;
10947 bool valid_arch = true;
10948
0cfff2a1
KT
10949 selected_cpu = NULL;
10950 selected_arch = NULL;
10951 selected_tune = NULL;
10952
10953 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
10954 If either of -march or -mtune is given, they override their
10955 respective component of -mcpu. */
10956 if (aarch64_cpu_string)
361fb3ee
KT
10957 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
10958 &cpu_isa);
0cfff2a1
KT
10959
10960 if (aarch64_arch_string)
361fb3ee
KT
10961 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
10962 &arch_isa);
0cfff2a1
KT
10963
10964 if (aarch64_tune_string)
361fb3ee 10965 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
10966
10967 /* If the user did not specify a processor, choose the default
10968 one for them. This will be the CPU set during configuration using
a3cd0246 10969 --with-cpu, otherwise it is "generic". */
43e9d192
IB
10970 if (!selected_cpu)
10971 {
0cfff2a1
KT
10972 if (selected_arch)
10973 {
10974 selected_cpu = &all_cores[selected_arch->ident];
10975 aarch64_isa_flags = arch_isa;
361fb3ee 10976 explicit_arch = selected_arch->arch;
0cfff2a1
KT
10977 }
10978 else
10979 {
361fb3ee
KT
10980 /* Get default configure-time CPU. */
10981 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
10982 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
10983 }
361fb3ee
KT
10984
10985 if (selected_tune)
10986 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
10987 }
10988 /* If both -mcpu and -march are specified check that they are architecturally
10989 compatible, warn if they're not and prefer the -march ISA flags. */
10990 else if (selected_arch)
10991 {
10992 if (selected_arch->arch != selected_cpu->arch)
10993 {
10994 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
10995 all_architectures[selected_cpu->arch].name,
10996 selected_arch->name);
10997 }
10998 aarch64_isa_flags = arch_isa;
361fb3ee
KT
10999 explicit_arch = selected_arch->arch;
11000 explicit_tune_core = selected_tune ? selected_tune->ident
11001 : selected_cpu->ident;
0cfff2a1
KT
11002 }
11003 else
11004 {
11005 /* -mcpu but no -march. */
11006 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
11007 explicit_tune_core = selected_tune ? selected_tune->ident
11008 : selected_cpu->ident;
11009 gcc_assert (selected_cpu);
11010 selected_arch = &all_architectures[selected_cpu->arch];
11011 explicit_arch = selected_arch->arch;
43e9d192
IB
11012 }
11013
0cfff2a1
KT
11014 /* Set the arch as well as we will need it when outputing
11015 the .arch directive in assembly. */
11016 if (!selected_arch)
11017 {
11018 gcc_assert (selected_cpu);
11019 selected_arch = &all_architectures[selected_cpu->arch];
11020 }
43e9d192 11021
43e9d192 11022 if (!selected_tune)
3edaf26d 11023 selected_tune = selected_cpu;
43e9d192 11024
0cfff2a1
KT
11025#ifndef HAVE_AS_MABI_OPTION
11026 /* The compiler may have been configured with 2.23.* binutils, which does
11027 not have support for ILP32. */
11028 if (TARGET_ILP32)
ee61f880 11029 error ("assembler does not support -mabi=ilp32");
0cfff2a1 11030#endif
43e9d192 11031
43cacb12
RS
11032 /* Convert -msve-vector-bits to a VG count. */
11033 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
11034
db58fd89 11035 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 11036 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 11037
361fb3ee
KT
11038 /* Make sure we properly set up the explicit options. */
11039 if ((aarch64_cpu_string && valid_cpu)
11040 || (aarch64_tune_string && valid_tune))
11041 gcc_assert (explicit_tune_core != aarch64_none);
11042
11043 if ((aarch64_cpu_string && valid_cpu)
11044 || (aarch64_arch_string && valid_arch))
11045 gcc_assert (explicit_arch != aarch64_no_arch);
11046
0cfff2a1
KT
11047 aarch64_override_options_internal (&global_options);
11048
11049 /* Save these options as the default ones in case we push and pop them later
11050 while processing functions with potential target attributes. */
11051 target_option_default_node = target_option_current_node
11052 = build_target_option_node (&global_options);
43e9d192
IB
11053}
11054
11055/* Implement targetm.override_options_after_change. */
11056
11057static void
11058aarch64_override_options_after_change (void)
11059{
0cfff2a1 11060 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
11061}
11062
11063static struct machine_function *
11064aarch64_init_machine_status (void)
11065{
11066 struct machine_function *machine;
766090c2 11067 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
11068 return machine;
11069}
11070
11071void
11072aarch64_init_expanders (void)
11073{
11074 init_machine_status = aarch64_init_machine_status;
11075}
11076
11077/* A checking mechanism for the implementation of the various code models. */
11078static void
0cfff2a1 11079initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 11080{
0cfff2a1 11081 if (opts->x_flag_pic)
43e9d192 11082 {
0cfff2a1 11083 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
11084 {
11085 case AARCH64_CMODEL_TINY:
11086 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
11087 break;
11088 case AARCH64_CMODEL_SMALL:
34ecdb0f 11089#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
11090 aarch64_cmodel = (flag_pic == 2
11091 ? AARCH64_CMODEL_SMALL_PIC
11092 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
11093#else
11094 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
11095#endif
43e9d192
IB
11096 break;
11097 case AARCH64_CMODEL_LARGE:
11098 sorry ("code model %qs with -f%s", "large",
0cfff2a1 11099 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 11100 break;
43e9d192
IB
11101 default:
11102 gcc_unreachable ();
11103 }
11104 }
11105 else
0cfff2a1 11106 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
11107}
11108
361fb3ee
KT
11109/* Implement TARGET_OPTION_SAVE. */
11110
11111static void
11112aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
11113{
11114 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
11115}
11116
11117/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
11118 using the information saved in PTR. */
11119
11120static void
11121aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
11122{
11123 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
11124 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11125 opts->x_explicit_arch = ptr->x_explicit_arch;
11126 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
11127 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
11128
11129 aarch64_override_options_internal (opts);
11130}
11131
11132/* Implement TARGET_OPTION_PRINT. */
11133
11134static void
11135aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
11136{
11137 const struct processor *cpu
11138 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11139 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
11140 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 11141 std::string extension
04a99ebe 11142 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
11143
11144 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
11145 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
11146 arch->name, extension.c_str ());
361fb3ee
KT
11147}
11148
d78006d9
KT
11149static GTY(()) tree aarch64_previous_fndecl;
11150
e4ea20c8
KT
11151void
11152aarch64_reset_previous_fndecl (void)
11153{
11154 aarch64_previous_fndecl = NULL;
11155}
11156
acfc1ac1
KT
11157/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
11158 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
11159 make sure optab availability predicates are recomputed when necessary. */
11160
11161void
11162aarch64_save_restore_target_globals (tree new_tree)
11163{
11164 if (TREE_TARGET_GLOBALS (new_tree))
11165 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
11166 else if (new_tree == target_option_default_node)
11167 restore_target_globals (&default_target_globals);
11168 else
11169 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
11170}
11171
d78006d9
KT
11172/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
11173 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
11174 of the function, if such exists. This function may be called multiple
11175 times on a single function so use aarch64_previous_fndecl to avoid
11176 setting up identical state. */
11177
11178static void
11179aarch64_set_current_function (tree fndecl)
11180{
acfc1ac1
KT
11181 if (!fndecl || fndecl == aarch64_previous_fndecl)
11182 return;
11183
d78006d9
KT
11184 tree old_tree = (aarch64_previous_fndecl
11185 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
11186 : NULL_TREE);
11187
acfc1ac1 11188 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 11189
acfc1ac1
KT
11190 /* If current function has no attributes but the previous one did,
11191 use the default node. */
11192 if (!new_tree && old_tree)
11193 new_tree = target_option_default_node;
d78006d9 11194
acfc1ac1
KT
11195 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
11196 the default have been handled by aarch64_save_restore_target_globals from
11197 aarch64_pragma_target_parse. */
11198 if (old_tree == new_tree)
11199 return;
d78006d9 11200
acfc1ac1 11201 aarch64_previous_fndecl = fndecl;
6e17a23b 11202
acfc1ac1
KT
11203 /* First set the target options. */
11204 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11205
acfc1ac1 11206 aarch64_save_restore_target_globals (new_tree);
d78006d9 11207}
361fb3ee 11208
5a2c8331
KT
11209/* Enum describing the various ways we can handle attributes.
11210 In many cases we can reuse the generic option handling machinery. */
11211
11212enum aarch64_attr_opt_type
11213{
11214 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11215 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11216 aarch64_attr_enum, /* Attribute sets an enum variable. */
11217 aarch64_attr_custom /* Attribute requires a custom handling function. */
11218};
11219
11220/* All the information needed to handle a target attribute.
11221 NAME is the name of the attribute.
9c582551 11222 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11223 in the definition of enum aarch64_attr_opt_type.
11224 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11225 HANDLER is the function that takes the attribute string as an argument
11226 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11227 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11228 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11229 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11230 aarch64_attr_enum. */
11231
11232struct aarch64_attribute_info
11233{
11234 const char *name;
11235 enum aarch64_attr_opt_type attr_type;
11236 bool allow_neg;
ab93e9b7 11237 bool (*handler) (const char *);
5a2c8331
KT
11238 enum opt_code opt_num;
11239};
11240
ab93e9b7 11241/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11242
11243static bool
ab93e9b7 11244aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11245{
11246 const struct processor *tmp_arch = NULL;
11247 enum aarch64_parse_opt_result parse_res
11248 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
11249
11250 if (parse_res == AARCH64_PARSE_OK)
11251 {
11252 gcc_assert (tmp_arch);
11253 selected_arch = tmp_arch;
11254 explicit_arch = selected_arch->arch;
11255 return true;
11256 }
11257
11258 switch (parse_res)
11259 {
11260 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11261 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11262 break;
11263 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11264 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11265 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11266 break;
11267 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11268 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11269 break;
11270 default:
11271 gcc_unreachable ();
11272 }
11273
11274 return false;
11275}
11276
ab93e9b7 11277/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11278
11279static bool
ab93e9b7 11280aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11281{
11282 const struct processor *tmp_cpu = NULL;
11283 enum aarch64_parse_opt_result parse_res
11284 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
11285
11286 if (parse_res == AARCH64_PARSE_OK)
11287 {
11288 gcc_assert (tmp_cpu);
11289 selected_tune = tmp_cpu;
11290 explicit_tune_core = selected_tune->ident;
11291
11292 selected_arch = &all_architectures[tmp_cpu->arch];
11293 explicit_arch = selected_arch->arch;
11294 return true;
11295 }
11296
11297 switch (parse_res)
11298 {
11299 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11300 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11301 break;
11302 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11303 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11304 aarch64_print_hint_for_core (str);
5a2c8331
KT
11305 break;
11306 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11307 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11308 break;
11309 default:
11310 gcc_unreachable ();
11311 }
11312
11313 return false;
11314}
11315
ab93e9b7 11316/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11317
11318static bool
ab93e9b7 11319aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11320{
11321 const struct processor *tmp_tune = NULL;
11322 enum aarch64_parse_opt_result parse_res
11323 = aarch64_parse_tune (str, &tmp_tune);
11324
11325 if (parse_res == AARCH64_PARSE_OK)
11326 {
11327 gcc_assert (tmp_tune);
11328 selected_tune = tmp_tune;
11329 explicit_tune_core = selected_tune->ident;
11330 return true;
11331 }
11332
11333 switch (parse_res)
11334 {
11335 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11336 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11337 aarch64_print_hint_for_core (str);
5a2c8331
KT
11338 break;
11339 default:
11340 gcc_unreachable ();
11341 }
11342
11343 return false;
11344}
11345
11346/* Parse an architecture extensions target attribute string specified in STR.
11347 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11348 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11349 modified. */
5a2c8331
KT
11350
11351static bool
ab93e9b7 11352aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11353{
11354 enum aarch64_parse_opt_result parse_res;
11355 unsigned long isa_flags = aarch64_isa_flags;
11356
e4ea20c8
KT
11357 /* We allow "+nothing" in the beginning to clear out all architectural
11358 features if the user wants to handpick specific features. */
11359 if (strncmp ("+nothing", str, 8) == 0)
11360 {
11361 isa_flags = 0;
11362 str += 8;
11363 }
11364
5a2c8331
KT
11365 parse_res = aarch64_parse_extension (str, &isa_flags);
11366
11367 if (parse_res == AARCH64_PARSE_OK)
11368 {
11369 aarch64_isa_flags = isa_flags;
11370 return true;
11371 }
11372
11373 switch (parse_res)
11374 {
11375 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11376 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11377 break;
11378
11379 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11380 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11381 break;
11382
11383 default:
11384 gcc_unreachable ();
11385 }
11386
11387 return false;
11388}
11389
11390/* The target attributes that we support. On top of these we also support just
11391 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11392 handled explicitly in aarch64_process_one_target_attr. */
11393
11394static const struct aarch64_attribute_info aarch64_attributes[] =
11395{
11396 { "general-regs-only", aarch64_attr_mask, false, NULL,
11397 OPT_mgeneral_regs_only },
11398 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11399 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11400 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11401 OPT_mfix_cortex_a53_843419 },
5a2c8331 11402 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11403 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11404 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11405 OPT_momit_leaf_frame_pointer },
11406 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11407 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11408 OPT_march_ },
11409 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11410 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11411 OPT_mtune_ },
db58fd89
JW
11412 { "sign-return-address", aarch64_attr_enum, false, NULL,
11413 OPT_msign_return_address_ },
5a2c8331
KT
11414 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
11415};
11416
11417/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 11418 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
11419
11420static bool
ab93e9b7 11421aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
11422{
11423 bool invert = false;
11424
11425 size_t len = strlen (arg_str);
11426
11427 if (len == 0)
11428 {
ab93e9b7 11429 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11430 return false;
11431 }
11432
11433 char *str_to_check = (char *) alloca (len + 1);
11434 strcpy (str_to_check, arg_str);
11435
11436 /* Skip leading whitespace. */
11437 while (*str_to_check == ' ' || *str_to_check == '\t')
11438 str_to_check++;
11439
11440 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
11441 It is easier to detect and handle it explicitly here rather than going
11442 through the machinery for the rest of the target attributes in this
11443 function. */
11444 if (*str_to_check == '+')
ab93e9b7 11445 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
11446
11447 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
11448 {
11449 invert = true;
11450 str_to_check += 3;
11451 }
11452 char *arg = strchr (str_to_check, '=');
11453
11454 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
11455 and point ARG to "foo". */
11456 if (arg)
11457 {
11458 *arg = '\0';
11459 arg++;
11460 }
11461 const struct aarch64_attribute_info *p_attr;
16d12992 11462 bool found = false;
5a2c8331
KT
11463 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
11464 {
11465 /* If the names don't match up, or the user has given an argument
11466 to an attribute that doesn't accept one, or didn't give an argument
11467 to an attribute that expects one, fail to match. */
11468 if (strcmp (str_to_check, p_attr->name) != 0)
11469 continue;
11470
16d12992 11471 found = true;
5a2c8331
KT
11472 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
11473 || p_attr->attr_type == aarch64_attr_enum;
11474
11475 if (attr_need_arg_p ^ (arg != NULL))
11476 {
ab93e9b7 11477 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
11478 return false;
11479 }
11480
11481 /* If the name matches but the attribute does not allow "no-" versions
11482 then we can't match. */
11483 if (invert && !p_attr->allow_neg)
11484 {
ab93e9b7 11485 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
11486 return false;
11487 }
11488
11489 switch (p_attr->attr_type)
11490 {
11491 /* Has a custom handler registered.
11492 For example, cpu=, arch=, tune=. */
11493 case aarch64_attr_custom:
11494 gcc_assert (p_attr->handler);
ab93e9b7 11495 if (!p_attr->handler (arg))
5a2c8331
KT
11496 return false;
11497 break;
11498
11499 /* Either set or unset a boolean option. */
11500 case aarch64_attr_bool:
11501 {
11502 struct cl_decoded_option decoded;
11503
11504 generate_option (p_attr->opt_num, NULL, !invert,
11505 CL_TARGET, &decoded);
11506 aarch64_handle_option (&global_options, &global_options_set,
11507 &decoded, input_location);
11508 break;
11509 }
11510 /* Set or unset a bit in the target_flags. aarch64_handle_option
11511 should know what mask to apply given the option number. */
11512 case aarch64_attr_mask:
11513 {
11514 struct cl_decoded_option decoded;
11515 /* We only need to specify the option number.
11516 aarch64_handle_option will know which mask to apply. */
11517 decoded.opt_index = p_attr->opt_num;
11518 decoded.value = !invert;
11519 aarch64_handle_option (&global_options, &global_options_set,
11520 &decoded, input_location);
11521 break;
11522 }
11523 /* Use the option setting machinery to set an option to an enum. */
11524 case aarch64_attr_enum:
11525 {
11526 gcc_assert (arg);
11527 bool valid;
11528 int value;
11529 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
11530 &value, CL_TARGET);
11531 if (valid)
11532 {
11533 set_option (&global_options, NULL, p_attr->opt_num, value,
11534 NULL, DK_UNSPECIFIED, input_location,
11535 global_dc);
11536 }
11537 else
11538 {
ab93e9b7 11539 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
11540 }
11541 break;
11542 }
11543 default:
11544 gcc_unreachable ();
11545 }
11546 }
11547
16d12992
KT
11548 /* If we reached here we either have found an attribute and validated
11549 it or didn't match any. If we matched an attribute but its arguments
11550 were malformed we will have returned false already. */
11551 return found;
5a2c8331
KT
11552}
11553
11554/* Count how many times the character C appears in
11555 NULL-terminated string STR. */
11556
11557static unsigned int
11558num_occurences_in_str (char c, char *str)
11559{
11560 unsigned int res = 0;
11561 while (*str != '\0')
11562 {
11563 if (*str == c)
11564 res++;
11565
11566 str++;
11567 }
11568
11569 return res;
11570}
11571
11572/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 11573 and update the global target options space. */
5a2c8331
KT
11574
11575bool
ab93e9b7 11576aarch64_process_target_attr (tree args)
5a2c8331
KT
11577{
11578 if (TREE_CODE (args) == TREE_LIST)
11579 {
11580 do
11581 {
11582 tree head = TREE_VALUE (args);
11583 if (head)
11584 {
ab93e9b7 11585 if (!aarch64_process_target_attr (head))
5a2c8331
KT
11586 return false;
11587 }
11588 args = TREE_CHAIN (args);
11589 } while (args);
11590
11591 return true;
11592 }
3b6cb9e3
ML
11593
11594 if (TREE_CODE (args) != STRING_CST)
11595 {
11596 error ("attribute %<target%> argument not a string");
11597 return false;
11598 }
5a2c8331
KT
11599
11600 size_t len = strlen (TREE_STRING_POINTER (args));
11601 char *str_to_check = (char *) alloca (len + 1);
11602 strcpy (str_to_check, TREE_STRING_POINTER (args));
11603
11604 if (len == 0)
11605 {
ab93e9b7 11606 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11607 return false;
11608 }
11609
11610 /* Used to catch empty spaces between commas i.e.
11611 attribute ((target ("attr1,,attr2"))). */
11612 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
11613
11614 /* Handle multiple target attributes separated by ','. */
11615 char *token = strtok (str_to_check, ",");
11616
11617 unsigned int num_attrs = 0;
11618 while (token)
11619 {
11620 num_attrs++;
ab93e9b7 11621 if (!aarch64_process_one_target_attr (token))
5a2c8331 11622 {
ab93e9b7 11623 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
11624 return false;
11625 }
11626
11627 token = strtok (NULL, ",");
11628 }
11629
11630 if (num_attrs != num_commas + 1)
11631 {
ab93e9b7 11632 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
11633 return false;
11634 }
11635
11636 return true;
11637}
11638
11639/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
11640 process attribute ((target ("..."))). */
11641
11642static bool
11643aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
11644{
11645 struct cl_target_option cur_target;
11646 bool ret;
11647 tree old_optimize;
11648 tree new_target, new_optimize;
11649 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
11650
11651 /* If what we're processing is the current pragma string then the
11652 target option node is already stored in target_option_current_node
11653 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
11654 having to re-parse the string. This is especially useful to keep
11655 arm_neon.h compile times down since that header contains a lot
11656 of intrinsics enclosed in pragmas. */
11657 if (!existing_target && args == current_target_pragma)
11658 {
11659 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
11660 return true;
11661 }
5a2c8331
KT
11662 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11663
11664 old_optimize = build_optimization_node (&global_options);
11665 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11666
11667 /* If the function changed the optimization levels as well as setting
11668 target options, start with the optimizations specified. */
11669 if (func_optimize && func_optimize != old_optimize)
11670 cl_optimization_restore (&global_options,
11671 TREE_OPTIMIZATION (func_optimize));
11672
11673 /* Save the current target options to restore at the end. */
11674 cl_target_option_save (&cur_target, &global_options);
11675
11676 /* If fndecl already has some target attributes applied to it, unpack
11677 them so that we add this attribute on top of them, rather than
11678 overwriting them. */
11679 if (existing_target)
11680 {
11681 struct cl_target_option *existing_options
11682 = TREE_TARGET_OPTION (existing_target);
11683
11684 if (existing_options)
11685 cl_target_option_restore (&global_options, existing_options);
11686 }
11687 else
11688 cl_target_option_restore (&global_options,
11689 TREE_TARGET_OPTION (target_option_current_node));
11690
ab93e9b7 11691 ret = aarch64_process_target_attr (args);
5a2c8331
KT
11692
11693 /* Set up any additional state. */
11694 if (ret)
11695 {
11696 aarch64_override_options_internal (&global_options);
e95a988a
KT
11697 /* Initialize SIMD builtins if we haven't already.
11698 Set current_target_pragma to NULL for the duration so that
11699 the builtin initialization code doesn't try to tag the functions
11700 being built with the attributes specified by any current pragma, thus
11701 going into an infinite recursion. */
11702 if (TARGET_SIMD)
11703 {
11704 tree saved_current_target_pragma = current_target_pragma;
11705 current_target_pragma = NULL;
11706 aarch64_init_simd_builtins ();
11707 current_target_pragma = saved_current_target_pragma;
11708 }
5a2c8331
KT
11709 new_target = build_target_option_node (&global_options);
11710 }
11711 else
11712 new_target = NULL;
11713
11714 new_optimize = build_optimization_node (&global_options);
11715
11716 if (fndecl && ret)
11717 {
11718 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
11719
11720 if (old_optimize != new_optimize)
11721 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
11722 }
11723
11724 cl_target_option_restore (&global_options, &cur_target);
11725
11726 if (old_optimize != new_optimize)
11727 cl_optimization_restore (&global_options,
11728 TREE_OPTIMIZATION (old_optimize));
11729 return ret;
11730}
11731
1fd8d40c
KT
11732/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
11733 tri-bool options (yes, no, don't care) and the default value is
11734 DEF, determine whether to reject inlining. */
11735
11736static bool
11737aarch64_tribools_ok_for_inlining_p (int caller, int callee,
11738 int dont_care, int def)
11739{
11740 /* If the callee doesn't care, always allow inlining. */
11741 if (callee == dont_care)
11742 return true;
11743
11744 /* If the caller doesn't care, always allow inlining. */
11745 if (caller == dont_care)
11746 return true;
11747
11748 /* Otherwise, allow inlining if either the callee and caller values
11749 agree, or if the callee is using the default value. */
11750 return (callee == caller || callee == def);
11751}
11752
11753/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
11754 to inline CALLEE into CALLER based on target-specific info.
11755 Make sure that the caller and callee have compatible architectural
11756 features. Then go through the other possible target attributes
11757 and see if they can block inlining. Try not to reject always_inline
11758 callees unless they are incompatible architecturally. */
11759
11760static bool
11761aarch64_can_inline_p (tree caller, tree callee)
11762{
11763 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
11764 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
11765
1fd8d40c
KT
11766 struct cl_target_option *caller_opts
11767 = TREE_TARGET_OPTION (caller_tree ? caller_tree
11768 : target_option_default_node);
11769
675d044c
SD
11770 struct cl_target_option *callee_opts
11771 = TREE_TARGET_OPTION (callee_tree ? callee_tree
11772 : target_option_default_node);
1fd8d40c
KT
11773
11774 /* Callee's ISA flags should be a subset of the caller's. */
11775 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
11776 != callee_opts->x_aarch64_isa_flags)
11777 return false;
11778
11779 /* Allow non-strict aligned functions inlining into strict
11780 aligned ones. */
11781 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
11782 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
11783 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
11784 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
11785 return false;
11786
11787 bool always_inline = lookup_attribute ("always_inline",
11788 DECL_ATTRIBUTES (callee));
11789
11790 /* If the architectural features match up and the callee is always_inline
11791 then the other attributes don't matter. */
11792 if (always_inline)
11793 return true;
11794
11795 if (caller_opts->x_aarch64_cmodel_var
11796 != callee_opts->x_aarch64_cmodel_var)
11797 return false;
11798
11799 if (caller_opts->x_aarch64_tls_dialect
11800 != callee_opts->x_aarch64_tls_dialect)
11801 return false;
11802
11803 /* Honour explicit requests to workaround errata. */
11804 if (!aarch64_tribools_ok_for_inlining_p (
11805 caller_opts->x_aarch64_fix_a53_err835769,
11806 callee_opts->x_aarch64_fix_a53_err835769,
11807 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
11808 return false;
11809
48bb1a55
CL
11810 if (!aarch64_tribools_ok_for_inlining_p (
11811 caller_opts->x_aarch64_fix_a53_err843419,
11812 callee_opts->x_aarch64_fix_a53_err843419,
11813 2, TARGET_FIX_ERR_A53_843419))
11814 return false;
11815
1fd8d40c
KT
11816 /* If the user explicitly specified -momit-leaf-frame-pointer for the
11817 caller and calle and they don't match up, reject inlining. */
11818 if (!aarch64_tribools_ok_for_inlining_p (
11819 caller_opts->x_flag_omit_leaf_frame_pointer,
11820 callee_opts->x_flag_omit_leaf_frame_pointer,
11821 2, 1))
11822 return false;
11823
11824 /* If the callee has specific tuning overrides, respect them. */
11825 if (callee_opts->x_aarch64_override_tune_string != NULL
11826 && caller_opts->x_aarch64_override_tune_string == NULL)
11827 return false;
11828
11829 /* If the user specified tuning override strings for the
11830 caller and callee and they don't match up, reject inlining.
11831 We just do a string compare here, we don't analyze the meaning
11832 of the string, as it would be too costly for little gain. */
11833 if (callee_opts->x_aarch64_override_tune_string
11834 && caller_opts->x_aarch64_override_tune_string
11835 && (strcmp (callee_opts->x_aarch64_override_tune_string,
11836 caller_opts->x_aarch64_override_tune_string) != 0))
11837 return false;
11838
11839 return true;
11840}
11841
43e9d192
IB
11842/* Return true if SYMBOL_REF X binds locally. */
11843
11844static bool
11845aarch64_symbol_binds_local_p (const_rtx x)
11846{
11847 return (SYMBOL_REF_DECL (x)
11848 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
11849 : SYMBOL_REF_LOCAL_P (x));
11850}
11851
11852/* Return true if SYMBOL_REF X is thread local */
11853static bool
11854aarch64_tls_symbol_p (rtx x)
11855{
11856 if (! TARGET_HAVE_TLS)
11857 return false;
11858
11859 if (GET_CODE (x) != SYMBOL_REF)
11860 return false;
11861
11862 return SYMBOL_REF_TLS_MODEL (x) != 0;
11863}
11864
11865/* Classify a TLS symbol into one of the TLS kinds. */
11866enum aarch64_symbol_type
11867aarch64_classify_tls_symbol (rtx x)
11868{
11869 enum tls_model tls_kind = tls_symbolic_operand_type (x);
11870
11871 switch (tls_kind)
11872 {
11873 case TLS_MODEL_GLOBAL_DYNAMIC:
11874 case TLS_MODEL_LOCAL_DYNAMIC:
11875 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
11876
11877 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
11878 switch (aarch64_cmodel)
11879 {
11880 case AARCH64_CMODEL_TINY:
11881 case AARCH64_CMODEL_TINY_PIC:
11882 return SYMBOL_TINY_TLSIE;
11883 default:
79496620 11884 return SYMBOL_SMALL_TLSIE;
5ae7caad 11885 }
43e9d192
IB
11886
11887 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
11888 if (aarch64_tls_size == 12)
11889 return SYMBOL_TLSLE12;
11890 else if (aarch64_tls_size == 24)
11891 return SYMBOL_TLSLE24;
11892 else if (aarch64_tls_size == 32)
11893 return SYMBOL_TLSLE32;
11894 else if (aarch64_tls_size == 48)
11895 return SYMBOL_TLSLE48;
11896 else
11897 gcc_unreachable ();
43e9d192
IB
11898
11899 case TLS_MODEL_EMULATED:
11900 case TLS_MODEL_NONE:
11901 return SYMBOL_FORCE_TO_MEM;
11902
11903 default:
11904 gcc_unreachable ();
11905 }
11906}
11907
43cacb12
RS
11908/* Return the correct method for accessing X + OFFSET, where X is either
11909 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 11910
43e9d192 11911enum aarch64_symbol_type
43cacb12 11912aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
11913{
11914 if (GET_CODE (x) == LABEL_REF)
11915 {
11916 switch (aarch64_cmodel)
11917 {
11918 case AARCH64_CMODEL_LARGE:
11919 return SYMBOL_FORCE_TO_MEM;
11920
11921 case AARCH64_CMODEL_TINY_PIC:
11922 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
11923 return SYMBOL_TINY_ABSOLUTE;
11924
1b1e81f8 11925 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11926 case AARCH64_CMODEL_SMALL_PIC:
11927 case AARCH64_CMODEL_SMALL:
11928 return SYMBOL_SMALL_ABSOLUTE;
11929
11930 default:
11931 gcc_unreachable ();
11932 }
11933 }
11934
17f4d4bf 11935 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 11936 {
43e9d192
IB
11937 if (aarch64_tls_symbol_p (x))
11938 return aarch64_classify_tls_symbol (x);
11939
17f4d4bf
CSS
11940 switch (aarch64_cmodel)
11941 {
11942 case AARCH64_CMODEL_TINY:
15f6e0da 11943 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
11944 the offset does not cause overflow of the final address. But
11945 we have no way of knowing the address of symbol at compile time
11946 so we can't accurately say if the distance between the PC and
11947 symbol + offset is outside the addressible range of +/-1M in the
11948 TINY code model. So we rely on images not being greater than
11949 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
11950 be loaded using an alternative mechanism. Furthermore if the
11951 symbol is a weak reference to something that isn't known to
11952 resolve to a symbol in this module, then force to memory. */
11953 if ((SYMBOL_REF_WEAK (x)
11954 && !aarch64_symbol_binds_local_p (x))
43cacb12 11955 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
11956 return SYMBOL_FORCE_TO_MEM;
11957 return SYMBOL_TINY_ABSOLUTE;
11958
17f4d4bf 11959 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
11960 /* Same reasoning as the tiny code model, but the offset cap here is
11961 4G. */
15f6e0da
RR
11962 if ((SYMBOL_REF_WEAK (x)
11963 && !aarch64_symbol_binds_local_p (x))
43cacb12 11964 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 11965 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
11966 return SYMBOL_FORCE_TO_MEM;
11967 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11968
17f4d4bf 11969 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 11970 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 11971 return SYMBOL_TINY_GOT;
38e6c9a6
MS
11972 return SYMBOL_TINY_ABSOLUTE;
11973
1b1e81f8 11974 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
11975 case AARCH64_CMODEL_SMALL_PIC:
11976 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
11977 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
11978 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 11979 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11980
9ee6540a
WD
11981 case AARCH64_CMODEL_LARGE:
11982 /* This is alright even in PIC code as the constant
11983 pool reference is always PC relative and within
11984 the same translation unit. */
d47d34bb 11985 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
11986 return SYMBOL_SMALL_ABSOLUTE;
11987 else
11988 return SYMBOL_FORCE_TO_MEM;
11989
17f4d4bf
CSS
11990 default:
11991 gcc_unreachable ();
11992 }
43e9d192 11993 }
17f4d4bf 11994
43e9d192
IB
11995 /* By default push everything into the constant pool. */
11996 return SYMBOL_FORCE_TO_MEM;
11997}
11998
43e9d192
IB
11999bool
12000aarch64_constant_address_p (rtx x)
12001{
12002 return (CONSTANT_P (x) && memory_address_p (DImode, x));
12003}
12004
12005bool
12006aarch64_legitimate_pic_operand_p (rtx x)
12007{
12008 if (GET_CODE (x) == SYMBOL_REF
12009 || (GET_CODE (x) == CONST
12010 && GET_CODE (XEXP (x, 0)) == PLUS
12011 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
12012 return false;
12013
12014 return true;
12015}
12016
26895c21
WD
12017/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
12018 that should be rematerialized rather than spilled. */
3520f7cc 12019
43e9d192 12020static bool
ef4bddc2 12021aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 12022{
26895c21 12023 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 12024 if (CONST_INT_P (x)
9f7b87ca 12025 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 12026 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
12027 return true;
12028
43cacb12
RS
12029 /* Do not allow vector struct mode constants for Advanced SIMD.
12030 We could support 0 and -1 easily, but they need support in
12031 aarch64-simd.md. */
12032 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12033 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
12034 return false;
12035
43cacb12
RS
12036 /* Only accept variable-length vector constants if they can be
12037 handled directly.
12038
12039 ??? It would be possible to handle rematerialization of other
12040 constants via secondary reloads. */
12041 if (vec_flags & VEC_ANY_SVE)
12042 return aarch64_simd_valid_immediate (x, NULL);
12043
509bb9b6
RS
12044 if (GET_CODE (x) == HIGH)
12045 x = XEXP (x, 0);
12046
43cacb12
RS
12047 /* Accept polynomial constants that can be calculated by using the
12048 destination of a move as the sole temporary. Constants that
12049 require a second temporary cannot be rematerialized (they can't be
12050 forced to memory and also aren't legitimate constants). */
12051 poly_int64 offset;
12052 if (poly_int_rtx_p (x, &offset))
12053 return aarch64_offset_temporaries (false, offset) <= 1;
12054
12055 /* If an offset is being added to something else, we need to allow the
12056 base to be moved into the destination register, meaning that there
12057 are no free temporaries for the offset. */
12058 x = strip_offset (x, &offset);
12059 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
12060 return false;
26895c21 12061
43cacb12
RS
12062 /* Do not allow const (plus (anchor_symbol, const_int)). */
12063 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
12064 return false;
26895c21 12065
f28e54bd
WD
12066 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
12067 so spilling them is better than rematerialization. */
12068 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
12069 return true;
12070
26895c21
WD
12071 /* Label references are always constant. */
12072 if (GET_CODE (x) == LABEL_REF)
12073 return true;
12074
12075 return false;
43e9d192
IB
12076}
12077
a5bc806c 12078rtx
43e9d192
IB
12079aarch64_load_tp (rtx target)
12080{
12081 if (!target
12082 || GET_MODE (target) != Pmode
12083 || !register_operand (target, Pmode))
12084 target = gen_reg_rtx (Pmode);
12085
12086 /* Can return in any reg. */
12087 emit_insn (gen_aarch64_load_tp_hard (target));
12088 return target;
12089}
12090
43e9d192
IB
12091/* On AAPCS systems, this is the "struct __va_list". */
12092static GTY(()) tree va_list_type;
12093
12094/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
12095 Return the type to use as __builtin_va_list.
12096
12097 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
12098
12099 struct __va_list
12100 {
12101 void *__stack;
12102 void *__gr_top;
12103 void *__vr_top;
12104 int __gr_offs;
12105 int __vr_offs;
12106 }; */
12107
12108static tree
12109aarch64_build_builtin_va_list (void)
12110{
12111 tree va_list_name;
12112 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12113
12114 /* Create the type. */
12115 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
12116 /* Give it the required name. */
12117 va_list_name = build_decl (BUILTINS_LOCATION,
12118 TYPE_DECL,
12119 get_identifier ("__va_list"),
12120 va_list_type);
12121 DECL_ARTIFICIAL (va_list_name) = 1;
12122 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 12123 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
12124
12125 /* Create the fields. */
12126 f_stack = build_decl (BUILTINS_LOCATION,
12127 FIELD_DECL, get_identifier ("__stack"),
12128 ptr_type_node);
12129 f_grtop = build_decl (BUILTINS_LOCATION,
12130 FIELD_DECL, get_identifier ("__gr_top"),
12131 ptr_type_node);
12132 f_vrtop = build_decl (BUILTINS_LOCATION,
12133 FIELD_DECL, get_identifier ("__vr_top"),
12134 ptr_type_node);
12135 f_groff = build_decl (BUILTINS_LOCATION,
12136 FIELD_DECL, get_identifier ("__gr_offs"),
12137 integer_type_node);
12138 f_vroff = build_decl (BUILTINS_LOCATION,
12139 FIELD_DECL, get_identifier ("__vr_offs"),
12140 integer_type_node);
12141
88e3bdd1 12142 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
12143 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
12144 purpose to identify whether the code is updating va_list internal
12145 offset fields through irregular way. */
12146 va_list_gpr_counter_field = f_groff;
12147 va_list_fpr_counter_field = f_vroff;
12148
43e9d192
IB
12149 DECL_ARTIFICIAL (f_stack) = 1;
12150 DECL_ARTIFICIAL (f_grtop) = 1;
12151 DECL_ARTIFICIAL (f_vrtop) = 1;
12152 DECL_ARTIFICIAL (f_groff) = 1;
12153 DECL_ARTIFICIAL (f_vroff) = 1;
12154
12155 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
12156 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
12157 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
12158 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
12159 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
12160
12161 TYPE_FIELDS (va_list_type) = f_stack;
12162 DECL_CHAIN (f_stack) = f_grtop;
12163 DECL_CHAIN (f_grtop) = f_vrtop;
12164 DECL_CHAIN (f_vrtop) = f_groff;
12165 DECL_CHAIN (f_groff) = f_vroff;
12166
12167 /* Compute its layout. */
12168 layout_type (va_list_type);
12169
12170 return va_list_type;
12171}
12172
12173/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
12174static void
12175aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12176{
12177 const CUMULATIVE_ARGS *cum;
12178 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12179 tree stack, grtop, vrtop, groff, vroff;
12180 tree t;
88e3bdd1
JW
12181 int gr_save_area_size = cfun->va_list_gpr_size;
12182 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
12183 int vr_offset;
12184
12185 cum = &crtl->args.info;
88e3bdd1
JW
12186 if (cfun->va_list_gpr_size)
12187 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
12188 cfun->va_list_gpr_size);
12189 if (cfun->va_list_fpr_size)
12190 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
12191 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 12192
d5726973 12193 if (!TARGET_FLOAT)
43e9d192 12194 {
261fb553 12195 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
12196 vr_save_area_size = 0;
12197 }
12198
12199 f_stack = TYPE_FIELDS (va_list_type_node);
12200 f_grtop = DECL_CHAIN (f_stack);
12201 f_vrtop = DECL_CHAIN (f_grtop);
12202 f_groff = DECL_CHAIN (f_vrtop);
12203 f_vroff = DECL_CHAIN (f_groff);
12204
12205 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12206 NULL_TREE);
12207 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12208 NULL_TREE);
12209 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12210 NULL_TREE);
12211 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12212 NULL_TREE);
12213 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12214 NULL_TREE);
12215
12216 /* Emit code to initialize STACK, which points to the next varargs stack
12217 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12218 by named arguments. STACK is 8-byte aligned. */
12219 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12220 if (cum->aapcs_stack_size > 0)
12221 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12222 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12223 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12224
12225 /* Emit code to initialize GRTOP, the top of the GR save area.
12226 virtual_incoming_args_rtx should have been 16 byte aligned. */
12227 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12228 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12230
12231 /* Emit code to initialize VRTOP, the top of the VR save area.
12232 This address is gr_save_area_bytes below GRTOP, rounded
12233 down to the next 16-byte boundary. */
12234 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12235 vr_offset = ROUND_UP (gr_save_area_size,
12236 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12237
12238 if (vr_offset)
12239 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12240 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12241 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12242
12243 /* Emit code to initialize GROFF, the offset from GRTOP of the
12244 next GPR argument. */
12245 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12246 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12248
12249 /* Likewise emit code to initialize VROFF, the offset from FTOP
12250 of the next VR argument. */
12251 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12252 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12253 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12254}
12255
12256/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12257
12258static tree
12259aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12260 gimple_seq *post_p ATTRIBUTE_UNUSED)
12261{
12262 tree addr;
12263 bool indirect_p;
12264 bool is_ha; /* is HFA or HVA. */
12265 bool dw_align; /* double-word align. */
ef4bddc2 12266 machine_mode ag_mode = VOIDmode;
43e9d192 12267 int nregs;
ef4bddc2 12268 machine_mode mode;
43e9d192
IB
12269
12270 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12271 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12272 HOST_WIDE_INT size, rsize, adjust, align;
12273 tree t, u, cond1, cond2;
12274
12275 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12276 if (indirect_p)
12277 type = build_pointer_type (type);
12278
12279 mode = TYPE_MODE (type);
12280
12281 f_stack = TYPE_FIELDS (va_list_type_node);
12282 f_grtop = DECL_CHAIN (f_stack);
12283 f_vrtop = DECL_CHAIN (f_grtop);
12284 f_groff = DECL_CHAIN (f_vrtop);
12285 f_vroff = DECL_CHAIN (f_groff);
12286
12287 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12288 f_stack, NULL_TREE);
12289 size = int_size_in_bytes (type);
985b8393 12290 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12291
12292 dw_align = false;
12293 adjust = 0;
12294 if (aarch64_vfp_is_call_or_return_candidate (mode,
12295 type,
12296 &ag_mode,
12297 &nregs,
12298 &is_ha))
12299 {
6a70badb
RS
12300 /* No frontends can create types with variable-sized modes, so we
12301 shouldn't be asked to pass or return them. */
12302 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12303
43e9d192 12304 /* TYPE passed in fp/simd registers. */
d5726973 12305 if (!TARGET_FLOAT)
fc29dfc9 12306 aarch64_err_no_fpadvsimd (mode);
43e9d192
IB
12307
12308 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12309 unshare_expr (valist), f_vrtop, NULL_TREE);
12310 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12311 unshare_expr (valist), f_vroff, NULL_TREE);
12312
12313 rsize = nregs * UNITS_PER_VREG;
12314
12315 if (is_ha)
12316 {
6a70badb
RS
12317 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12318 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12319 }
76b0cbf8 12320 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12321 && size < UNITS_PER_VREG)
12322 {
12323 adjust = UNITS_PER_VREG - size;
12324 }
12325 }
12326 else
12327 {
12328 /* TYPE passed in general registers. */
12329 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12330 unshare_expr (valist), f_grtop, NULL_TREE);
12331 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12332 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12333 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12334 nregs = rsize / UNITS_PER_WORD;
12335
12336 if (align > 8)
12337 dw_align = true;
12338
76b0cbf8 12339 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12340 && size < UNITS_PER_WORD)
12341 {
12342 adjust = UNITS_PER_WORD - size;
12343 }
12344 }
12345
12346 /* Get a local temporary for the field value. */
12347 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12348
12349 /* Emit code to branch if off >= 0. */
12350 t = build2 (GE_EXPR, boolean_type_node, off,
12351 build_int_cst (TREE_TYPE (off), 0));
12352 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12353
12354 if (dw_align)
12355 {
12356 /* Emit: offs = (offs + 15) & -16. */
12357 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12358 build_int_cst (TREE_TYPE (off), 15));
12359 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12360 build_int_cst (TREE_TYPE (off), -16));
12361 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12362 }
12363 else
12364 roundup = NULL;
12365
12366 /* Update ap.__[g|v]r_offs */
12367 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12368 build_int_cst (TREE_TYPE (off), rsize));
12369 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12370
12371 /* String up. */
12372 if (roundup)
12373 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12374
12375 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12376 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12377 build_int_cst (TREE_TYPE (f_off), 0));
12378 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12379
12380 /* String up: make sure the assignment happens before the use. */
12381 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12382 COND_EXPR_ELSE (cond1) = t;
12383
12384 /* Prepare the trees handling the argument that is passed on the stack;
12385 the top level node will store in ON_STACK. */
12386 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12387 if (align > 8)
12388 {
12389 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12390 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12391 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12392 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12393 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12394 }
12395 else
12396 roundup = NULL;
12397 /* Advance ap.__stack */
4bdc2738 12398 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12399 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12400 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12401 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12402 /* String up roundup and advance. */
12403 if (roundup)
12404 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12405 /* String up with arg */
12406 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12407 /* Big-endianness related address adjustment. */
76b0cbf8 12408 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12409 && size < UNITS_PER_WORD)
12410 {
12411 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12412 size_int (UNITS_PER_WORD - size));
12413 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
12414 }
12415
12416 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
12417 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
12418
12419 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
12420 t = off;
12421 if (adjust)
12422 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
12423 build_int_cst (TREE_TYPE (off), adjust));
12424
12425 t = fold_convert (sizetype, t);
12426 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
12427
12428 if (is_ha)
12429 {
12430 /* type ha; // treat as "struct {ftype field[n];}"
12431 ... [computing offs]
12432 for (i = 0; i <nregs; ++i, offs += 16)
12433 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
12434 return ha; */
12435 int i;
12436 tree tmp_ha, field_t, field_ptr_t;
12437
12438 /* Declare a local variable. */
12439 tmp_ha = create_tmp_var_raw (type, "ha");
12440 gimple_add_tmp_var (tmp_ha);
12441
12442 /* Establish the base type. */
12443 switch (ag_mode)
12444 {
4e10a5a7 12445 case E_SFmode:
43e9d192
IB
12446 field_t = float_type_node;
12447 field_ptr_t = float_ptr_type_node;
12448 break;
4e10a5a7 12449 case E_DFmode:
43e9d192
IB
12450 field_t = double_type_node;
12451 field_ptr_t = double_ptr_type_node;
12452 break;
4e10a5a7 12453 case E_TFmode:
43e9d192
IB
12454 field_t = long_double_type_node;
12455 field_ptr_t = long_double_ptr_type_node;
12456 break;
4e10a5a7 12457 case E_HFmode:
1b62ed4f
JG
12458 field_t = aarch64_fp16_type_node;
12459 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 12460 break;
4e10a5a7
RS
12461 case E_V2SImode:
12462 case E_V4SImode:
43e9d192
IB
12463 {
12464 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
12465 field_t = build_vector_type_for_mode (innertype, ag_mode);
12466 field_ptr_t = build_pointer_type (field_t);
12467 }
12468 break;
12469 default:
12470 gcc_assert (0);
12471 }
12472
12473 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
12474 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
12475 addr = t;
12476 t = fold_convert (field_ptr_t, addr);
12477 t = build2 (MODIFY_EXPR, field_t,
12478 build1 (INDIRECT_REF, field_t, tmp_ha),
12479 build1 (INDIRECT_REF, field_t, t));
12480
12481 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
12482 for (i = 1; i < nregs; ++i)
12483 {
12484 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
12485 u = fold_convert (field_ptr_t, addr);
12486 u = build2 (MODIFY_EXPR, field_t,
12487 build2 (MEM_REF, field_t, tmp_ha,
12488 build_int_cst (field_ptr_t,
12489 (i *
12490 int_size_in_bytes (field_t)))),
12491 build1 (INDIRECT_REF, field_t, u));
12492 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
12493 }
12494
12495 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
12496 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
12497 }
12498
12499 COND_EXPR_ELSE (cond2) = t;
12500 addr = fold_convert (build_pointer_type (type), cond1);
12501 addr = build_va_arg_indirect_ref (addr);
12502
12503 if (indirect_p)
12504 addr = build_va_arg_indirect_ref (addr);
12505
12506 return addr;
12507}
12508
12509/* Implement TARGET_SETUP_INCOMING_VARARGS. */
12510
12511static void
ef4bddc2 12512aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
12513 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12514 int no_rtl)
12515{
12516 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12517 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
12518 int gr_saved = cfun->va_list_gpr_size;
12519 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
12520
12521 /* The caller has advanced CUM up to, but not beyond, the last named
12522 argument. Advance a local copy of CUM past the last "real" named
12523 argument, to find out how many registers are left over. */
12524 local_cum = *cum;
12525 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
12526
88e3bdd1
JW
12527 /* Found out how many registers we need to save.
12528 Honor tree-stdvar analysis results. */
12529 if (cfun->va_list_gpr_size)
12530 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
12531 cfun->va_list_gpr_size / UNITS_PER_WORD);
12532 if (cfun->va_list_fpr_size)
12533 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
12534 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 12535
d5726973 12536 if (!TARGET_FLOAT)
43e9d192 12537 {
261fb553 12538 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
12539 vr_saved = 0;
12540 }
12541
12542 if (!no_rtl)
12543 {
12544 if (gr_saved > 0)
12545 {
12546 rtx ptr, mem;
12547
12548 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
12549 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
12550 - gr_saved * UNITS_PER_WORD);
12551 mem = gen_frame_mem (BLKmode, ptr);
12552 set_mem_alias_set (mem, get_varargs_alias_set ());
12553
12554 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
12555 mem, gr_saved);
12556 }
12557 if (vr_saved > 0)
12558 {
12559 /* We can't use move_block_from_reg, because it will use
12560 the wrong mode, storing D regs only. */
ef4bddc2 12561 machine_mode mode = TImode;
88e3bdd1 12562 int off, i, vr_start;
43e9d192
IB
12563
12564 /* Set OFF to the offset from virtual_incoming_args_rtx of
12565 the first vector register. The VR save area lies below
12566 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
12567 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
12568 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12569 off -= vr_saved * UNITS_PER_VREG;
12570
88e3bdd1
JW
12571 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
12572 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
12573 {
12574 rtx ptr, mem;
12575
12576 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
12577 mem = gen_frame_mem (mode, ptr);
12578 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 12579 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
12580 off += UNITS_PER_VREG;
12581 }
12582 }
12583 }
12584
12585 /* We don't save the size into *PRETEND_SIZE because we want to avoid
12586 any complication of having crtl->args.pretend_args_size changed. */
8799637a 12587 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
12588 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
12589 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
12590 + vr_saved * UNITS_PER_VREG);
12591}
12592
12593static void
12594aarch64_conditional_register_usage (void)
12595{
12596 int i;
12597 if (!TARGET_FLOAT)
12598 {
12599 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
12600 {
12601 fixed_regs[i] = 1;
12602 call_used_regs[i] = 1;
12603 }
12604 }
43cacb12
RS
12605 if (!TARGET_SVE)
12606 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
12607 {
12608 fixed_regs[i] = 1;
12609 call_used_regs[i] = 1;
12610 }
3751345d
RE
12611
12612 /* When tracking speculation, we need a couple of call-clobbered registers
12613 to track the speculation state. It would be nice to just use
12614 IP0 and IP1, but currently there are numerous places that just
12615 assume these registers are free for other uses (eg pointer
12616 authentication). */
12617 if (aarch64_track_speculation)
12618 {
12619 fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
12620 call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
12621 fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
12622 call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
12623 }
43e9d192
IB
12624}
12625
12626/* Walk down the type tree of TYPE counting consecutive base elements.
12627 If *MODEP is VOIDmode, then set it to the first valid floating point
12628 type. If a non-floating point type is found, or if a floating point
12629 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
12630 otherwise return the count in the sub-tree. */
12631static int
ef4bddc2 12632aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 12633{
ef4bddc2 12634 machine_mode mode;
43e9d192
IB
12635 HOST_WIDE_INT size;
12636
12637 switch (TREE_CODE (type))
12638 {
12639 case REAL_TYPE:
12640 mode = TYPE_MODE (type);
1b62ed4f
JG
12641 if (mode != DFmode && mode != SFmode
12642 && mode != TFmode && mode != HFmode)
43e9d192
IB
12643 return -1;
12644
12645 if (*modep == VOIDmode)
12646 *modep = mode;
12647
12648 if (*modep == mode)
12649 return 1;
12650
12651 break;
12652
12653 case COMPLEX_TYPE:
12654 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
12655 if (mode != DFmode && mode != SFmode
12656 && mode != TFmode && mode != HFmode)
43e9d192
IB
12657 return -1;
12658
12659 if (*modep == VOIDmode)
12660 *modep = mode;
12661
12662 if (*modep == mode)
12663 return 2;
12664
12665 break;
12666
12667 case VECTOR_TYPE:
12668 /* Use V2SImode and V4SImode as representatives of all 64-bit
12669 and 128-bit vector types. */
12670 size = int_size_in_bytes (type);
12671 switch (size)
12672 {
12673 case 8:
12674 mode = V2SImode;
12675 break;
12676 case 16:
12677 mode = V4SImode;
12678 break;
12679 default:
12680 return -1;
12681 }
12682
12683 if (*modep == VOIDmode)
12684 *modep = mode;
12685
12686 /* Vector modes are considered to be opaque: two vectors are
12687 equivalent for the purposes of being homogeneous aggregates
12688 if they are the same size. */
12689 if (*modep == mode)
12690 return 1;
12691
12692 break;
12693
12694 case ARRAY_TYPE:
12695 {
12696 int count;
12697 tree index = TYPE_DOMAIN (type);
12698
807e902e
KZ
12699 /* Can't handle incomplete types nor sizes that are not
12700 fixed. */
12701 if (!COMPLETE_TYPE_P (type)
12702 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12703 return -1;
12704
12705 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
12706 if (count == -1
12707 || !index
12708 || !TYPE_MAX_VALUE (index)
cc269bb6 12709 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 12710 || !TYPE_MIN_VALUE (index)
cc269bb6 12711 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
12712 || count < 0)
12713 return -1;
12714
ae7e9ddd
RS
12715 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
12716 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
12717
12718 /* There must be no padding. */
6a70badb
RS
12719 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12720 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12721 return -1;
12722
12723 return count;
12724 }
12725
12726 case RECORD_TYPE:
12727 {
12728 int count = 0;
12729 int sub_count;
12730 tree field;
12731
807e902e
KZ
12732 /* Can't handle incomplete types nor sizes that are not
12733 fixed. */
12734 if (!COMPLETE_TYPE_P (type)
12735 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12736 return -1;
12737
12738 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12739 {
12740 if (TREE_CODE (field) != FIELD_DECL)
12741 continue;
12742
12743 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12744 if (sub_count < 0)
12745 return -1;
12746 count += sub_count;
12747 }
12748
12749 /* There must be no padding. */
6a70badb
RS
12750 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12751 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12752 return -1;
12753
12754 return count;
12755 }
12756
12757 case UNION_TYPE:
12758 case QUAL_UNION_TYPE:
12759 {
12760 /* These aren't very interesting except in a degenerate case. */
12761 int count = 0;
12762 int sub_count;
12763 tree field;
12764
807e902e
KZ
12765 /* Can't handle incomplete types nor sizes that are not
12766 fixed. */
12767 if (!COMPLETE_TYPE_P (type)
12768 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12769 return -1;
12770
12771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12772 {
12773 if (TREE_CODE (field) != FIELD_DECL)
12774 continue;
12775
12776 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12777 if (sub_count < 0)
12778 return -1;
12779 count = count > sub_count ? count : sub_count;
12780 }
12781
12782 /* There must be no padding. */
6a70badb
RS
12783 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12784 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12785 return -1;
12786
12787 return count;
12788 }
12789
12790 default:
12791 break;
12792 }
12793
12794 return -1;
12795}
12796
b6ec6215
KT
12797/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
12798 type as described in AAPCS64 \S 4.1.2.
12799
12800 See the comment above aarch64_composite_type_p for the notes on MODE. */
12801
12802static bool
12803aarch64_short_vector_p (const_tree type,
12804 machine_mode mode)
12805{
6a70badb 12806 poly_int64 size = -1;
b6ec6215
KT
12807
12808 if (type && TREE_CODE (type) == VECTOR_TYPE)
12809 size = int_size_in_bytes (type);
12810 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12811 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12812 size = GET_MODE_SIZE (mode);
12813
6a70badb 12814 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
12815}
12816
43e9d192
IB
12817/* Return TRUE if the type, as described by TYPE and MODE, is a composite
12818 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
12819 array types. The C99 floating-point complex types are also considered
12820 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
12821 types, which are GCC extensions and out of the scope of AAPCS64, are
12822 treated as composite types here as well.
12823
12824 Note that MODE itself is not sufficient in determining whether a type
12825 is such a composite type or not. This is because
12826 stor-layout.c:compute_record_mode may have already changed the MODE
12827 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
12828 structure with only one field may have its MODE set to the mode of the
12829 field. Also an integer mode whose size matches the size of the
12830 RECORD_TYPE type may be used to substitute the original mode
12831 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
12832 solely relied on. */
12833
12834static bool
12835aarch64_composite_type_p (const_tree type,
ef4bddc2 12836 machine_mode mode)
43e9d192 12837{
b6ec6215
KT
12838 if (aarch64_short_vector_p (type, mode))
12839 return false;
12840
43e9d192
IB
12841 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
12842 return true;
12843
12844 if (mode == BLKmode
12845 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
12846 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
12847 return true;
12848
12849 return false;
12850}
12851
43e9d192
IB
12852/* Return TRUE if an argument, whose type is described by TYPE and MODE,
12853 shall be passed or returned in simd/fp register(s) (providing these
12854 parameter passing registers are available).
12855
12856 Upon successful return, *COUNT returns the number of needed registers,
12857 *BASE_MODE returns the mode of the individual register and when IS_HAF
12858 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
12859 floating-point aggregate or a homogeneous short-vector aggregate. */
12860
12861static bool
ef4bddc2 12862aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 12863 const_tree type,
ef4bddc2 12864 machine_mode *base_mode,
43e9d192
IB
12865 int *count,
12866 bool *is_ha)
12867{
ef4bddc2 12868 machine_mode new_mode = VOIDmode;
43e9d192
IB
12869 bool composite_p = aarch64_composite_type_p (type, mode);
12870
12871 if (is_ha != NULL) *is_ha = false;
12872
12873 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
12874 || aarch64_short_vector_p (type, mode))
12875 {
12876 *count = 1;
12877 new_mode = mode;
12878 }
12879 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
12880 {
12881 if (is_ha != NULL) *is_ha = true;
12882 *count = 2;
12883 new_mode = GET_MODE_INNER (mode);
12884 }
12885 else if (type && composite_p)
12886 {
12887 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
12888
12889 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
12890 {
12891 if (is_ha != NULL) *is_ha = true;
12892 *count = ag_count;
12893 }
12894 else
12895 return false;
12896 }
12897 else
12898 return false;
12899
12900 *base_mode = new_mode;
12901 return true;
12902}
12903
12904/* Implement TARGET_STRUCT_VALUE_RTX. */
12905
12906static rtx
12907aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
12908 int incoming ATTRIBUTE_UNUSED)
12909{
12910 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
12911}
12912
12913/* Implements target hook vector_mode_supported_p. */
12914static bool
ef4bddc2 12915aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 12916{
43cacb12
RS
12917 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12918 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
12919}
12920
b7342d25
IB
12921/* Return appropriate SIMD container
12922 for MODE within a vector of WIDTH bits. */
ef4bddc2 12923static machine_mode
43cacb12 12924aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 12925{
43cacb12
RS
12926 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
12927 switch (mode)
12928 {
12929 case E_DFmode:
12930 return VNx2DFmode;
12931 case E_SFmode:
12932 return VNx4SFmode;
12933 case E_HFmode:
12934 return VNx8HFmode;
12935 case E_DImode:
12936 return VNx2DImode;
12937 case E_SImode:
12938 return VNx4SImode;
12939 case E_HImode:
12940 return VNx8HImode;
12941 case E_QImode:
12942 return VNx16QImode;
12943 default:
12944 return word_mode;
12945 }
12946
12947 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 12948 if (TARGET_SIMD)
b7342d25 12949 {
43cacb12 12950 if (known_eq (width, 128))
b7342d25
IB
12951 switch (mode)
12952 {
4e10a5a7 12953 case E_DFmode:
b7342d25 12954 return V2DFmode;
4e10a5a7 12955 case E_SFmode:
b7342d25 12956 return V4SFmode;
4e10a5a7 12957 case E_HFmode:
b719f884 12958 return V8HFmode;
4e10a5a7 12959 case E_SImode:
b7342d25 12960 return V4SImode;
4e10a5a7 12961 case E_HImode:
b7342d25 12962 return V8HImode;
4e10a5a7 12963 case E_QImode:
b7342d25 12964 return V16QImode;
4e10a5a7 12965 case E_DImode:
b7342d25
IB
12966 return V2DImode;
12967 default:
12968 break;
12969 }
12970 else
12971 switch (mode)
12972 {
4e10a5a7 12973 case E_SFmode:
b7342d25 12974 return V2SFmode;
4e10a5a7 12975 case E_HFmode:
b719f884 12976 return V4HFmode;
4e10a5a7 12977 case E_SImode:
b7342d25 12978 return V2SImode;
4e10a5a7 12979 case E_HImode:
b7342d25 12980 return V4HImode;
4e10a5a7 12981 case E_QImode:
b7342d25
IB
12982 return V8QImode;
12983 default:
12984 break;
12985 }
12986 }
43e9d192
IB
12987 return word_mode;
12988}
12989
b7342d25 12990/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 12991static machine_mode
005ba29c 12992aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 12993{
43cacb12
RS
12994 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
12995 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
12996}
12997
86e36728 12998/* Return a list of possible vector sizes for the vectorizer
3b357264 12999 to iterate over. */
86e36728
RS
13000static void
13001aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 13002{
43cacb12
RS
13003 if (TARGET_SVE)
13004 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
13005 sizes->safe_push (16);
13006 sizes->safe_push (8);
3b357264
JG
13007}
13008
ac2b960f
YZ
13009/* Implement TARGET_MANGLE_TYPE. */
13010
6f549691 13011static const char *
ac2b960f
YZ
13012aarch64_mangle_type (const_tree type)
13013{
13014 /* The AArch64 ABI documents say that "__va_list" has to be
13015 managled as if it is in the "std" namespace. */
13016 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
13017 return "St9__va_list";
13018
c2ec330c
AL
13019 /* Half-precision float. */
13020 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
13021 return "Dh";
13022
f9d53c27
TB
13023 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
13024 builtin types. */
13025 if (TYPE_NAME (type) != NULL)
13026 return aarch64_mangle_builtin_type (type);
c6fc9e43 13027
ac2b960f
YZ
13028 /* Use the default mangling. */
13029 return NULL;
13030}
13031
75cf1494
KT
13032/* Find the first rtx_insn before insn that will generate an assembly
13033 instruction. */
13034
13035static rtx_insn *
13036aarch64_prev_real_insn (rtx_insn *insn)
13037{
13038 if (!insn)
13039 return NULL;
13040
13041 do
13042 {
13043 insn = prev_real_insn (insn);
13044 }
13045 while (insn && recog_memoized (insn) < 0);
13046
13047 return insn;
13048}
13049
13050static bool
13051is_madd_op (enum attr_type t1)
13052{
13053 unsigned int i;
13054 /* A number of these may be AArch32 only. */
13055 enum attr_type mlatypes[] = {
13056 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
13057 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
13058 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
13059 };
13060
13061 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
13062 {
13063 if (t1 == mlatypes[i])
13064 return true;
13065 }
13066
13067 return false;
13068}
13069
13070/* Check if there is a register dependency between a load and the insn
13071 for which we hold recog_data. */
13072
13073static bool
13074dep_between_memop_and_curr (rtx memop)
13075{
13076 rtx load_reg;
13077 int opno;
13078
8baff86e 13079 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
13080
13081 if (!REG_P (SET_DEST (memop)))
13082 return false;
13083
13084 load_reg = SET_DEST (memop);
8baff86e 13085 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
13086 {
13087 rtx operand = recog_data.operand[opno];
13088 if (REG_P (operand)
13089 && reg_overlap_mentioned_p (load_reg, operand))
13090 return true;
13091
13092 }
13093 return false;
13094}
13095
8baff86e
KT
13096
13097/* When working around the Cortex-A53 erratum 835769,
13098 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
13099 instruction and has a preceding memory instruction such that a NOP
13100 should be inserted between them. */
13101
75cf1494
KT
13102bool
13103aarch64_madd_needs_nop (rtx_insn* insn)
13104{
13105 enum attr_type attr_type;
13106 rtx_insn *prev;
13107 rtx body;
13108
b32c1043 13109 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
13110 return false;
13111
e322d6e3 13112 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
13113 return false;
13114
13115 attr_type = get_attr_type (insn);
13116 if (!is_madd_op (attr_type))
13117 return false;
13118
13119 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
13120 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
13121 Restore recog state to INSN to avoid state corruption. */
13122 extract_constrain_insn_cached (insn);
13123
550e2205 13124 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
13125 return false;
13126
13127 body = single_set (prev);
13128
13129 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
13130 it and the DImode madd, emit a NOP between them. If body is NULL then we
13131 have a complex memory operation, probably a load/store pair.
13132 Be conservative for now and emit a NOP. */
13133 if (GET_MODE (recog_data.operand[0]) == DImode
13134 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
13135 return true;
13136
13137 return false;
13138
13139}
13140
8baff86e
KT
13141
13142/* Implement FINAL_PRESCAN_INSN. */
13143
75cf1494
KT
13144void
13145aarch64_final_prescan_insn (rtx_insn *insn)
13146{
13147 if (aarch64_madd_needs_nop (insn))
13148 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
13149}
13150
13151
43cacb12
RS
13152/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
13153 instruction. */
13154
13155bool
13156aarch64_sve_index_immediate_p (rtx base_or_step)
13157{
13158 return (CONST_INT_P (base_or_step)
13159 && IN_RANGE (INTVAL (base_or_step), -16, 15));
13160}
13161
13162/* Return true if X is a valid immediate for the SVE ADD and SUB
13163 instructions. Negate X first if NEGATE_P is true. */
13164
13165bool
13166aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
13167{
13168 rtx elt;
13169
13170 if (!const_vec_duplicate_p (x, &elt)
13171 || !CONST_INT_P (elt))
13172 return false;
13173
13174 HOST_WIDE_INT val = INTVAL (elt);
13175 if (negate_p)
13176 val = -val;
13177 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
13178
13179 if (val & 0xff)
13180 return IN_RANGE (val, 0, 0xff);
13181 return IN_RANGE (val, 0, 0xff00);
13182}
13183
13184/* Return true if X is a valid immediate operand for an SVE logical
13185 instruction such as AND. */
13186
13187bool
13188aarch64_sve_bitmask_immediate_p (rtx x)
13189{
13190 rtx elt;
13191
13192 return (const_vec_duplicate_p (x, &elt)
13193 && CONST_INT_P (elt)
13194 && aarch64_bitmask_imm (INTVAL (elt),
13195 GET_MODE_INNER (GET_MODE (x))));
13196}
13197
13198/* Return true if X is a valid immediate for the SVE DUP and CPY
13199 instructions. */
13200
13201bool
13202aarch64_sve_dup_immediate_p (rtx x)
13203{
13204 rtx elt;
13205
13206 if (!const_vec_duplicate_p (x, &elt)
13207 || !CONST_INT_P (elt))
13208 return false;
13209
13210 HOST_WIDE_INT val = INTVAL (elt);
13211 if (val & 0xff)
13212 return IN_RANGE (val, -0x80, 0x7f);
13213 return IN_RANGE (val, -0x8000, 0x7f00);
13214}
13215
13216/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13217 SIGNED_P says whether the operand is signed rather than unsigned. */
13218
13219bool
13220aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13221{
13222 rtx elt;
13223
13224 return (const_vec_duplicate_p (x, &elt)
13225 && CONST_INT_P (elt)
13226 && (signed_p
13227 ? IN_RANGE (INTVAL (elt), -16, 15)
13228 : IN_RANGE (INTVAL (elt), 0, 127)));
13229}
13230
13231/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13232 instruction. Negate X first if NEGATE_P is true. */
13233
13234bool
13235aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13236{
13237 rtx elt;
13238 REAL_VALUE_TYPE r;
13239
13240 if (!const_vec_duplicate_p (x, &elt)
13241 || GET_CODE (elt) != CONST_DOUBLE)
13242 return false;
13243
13244 r = *CONST_DOUBLE_REAL_VALUE (elt);
13245
13246 if (negate_p)
13247 r = real_value_negate (&r);
13248
13249 if (real_equal (&r, &dconst1))
13250 return true;
13251 if (real_equal (&r, &dconsthalf))
13252 return true;
13253 return false;
13254}
13255
13256/* Return true if X is a valid immediate operand for an SVE FMUL
13257 instruction. */
13258
13259bool
13260aarch64_sve_float_mul_immediate_p (rtx x)
13261{
13262 rtx elt;
13263
13264 /* GCC will never generate a multiply with an immediate of 2, so there is no
13265 point testing for it (even though it is a valid constant). */
13266 return (const_vec_duplicate_p (x, &elt)
13267 && GET_CODE (elt) == CONST_DOUBLE
13268 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13269}
13270
b187677b
RS
13271/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13272 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13273 is nonnull, use it to describe valid immediates. */
3520f7cc 13274static bool
b187677b
RS
13275aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13276 simd_immediate_info *info,
13277 enum simd_immediate_check which,
13278 simd_immediate_info::insn_type insn)
13279{
13280 /* Try a 4-byte immediate with LSL. */
13281 for (unsigned int shift = 0; shift < 32; shift += 8)
13282 if ((val32 & (0xff << shift)) == val32)
13283 {
13284 if (info)
13285 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13286 simd_immediate_info::LSL, shift);
13287 return true;
13288 }
3520f7cc 13289
b187677b
RS
13290 /* Try a 2-byte immediate with LSL. */
13291 unsigned int imm16 = val32 & 0xffff;
13292 if (imm16 == (val32 >> 16))
13293 for (unsigned int shift = 0; shift < 16; shift += 8)
13294 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13295 {
b187677b
RS
13296 if (info)
13297 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13298 simd_immediate_info::LSL, shift);
13299 return true;
48063b9d 13300 }
3520f7cc 13301
b187677b
RS
13302 /* Try a 4-byte immediate with MSL, except for cases that MVN
13303 can handle. */
13304 if (which == AARCH64_CHECK_MOV)
13305 for (unsigned int shift = 8; shift < 24; shift += 8)
13306 {
13307 unsigned int low = (1 << shift) - 1;
13308 if (((val32 & (0xff << shift)) | low) == val32)
13309 {
13310 if (info)
13311 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13312 simd_immediate_info::MSL, shift);
13313 return true;
13314 }
13315 }
43e9d192 13316
b187677b
RS
13317 return false;
13318}
13319
13320/* Return true if replicating VAL64 is a valid immediate for the
13321 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13322 use it to describe valid immediates. */
13323static bool
13324aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13325 simd_immediate_info *info,
13326 enum simd_immediate_check which)
13327{
13328 unsigned int val32 = val64 & 0xffffffff;
13329 unsigned int val16 = val64 & 0xffff;
13330 unsigned int val8 = val64 & 0xff;
13331
13332 if (val32 == (val64 >> 32))
43e9d192 13333 {
b187677b
RS
13334 if ((which & AARCH64_CHECK_ORR) != 0
13335 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13336 simd_immediate_info::MOV))
13337 return true;
43e9d192 13338
b187677b
RS
13339 if ((which & AARCH64_CHECK_BIC) != 0
13340 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13341 simd_immediate_info::MVN))
13342 return true;
ee78df47 13343
b187677b
RS
13344 /* Try using a replicated byte. */
13345 if (which == AARCH64_CHECK_MOV
13346 && val16 == (val32 >> 16)
13347 && val8 == (val16 >> 8))
ee78df47 13348 {
b187677b
RS
13349 if (info)
13350 *info = simd_immediate_info (QImode, val8);
13351 return true;
ee78df47 13352 }
43e9d192
IB
13353 }
13354
b187677b
RS
13355 /* Try using a bit-to-bytemask. */
13356 if (which == AARCH64_CHECK_MOV)
43e9d192 13357 {
b187677b
RS
13358 unsigned int i;
13359 for (i = 0; i < 64; i += 8)
ab6501d7 13360 {
b187677b
RS
13361 unsigned char byte = (val64 >> i) & 0xff;
13362 if (byte != 0 && byte != 0xff)
13363 break;
ab6501d7 13364 }
b187677b 13365 if (i == 64)
ab6501d7 13366 {
b187677b
RS
13367 if (info)
13368 *info = simd_immediate_info (DImode, val64);
13369 return true;
ab6501d7 13370 }
43e9d192 13371 }
b187677b
RS
13372 return false;
13373}
43e9d192 13374
43cacb12
RS
13375/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13376 instruction. If INFO is nonnull, use it to describe valid immediates. */
13377
13378static bool
13379aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13380 simd_immediate_info *info)
13381{
13382 scalar_int_mode mode = DImode;
13383 unsigned int val32 = val64 & 0xffffffff;
13384 if (val32 == (val64 >> 32))
13385 {
13386 mode = SImode;
13387 unsigned int val16 = val32 & 0xffff;
13388 if (val16 == (val32 >> 16))
13389 {
13390 mode = HImode;
13391 unsigned int val8 = val16 & 0xff;
13392 if (val8 == (val16 >> 8))
13393 mode = QImode;
13394 }
13395 }
13396 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13397 if (IN_RANGE (val, -0x80, 0x7f))
13398 {
13399 /* DUP with no shift. */
13400 if (info)
13401 *info = simd_immediate_info (mode, val);
13402 return true;
13403 }
13404 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13405 {
13406 /* DUP with LSL #8. */
13407 if (info)
13408 *info = simd_immediate_info (mode, val);
13409 return true;
13410 }
13411 if (aarch64_bitmask_imm (val64, mode))
13412 {
13413 /* DUPM. */
13414 if (info)
13415 *info = simd_immediate_info (mode, val);
13416 return true;
13417 }
13418 return false;
13419}
13420
b187677b
RS
13421/* Return true if OP is a valid SIMD immediate for the operation
13422 described by WHICH. If INFO is nonnull, use it to describe valid
13423 immediates. */
13424bool
13425aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
13426 enum simd_immediate_check which)
13427{
43cacb12
RS
13428 machine_mode mode = GET_MODE (op);
13429 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13430 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
13431 return false;
13432
13433 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 13434 rtx base, step;
b187677b 13435 unsigned int n_elts;
f9093f23
RS
13436 if (GET_CODE (op) == CONST_VECTOR
13437 && CONST_VECTOR_DUPLICATE_P (op))
13438 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
13439 else if ((vec_flags & VEC_SVE_DATA)
13440 && const_vec_series_p (op, &base, &step))
13441 {
13442 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
13443 if (!aarch64_sve_index_immediate_p (base)
13444 || !aarch64_sve_index_immediate_p (step))
13445 return false;
13446
13447 if (info)
13448 *info = simd_immediate_info (elt_mode, base, step);
13449 return true;
13450 }
6a70badb
RS
13451 else if (GET_CODE (op) == CONST_VECTOR
13452 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
13453 /* N_ELTS set above. */;
b187677b 13454 else
d8edd899 13455 return false;
43e9d192 13456
43cacb12
RS
13457 /* Handle PFALSE and PTRUE. */
13458 if (vec_flags & VEC_SVE_PRED)
13459 return (op == CONST0_RTX (mode)
13460 || op == CONSTM1_RTX (mode));
13461
b187677b 13462 scalar_float_mode elt_float_mode;
f9093f23
RS
13463 if (n_elts == 1
13464 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 13465 {
f9093f23
RS
13466 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
13467 if (aarch64_float_const_zero_rtx_p (elt)
13468 || aarch64_float_const_representable_p (elt))
13469 {
13470 if (info)
13471 *info = simd_immediate_info (elt_float_mode, elt);
13472 return true;
13473 }
b187677b 13474 }
43e9d192 13475
b187677b
RS
13476 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
13477 if (elt_size > 8)
13478 return false;
e4f0f84d 13479
b187677b 13480 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 13481
b187677b
RS
13482 /* Expand the vector constant out into a byte vector, with the least
13483 significant byte of the register first. */
13484 auto_vec<unsigned char, 16> bytes;
13485 bytes.reserve (n_elts * elt_size);
13486 for (unsigned int i = 0; i < n_elts; i++)
13487 {
f9093f23
RS
13488 /* The vector is provided in gcc endian-neutral fashion.
13489 For aarch64_be Advanced SIMD, it must be laid out in the vector
13490 register in reverse order. */
13491 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
13492 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 13493
b187677b
RS
13494 if (elt_mode != elt_int_mode)
13495 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 13496
b187677b
RS
13497 if (!CONST_INT_P (elt))
13498 return false;
43e9d192 13499
b187677b
RS
13500 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
13501 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 13502 {
b187677b
RS
13503 bytes.quick_push (elt_val & 0xff);
13504 elt_val >>= BITS_PER_UNIT;
48063b9d 13505 }
43e9d192
IB
13506 }
13507
b187677b
RS
13508 /* The immediate must repeat every eight bytes. */
13509 unsigned int nbytes = bytes.length ();
13510 for (unsigned i = 8; i < nbytes; ++i)
13511 if (bytes[i] != bytes[i - 8])
13512 return false;
13513
13514 /* Get the repeating 8-byte value as an integer. No endian correction
13515 is needed here because bytes is already in lsb-first order. */
13516 unsigned HOST_WIDE_INT val64 = 0;
13517 for (unsigned int i = 0; i < 8; i++)
13518 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
13519 << (i * BITS_PER_UNIT));
13520
43cacb12
RS
13521 if (vec_flags & VEC_SVE_DATA)
13522 return aarch64_sve_valid_immediate (val64, info);
13523 else
13524 return aarch64_advsimd_valid_immediate (val64, info, which);
13525}
13526
13527/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
13528 has a step in the range of INDEX. Return the index expression if so,
13529 otherwise return null. */
13530rtx
13531aarch64_check_zero_based_sve_index_immediate (rtx x)
13532{
13533 rtx base, step;
13534 if (const_vec_series_p (x, &base, &step)
13535 && base == const0_rtx
13536 && aarch64_sve_index_immediate_p (step))
13537 return step;
13538 return NULL_RTX;
43e9d192
IB
13539}
13540
43e9d192
IB
13541/* Check of immediate shift constants are within range. */
13542bool
ef4bddc2 13543aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
13544{
13545 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
13546 if (left)
ddeabd3e 13547 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 13548 else
ddeabd3e 13549 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
13550}
13551
7325d85a
KT
13552/* Return the bitmask CONST_INT to select the bits required by a zero extract
13553 operation of width WIDTH at bit position POS. */
13554
13555rtx
13556aarch64_mask_from_zextract_ops (rtx width, rtx pos)
13557{
13558 gcc_assert (CONST_INT_P (width));
13559 gcc_assert (CONST_INT_P (pos));
13560
13561 unsigned HOST_WIDE_INT mask
13562 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
13563 return GEN_INT (mask << UINTVAL (pos));
13564}
13565
83f8c414 13566bool
a6e0bfa7 13567aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 13568{
83f8c414
CSS
13569 if (GET_CODE (x) == HIGH
13570 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
13571 return true;
13572
82614948 13573 if (CONST_INT_P (x))
83f8c414
CSS
13574 return true;
13575
43cacb12
RS
13576 if (VECTOR_MODE_P (GET_MODE (x)))
13577 return aarch64_simd_valid_immediate (x, NULL);
13578
83f8c414
CSS
13579 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
13580 return true;
13581
43cacb12
RS
13582 if (aarch64_sve_cnt_immediate_p (x))
13583 return true;
13584
a6e0bfa7 13585 return aarch64_classify_symbolic_expression (x)
a5350ddc 13586 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
13587}
13588
43e9d192
IB
13589/* Return a const_int vector of VAL. */
13590rtx
ab014eb3 13591aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 13592{
59d06c05
RS
13593 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
13594 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
13595}
13596
051d0e2f
SN
13597/* Check OP is a legal scalar immediate for the MOVI instruction. */
13598
13599bool
77e994c9 13600aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 13601{
ef4bddc2 13602 machine_mode vmode;
051d0e2f 13603
43cacb12 13604 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 13605 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 13606 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
13607}
13608
988fa693
JG
13609/* Construct and return a PARALLEL RTX vector with elements numbering the
13610 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
13611 the vector - from the perspective of the architecture. This does not
13612 line up with GCC's perspective on lane numbers, so we end up with
13613 different masks depending on our target endian-ness. The diagram
13614 below may help. We must draw the distinction when building masks
13615 which select one half of the vector. An instruction selecting
13616 architectural low-lanes for a big-endian target, must be described using
13617 a mask selecting GCC high-lanes.
13618
13619 Big-Endian Little-Endian
13620
13621GCC 0 1 2 3 3 2 1 0
13622 | x | x | x | x | | x | x | x | x |
13623Architecture 3 2 1 0 3 2 1 0
13624
13625Low Mask: { 2, 3 } { 0, 1 }
13626High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
13627
13628 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 13629
43e9d192 13630rtx
f5cbabc1 13631aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 13632{
43e9d192 13633 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
13634 int high_base = nunits / 2;
13635 int low_base = 0;
13636 int base;
43e9d192
IB
13637 rtx t1;
13638 int i;
13639
988fa693
JG
13640 if (BYTES_BIG_ENDIAN)
13641 base = high ? low_base : high_base;
13642 else
13643 base = high ? high_base : low_base;
13644
13645 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
13646 RTVEC_ELT (v, i) = GEN_INT (base + i);
13647
13648 t1 = gen_rtx_PARALLEL (mode, v);
13649 return t1;
13650}
13651
988fa693
JG
13652/* Check OP for validity as a PARALLEL RTX vector with elements
13653 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
13654 from the perspective of the architecture. See the diagram above
13655 aarch64_simd_vect_par_cnst_half for more details. */
13656
13657bool
ef4bddc2 13658aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
13659 bool high)
13660{
6a70badb
RS
13661 int nelts;
13662 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
13663 return false;
13664
6a70badb 13665 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
13666 HOST_WIDE_INT count_op = XVECLEN (op, 0);
13667 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
13668 int i = 0;
13669
988fa693
JG
13670 if (count_op != count_ideal)
13671 return false;
13672
13673 for (i = 0; i < count_ideal; i++)
13674 {
13675 rtx elt_op = XVECEXP (op, 0, i);
13676 rtx elt_ideal = XVECEXP (ideal, 0, i);
13677
4aa81c2e 13678 if (!CONST_INT_P (elt_op)
988fa693
JG
13679 || INTVAL (elt_ideal) != INTVAL (elt_op))
13680 return false;
13681 }
13682 return true;
13683}
13684
43e9d192
IB
13685/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
13686 HIGH (exclusive). */
13687void
46ed6024
CB
13688aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13689 const_tree exp)
43e9d192
IB
13690{
13691 HOST_WIDE_INT lane;
4aa81c2e 13692 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
13693 lane = INTVAL (operand);
13694
13695 if (lane < low || lane >= high)
46ed6024
CB
13696 {
13697 if (exp)
cf0c27ef 13698 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 13699 else
cf0c27ef 13700 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 13701 }
43e9d192
IB
13702}
13703
7ac29c0f
RS
13704/* Peform endian correction on lane number N, which indexes a vector
13705 of mode MODE, and return the result as an SImode rtx. */
13706
13707rtx
13708aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
13709{
13710 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
13711}
13712
43e9d192 13713/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 13714
43e9d192
IB
13715bool
13716aarch64_simd_mem_operand_p (rtx op)
13717{
13718 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 13719 || REG_P (XEXP (op, 0)));
43e9d192
IB
13720}
13721
43cacb12
RS
13722/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
13723
13724bool
13725aarch64_sve_ld1r_operand_p (rtx op)
13726{
13727 struct aarch64_address_info addr;
13728 scalar_mode mode;
13729
13730 return (MEM_P (op)
13731 && is_a <scalar_mode> (GET_MODE (op), &mode)
13732 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
13733 && addr.type == ADDRESS_REG_IMM
13734 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
13735}
13736
13737/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
13738 The conditions for STR are the same. */
13739bool
13740aarch64_sve_ldr_operand_p (rtx op)
13741{
13742 struct aarch64_address_info addr;
13743
13744 return (MEM_P (op)
13745 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
13746 false, ADDR_QUERY_ANY)
13747 && addr.type == ADDRESS_REG_IMM);
13748}
13749
9f4cbab8
RS
13750/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
13751 We need to be able to access the individual pieces, so the range
13752 is different from LD[234] and ST[234]. */
13753bool
13754aarch64_sve_struct_memory_operand_p (rtx op)
13755{
13756 if (!MEM_P (op))
13757 return false;
13758
13759 machine_mode mode = GET_MODE (op);
13760 struct aarch64_address_info addr;
13761 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
13762 ADDR_QUERY_ANY)
13763 || addr.type != ADDRESS_REG_IMM)
13764 return false;
13765
13766 poly_int64 first = addr.const_offset;
13767 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
13768 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
13769 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
13770}
13771
2d8c6dc1
AH
13772/* Emit a register copy from operand to operand, taking care not to
13773 early-clobber source registers in the process.
43e9d192 13774
2d8c6dc1
AH
13775 COUNT is the number of components into which the copy needs to be
13776 decomposed. */
43e9d192 13777void
b8506a8a 13778aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 13779 unsigned int count)
43e9d192
IB
13780{
13781 unsigned int i;
2d8c6dc1
AH
13782 int rdest = REGNO (operands[0]);
13783 int rsrc = REGNO (operands[1]);
43e9d192
IB
13784
13785 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
13786 || rdest < rsrc)
13787 for (i = 0; i < count; i++)
13788 emit_move_insn (gen_rtx_REG (mode, rdest + i),
13789 gen_rtx_REG (mode, rsrc + i));
43e9d192 13790 else
2d8c6dc1
AH
13791 for (i = 0; i < count; i++)
13792 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
13793 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
13794}
13795
668046d1 13796/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 13797 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 13798int
b8506a8a 13799aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 13800{
6a70badb
RS
13801 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
13802 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
13803}
13804
db0253a4 13805/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
13806 alignment of a vector to 128 bits. SVE predicates have an alignment of
13807 16 bits. */
db0253a4
TB
13808static HOST_WIDE_INT
13809aarch64_simd_vector_alignment (const_tree type)
13810{
43cacb12
RS
13811 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
13812 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
13813 be set for non-predicate vectors of booleans. Modes are the most
13814 direct way we have of identifying real SVE predicate types. */
13815 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 13816 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
13817 return MIN (align, 128);
13818}
13819
43cacb12
RS
13820/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
13821static HOST_WIDE_INT
13822aarch64_vectorize_preferred_vector_alignment (const_tree type)
13823{
13824 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
13825 {
13826 /* If the length of the vector is fixed, try to align to that length,
13827 otherwise don't try to align at all. */
13828 HOST_WIDE_INT result;
13829 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
13830 result = TYPE_ALIGN (TREE_TYPE (type));
13831 return result;
13832 }
13833 return TYPE_ALIGN (type);
13834}
13835
db0253a4
TB
13836/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
13837static bool
13838aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
13839{
13840 if (is_packed)
13841 return false;
13842
43cacb12
RS
13843 /* For fixed-length vectors, check that the vectorizer will aim for
13844 full-vector alignment. This isn't true for generic GCC vectors
13845 that are wider than the ABI maximum of 128 bits. */
13846 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13847 && (wi::to_widest (TYPE_SIZE (type))
13848 != aarch64_vectorize_preferred_vector_alignment (type)))
db0253a4
TB
13849 return false;
13850
13851 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
13852 return true;
13853}
13854
7df76747
N
13855/* Return true if the vector misalignment factor is supported by the
13856 target. */
13857static bool
13858aarch64_builtin_support_vector_misalignment (machine_mode mode,
13859 const_tree type, int misalignment,
13860 bool is_packed)
13861{
13862 if (TARGET_SIMD && STRICT_ALIGNMENT)
13863 {
13864 /* Return if movmisalign pattern is not supported for this mode. */
13865 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
13866 return false;
13867
a509c571 13868 /* Misalignment factor is unknown at compile time. */
7df76747 13869 if (misalignment == -1)
a509c571 13870 return false;
7df76747
N
13871 }
13872 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13873 is_packed);
13874}
13875
4369c11e
TB
13876/* If VALS is a vector constant that can be loaded into a register
13877 using DUP, generate instructions to do so and return an RTX to
13878 assign to the register. Otherwise return NULL_RTX. */
13879static rtx
13880aarch64_simd_dup_constant (rtx vals)
13881{
ef4bddc2
RS
13882 machine_mode mode = GET_MODE (vals);
13883 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 13884 rtx x;
4369c11e 13885
92695fbb 13886 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
13887 return NULL_RTX;
13888
13889 /* We can load this constant by using DUP and a constant in a
13890 single ARM register. This will be cheaper than a vector
13891 load. */
92695fbb 13892 x = copy_to_mode_reg (inner_mode, x);
59d06c05 13893 return gen_vec_duplicate (mode, x);
4369c11e
TB
13894}
13895
13896
13897/* Generate code to load VALS, which is a PARALLEL containing only
13898 constants (for vec_init) or CONST_VECTOR, efficiently into a
13899 register. Returns an RTX to copy into the register, or NULL_RTX
13900 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 13901static rtx
4369c11e
TB
13902aarch64_simd_make_constant (rtx vals)
13903{
ef4bddc2 13904 machine_mode mode = GET_MODE (vals);
4369c11e
TB
13905 rtx const_dup;
13906 rtx const_vec = NULL_RTX;
4369c11e
TB
13907 int n_const = 0;
13908 int i;
13909
13910 if (GET_CODE (vals) == CONST_VECTOR)
13911 const_vec = vals;
13912 else if (GET_CODE (vals) == PARALLEL)
13913 {
13914 /* A CONST_VECTOR must contain only CONST_INTs and
13915 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13916 Only store valid constants in a CONST_VECTOR. */
6a70badb 13917 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
13918 for (i = 0; i < n_elts; ++i)
13919 {
13920 rtx x = XVECEXP (vals, 0, i);
13921 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13922 n_const++;
13923 }
13924 if (n_const == n_elts)
13925 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13926 }
13927 else
13928 gcc_unreachable ();
13929
13930 if (const_vec != NULL_RTX
b187677b 13931 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
13932 /* Load using MOVI/MVNI. */
13933 return const_vec;
13934 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
13935 /* Loaded using DUP. */
13936 return const_dup;
13937 else if (const_vec != NULL_RTX)
13938 /* Load from constant pool. We can not take advantage of single-cycle
13939 LD1 because we need a PC-relative addressing mode. */
13940 return const_vec;
13941 else
13942 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13943 We can not construct an initializer. */
13944 return NULL_RTX;
13945}
13946
35a093b6
JG
13947/* Expand a vector initialisation sequence, such that TARGET is
13948 initialised to contain VALS. */
13949
4369c11e
TB
13950void
13951aarch64_expand_vector_init (rtx target, rtx vals)
13952{
ef4bddc2 13953 machine_mode mode = GET_MODE (target);
146c2e3a 13954 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 13955 /* The number of vector elements. */
6a70badb 13956 int n_elts = XVECLEN (vals, 0);
35a093b6 13957 /* The number of vector elements which are not constant. */
8b66a2d4
AL
13958 int n_var = 0;
13959 rtx any_const = NULL_RTX;
35a093b6
JG
13960 /* The first element of vals. */
13961 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 13962 bool all_same = true;
4369c11e 13963
35a093b6 13964 /* Count the number of variable elements to initialise. */
8b66a2d4 13965 for (int i = 0; i < n_elts; ++i)
4369c11e 13966 {
8b66a2d4 13967 rtx x = XVECEXP (vals, 0, i);
35a093b6 13968 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
13969 ++n_var;
13970 else
13971 any_const = x;
4369c11e 13972
35a093b6 13973 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
13974 }
13975
35a093b6
JG
13976 /* No variable elements, hand off to aarch64_simd_make_constant which knows
13977 how best to handle this. */
4369c11e
TB
13978 if (n_var == 0)
13979 {
13980 rtx constant = aarch64_simd_make_constant (vals);
13981 if (constant != NULL_RTX)
13982 {
13983 emit_move_insn (target, constant);
13984 return;
13985 }
13986 }
13987
13988 /* Splat a single non-constant element if we can. */
13989 if (all_same)
13990 {
35a093b6 13991 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 13992 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
13993 return;
13994 }
13995
85c1b6d7
AP
13996 enum insn_code icode = optab_handler (vec_set_optab, mode);
13997 gcc_assert (icode != CODE_FOR_nothing);
13998
13999 /* If there are only variable elements, try to optimize
14000 the insertion using dup for the most common element
14001 followed by insertions. */
14002
14003 /* The algorithm will fill matches[*][0] with the earliest matching element,
14004 and matches[X][1] with the count of duplicate elements (if X is the
14005 earliest element which has duplicates). */
14006
14007 if (n_var == n_elts && n_elts <= 16)
14008 {
14009 int matches[16][2] = {0};
14010 for (int i = 0; i < n_elts; i++)
14011 {
14012 for (int j = 0; j <= i; j++)
14013 {
14014 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
14015 {
14016 matches[i][0] = j;
14017 matches[j][1]++;
14018 break;
14019 }
14020 }
14021 }
14022 int maxelement = 0;
14023 int maxv = 0;
14024 for (int i = 0; i < n_elts; i++)
14025 if (matches[i][1] > maxv)
14026 {
14027 maxelement = i;
14028 maxv = matches[i][1];
14029 }
14030
b4e2cd5b
JG
14031 /* Create a duplicate of the most common element, unless all elements
14032 are equally useless to us, in which case just immediately set the
14033 vector register using the first element. */
14034
14035 if (maxv == 1)
14036 {
14037 /* For vectors of two 64-bit elements, we can do even better. */
14038 if (n_elts == 2
14039 && (inner_mode == E_DImode
14040 || inner_mode == E_DFmode))
14041
14042 {
14043 rtx x0 = XVECEXP (vals, 0, 0);
14044 rtx x1 = XVECEXP (vals, 0, 1);
14045 /* Combine can pick up this case, but handling it directly
14046 here leaves clearer RTL.
14047
14048 This is load_pair_lanes<mode>, and also gives us a clean-up
14049 for store_pair_lanes<mode>. */
14050 if (memory_operand (x0, inner_mode)
14051 && memory_operand (x1, inner_mode)
14052 && !STRICT_ALIGNMENT
14053 && rtx_equal_p (XEXP (x1, 0),
14054 plus_constant (Pmode,
14055 XEXP (x0, 0),
14056 GET_MODE_SIZE (inner_mode))))
14057 {
14058 rtx t;
14059 if (inner_mode == DFmode)
14060 t = gen_load_pair_lanesdf (target, x0, x1);
14061 else
14062 t = gen_load_pair_lanesdi (target, x0, x1);
14063 emit_insn (t);
14064 return;
14065 }
14066 }
14067 /* The subreg-move sequence below will move into lane zero of the
14068 vector register. For big-endian we want that position to hold
14069 the last element of VALS. */
14070 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
14071 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14072 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
14073 }
14074 else
14075 {
14076 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
14077 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
14078 }
85c1b6d7
AP
14079
14080 /* Insert the rest. */
14081 for (int i = 0; i < n_elts; i++)
14082 {
14083 rtx x = XVECEXP (vals, 0, i);
14084 if (matches[i][0] == maxelement)
14085 continue;
14086 x = copy_to_mode_reg (inner_mode, x);
14087 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14088 }
14089 return;
14090 }
14091
35a093b6
JG
14092 /* Initialise a vector which is part-variable. We want to first try
14093 to build those lanes which are constant in the most efficient way we
14094 can. */
14095 if (n_var != n_elts)
4369c11e
TB
14096 {
14097 rtx copy = copy_rtx (vals);
4369c11e 14098
8b66a2d4
AL
14099 /* Load constant part of vector. We really don't care what goes into the
14100 parts we will overwrite, but we're more likely to be able to load the
14101 constant efficiently if it has fewer, larger, repeating parts
14102 (see aarch64_simd_valid_immediate). */
14103 for (int i = 0; i < n_elts; i++)
14104 {
14105 rtx x = XVECEXP (vals, 0, i);
14106 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14107 continue;
14108 rtx subst = any_const;
14109 for (int bit = n_elts / 2; bit > 0; bit /= 2)
14110 {
14111 /* Look in the copied vector, as more elements are const. */
14112 rtx test = XVECEXP (copy, 0, i ^ bit);
14113 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
14114 {
14115 subst = test;
14116 break;
14117 }
14118 }
14119 XVECEXP (copy, 0, i) = subst;
14120 }
4369c11e 14121 aarch64_expand_vector_init (target, copy);
35a093b6 14122 }
4369c11e 14123
35a093b6 14124 /* Insert the variable lanes directly. */
8b66a2d4 14125 for (int i = 0; i < n_elts; i++)
35a093b6
JG
14126 {
14127 rtx x = XVECEXP (vals, 0, i);
14128 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14129 continue;
14130 x = copy_to_mode_reg (inner_mode, x);
14131 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14132 }
4369c11e
TB
14133}
14134
43e9d192 14135static unsigned HOST_WIDE_INT
ef4bddc2 14136aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 14137{
43cacb12
RS
14138 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
14139 return 0;
14140 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
14141}
14142
43e9d192
IB
14143/* Select a format to encode pointers in exception handling data. */
14144int
14145aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
14146{
14147 int type;
14148 switch (aarch64_cmodel)
14149 {
14150 case AARCH64_CMODEL_TINY:
14151 case AARCH64_CMODEL_TINY_PIC:
14152 case AARCH64_CMODEL_SMALL:
14153 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 14154 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
14155 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
14156 for everything. */
14157 type = DW_EH_PE_sdata4;
14158 break;
14159 default:
14160 /* No assumptions here. 8-byte relocs required. */
14161 type = DW_EH_PE_sdata8;
14162 break;
14163 }
14164 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
14165}
14166
e1c1ecb0
KT
14167/* The last .arch and .tune assembly strings that we printed. */
14168static std::string aarch64_last_printed_arch_string;
14169static std::string aarch64_last_printed_tune_string;
14170
361fb3ee
KT
14171/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
14172 by the function fndecl. */
14173
14174void
14175aarch64_declare_function_name (FILE *stream, const char* name,
14176 tree fndecl)
14177{
14178 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14179
14180 struct cl_target_option *targ_options;
14181 if (target_parts)
14182 targ_options = TREE_TARGET_OPTION (target_parts);
14183 else
14184 targ_options = TREE_TARGET_OPTION (target_option_current_node);
14185 gcc_assert (targ_options);
14186
14187 const struct processor *this_arch
14188 = aarch64_get_arch (targ_options->x_explicit_arch);
14189
054b4005
JG
14190 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
14191 std::string extension
04a99ebe
JG
14192 = aarch64_get_extension_string_for_isa_flags (isa_flags,
14193 this_arch->flags);
e1c1ecb0
KT
14194 /* Only update the assembler .arch string if it is distinct from the last
14195 such string we printed. */
14196 std::string to_print = this_arch->name + extension;
14197 if (to_print != aarch64_last_printed_arch_string)
14198 {
14199 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14200 aarch64_last_printed_arch_string = to_print;
14201 }
361fb3ee
KT
14202
14203 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14204 useful to readers of the generated asm. Do it only when it changes
14205 from function to function and verbose assembly is requested. */
361fb3ee
KT
14206 const struct processor *this_tune
14207 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14208
e1c1ecb0
KT
14209 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14210 {
14211 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14212 this_tune->name);
14213 aarch64_last_printed_tune_string = this_tune->name;
14214 }
361fb3ee
KT
14215
14216 /* Don't forget the type directive for ELF. */
14217 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14218 ASM_OUTPUT_LABEL (stream, name);
14219}
14220
e1c1ecb0
KT
14221/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14222
14223static void
14224aarch64_start_file (void)
14225{
14226 struct cl_target_option *default_options
14227 = TREE_TARGET_OPTION (target_option_default_node);
14228
14229 const struct processor *default_arch
14230 = aarch64_get_arch (default_options->x_explicit_arch);
14231 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14232 std::string extension
04a99ebe
JG
14233 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14234 default_arch->flags);
e1c1ecb0
KT
14235
14236 aarch64_last_printed_arch_string = default_arch->name + extension;
14237 aarch64_last_printed_tune_string = "";
14238 asm_fprintf (asm_out_file, "\t.arch %s\n",
14239 aarch64_last_printed_arch_string.c_str ());
14240
14241 default_file_start ();
14242}
14243
0462169c
SN
14244/* Emit load exclusive. */
14245
14246static void
ef4bddc2 14247aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14248 rtx mem, rtx model_rtx)
14249{
14250 rtx (*gen) (rtx, rtx, rtx);
14251
14252 switch (mode)
14253 {
4e10a5a7
RS
14254 case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
14255 case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
14256 case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
14257 case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
0462169c
SN
14258 default:
14259 gcc_unreachable ();
14260 }
14261
14262 emit_insn (gen (rval, mem, model_rtx));
14263}
14264
14265/* Emit store exclusive. */
14266
14267static void
ef4bddc2 14268aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14269 rtx rval, rtx mem, rtx model_rtx)
14270{
14271 rtx (*gen) (rtx, rtx, rtx, rtx);
14272
14273 switch (mode)
14274 {
4e10a5a7
RS
14275 case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
14276 case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
14277 case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
14278 case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
0462169c
SN
14279 default:
14280 gcc_unreachable ();
14281 }
14282
14283 emit_insn (gen (bval, rval, mem, model_rtx));
14284}
14285
14286/* Mark the previous jump instruction as unlikely. */
14287
14288static void
14289aarch64_emit_unlikely_jump (rtx insn)
14290{
f370536c 14291 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14292 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14293}
14294
14295/* Expand a compare and swap pattern. */
14296
14297void
14298aarch64_expand_compare_and_swap (rtx operands[])
14299{
14300 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 14301 machine_mode mode, cmp_mode;
b0770c0f
MW
14302 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
14303 int idx;
14304 gen_cas_fn gen;
14305 const gen_cas_fn split_cas[] =
14306 {
14307 gen_aarch64_compare_and_swapqi,
14308 gen_aarch64_compare_and_swaphi,
14309 gen_aarch64_compare_and_swapsi,
14310 gen_aarch64_compare_and_swapdi
14311 };
14312 const gen_cas_fn atomic_cas[] =
14313 {
14314 gen_aarch64_compare_and_swapqi_lse,
14315 gen_aarch64_compare_and_swaphi_lse,
14316 gen_aarch64_compare_and_swapsi_lse,
14317 gen_aarch64_compare_and_swapdi_lse
14318 };
0462169c
SN
14319
14320 bval = operands[0];
14321 rval = operands[1];
14322 mem = operands[2];
14323 oldval = operands[3];
14324 newval = operands[4];
14325 is_weak = operands[5];
14326 mod_s = operands[6];
14327 mod_f = operands[7];
14328 mode = GET_MODE (mem);
14329 cmp_mode = mode;
14330
14331 /* Normally the succ memory model must be stronger than fail, but in the
14332 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14333 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
14334
46b35980
AM
14335 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14336 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14337 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14338
14339 switch (mode)
14340 {
4e10a5a7
RS
14341 case E_QImode:
14342 case E_HImode:
0462169c
SN
14343 /* For short modes, we're going to perform the comparison in SImode,
14344 so do the zero-extension now. */
14345 cmp_mode = SImode;
14346 rval = gen_reg_rtx (SImode);
14347 oldval = convert_modes (SImode, mode, oldval, true);
14348 /* Fall through. */
14349
4e10a5a7
RS
14350 case E_SImode:
14351 case E_DImode:
0462169c
SN
14352 /* Force the value into a register if needed. */
14353 if (!aarch64_plus_operand (oldval, mode))
14354 oldval = force_reg (cmp_mode, oldval);
14355 break;
14356
14357 default:
14358 gcc_unreachable ();
14359 }
14360
14361 switch (mode)
14362 {
4e10a5a7
RS
14363 case E_QImode: idx = 0; break;
14364 case E_HImode: idx = 1; break;
14365 case E_SImode: idx = 2; break;
14366 case E_DImode: idx = 3; break;
0462169c
SN
14367 default:
14368 gcc_unreachable ();
14369 }
b0770c0f
MW
14370 if (TARGET_LSE)
14371 gen = atomic_cas[idx];
14372 else
14373 gen = split_cas[idx];
0462169c
SN
14374
14375 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
14376
14377 if (mode == QImode || mode == HImode)
14378 emit_move_insn (operands[1], gen_lowpart (mode, rval));
14379
14380 x = gen_rtx_REG (CCmode, CC_REGNUM);
14381 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 14382 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14383}
14384
641c2f8b
MW
14385/* Test whether the target supports using a atomic load-operate instruction.
14386 CODE is the operation and AFTER is TRUE if the data in memory after the
14387 operation should be returned and FALSE if the data before the operation
14388 should be returned. Returns FALSE if the operation isn't supported by the
14389 architecture. */
14390
14391bool
14392aarch64_atomic_ldop_supported_p (enum rtx_code code)
14393{
14394 if (!TARGET_LSE)
14395 return false;
14396
14397 switch (code)
14398 {
14399 case SET:
14400 case AND:
14401 case IOR:
14402 case XOR:
14403 case MINUS:
14404 case PLUS:
14405 return true;
14406 default:
14407 return false;
14408 }
14409}
14410
f70fb3b6
MW
14411/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14412 sequence implementing an atomic operation. */
14413
14414static void
14415aarch64_emit_post_barrier (enum memmodel model)
14416{
14417 const enum memmodel base_model = memmodel_base (model);
14418
14419 if (is_mm_sync (model)
14420 && (base_model == MEMMODEL_ACQUIRE
14421 || base_model == MEMMODEL_ACQ_REL
14422 || base_model == MEMMODEL_SEQ_CST))
14423 {
14424 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14425 }
14426}
14427
b0770c0f
MW
14428/* Emit an atomic compare-and-swap operation. RVAL is the destination register
14429 for the data in memory. EXPECTED is the value expected to be in memory.
14430 DESIRED is the value to store to memory. MEM is the memory location. MODEL
14431 is the memory ordering to use. */
14432
14433void
14434aarch64_gen_atomic_cas (rtx rval, rtx mem,
14435 rtx expected, rtx desired,
14436 rtx model)
14437{
14438 rtx (*gen) (rtx, rtx, rtx, rtx);
14439 machine_mode mode;
14440
14441 mode = GET_MODE (mem);
14442
14443 switch (mode)
14444 {
4e10a5a7
RS
14445 case E_QImode: gen = gen_aarch64_atomic_casqi; break;
14446 case E_HImode: gen = gen_aarch64_atomic_cashi; break;
14447 case E_SImode: gen = gen_aarch64_atomic_cassi; break;
14448 case E_DImode: gen = gen_aarch64_atomic_casdi; break;
b0770c0f
MW
14449 default:
14450 gcc_unreachable ();
14451 }
14452
14453 /* Move the expected value into the CAS destination register. */
14454 emit_insn (gen_rtx_SET (rval, expected));
14455
14456 /* Emit the CAS. */
14457 emit_insn (gen (rval, mem, desired, model));
14458
14459 /* Compare the expected value with the value loaded by the CAS, to establish
14460 whether the swap was made. */
14461 aarch64_gen_compare_reg (EQ, rval, expected);
14462}
14463
0462169c
SN
14464/* Split a compare and swap pattern. */
14465
14466void
14467aarch64_split_compare_and_swap (rtx operands[])
14468{
14469 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14470 machine_mode mode;
0462169c 14471 bool is_weak;
5d8a22a5
DM
14472 rtx_code_label *label1, *label2;
14473 rtx x, cond;
ab876106
MW
14474 enum memmodel model;
14475 rtx model_rtx;
0462169c
SN
14476
14477 rval = operands[0];
14478 mem = operands[1];
14479 oldval = operands[2];
14480 newval = operands[3];
14481 is_weak = (operands[4] != const0_rtx);
ab876106 14482 model_rtx = operands[5];
0462169c
SN
14483 scratch = operands[7];
14484 mode = GET_MODE (mem);
ab876106 14485 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14486
17f47f86
KT
14487 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14488 loop:
14489 .label1:
14490 LD[A]XR rval, [mem]
14491 CBNZ rval, .label2
14492 ST[L]XR scratch, newval, [mem]
14493 CBNZ scratch, .label1
14494 .label2:
14495 CMP rval, 0. */
14496 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14497
5d8a22a5 14498 label1 = NULL;
0462169c
SN
14499 if (!is_weak)
14500 {
14501 label1 = gen_label_rtx ();
14502 emit_label (label1);
14503 }
14504 label2 = gen_label_rtx ();
14505
ab876106
MW
14506 /* The initial load can be relaxed for a __sync operation since a final
14507 barrier will be emitted to stop code hoisting. */
14508 if (is_mm_sync (model))
14509 aarch64_emit_load_exclusive (mode, rval, mem,
14510 GEN_INT (MEMMODEL_RELAXED));
14511 else
14512 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14513
17f47f86
KT
14514 if (strong_zero_p)
14515 {
6e1eaca9
RE
14516 if (aarch64_track_speculation)
14517 {
14518 /* Emit an explicit compare instruction, so that we can correctly
14519 track the condition codes. */
14520 rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
14521 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14522 }
14523 else
14524 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
14525
17f47f86
KT
14526 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14527 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14528 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14529 }
14530 else
14531 {
14532 cond = aarch64_gen_compare_reg (NE, rval, oldval);
14533 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14534 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14535 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14536 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14537 }
0462169c 14538
ab876106 14539 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
14540
14541 if (!is_weak)
14542 {
6e1eaca9
RE
14543 if (aarch64_track_speculation)
14544 {
14545 /* Emit an explicit compare instruction, so that we can correctly
14546 track the condition codes. */
14547 rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
14548 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14549 }
14550 else
14551 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
14552
0462169c
SN
14553 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14554 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 14555 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
14556 }
14557 else
14558 {
14559 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14560 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 14561 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
14562 }
14563
14564 emit_label (label2);
17f47f86
KT
14565 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
14566 to set the condition flags. If this is not used it will be removed by
14567 later passes. */
14568 if (strong_zero_p)
14569 {
14570 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14571 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
14572 emit_insn (gen_rtx_SET (cond, x));
14573 }
ab876106
MW
14574 /* Emit any final barrier needed for a __sync operation. */
14575 if (is_mm_sync (model))
14576 aarch64_emit_post_barrier (model);
0462169c
SN
14577}
14578
68729b06
MW
14579/* Emit a BIC instruction. */
14580
14581static void
14582aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
14583{
14584 rtx shift_rtx = GEN_INT (shift);
14585 rtx (*gen) (rtx, rtx, rtx, rtx);
14586
14587 switch (mode)
14588 {
4e10a5a7
RS
14589 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
14590 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
68729b06
MW
14591 default:
14592 gcc_unreachable ();
14593 }
14594
14595 emit_insn (gen (dst, s2, shift_rtx, s1));
14596}
14597
9cd7b720
MW
14598/* Emit an atomic swap. */
14599
14600static void
14601aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
14602 rtx mem, rtx model)
14603{
14604 rtx (*gen) (rtx, rtx, rtx, rtx);
14605
14606 switch (mode)
14607 {
4e10a5a7
RS
14608 case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
14609 case E_HImode: gen = gen_aarch64_atomic_swphi; break;
14610 case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
14611 case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
9cd7b720
MW
14612 default:
14613 gcc_unreachable ();
14614 }
14615
14616 emit_insn (gen (dst, mem, value, model));
14617}
14618
641c2f8b
MW
14619/* Operations supported by aarch64_emit_atomic_load_op. */
14620
14621enum aarch64_atomic_load_op_code
14622{
14623 AARCH64_LDOP_PLUS, /* A + B */
14624 AARCH64_LDOP_XOR, /* A ^ B */
14625 AARCH64_LDOP_OR, /* A | B */
14626 AARCH64_LDOP_BIC /* A & ~B */
14627};
14628
14629/* Emit an atomic load-operate. */
14630
14631static void
14632aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
14633 machine_mode mode, rtx dst, rtx src,
14634 rtx mem, rtx model)
14635{
14636 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
14637 const aarch64_atomic_load_op_fn plus[] =
14638 {
14639 gen_aarch64_atomic_loadaddqi,
14640 gen_aarch64_atomic_loadaddhi,
14641 gen_aarch64_atomic_loadaddsi,
14642 gen_aarch64_atomic_loadadddi
14643 };
14644 const aarch64_atomic_load_op_fn eor[] =
14645 {
14646 gen_aarch64_atomic_loadeorqi,
14647 gen_aarch64_atomic_loadeorhi,
14648 gen_aarch64_atomic_loadeorsi,
14649 gen_aarch64_atomic_loadeordi
14650 };
14651 const aarch64_atomic_load_op_fn ior[] =
14652 {
14653 gen_aarch64_atomic_loadsetqi,
14654 gen_aarch64_atomic_loadsethi,
14655 gen_aarch64_atomic_loadsetsi,
14656 gen_aarch64_atomic_loadsetdi
14657 };
14658 const aarch64_atomic_load_op_fn bic[] =
14659 {
14660 gen_aarch64_atomic_loadclrqi,
14661 gen_aarch64_atomic_loadclrhi,
14662 gen_aarch64_atomic_loadclrsi,
14663 gen_aarch64_atomic_loadclrdi
14664 };
14665 aarch64_atomic_load_op_fn gen;
14666 int idx = 0;
14667
14668 switch (mode)
14669 {
4e10a5a7
RS
14670 case E_QImode: idx = 0; break;
14671 case E_HImode: idx = 1; break;
14672 case E_SImode: idx = 2; break;
14673 case E_DImode: idx = 3; break;
641c2f8b
MW
14674 default:
14675 gcc_unreachable ();
14676 }
14677
14678 switch (code)
14679 {
14680 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
14681 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
14682 case AARCH64_LDOP_OR: gen = ior[idx]; break;
14683 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
14684 default:
14685 gcc_unreachable ();
14686 }
14687
14688 emit_insn (gen (dst, mem, src, model));
14689}
14690
14691/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
14692 location to store the data read from memory. OUT_RESULT is the location to
14693 store the result of the operation. MEM is the memory location to read and
14694 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
14695 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
14696 be NULL. */
9cd7b720
MW
14697
14698void
68729b06 14699aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
14700 rtx mem, rtx value, rtx model_rtx)
14701{
14702 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
14703 machine_mode wmode = (mode == DImode ? DImode : SImode);
14704 const bool short_mode = (mode < SImode);
14705 aarch64_atomic_load_op_code ldop_code;
14706 rtx src;
14707 rtx x;
14708
14709 if (out_data)
14710 out_data = gen_lowpart (mode, out_data);
9cd7b720 14711
68729b06
MW
14712 if (out_result)
14713 out_result = gen_lowpart (mode, out_result);
14714
641c2f8b
MW
14715 /* Make sure the value is in a register, putting it into a destination
14716 register if it needs to be manipulated. */
14717 if (!register_operand (value, mode)
14718 || code == AND || code == MINUS)
14719 {
68729b06 14720 src = out_result ? out_result : out_data;
641c2f8b
MW
14721 emit_move_insn (src, gen_lowpart (mode, value));
14722 }
14723 else
14724 src = value;
14725 gcc_assert (register_operand (src, mode));
9cd7b720 14726
641c2f8b
MW
14727 /* Preprocess the data for the operation as necessary. If the operation is
14728 a SET then emit a swap instruction and finish. */
9cd7b720
MW
14729 switch (code)
14730 {
14731 case SET:
641c2f8b 14732 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
14733 return;
14734
641c2f8b
MW
14735 case MINUS:
14736 /* Negate the value and treat it as a PLUS. */
14737 {
14738 rtx neg_src;
14739
14740 /* Resize the value if necessary. */
14741 if (short_mode)
14742 src = gen_lowpart (wmode, src);
14743
14744 neg_src = gen_rtx_NEG (wmode, src);
14745 emit_insn (gen_rtx_SET (src, neg_src));
14746
14747 if (short_mode)
14748 src = gen_lowpart (mode, src);
14749 }
14750 /* Fall-through. */
14751 case PLUS:
14752 ldop_code = AARCH64_LDOP_PLUS;
14753 break;
14754
14755 case IOR:
14756 ldop_code = AARCH64_LDOP_OR;
14757 break;
14758
14759 case XOR:
14760 ldop_code = AARCH64_LDOP_XOR;
14761 break;
14762
14763 case AND:
14764 {
14765 rtx not_src;
14766
14767 /* Resize the value if necessary. */
14768 if (short_mode)
14769 src = gen_lowpart (wmode, src);
14770
14771 not_src = gen_rtx_NOT (wmode, src);
14772 emit_insn (gen_rtx_SET (src, not_src));
14773
14774 if (short_mode)
14775 src = gen_lowpart (mode, src);
14776 }
14777 ldop_code = AARCH64_LDOP_BIC;
14778 break;
14779
9cd7b720
MW
14780 default:
14781 /* The operation can't be done with atomic instructions. */
14782 gcc_unreachable ();
14783 }
641c2f8b
MW
14784
14785 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
14786
14787 /* If necessary, calculate the data in memory after the update by redoing the
14788 operation from values in registers. */
14789 if (!out_result)
14790 return;
14791
14792 if (short_mode)
14793 {
14794 src = gen_lowpart (wmode, src);
14795 out_data = gen_lowpart (wmode, out_data);
14796 out_result = gen_lowpart (wmode, out_result);
14797 }
14798
14799 x = NULL_RTX;
14800
14801 switch (code)
14802 {
14803 case MINUS:
14804 case PLUS:
14805 x = gen_rtx_PLUS (wmode, out_data, src);
14806 break;
14807 case IOR:
14808 x = gen_rtx_IOR (wmode, out_data, src);
14809 break;
14810 case XOR:
14811 x = gen_rtx_XOR (wmode, out_data, src);
14812 break;
14813 case AND:
14814 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
14815 return;
14816 default:
14817 gcc_unreachable ();
14818 }
14819
14820 emit_set_insn (out_result, x);
14821
14822 return;
9cd7b720
MW
14823}
14824
0462169c
SN
14825/* Split an atomic operation. */
14826
14827void
14828aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 14829 rtx value, rtx model_rtx, rtx cond)
0462169c 14830{
ef4bddc2
RS
14831 machine_mode mode = GET_MODE (mem);
14832 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
14833 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
14834 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
14835 rtx_code_label *label;
14836 rtx x;
0462169c 14837
9cd7b720 14838 /* Split the atomic operation into a sequence. */
0462169c
SN
14839 label = gen_label_rtx ();
14840 emit_label (label);
14841
14842 if (new_out)
14843 new_out = gen_lowpart (wmode, new_out);
14844 if (old_out)
14845 old_out = gen_lowpart (wmode, old_out);
14846 else
14847 old_out = new_out;
14848 value = simplify_gen_subreg (wmode, value, mode, 0);
14849
f70fb3b6
MW
14850 /* The initial load can be relaxed for a __sync operation since a final
14851 barrier will be emitted to stop code hoisting. */
14852 if (is_sync)
14853 aarch64_emit_load_exclusive (mode, old_out, mem,
14854 GEN_INT (MEMMODEL_RELAXED));
14855 else
14856 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
14857
14858 switch (code)
14859 {
14860 case SET:
14861 new_out = value;
14862 break;
14863
14864 case NOT:
14865 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 14866 emit_insn (gen_rtx_SET (new_out, x));
0462169c 14867 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 14868 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14869 break;
14870
14871 case MINUS:
14872 if (CONST_INT_P (value))
14873 {
14874 value = GEN_INT (-INTVAL (value));
14875 code = PLUS;
14876 }
14877 /* Fall through. */
14878
14879 default:
14880 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 14881 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14882 break;
14883 }
14884
14885 aarch64_emit_store_exclusive (mode, cond, mem,
14886 gen_lowpart (mode, new_out), model_rtx);
14887
6e1eaca9
RE
14888 if (aarch64_track_speculation)
14889 {
14890 /* Emit an explicit compare instruction, so that we can correctly
14891 track the condition codes. */
14892 rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
14893 x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
14894 }
14895 else
14896 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14897
0462169c
SN
14898 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14899 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 14900 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
14901
14902 /* Emit any final barrier needed for a __sync operation. */
14903 if (is_sync)
14904 aarch64_emit_post_barrier (model);
0462169c
SN
14905}
14906
c2ec330c
AL
14907static void
14908aarch64_init_libfuncs (void)
14909{
14910 /* Half-precision float operations. The compiler handles all operations
14911 with NULL libfuncs by converting to SFmode. */
14912
14913 /* Conversions. */
14914 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
14915 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
14916
14917 /* Arithmetic. */
14918 set_optab_libfunc (add_optab, HFmode, NULL);
14919 set_optab_libfunc (sdiv_optab, HFmode, NULL);
14920 set_optab_libfunc (smul_optab, HFmode, NULL);
14921 set_optab_libfunc (neg_optab, HFmode, NULL);
14922 set_optab_libfunc (sub_optab, HFmode, NULL);
14923
14924 /* Comparisons. */
14925 set_optab_libfunc (eq_optab, HFmode, NULL);
14926 set_optab_libfunc (ne_optab, HFmode, NULL);
14927 set_optab_libfunc (lt_optab, HFmode, NULL);
14928 set_optab_libfunc (le_optab, HFmode, NULL);
14929 set_optab_libfunc (ge_optab, HFmode, NULL);
14930 set_optab_libfunc (gt_optab, HFmode, NULL);
14931 set_optab_libfunc (unord_optab, HFmode, NULL);
14932}
14933
43e9d192 14934/* Target hook for c_mode_for_suffix. */
ef4bddc2 14935static machine_mode
43e9d192
IB
14936aarch64_c_mode_for_suffix (char suffix)
14937{
14938 if (suffix == 'q')
14939 return TFmode;
14940
14941 return VOIDmode;
14942}
14943
3520f7cc
JG
14944/* We can only represent floating point constants which will fit in
14945 "quarter-precision" values. These values are characterised by
14946 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
14947 by:
14948
14949 (-1)^s * (n/16) * 2^r
14950
14951 Where:
14952 's' is the sign bit.
14953 'n' is an integer in the range 16 <= n <= 31.
14954 'r' is an integer in the range -3 <= r <= 4. */
14955
14956/* Return true iff X can be represented by a quarter-precision
14957 floating point immediate operand X. Note, we cannot represent 0.0. */
14958bool
14959aarch64_float_const_representable_p (rtx x)
14960{
14961 /* This represents our current view of how many bits
14962 make up the mantissa. */
14963 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 14964 int exponent;
3520f7cc 14965 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 14966 REAL_VALUE_TYPE r, m;
807e902e 14967 bool fail;
3520f7cc
JG
14968
14969 if (!CONST_DOUBLE_P (x))
14970 return false;
14971
c2ec330c
AL
14972 /* We don't support HFmode constants yet. */
14973 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
14974 return false;
14975
34a72c33 14976 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
14977
14978 /* We cannot represent infinities, NaNs or +/-zero. We won't
14979 know if we have +zero until we analyse the mantissa, but we
14980 can reject the other invalid values. */
14981 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
14982 || REAL_VALUE_MINUS_ZERO (r))
14983 return false;
14984
ba96cdfb 14985 /* Extract exponent. */
3520f7cc
JG
14986 r = real_value_abs (&r);
14987 exponent = REAL_EXP (&r);
14988
14989 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
14990 highest (sign) bit, with a fixed binary point at bit point_pos.
14991 m1 holds the low part of the mantissa, m2 the high part.
14992 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
14993 bits for the mantissa, this can fail (low bits will be lost). */
14994 real_ldexp (&m, &r, point_pos - exponent);
807e902e 14995 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
14996
14997 /* If the low part of the mantissa has bits set we cannot represent
14998 the value. */
d9074b29 14999 if (w.ulow () != 0)
3520f7cc
JG
15000 return false;
15001 /* We have rejected the lower HOST_WIDE_INT, so update our
15002 understanding of how many bits lie in the mantissa and
15003 look only at the high HOST_WIDE_INT. */
807e902e 15004 mantissa = w.elt (1);
3520f7cc
JG
15005 point_pos -= HOST_BITS_PER_WIDE_INT;
15006
15007 /* We can only represent values with a mantissa of the form 1.xxxx. */
15008 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
15009 if ((mantissa & mask) != 0)
15010 return false;
15011
15012 /* Having filtered unrepresentable values, we may now remove all
15013 but the highest 5 bits. */
15014 mantissa >>= point_pos - 5;
15015
15016 /* We cannot represent the value 0.0, so reject it. This is handled
15017 elsewhere. */
15018 if (mantissa == 0)
15019 return false;
15020
15021 /* Then, as bit 4 is always set, we can mask it off, leaving
15022 the mantissa in the range [0, 15]. */
15023 mantissa &= ~(1 << 4);
15024 gcc_assert (mantissa <= 15);
15025
15026 /* GCC internally does not use IEEE754-like encoding (where normalized
15027 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
15028 Our mantissa values are shifted 4 places to the left relative to
15029 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
15030 by 5 places to correct for GCC's representation. */
15031 exponent = 5 - exponent;
15032
15033 return (exponent >= 0 && exponent <= 7);
15034}
15035
ab6501d7
SD
15036/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
15037 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
15038 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 15039char*
b187677b 15040aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 15041 enum simd_immediate_check which)
3520f7cc 15042{
3ea63f60 15043 bool is_valid;
3520f7cc 15044 static char templ[40];
3520f7cc 15045 const char *mnemonic;
e4f0f84d 15046 const char *shift_op;
3520f7cc 15047 unsigned int lane_count = 0;
81c2dfb9 15048 char element_char;
3520f7cc 15049
b187677b 15050 struct simd_immediate_info info;
48063b9d
IB
15051
15052 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
15053 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
15054 It will also update INFO to show how the immediate should be generated.
15055 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 15056 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
15057 gcc_assert (is_valid);
15058
b187677b
RS
15059 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15060 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 15061
b187677b 15062 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 15063 {
b187677b 15064 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
15065 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
15066 move immediate path. */
48063b9d
IB
15067 if (aarch64_float_const_zero_rtx_p (info.value))
15068 info.value = GEN_INT (0);
15069 else
15070 {
83faf7d0 15071 const unsigned int buf_size = 20;
48063b9d 15072 char float_buf[buf_size] = {'\0'};
34a72c33
RS
15073 real_to_decimal_for_mode (float_buf,
15074 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 15075 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
15076
15077 if (lane_count == 1)
15078 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
15079 else
15080 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 15081 lane_count, element_char, float_buf);
48063b9d
IB
15082 return templ;
15083 }
3520f7cc 15084 }
3520f7cc 15085
0d8e1702 15086 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
15087
15088 if (which == AARCH64_CHECK_MOV)
15089 {
b187677b
RS
15090 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
15091 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
15092 if (lane_count == 1)
15093 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
15094 mnemonic, UINTVAL (info.value));
15095 else if (info.shift)
15096 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15097 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
15098 element_char, UINTVAL (info.value), shift_op, info.shift);
15099 else
15100 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
15101 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
15102 element_char, UINTVAL (info.value));
15103 }
3520f7cc 15104 else
ab6501d7
SD
15105 {
15106 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 15107 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
15108 if (info.shift)
15109 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15110 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
15111 element_char, UINTVAL (info.value), "lsl", info.shift);
15112 else
15113 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
15114 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
15115 element_char, UINTVAL (info.value));
15116 }
3520f7cc
JG
15117 return templ;
15118}
15119
b7342d25 15120char*
77e994c9 15121aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 15122{
a2170965
TC
15123
15124 /* If a floating point number was passed and we desire to use it in an
15125 integer mode do the conversion to integer. */
15126 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
15127 {
15128 unsigned HOST_WIDE_INT ival;
15129 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15130 gcc_unreachable ();
15131 immediate = gen_int_mode (ival, mode);
15132 }
15133
ef4bddc2 15134 machine_mode vmode;
a2170965
TC
15135 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15136 a 128 bit vector mode. */
15137 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15138
a2170965 15139 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15140 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15141 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15142}
15143
43cacb12
RS
15144/* Return the output string to use for moving immediate CONST_VECTOR
15145 into an SVE register. */
15146
15147char *
15148aarch64_output_sve_mov_immediate (rtx const_vector)
15149{
15150 static char templ[40];
15151 struct simd_immediate_info info;
15152 char element_char;
15153
15154 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15155 gcc_assert (is_valid);
15156
15157 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15158
15159 if (info.step)
15160 {
15161 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15162 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15163 element_char, INTVAL (info.value), INTVAL (info.step));
15164 return templ;
15165 }
15166
15167 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15168 {
15169 if (aarch64_float_const_zero_rtx_p (info.value))
15170 info.value = GEN_INT (0);
15171 else
15172 {
15173 const int buf_size = 20;
15174 char float_buf[buf_size] = {};
15175 real_to_decimal_for_mode (float_buf,
15176 CONST_DOUBLE_REAL_VALUE (info.value),
15177 buf_size, buf_size, 1, info.elt_mode);
15178
15179 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15180 element_char, float_buf);
15181 return templ;
15182 }
15183 }
15184
15185 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15186 element_char, INTVAL (info.value));
15187 return templ;
15188}
15189
15190/* Return the asm format for a PTRUE instruction whose destination has
15191 mode MODE. SUFFIX is the element size suffix. */
15192
15193char *
15194aarch64_output_ptrue (machine_mode mode, char suffix)
15195{
15196 unsigned int nunits;
15197 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15198 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15199 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15200 else
15201 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15202 return buf;
15203}
15204
88b08073
JG
15205/* Split operands into moves from op[1] + op[2] into op[0]. */
15206
15207void
15208aarch64_split_combinev16qi (rtx operands[3])
15209{
15210 unsigned int dest = REGNO (operands[0]);
15211 unsigned int src1 = REGNO (operands[1]);
15212 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15213 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15214 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15215 rtx destlo, desthi;
15216
15217 gcc_assert (halfmode == V16QImode);
15218
15219 if (src1 == dest && src2 == dest + halfregs)
15220 {
15221 /* No-op move. Can't split to nothing; emit something. */
15222 emit_note (NOTE_INSN_DELETED);
15223 return;
15224 }
15225
15226 /* Preserve register attributes for variable tracking. */
15227 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15228 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15229 GET_MODE_SIZE (halfmode));
15230
15231 /* Special case of reversed high/low parts. */
15232 if (reg_overlap_mentioned_p (operands[2], destlo)
15233 && reg_overlap_mentioned_p (operands[1], desthi))
15234 {
15235 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15236 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15237 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15238 }
15239 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15240 {
15241 /* Try to avoid unnecessary moves if part of the result
15242 is in the right place already. */
15243 if (src1 != dest)
15244 emit_move_insn (destlo, operands[1]);
15245 if (src2 != dest + halfregs)
15246 emit_move_insn (desthi, operands[2]);
15247 }
15248 else
15249 {
15250 if (src2 != dest + halfregs)
15251 emit_move_insn (desthi, operands[2]);
15252 if (src1 != dest)
15253 emit_move_insn (destlo, operands[1]);
15254 }
15255}
15256
15257/* vec_perm support. */
15258
88b08073
JG
15259struct expand_vec_perm_d
15260{
15261 rtx target, op0, op1;
e3342de4 15262 vec_perm_indices perm;
ef4bddc2 15263 machine_mode vmode;
43cacb12 15264 unsigned int vec_flags;
88b08073
JG
15265 bool one_vector_p;
15266 bool testing_p;
15267};
15268
15269/* Generate a variable permutation. */
15270
15271static void
15272aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
15273{
ef4bddc2 15274 machine_mode vmode = GET_MODE (target);
88b08073
JG
15275 bool one_vector_p = rtx_equal_p (op0, op1);
15276
15277 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
15278 gcc_checking_assert (GET_MODE (op0) == vmode);
15279 gcc_checking_assert (GET_MODE (op1) == vmode);
15280 gcc_checking_assert (GET_MODE (sel) == vmode);
15281 gcc_checking_assert (TARGET_SIMD);
15282
15283 if (one_vector_p)
15284 {
15285 if (vmode == V8QImode)
15286 {
15287 /* Expand the argument to a V16QI mode by duplicating it. */
15288 rtx pair = gen_reg_rtx (V16QImode);
15289 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
15290 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15291 }
15292 else
15293 {
15294 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
15295 }
15296 }
15297 else
15298 {
15299 rtx pair;
15300
15301 if (vmode == V8QImode)
15302 {
15303 pair = gen_reg_rtx (V16QImode);
15304 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
15305 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15306 }
15307 else
15308 {
15309 pair = gen_reg_rtx (OImode);
15310 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
15311 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
15312 }
15313 }
15314}
15315
80940017
RS
15316/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
15317 NELT is the number of elements in the vector. */
15318
88b08073 15319void
80940017
RS
15320aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
15321 unsigned int nelt)
88b08073 15322{
ef4bddc2 15323 machine_mode vmode = GET_MODE (target);
88b08073 15324 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 15325 rtx mask;
88b08073
JG
15326
15327 /* The TBL instruction does not use a modulo index, so we must take care
15328 of that ourselves. */
f7c4e5b8
AL
15329 mask = aarch64_simd_gen_const_vector_dup (vmode,
15330 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
15331 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
15332
f7c4e5b8
AL
15333 /* For big-endian, we also need to reverse the index within the vector
15334 (but not which vector). */
15335 if (BYTES_BIG_ENDIAN)
15336 {
15337 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
15338 if (!one_vector_p)
15339 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15340 sel = expand_simple_binop (vmode, XOR, sel, mask,
15341 NULL, 0, OPTAB_LIB_WIDEN);
15342 }
88b08073
JG
15343 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15344}
15345
43cacb12
RS
15346/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15347
15348static void
15349emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15350{
15351 emit_insn (gen_rtx_SET (target,
15352 gen_rtx_UNSPEC (GET_MODE (target),
15353 gen_rtvec (2, op0, op1), code)));
15354}
15355
15356/* Expand an SVE vec_perm with the given operands. */
15357
15358void
15359aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15360{
15361 machine_mode data_mode = GET_MODE (target);
15362 machine_mode sel_mode = GET_MODE (sel);
15363 /* Enforced by the pattern condition. */
15364 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15365
15366 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15367 size of the two value vectors, i.e. the upper bits of the indices
15368 are effectively ignored. SVE TBL instead produces 0 for any
15369 out-of-range indices, so we need to modulo all the vec_perm indices
15370 to ensure they are all in range. */
15371 rtx sel_reg = force_reg (sel_mode, sel);
15372
15373 /* Check if the sel only references the first values vector. */
15374 if (GET_CODE (sel) == CONST_VECTOR
15375 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15376 {
15377 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15378 return;
15379 }
15380
15381 /* Check if the two values vectors are the same. */
15382 if (rtx_equal_p (op0, op1))
15383 {
15384 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15385 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15386 NULL, 0, OPTAB_DIRECT);
15387 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15388 return;
15389 }
15390
15391 /* Run TBL on for each value vector and combine the results. */
15392
15393 rtx res0 = gen_reg_rtx (data_mode);
15394 rtx res1 = gen_reg_rtx (data_mode);
15395 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15396 if (GET_CODE (sel) != CONST_VECTOR
15397 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15398 {
15399 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15400 2 * nunits - 1);
15401 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15402 NULL, 0, OPTAB_DIRECT);
15403 }
15404 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15405 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15406 NULL, 0, OPTAB_DIRECT);
15407 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15408 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15409 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15410 else
15411 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15412}
15413
cc4d934f
JG
15414/* Recognize patterns suitable for the TRN instructions. */
15415static bool
15416aarch64_evpc_trn (struct expand_vec_perm_d *d)
15417{
6a70badb
RS
15418 HOST_WIDE_INT odd;
15419 poly_uint64 nelt = d->perm.length ();
cc4d934f 15420 rtx out, in0, in1, x;
ef4bddc2 15421 machine_mode vmode = d->vmode;
cc4d934f
JG
15422
15423 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15424 return false;
15425
15426 /* Note that these are little-endian tests.
15427 We correct for big-endian later. */
6a70badb
RS
15428 if (!d->perm[0].is_constant (&odd)
15429 || (odd != 0 && odd != 1)
326ac20e
RS
15430 || !d->perm.series_p (0, 2, odd, 2)
15431 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15432 return false;
cc4d934f
JG
15433
15434 /* Success! */
15435 if (d->testing_p)
15436 return true;
15437
15438 in0 = d->op0;
15439 in1 = d->op1;
43cacb12
RS
15440 /* We don't need a big-endian lane correction for SVE; see the comment
15441 at the head of aarch64-sve.md for details. */
15442 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15443 {
15444 x = in0, in0 = in1, in1 = x;
15445 odd = !odd;
15446 }
15447 out = d->target;
15448
3f8334a5
RS
15449 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15450 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15451 return true;
15452}
15453
15454/* Recognize patterns suitable for the UZP instructions. */
15455static bool
15456aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15457{
6a70badb 15458 HOST_WIDE_INT odd;
cc4d934f 15459 rtx out, in0, in1, x;
ef4bddc2 15460 machine_mode vmode = d->vmode;
cc4d934f
JG
15461
15462 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15463 return false;
15464
15465 /* Note that these are little-endian tests.
15466 We correct for big-endian later. */
6a70badb
RS
15467 if (!d->perm[0].is_constant (&odd)
15468 || (odd != 0 && odd != 1)
326ac20e 15469 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15470 return false;
cc4d934f
JG
15471
15472 /* Success! */
15473 if (d->testing_p)
15474 return true;
15475
15476 in0 = d->op0;
15477 in1 = d->op1;
43cacb12
RS
15478 /* We don't need a big-endian lane correction for SVE; see the comment
15479 at the head of aarch64-sve.md for details. */
15480 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15481 {
15482 x = in0, in0 = in1, in1 = x;
15483 odd = !odd;
15484 }
15485 out = d->target;
15486
3f8334a5
RS
15487 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15488 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15489 return true;
15490}
15491
15492/* Recognize patterns suitable for the ZIP instructions. */
15493static bool
15494aarch64_evpc_zip (struct expand_vec_perm_d *d)
15495{
6a70badb
RS
15496 unsigned int high;
15497 poly_uint64 nelt = d->perm.length ();
cc4d934f 15498 rtx out, in0, in1, x;
ef4bddc2 15499 machine_mode vmode = d->vmode;
cc4d934f
JG
15500
15501 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15502 return false;
15503
15504 /* Note that these are little-endian tests.
15505 We correct for big-endian later. */
6a70badb
RS
15506 poly_uint64 first = d->perm[0];
15507 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15508 || !d->perm.series_p (0, 2, first, 1)
15509 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15510 return false;
6a70badb 15511 high = maybe_ne (first, 0U);
cc4d934f
JG
15512
15513 /* Success! */
15514 if (d->testing_p)
15515 return true;
15516
15517 in0 = d->op0;
15518 in1 = d->op1;
43cacb12
RS
15519 /* We don't need a big-endian lane correction for SVE; see the comment
15520 at the head of aarch64-sve.md for details. */
15521 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15522 {
15523 x = in0, in0 = in1, in1 = x;
15524 high = !high;
15525 }
15526 out = d->target;
15527
3f8334a5
RS
15528 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15529 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15530 return true;
15531}
15532
ae0533da
AL
15533/* Recognize patterns for the EXT insn. */
15534
15535static bool
15536aarch64_evpc_ext (struct expand_vec_perm_d *d)
15537{
6a70badb 15538 HOST_WIDE_INT location;
ae0533da
AL
15539 rtx offset;
15540
6a70badb
RS
15541 /* The first element always refers to the first vector.
15542 Check if the extracted indices are increasing by one. */
43cacb12
RS
15543 if (d->vec_flags == VEC_SVE_PRED
15544 || !d->perm[0].is_constant (&location)
6a70badb 15545 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15546 return false;
ae0533da 15547
ae0533da
AL
15548 /* Success! */
15549 if (d->testing_p)
15550 return true;
15551
b31e65bb 15552 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15553 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15554
43cacb12
RS
15555 We don't need a big-endian lane correction for SVE; see the comment
15556 at the head of aarch64-sve.md for details. */
15557 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15558 {
15559 /* After setup, we want the high elements of the first vector (stored
15560 at the LSB end of the register), and the low elements of the second
15561 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15562 std::swap (d->op0, d->op1);
6a70badb
RS
15563 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15564 to_constant () is safe since this is restricted to Advanced SIMD
15565 vectors. */
15566 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15567 }
15568
15569 offset = GEN_INT (location);
3f8334a5
RS
15570 emit_set_insn (d->target,
15571 gen_rtx_UNSPEC (d->vmode,
15572 gen_rtvec (3, d->op0, d->op1, offset),
15573 UNSPEC_EXT));
ae0533da
AL
15574 return true;
15575}
15576
43cacb12
RS
15577/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15578 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15579
15580static bool
43cacb12 15581aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15582{
6a70badb
RS
15583 HOST_WIDE_INT diff;
15584 unsigned int i, size, unspec;
43cacb12 15585 machine_mode pred_mode;
923fcec3 15586
43cacb12
RS
15587 if (d->vec_flags == VEC_SVE_PRED
15588 || !d->one_vector_p
6a70badb 15589 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15590 return false;
15591
3f8334a5
RS
15592 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15593 if (size == 8)
43cacb12
RS
15594 {
15595 unspec = UNSPEC_REV64;
15596 pred_mode = VNx2BImode;
15597 }
3f8334a5 15598 else if (size == 4)
43cacb12
RS
15599 {
15600 unspec = UNSPEC_REV32;
15601 pred_mode = VNx4BImode;
15602 }
3f8334a5 15603 else if (size == 2)
43cacb12
RS
15604 {
15605 unspec = UNSPEC_REV16;
15606 pred_mode = VNx8BImode;
15607 }
3f8334a5
RS
15608 else
15609 return false;
923fcec3 15610
326ac20e
RS
15611 unsigned int step = diff + 1;
15612 for (i = 0; i < step; ++i)
15613 if (!d->perm.series_p (i, step, diff - i, step))
15614 return false;
923fcec3
AL
15615
15616 /* Success! */
15617 if (d->testing_p)
15618 return true;
15619
43cacb12
RS
15620 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15621 if (d->vec_flags == VEC_SVE_DATA)
15622 {
15623 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15624 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15625 UNSPEC_MERGE_PTRUE);
15626 }
15627 emit_set_insn (d->target, src);
15628 return true;
15629}
15630
15631/* Recognize patterns for the REV insn, which reverses elements within
15632 a full vector. */
15633
15634static bool
15635aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15636{
15637 poly_uint64 nelt = d->perm.length ();
15638
15639 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15640 return false;
15641
15642 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15643 return false;
15644
15645 /* Success! */
15646 if (d->testing_p)
15647 return true;
15648
15649 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15650 emit_set_insn (d->target, src);
923fcec3
AL
15651 return true;
15652}
15653
91bd4114
JG
15654static bool
15655aarch64_evpc_dup (struct expand_vec_perm_d *d)
15656{
91bd4114
JG
15657 rtx out = d->target;
15658 rtx in0;
6a70badb 15659 HOST_WIDE_INT elt;
ef4bddc2 15660 machine_mode vmode = d->vmode;
91bd4114
JG
15661 rtx lane;
15662
43cacb12
RS
15663 if (d->vec_flags == VEC_SVE_PRED
15664 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15665 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15666 return false;
15667
43cacb12
RS
15668 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15669 return false;
15670
326ac20e
RS
15671 /* Success! */
15672 if (d->testing_p)
15673 return true;
15674
91bd4114
JG
15675 /* The generic preparation in aarch64_expand_vec_perm_const_1
15676 swaps the operand order and the permute indices if it finds
15677 d->perm[0] to be in the second operand. Thus, we can always
15678 use d->op0 and need not do any extra arithmetic to get the
15679 correct lane number. */
15680 in0 = d->op0;
f901401e 15681 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15682
3f8334a5
RS
15683 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15684 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15685 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15686 return true;
15687}
15688
88b08073
JG
15689static bool
15690aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15691{
43cacb12 15692 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15693 machine_mode vmode = d->vmode;
6a70badb
RS
15694
15695 /* Make sure that the indices are constant. */
15696 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15697 for (unsigned int i = 0; i < encoded_nelts; ++i)
15698 if (!d->perm[i].is_constant ())
15699 return false;
88b08073 15700
88b08073
JG
15701 if (d->testing_p)
15702 return true;
15703
15704 /* Generic code will try constant permutation twice. Once with the
15705 original mode and again with the elements lowered to QImode.
15706 So wait and don't do the selector expansion ourselves. */
15707 if (vmode != V8QImode && vmode != V16QImode)
15708 return false;
15709
6a70badb
RS
15710 /* to_constant is safe since this routine is specific to Advanced SIMD
15711 vectors. */
15712 unsigned int nelt = d->perm.length ().to_constant ();
15713 for (unsigned int i = 0; i < nelt; ++i)
15714 /* If big-endian and two vectors we end up with a weird mixed-endian
15715 mode on NEON. Reverse the index within each word but not the word
15716 itself. to_constant is safe because we checked is_constant above. */
15717 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15718 ? d->perm[i].to_constant () ^ (nelt - 1)
15719 : d->perm[i].to_constant ());
bbcc9c00 15720
88b08073
JG
15721 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15722 sel = force_reg (vmode, sel);
15723
15724 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15725 return true;
15726}
15727
43cacb12
RS
15728/* Try to implement D using an SVE TBL instruction. */
15729
15730static bool
15731aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15732{
15733 unsigned HOST_WIDE_INT nelt;
15734
15735 /* Permuting two variable-length vectors could overflow the
15736 index range. */
15737 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15738 return false;
15739
15740 if (d->testing_p)
15741 return true;
15742
15743 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15744 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
15745 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
15746 return true;
15747}
15748
88b08073
JG
15749static bool
15750aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15751{
15752 /* The pattern matching functions above are written to look for a small
15753 number to begin the sequence (0, 1, N/2). If we begin with an index
15754 from the second operand, we can swap the operands. */
6a70badb
RS
15755 poly_int64 nelt = d->perm.length ();
15756 if (known_ge (d->perm[0], nelt))
88b08073 15757 {
e3342de4 15758 d->perm.rotate_inputs (1);
cb5c6c29 15759 std::swap (d->op0, d->op1);
88b08073
JG
15760 }
15761
43cacb12
RS
15762 if ((d->vec_flags == VEC_ADVSIMD
15763 || d->vec_flags == VEC_SVE_DATA
15764 || d->vec_flags == VEC_SVE_PRED)
15765 && known_gt (nelt, 1))
cc4d934f 15766 {
43cacb12
RS
15767 if (aarch64_evpc_rev_local (d))
15768 return true;
15769 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
15770 return true;
15771 else if (aarch64_evpc_ext (d))
ae0533da 15772 return true;
f901401e
AL
15773 else if (aarch64_evpc_dup (d))
15774 return true;
ae0533da 15775 else if (aarch64_evpc_zip (d))
cc4d934f
JG
15776 return true;
15777 else if (aarch64_evpc_uzp (d))
15778 return true;
15779 else if (aarch64_evpc_trn (d))
15780 return true;
43cacb12
RS
15781 if (d->vec_flags == VEC_SVE_DATA)
15782 return aarch64_evpc_sve_tbl (d);
15783 else if (d->vec_flags == VEC_SVE_DATA)
15784 return aarch64_evpc_tbl (d);
cc4d934f 15785 }
88b08073
JG
15786 return false;
15787}
15788
f151c9e1 15789/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 15790
f151c9e1
RS
15791static bool
15792aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
15793 rtx op1, const vec_perm_indices &sel)
88b08073
JG
15794{
15795 struct expand_vec_perm_d d;
88b08073 15796
326ac20e
RS
15797 /* Check whether the mask can be applied to a single vector. */
15798 if (op0 && rtx_equal_p (op0, op1))
15799 d.one_vector_p = true;
15800 else if (sel.all_from_input_p (0))
88b08073 15801 {
326ac20e
RS
15802 d.one_vector_p = true;
15803 op1 = op0;
88b08073 15804 }
326ac20e 15805 else if (sel.all_from_input_p (1))
88b08073 15806 {
88b08073 15807 d.one_vector_p = true;
326ac20e 15808 op0 = op1;
88b08073 15809 }
326ac20e
RS
15810 else
15811 d.one_vector_p = false;
88b08073 15812
326ac20e
RS
15813 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
15814 sel.nelts_per_input ());
15815 d.vmode = vmode;
43cacb12 15816 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
15817 d.target = target;
15818 d.op0 = op0;
15819 d.op1 = op1;
15820 d.testing_p = !target;
e3342de4 15821
f151c9e1
RS
15822 if (!d.testing_p)
15823 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 15824
326ac20e 15825 rtx_insn *last = get_last_insn ();
f151c9e1 15826 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 15827 gcc_assert (last == get_last_insn ());
88b08073
JG
15828
15829 return ret;
15830}
15831
73e3da51
RS
15832/* Generate a byte permute mask for a register of mode MODE,
15833 which has NUNITS units. */
15834
668046d1 15835rtx
73e3da51 15836aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
15837{
15838 /* We have to reverse each vector because we dont have
15839 a permuted load that can reverse-load according to ABI rules. */
15840 rtx mask;
15841 rtvec v = rtvec_alloc (16);
73e3da51
RS
15842 unsigned int i, j;
15843 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
15844
15845 gcc_assert (BYTES_BIG_ENDIAN);
15846 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
15847
15848 for (i = 0; i < nunits; i++)
15849 for (j = 0; j < usize; j++)
15850 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
15851 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
15852 return force_reg (V16QImode, mask);
15853}
15854
43cacb12
RS
15855/* Return true if X is a valid second operand for the SVE instruction
15856 that implements integer comparison OP_CODE. */
15857
15858static bool
15859aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
15860{
15861 if (register_operand (x, VOIDmode))
15862 return true;
15863
15864 switch (op_code)
15865 {
15866 case LTU:
15867 case LEU:
15868 case GEU:
15869 case GTU:
15870 return aarch64_sve_cmp_immediate_p (x, false);
15871 case LT:
15872 case LE:
15873 case GE:
15874 case GT:
15875 case NE:
15876 case EQ:
15877 return aarch64_sve_cmp_immediate_p (x, true);
15878 default:
15879 gcc_unreachable ();
15880 }
15881}
15882
f22d7973
RS
15883/* Use predicated SVE instructions to implement the equivalent of:
15884
15885 (set TARGET OP)
15886
15887 given that PTRUE is an all-true predicate of the appropriate mode. */
15888
15889static void
15890aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
15891{
15892 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15893 gen_rtvec (2, ptrue, op),
15894 UNSPEC_MERGE_PTRUE);
15895 rtx_insn *insn = emit_set_insn (target, unspec);
15896 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15897}
15898
15899/* Likewise, but also clobber the condition codes. */
15900
15901static void
15902aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
15903{
15904 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15905 gen_rtvec (2, ptrue, op),
15906 UNSPEC_MERGE_PTRUE);
15907 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
15908 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15909}
15910
43cacb12
RS
15911/* Return the UNSPEC_COND_* code for comparison CODE. */
15912
15913static unsigned int
15914aarch64_unspec_cond_code (rtx_code code)
15915{
15916 switch (code)
15917 {
15918 case NE:
15919 return UNSPEC_COND_NE;
15920 case EQ:
15921 return UNSPEC_COND_EQ;
15922 case LT:
15923 return UNSPEC_COND_LT;
15924 case GT:
15925 return UNSPEC_COND_GT;
15926 case LE:
15927 return UNSPEC_COND_LE;
15928 case GE:
15929 return UNSPEC_COND_GE;
43cacb12
RS
15930 default:
15931 gcc_unreachable ();
15932 }
15933}
15934
f22d7973 15935/* Emit:
43cacb12 15936
f22d7973
RS
15937 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
15938
15939 where <X> is the operation associated with comparison CODE. This form
15940 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
15941 semantics, such as when PRED might not be all-true and when comparing
15942 inactive lanes could have side effects. */
15943
15944static void
15945aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
15946 rtx pred, rtx op0, rtx op1)
43cacb12 15947{
f22d7973
RS
15948 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
15949 gen_rtvec (3, pred, op0, op1),
15950 aarch64_unspec_cond_code (code));
15951 emit_set_insn (target, unspec);
43cacb12
RS
15952}
15953
f22d7973 15954/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 15955
f22d7973 15956 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
15957
15958void
15959aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
15960{
15961 machine_mode pred_mode = GET_MODE (target);
15962 machine_mode data_mode = GET_MODE (op0);
15963
15964 if (!aarch64_sve_cmp_operand_p (code, op1))
15965 op1 = force_reg (data_mode, op1);
15966
15967 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
15968 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15969 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
15970}
15971
f22d7973 15972/* Emit the SVE equivalent of:
43cacb12 15973
f22d7973
RS
15974 (set TMP1 (CODE1 OP0 OP1))
15975 (set TMP2 (CODE2 OP0 OP1))
15976 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 15977
f22d7973 15978 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
15979
15980static void
f22d7973
RS
15981aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
15982 rtx ptrue, rtx op0, rtx op1)
43cacb12 15983{
f22d7973 15984 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 15985 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
15986 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
15987 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 15988 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
15989 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
15990 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
15991 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
15992}
15993
f22d7973 15994/* Emit the SVE equivalent of:
43cacb12 15995
f22d7973
RS
15996 (set TMP (CODE OP0 OP1))
15997 (set TARGET (not TMP))
43cacb12 15998
f22d7973 15999 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
16000
16001static void
f22d7973
RS
16002aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
16003 rtx op0, rtx op1)
43cacb12 16004{
f22d7973
RS
16005 machine_mode pred_mode = GET_MODE (ptrue);
16006 rtx tmp = gen_reg_rtx (pred_mode);
16007 aarch64_emit_sve_ptrue_op (tmp, ptrue,
16008 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
16009 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
16010}
16011
f22d7973 16012/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 16013
f22d7973 16014 (set TARGET (CODE OP0 OP1))
43cacb12
RS
16015
16016 If CAN_INVERT_P is true, the caller can also handle inverted results;
16017 return true if the result is in fact inverted. */
16018
16019bool
16020aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
16021 rtx op0, rtx op1, bool can_invert_p)
16022{
16023 machine_mode pred_mode = GET_MODE (target);
16024 machine_mode data_mode = GET_MODE (op0);
16025
16026 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
16027 switch (code)
16028 {
16029 case UNORDERED:
16030 /* UNORDERED has no immediate form. */
16031 op1 = force_reg (data_mode, op1);
f22d7973 16032 /* fall through */
43cacb12
RS
16033 case LT:
16034 case LE:
16035 case GT:
16036 case GE:
16037 case EQ:
16038 case NE:
f22d7973
RS
16039 {
16040 /* There is native support for the comparison. */
16041 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16042 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16043 return false;
16044 }
43cacb12
RS
16045
16046 case LTGT:
16047 /* This is a trapping operation (LT or GT). */
f22d7973 16048 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
16049 return false;
16050
16051 case UNEQ:
16052 if (!flag_trapping_math)
16053 {
16054 /* This would trap for signaling NaNs. */
16055 op1 = force_reg (data_mode, op1);
f22d7973 16056 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
16057 return false;
16058 }
16059 /* fall through */
43cacb12
RS
16060 case UNLT:
16061 case UNLE:
16062 case UNGT:
16063 case UNGE:
f22d7973
RS
16064 if (flag_trapping_math)
16065 {
16066 /* Work out which elements are ordered. */
16067 rtx ordered = gen_reg_rtx (pred_mode);
16068 op1 = force_reg (data_mode, op1);
16069 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
16070
16071 /* Test the opposite condition for the ordered elements,
16072 then invert the result. */
16073 if (code == UNEQ)
16074 code = NE;
16075 else
16076 code = reverse_condition_maybe_unordered (code);
16077 if (can_invert_p)
16078 {
16079 aarch64_emit_sve_predicated_cond (target, code,
16080 ordered, op0, op1);
16081 return true;
16082 }
16083 rtx tmp = gen_reg_rtx (pred_mode);
16084 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
16085 aarch64_emit_unop (target, one_cmpl_optab, tmp);
16086 return false;
16087 }
16088 break;
16089
16090 case ORDERED:
16091 /* ORDERED has no immediate form. */
16092 op1 = force_reg (data_mode, op1);
16093 break;
43cacb12
RS
16094
16095 default:
16096 gcc_unreachable ();
16097 }
f22d7973
RS
16098
16099 /* There is native support for the inverse comparison. */
16100 code = reverse_condition_maybe_unordered (code);
16101 if (can_invert_p)
16102 {
16103 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
16104 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
16105 return true;
16106 }
16107 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
16108 return false;
43cacb12
RS
16109}
16110
16111/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
16112 of the data being selected and CMP_MODE is the mode of the values being
16113 compared. */
16114
16115void
16116aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
16117 rtx *ops)
16118{
16119 machine_mode pred_mode
16120 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
16121 GET_MODE_SIZE (cmp_mode)).require ();
16122 rtx pred = gen_reg_rtx (pred_mode);
16123 if (FLOAT_MODE_P (cmp_mode))
16124 {
16125 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
16126 ops[4], ops[5], true))
16127 std::swap (ops[1], ops[2]);
16128 }
16129 else
16130 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16131
16132 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16133 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16134}
16135
99e1629f
RS
16136/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16137 true. However due to issues with register allocation it is preferable
16138 to avoid tieing integer scalar and FP scalar modes. Executing integer
16139 operations in general registers is better than treating them as scalar
16140 vector operations. This reduces latency and avoids redundant int<->FP
16141 moves. So tie modes if they are either the same class, or vector modes
16142 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16143
99e1629f 16144static bool
ef4bddc2 16145aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16146{
16147 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16148 return true;
16149
16150 /* We specifically want to allow elements of "structure" modes to
16151 be tieable to the structure. This more general condition allows
43cacb12
RS
16152 other rarer situations too. The reason we don't extend this to
16153 predicate modes is that there are no predicate structure modes
16154 nor any specific instructions for extracting part of a predicate
16155 register. */
16156 if (aarch64_vector_data_mode_p (mode1)
16157 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16158 return true;
16159
16160 /* Also allow any scalar modes with vectors. */
16161 if (aarch64_vector_mode_supported_p (mode1)
16162 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16163 return true;
16164
16165 return false;
16166}
16167
e2c75eea
JG
16168/* Return a new RTX holding the result of moving POINTER forward by
16169 AMOUNT bytes. */
16170
16171static rtx
6a70badb 16172aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16173{
16174 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16175
16176 return adjust_automodify_address (pointer, GET_MODE (pointer),
16177 next, amount);
16178}
16179
16180/* Return a new RTX holding the result of moving POINTER forward by the
16181 size of the mode it points to. */
16182
16183static rtx
16184aarch64_progress_pointer (rtx pointer)
16185{
6a70badb 16186 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16187}
16188
16189/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16190 MODE bytes. */
16191
16192static void
16193aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16194 machine_mode mode)
e2c75eea
JG
16195{
16196 rtx reg = gen_reg_rtx (mode);
16197
16198 /* "Cast" the pointers to the correct mode. */
16199 *src = adjust_address (*src, mode, 0);
16200 *dst = adjust_address (*dst, mode, 0);
16201 /* Emit the memcpy. */
16202 emit_move_insn (reg, *src);
16203 emit_move_insn (*dst, reg);
16204 /* Move the pointers forward. */
16205 *src = aarch64_progress_pointer (*src);
16206 *dst = aarch64_progress_pointer (*dst);
16207}
16208
16209/* Expand movmem, as if from a __builtin_memcpy. Return true if
16210 we succeed, otherwise return false. */
16211
16212bool
16213aarch64_expand_movmem (rtx *operands)
16214{
89c52e5e 16215 int n, mode_bits;
e2c75eea
JG
16216 rtx dst = operands[0];
16217 rtx src = operands[1];
16218 rtx base;
89c52e5e 16219 machine_mode cur_mode = BLKmode, next_mode;
e2c75eea
JG
16220 bool speed_p = !optimize_function_for_size_p (cfun);
16221
16222 /* When optimizing for size, give a better estimate of the length of a
89c52e5e
TC
16223 memcpy call, but use the default otherwise. Moves larger than 8 bytes
16224 will always require an even number of instructions to do now. And each
16225 operation requires both a load+store, so devide the max number by 2. */
16226 int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
e2c75eea
JG
16227
16228 /* We can't do anything smart if the amount to copy is not constant. */
16229 if (!CONST_INT_P (operands[2]))
16230 return false;
16231
89c52e5e 16232 n = INTVAL (operands[2]);
e2c75eea 16233
89c52e5e
TC
16234 /* Try to keep the number of instructions low. For all cases we will do at
16235 most two moves for the residual amount, since we'll always overlap the
16236 remainder. */
16237 if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
e2c75eea
JG
16238 return false;
16239
16240 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16241 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16242
16243 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
16244 src = adjust_automodify_address (src, VOIDmode, base, 0);
16245
89c52e5e
TC
16246 /* Convert n to bits to make the rest of the code simpler. */
16247 n = n * BITS_PER_UNIT;
e2c75eea 16248
89c52e5e 16249 while (n > 0)
e2c75eea 16250 {
89c52e5e
TC
16251 /* Find the largest mode in which to do the copy in without over reading
16252 or writing. */
16253 opt_scalar_int_mode mode_iter;
16254 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
16255 if (GET_MODE_BITSIZE (mode_iter.require ()) <= n)
16256 cur_mode = mode_iter.require ();
e2c75eea 16257
89c52e5e 16258 gcc_assert (cur_mode != BLKmode);
e2c75eea 16259
89c52e5e
TC
16260 mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
16261 aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
e2c75eea 16262
89c52e5e 16263 n -= mode_bits;
e2c75eea 16264
89c52e5e
TC
16265 /* Do certain trailing copies as overlapping if it's going to be
16266 cheaper. i.e. less instructions to do so. For instance doing a 15
16267 byte copy it's more efficient to do two overlapping 8 byte copies than
16268 8 + 6 + 1. */
16269 next_mode = smallest_mode_for_size (n, MODE_INT);
16270 int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
16271 if (n > 0 && n_bits > n && n_bits <= 8 * BITS_PER_UNIT)
16272 {
16273 src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
16274 dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
16275 n = n_bits;
e2c75eea
JG
16276 }
16277 }
16278
16279 return true;
16280}
16281
141a3ccf
KT
16282/* Split a DImode store of a CONST_INT SRC to MEM DST as two
16283 SImode stores. Handle the case when the constant has identical
16284 bottom and top halves. This is beneficial when the two stores can be
16285 merged into an STP and we avoid synthesising potentially expensive
16286 immediates twice. Return true if such a split is possible. */
16287
16288bool
16289aarch64_split_dimode_const_store (rtx dst, rtx src)
16290{
16291 rtx lo = gen_lowpart (SImode, src);
16292 rtx hi = gen_highpart_mode (SImode, DImode, src);
16293
16294 bool size_p = optimize_function_for_size_p (cfun);
16295
16296 if (!rtx_equal_p (lo, hi))
16297 return false;
16298
16299 unsigned int orig_cost
16300 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
16301 unsigned int lo_cost
16302 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
16303
16304 /* We want to transform:
16305 MOV x1, 49370
16306 MOVK x1, 0x140, lsl 16
16307 MOVK x1, 0xc0da, lsl 32
16308 MOVK x1, 0x140, lsl 48
16309 STR x1, [x0]
16310 into:
16311 MOV w1, 49370
16312 MOVK w1, 0x140, lsl 16
16313 STP w1, w1, [x0]
16314 So we want to perform this only when we save two instructions
16315 or more. When optimizing for size, however, accept any code size
16316 savings we can. */
16317 if (size_p && orig_cost <= lo_cost)
16318 return false;
16319
16320 if (!size_p
16321 && (orig_cost <= lo_cost + 1))
16322 return false;
16323
16324 rtx mem_lo = adjust_address (dst, SImode, 0);
16325 if (!aarch64_mem_pair_operand (mem_lo, SImode))
16326 return false;
16327
16328 rtx tmp_reg = gen_reg_rtx (SImode);
16329 aarch64_expand_mov_immediate (tmp_reg, lo);
16330 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
16331 /* Don't emit an explicit store pair as this may not be always profitable.
16332 Let the sched-fusion logic decide whether to merge them. */
16333 emit_move_insn (mem_lo, tmp_reg);
16334 emit_move_insn (mem_hi, tmp_reg);
16335
16336 return true;
16337}
16338
30c46053
MC
16339/* Generate RTL for a conditional branch with rtx comparison CODE in
16340 mode CC_MODE. The destination of the unlikely conditional branch
16341 is LABEL_REF. */
16342
16343void
16344aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
16345 rtx label_ref)
16346{
16347 rtx x;
16348 x = gen_rtx_fmt_ee (code, VOIDmode,
16349 gen_rtx_REG (cc_mode, CC_REGNUM),
16350 const0_rtx);
16351
16352 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
16353 gen_rtx_LABEL_REF (VOIDmode, label_ref),
16354 pc_rtx);
16355 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
16356}
16357
16358/* Generate DImode scratch registers for 128-bit (TImode) addition.
16359
16360 OP1 represents the TImode destination operand 1
16361 OP2 represents the TImode destination operand 2
16362 LOW_DEST represents the low half (DImode) of TImode operand 0
16363 LOW_IN1 represents the low half (DImode) of TImode operand 1
16364 LOW_IN2 represents the low half (DImode) of TImode operand 2
16365 HIGH_DEST represents the high half (DImode) of TImode operand 0
16366 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16367 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16368
16369void
16370aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16371 rtx *low_in1, rtx *low_in2,
16372 rtx *high_dest, rtx *high_in1,
16373 rtx *high_in2)
16374{
16375 *low_dest = gen_reg_rtx (DImode);
16376 *low_in1 = gen_lowpart (DImode, op1);
16377 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16378 subreg_lowpart_offset (DImode, TImode));
16379 *high_dest = gen_reg_rtx (DImode);
16380 *high_in1 = gen_highpart (DImode, op1);
16381 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16382 subreg_highpart_offset (DImode, TImode));
16383}
16384
16385/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
16386
16387 This function differs from 'arch64_addti_scratch_regs' in that
16388 OP1 can be an immediate constant (zero). We must call
16389 subreg_highpart_offset with DImode and TImode arguments, otherwise
16390 VOIDmode will be used for the const_int which generates an internal
16391 error from subreg_size_highpart_offset which does not expect a size of zero.
16392
16393 OP1 represents the TImode destination operand 1
16394 OP2 represents the TImode destination operand 2
16395 LOW_DEST represents the low half (DImode) of TImode operand 0
16396 LOW_IN1 represents the low half (DImode) of TImode operand 1
16397 LOW_IN2 represents the low half (DImode) of TImode operand 2
16398 HIGH_DEST represents the high half (DImode) of TImode operand 0
16399 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16400 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16401
16402
16403void
16404aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
16405 rtx *low_in1, rtx *low_in2,
16406 rtx *high_dest, rtx *high_in1,
16407 rtx *high_in2)
16408{
16409 *low_dest = gen_reg_rtx (DImode);
16410 *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
16411 subreg_lowpart_offset (DImode, TImode));
16412
16413 *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
16414 subreg_lowpart_offset (DImode, TImode));
16415 *high_dest = gen_reg_rtx (DImode);
16416
16417 *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
16418 subreg_highpart_offset (DImode, TImode));
16419 *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
16420 subreg_highpart_offset (DImode, TImode));
16421}
16422
16423/* Generate RTL for 128-bit (TImode) subtraction with overflow.
16424
16425 OP0 represents the TImode destination operand 0
16426 LOW_DEST represents the low half (DImode) of TImode operand 0
16427 LOW_IN1 represents the low half (DImode) of TImode operand 1
16428 LOW_IN2 represents the low half (DImode) of TImode operand 2
16429 HIGH_DEST represents the high half (DImode) of TImode operand 0
16430 HIGH_IN1 represents the high half (DImode) of TImode operand 1
16431 HIGH_IN2 represents the high half (DImode) of TImode operand 2. */
16432
16433void
16434aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
16435 rtx low_in2, rtx high_dest, rtx high_in1,
16436 rtx high_in2)
16437{
16438 if (low_in2 == const0_rtx)
16439 {
16440 low_dest = low_in1;
16441 emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
16442 force_reg (DImode, high_in2)));
16443 }
16444 else
16445 {
16446 if (CONST_INT_P (low_in2))
16447 {
16448 low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
16449 high_in2 = force_reg (DImode, high_in2);
16450 emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
16451 }
16452 else
16453 emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
16454 emit_insn (gen_subdi3_carryinCV (high_dest,
16455 force_reg (DImode, high_in1),
16456 high_in2));
16457 }
16458
16459 emit_move_insn (gen_lowpart (DImode, op0), low_dest);
16460 emit_move_insn (gen_highpart (DImode, op0), high_dest);
16461
16462}
16463
a3125fc2
CL
16464/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16465
16466static unsigned HOST_WIDE_INT
16467aarch64_asan_shadow_offset (void)
16468{
16469 return (HOST_WIDE_INT_1 << 36);
16470}
16471
5f3bc026 16472static rtx
cb4347e8 16473aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16474 int code, tree treeop0, tree treeop1)
16475{
c8012fbc
WD
16476 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16477 rtx op0, op1;
5f3bc026 16478 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16479 insn_code icode;
5f3bc026
ZC
16480 struct expand_operand ops[4];
16481
5f3bc026
ZC
16482 start_sequence ();
16483 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16484
16485 op_mode = GET_MODE (op0);
16486 if (op_mode == VOIDmode)
16487 op_mode = GET_MODE (op1);
16488
16489 switch (op_mode)
16490 {
4e10a5a7
RS
16491 case E_QImode:
16492 case E_HImode:
16493 case E_SImode:
5f3bc026
ZC
16494 cmp_mode = SImode;
16495 icode = CODE_FOR_cmpsi;
16496 break;
16497
4e10a5a7 16498 case E_DImode:
5f3bc026
ZC
16499 cmp_mode = DImode;
16500 icode = CODE_FOR_cmpdi;
16501 break;
16502
4e10a5a7 16503 case E_SFmode:
786e3c06
WD
16504 cmp_mode = SFmode;
16505 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16506 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16507 break;
16508
4e10a5a7 16509 case E_DFmode:
786e3c06
WD
16510 cmp_mode = DFmode;
16511 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16512 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16513 break;
16514
5f3bc026
ZC
16515 default:
16516 end_sequence ();
16517 return NULL_RTX;
16518 }
16519
c8012fbc
WD
16520 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16521 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16522 if (!op0 || !op1)
16523 {
16524 end_sequence ();
16525 return NULL_RTX;
16526 }
16527 *prep_seq = get_insns ();
16528 end_sequence ();
16529
c8012fbc
WD
16530 create_fixed_operand (&ops[0], op0);
16531 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16532
16533 start_sequence ();
c8012fbc 16534 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16535 {
16536 end_sequence ();
16537 return NULL_RTX;
16538 }
16539 *gen_seq = get_insns ();
16540 end_sequence ();
16541
c8012fbc
WD
16542 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16543 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16544}
16545
16546static rtx
cb4347e8
TS
16547aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16548 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16549{
c8012fbc
WD
16550 rtx op0, op1, target;
16551 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16552 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16553 insn_code icode;
5f3bc026 16554 struct expand_operand ops[6];
c8012fbc 16555 int aarch64_cond;
5f3bc026 16556
cb4347e8 16557 push_to_sequence (*prep_seq);
5f3bc026
ZC
16558 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16559
16560 op_mode = GET_MODE (op0);
16561 if (op_mode == VOIDmode)
16562 op_mode = GET_MODE (op1);
16563
16564 switch (op_mode)
16565 {
4e10a5a7
RS
16566 case E_QImode:
16567 case E_HImode:
16568 case E_SImode:
5f3bc026 16569 cmp_mode = SImode;
c8012fbc 16570 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16571 break;
16572
4e10a5a7 16573 case E_DImode:
5f3bc026 16574 cmp_mode = DImode;
c8012fbc 16575 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16576 break;
16577
4e10a5a7 16578 case E_SFmode:
786e3c06
WD
16579 cmp_mode = SFmode;
16580 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16581 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16582 break;
16583
4e10a5a7 16584 case E_DFmode:
786e3c06
WD
16585 cmp_mode = DFmode;
16586 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16587 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16588 break;
16589
5f3bc026
ZC
16590 default:
16591 end_sequence ();
16592 return NULL_RTX;
16593 }
16594
16595 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16596 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16597 if (!op0 || !op1)
16598 {
16599 end_sequence ();
16600 return NULL_RTX;
16601 }
16602 *prep_seq = get_insns ();
16603 end_sequence ();
16604
16605 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16606 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16607
c8012fbc
WD
16608 if (bit_code != AND)
16609 {
16610 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16611 GET_MODE (XEXP (prev, 0))),
16612 VOIDmode, XEXP (prev, 0), const0_rtx);
16613 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16614 }
16615
16616 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16617 create_fixed_operand (&ops[1], target);
16618 create_fixed_operand (&ops[2], op0);
16619 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16620 create_fixed_operand (&ops[4], prev);
16621 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16622
cb4347e8 16623 push_to_sequence (*gen_seq);
5f3bc026
ZC
16624 if (!maybe_expand_insn (icode, 6, ops))
16625 {
16626 end_sequence ();
16627 return NULL_RTX;
16628 }
16629
16630 *gen_seq = get_insns ();
16631 end_sequence ();
16632
c8012fbc 16633 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16634}
16635
16636#undef TARGET_GEN_CCMP_FIRST
16637#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16638
16639#undef TARGET_GEN_CCMP_NEXT
16640#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16641
6a569cdd
KT
16642/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16643 instruction fusion of some sort. */
16644
16645static bool
16646aarch64_macro_fusion_p (void)
16647{
b175b679 16648 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16649}
16650
16651
16652/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16653 should be kept together during scheduling. */
16654
16655static bool
16656aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16657{
16658 rtx set_dest;
16659 rtx prev_set = single_set (prev);
16660 rtx curr_set = single_set (curr);
16661 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16662 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16663
16664 if (!aarch64_macro_fusion_p ())
16665 return false;
16666
d7b03373 16667 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16668 {
16669 /* We are trying to match:
16670 prev (mov) == (set (reg r0) (const_int imm16))
16671 curr (movk) == (set (zero_extract (reg r0)
16672 (const_int 16)
16673 (const_int 16))
16674 (const_int imm16_1)) */
16675
16676 set_dest = SET_DEST (curr_set);
16677
16678 if (GET_CODE (set_dest) == ZERO_EXTRACT
16679 && CONST_INT_P (SET_SRC (curr_set))
16680 && CONST_INT_P (SET_SRC (prev_set))
16681 && CONST_INT_P (XEXP (set_dest, 2))
16682 && INTVAL (XEXP (set_dest, 2)) == 16
16683 && REG_P (XEXP (set_dest, 0))
16684 && REG_P (SET_DEST (prev_set))
16685 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16686 {
16687 return true;
16688 }
16689 }
16690
d7b03373 16691 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16692 {
16693
16694 /* We're trying to match:
16695 prev (adrp) == (set (reg r1)
16696 (high (symbol_ref ("SYM"))))
16697 curr (add) == (set (reg r0)
16698 (lo_sum (reg r1)
16699 (symbol_ref ("SYM"))))
16700 Note that r0 need not necessarily be the same as r1, especially
16701 during pre-regalloc scheduling. */
16702
16703 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16704 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16705 {
16706 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16707 && REG_P (XEXP (SET_SRC (curr_set), 0))
16708 && REGNO (XEXP (SET_SRC (curr_set), 0))
16709 == REGNO (SET_DEST (prev_set))
16710 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16711 XEXP (SET_SRC (curr_set), 1)))
16712 return true;
16713 }
16714 }
16715
d7b03373 16716 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16717 {
16718
16719 /* We're trying to match:
16720 prev (movk) == (set (zero_extract (reg r0)
16721 (const_int 16)
16722 (const_int 32))
16723 (const_int imm16_1))
16724 curr (movk) == (set (zero_extract (reg r0)
16725 (const_int 16)
16726 (const_int 48))
16727 (const_int imm16_2)) */
16728
16729 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16730 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16731 && REG_P (XEXP (SET_DEST (prev_set), 0))
16732 && REG_P (XEXP (SET_DEST (curr_set), 0))
16733 && REGNO (XEXP (SET_DEST (prev_set), 0))
16734 == REGNO (XEXP (SET_DEST (curr_set), 0))
16735 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16736 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16737 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16738 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16739 && CONST_INT_P (SET_SRC (prev_set))
16740 && CONST_INT_P (SET_SRC (curr_set)))
16741 return true;
16742
16743 }
d7b03373 16744 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16745 {
16746 /* We're trying to match:
16747 prev (adrp) == (set (reg r0)
16748 (high (symbol_ref ("SYM"))))
16749 curr (ldr) == (set (reg r1)
16750 (mem (lo_sum (reg r0)
16751 (symbol_ref ("SYM")))))
16752 or
16753 curr (ldr) == (set (reg r1)
16754 (zero_extend (mem
16755 (lo_sum (reg r0)
16756 (symbol_ref ("SYM")))))) */
16757 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16758 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16759 {
16760 rtx curr_src = SET_SRC (curr_set);
16761
16762 if (GET_CODE (curr_src) == ZERO_EXTEND)
16763 curr_src = XEXP (curr_src, 0);
16764
16765 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
16766 && REG_P (XEXP (XEXP (curr_src, 0), 0))
16767 && REGNO (XEXP (XEXP (curr_src, 0), 0))
16768 == REGNO (SET_DEST (prev_set))
16769 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
16770 XEXP (SET_SRC (prev_set), 0)))
16771 return true;
16772 }
16773 }
cd0cb232 16774
d7b03373 16775 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
16776 && aarch_crypto_can_dual_issue (prev, curr))
16777 return true;
16778
d7b03373 16779 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
16780 && any_condjump_p (curr))
16781 {
16782 enum attr_type prev_type = get_attr_type (prev);
16783
509f819a
N
16784 unsigned int condreg1, condreg2;
16785 rtx cc_reg_1;
16786 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
16787 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
16788
16789 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
16790 && prev
16791 && modified_in_p (cc_reg_1, prev))
16792 {
16793 /* FIXME: this misses some which is considered simple arthematic
16794 instructions for ThunderX. Simple shifts are missed here. */
16795 if (prev_type == TYPE_ALUS_SREG
16796 || prev_type == TYPE_ALUS_IMM
16797 || prev_type == TYPE_LOGICS_REG
16798 || prev_type == TYPE_LOGICS_IMM)
16799 return true;
16800 }
3759108f
AP
16801 }
16802
bee7e0fc
AP
16803 if (prev_set
16804 && curr_set
16805 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
16806 && any_condjump_p (curr))
16807 {
16808 /* We're trying to match:
16809 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
16810 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
16811 (const_int 0))
16812 (label_ref ("SYM"))
16813 (pc)) */
16814 if (SET_DEST (curr_set) == (pc_rtx)
16815 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
16816 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
16817 && REG_P (SET_DEST (prev_set))
16818 && REGNO (SET_DEST (prev_set))
16819 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
16820 {
16821 /* Fuse ALU operations followed by conditional branch instruction. */
16822 switch (get_attr_type (prev))
16823 {
16824 case TYPE_ALU_IMM:
16825 case TYPE_ALU_SREG:
16826 case TYPE_ADC_REG:
16827 case TYPE_ADC_IMM:
16828 case TYPE_ADCS_REG:
16829 case TYPE_ADCS_IMM:
16830 case TYPE_LOGIC_REG:
16831 case TYPE_LOGIC_IMM:
16832 case TYPE_CSEL:
16833 case TYPE_ADR:
16834 case TYPE_MOV_IMM:
16835 case TYPE_SHIFT_REG:
16836 case TYPE_SHIFT_IMM:
16837 case TYPE_BFM:
16838 case TYPE_RBIT:
16839 case TYPE_REV:
16840 case TYPE_EXTEND:
16841 return true;
16842
16843 default:;
16844 }
16845 }
16846 }
16847
6a569cdd
KT
16848 return false;
16849}
16850
f2879a90
KT
16851/* Return true iff the instruction fusion described by OP is enabled. */
16852
16853bool
16854aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
16855{
16856 return (aarch64_tune_params.fusible_ops & op) != 0;
16857}
16858
350013bc
BC
16859/* If MEM is in the form of [base+offset], extract the two parts
16860 of address and set to BASE and OFFSET, otherwise return false
16861 after clearing BASE and OFFSET. */
16862
16863bool
16864extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
16865{
16866 rtx addr;
16867
16868 gcc_assert (MEM_P (mem));
16869
16870 addr = XEXP (mem, 0);
16871
16872 if (REG_P (addr))
16873 {
16874 *base = addr;
16875 *offset = const0_rtx;
16876 return true;
16877 }
16878
16879 if (GET_CODE (addr) == PLUS
16880 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
16881 {
16882 *base = XEXP (addr, 0);
16883 *offset = XEXP (addr, 1);
16884 return true;
16885 }
16886
16887 *base = NULL_RTX;
16888 *offset = NULL_RTX;
16889
16890 return false;
16891}
16892
16893/* Types for scheduling fusion. */
16894enum sched_fusion_type
16895{
16896 SCHED_FUSION_NONE = 0,
16897 SCHED_FUSION_LD_SIGN_EXTEND,
16898 SCHED_FUSION_LD_ZERO_EXTEND,
16899 SCHED_FUSION_LD,
16900 SCHED_FUSION_ST,
16901 SCHED_FUSION_NUM
16902};
16903
16904/* If INSN is a load or store of address in the form of [base+offset],
16905 extract the two parts and set to BASE and OFFSET. Return scheduling
16906 fusion type this INSN is. */
16907
16908static enum sched_fusion_type
16909fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
16910{
16911 rtx x, dest, src;
16912 enum sched_fusion_type fusion = SCHED_FUSION_LD;
16913
16914 gcc_assert (INSN_P (insn));
16915 x = PATTERN (insn);
16916 if (GET_CODE (x) != SET)
16917 return SCHED_FUSION_NONE;
16918
16919 src = SET_SRC (x);
16920 dest = SET_DEST (x);
16921
abc52318
KT
16922 machine_mode dest_mode = GET_MODE (dest);
16923
16924 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
16925 return SCHED_FUSION_NONE;
16926
16927 if (GET_CODE (src) == SIGN_EXTEND)
16928 {
16929 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
16930 src = XEXP (src, 0);
16931 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16932 return SCHED_FUSION_NONE;
16933 }
16934 else if (GET_CODE (src) == ZERO_EXTEND)
16935 {
16936 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
16937 src = XEXP (src, 0);
16938 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16939 return SCHED_FUSION_NONE;
16940 }
16941
16942 if (GET_CODE (src) == MEM && REG_P (dest))
16943 extract_base_offset_in_addr (src, base, offset);
16944 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
16945 {
16946 fusion = SCHED_FUSION_ST;
16947 extract_base_offset_in_addr (dest, base, offset);
16948 }
16949 else
16950 return SCHED_FUSION_NONE;
16951
16952 if (*base == NULL_RTX || *offset == NULL_RTX)
16953 fusion = SCHED_FUSION_NONE;
16954
16955 return fusion;
16956}
16957
16958/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
16959
16960 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
16961 and PRI are only calculated for these instructions. For other instruction,
16962 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
16963 type instruction fusion can be added by returning different priorities.
16964
16965 It's important that irrelevant instructions get the largest FUSION_PRI. */
16966
16967static void
16968aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
16969 int *fusion_pri, int *pri)
16970{
16971 int tmp, off_val;
16972 rtx base, offset;
16973 enum sched_fusion_type fusion;
16974
16975 gcc_assert (INSN_P (insn));
16976
16977 tmp = max_pri - 1;
16978 fusion = fusion_load_store (insn, &base, &offset);
16979 if (fusion == SCHED_FUSION_NONE)
16980 {
16981 *pri = tmp;
16982 *fusion_pri = tmp;
16983 return;
16984 }
16985
16986 /* Set FUSION_PRI according to fusion type and base register. */
16987 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
16988
16989 /* Calculate PRI. */
16990 tmp /= 2;
16991
16992 /* INSN with smaller offset goes first. */
16993 off_val = (int)(INTVAL (offset));
16994 if (off_val >= 0)
16995 tmp -= (off_val & 0xfffff);
16996 else
16997 tmp += ((- off_val) & 0xfffff);
16998
16999 *pri = tmp;
17000 return;
17001}
17002
9bca63d4
WD
17003/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
17004 Adjust priority of sha1h instructions so they are scheduled before
17005 other SHA1 instructions. */
17006
17007static int
17008aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
17009{
17010 rtx x = PATTERN (insn);
17011
17012 if (GET_CODE (x) == SET)
17013 {
17014 x = SET_SRC (x);
17015
17016 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
17017 return priority + 10;
17018 }
17019
17020 return priority;
17021}
17022
350013bc
BC
17023/* Given OPERANDS of consecutive load/store, check if we can merge
17024 them into ldp/stp. LOAD is true if they are load instructions.
17025 MODE is the mode of memory operands. */
17026
17027bool
17028aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 17029 machine_mode mode)
350013bc
BC
17030{
17031 HOST_WIDE_INT offval_1, offval_2, msize;
17032 enum reg_class rclass_1, rclass_2;
17033 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
17034
17035 if (load)
17036 {
17037 mem_1 = operands[1];
17038 mem_2 = operands[3];
17039 reg_1 = operands[0];
17040 reg_2 = operands[2];
17041 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
17042 if (REGNO (reg_1) == REGNO (reg_2))
17043 return false;
17044 }
17045 else
17046 {
17047 mem_1 = operands[0];
17048 mem_2 = operands[2];
17049 reg_1 = operands[1];
17050 reg_2 = operands[3];
17051 }
17052
bf84ac44
AP
17053 /* The mems cannot be volatile. */
17054 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
17055 return false;
17056
54700e2e
AP
17057 /* If we have SImode and slow unaligned ldp,
17058 check the alignment to be at least 8 byte. */
17059 if (mode == SImode
17060 && (aarch64_tune_params.extra_tuning_flags
17061 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17062 && !optimize_size
17063 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17064 return false;
17065
350013bc
BC
17066 /* Check if the addresses are in the form of [base+offset]. */
17067 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17068 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17069 return false;
17070 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17071 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17072 return false;
17073
17074 /* Check if the bases are same. */
17075 if (!rtx_equal_p (base_1, base_2))
17076 return false;
17077
dfe1da23
JW
17078 /* The operands must be of the same size. */
17079 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
17080 GET_MODE_SIZE (GET_MODE (mem_2))));
17081
350013bc
BC
17082 offval_1 = INTVAL (offset_1);
17083 offval_2 = INTVAL (offset_2);
6a70badb
RS
17084 /* We should only be trying this for fixed-sized modes. There is no
17085 SVE LDP/STP instruction. */
17086 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
17087 /* Check if the offsets are consecutive. */
17088 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
17089 return false;
17090
17091 /* Check if the addresses are clobbered by load. */
17092 if (load)
17093 {
17094 if (reg_mentioned_p (reg_1, mem_1))
17095 return false;
17096
17097 /* In increasing order, the last load can clobber the address. */
17098 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 17099 return false;
350013bc
BC
17100 }
17101
9b56ec11
JW
17102 /* One of the memory accesses must be a mempair operand.
17103 If it is not the first one, they need to be swapped by the
17104 peephole. */
17105 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
17106 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
17107 return false;
17108
350013bc
BC
17109 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17110 rclass_1 = FP_REGS;
17111 else
17112 rclass_1 = GENERAL_REGS;
17113
17114 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17115 rclass_2 = FP_REGS;
17116 else
17117 rclass_2 = GENERAL_REGS;
17118
17119 /* Check if the registers are of same class. */
17120 if (rclass_1 != rclass_2)
17121 return false;
17122
17123 return true;
17124}
17125
9b56ec11
JW
17126/* Given OPERANDS of consecutive load/store that can be merged,
17127 swap them if they are not in ascending order. */
17128void
17129aarch64_swap_ldrstr_operands (rtx* operands, bool load)
17130{
17131 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
17132 HOST_WIDE_INT offval_1, offval_2;
17133
17134 if (load)
17135 {
17136 mem_1 = operands[1];
17137 mem_2 = operands[3];
17138 }
17139 else
17140 {
17141 mem_1 = operands[0];
17142 mem_2 = operands[2];
17143 }
17144
17145 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17146 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17147
17148 offval_1 = INTVAL (offset_1);
17149 offval_2 = INTVAL (offset_2);
17150
17151 if (offval_1 > offval_2)
17152 {
17153 /* Irrespective of whether this is a load or a store,
17154 we do the same swap. */
17155 std::swap (operands[0], operands[2]);
17156 std::swap (operands[1], operands[3]);
17157 }
17158}
17159
d0b51297
JW
17160/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
17161 comparison between the two. */
17162int
17163aarch64_host_wide_int_compare (const void *x, const void *y)
17164{
17165 return wi::cmps (* ((const HOST_WIDE_INT *) x),
17166 * ((const HOST_WIDE_INT *) y));
17167}
17168
17169/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
17170 other pointing to a REG rtx containing an offset, compare the offsets
17171 of the two pairs.
17172
17173 Return:
17174
17175 1 iff offset (X) > offset (Y)
17176 0 iff offset (X) == offset (Y)
17177 -1 iff offset (X) < offset (Y) */
17178int
17179aarch64_ldrstr_offset_compare (const void *x, const void *y)
17180{
17181 const rtx * operands_1 = (const rtx *) x;
17182 const rtx * operands_2 = (const rtx *) y;
17183 rtx mem_1, mem_2, base, offset_1, offset_2;
17184
17185 if (MEM_P (operands_1[0]))
17186 mem_1 = operands_1[0];
17187 else
17188 mem_1 = operands_1[1];
17189
17190 if (MEM_P (operands_2[0]))
17191 mem_2 = operands_2[0];
17192 else
17193 mem_2 = operands_2[1];
17194
17195 /* Extract the offsets. */
17196 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17197 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17198
17199 gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
17200
17201 return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
17202}
17203
350013bc
BC
17204/* Given OPERANDS of consecutive load/store, check if we can merge
17205 them into ldp/stp by adjusting the offset. LOAD is true if they
17206 are load instructions. MODE is the mode of memory operands.
17207
17208 Given below consecutive stores:
17209
17210 str w1, [xb, 0x100]
17211 str w1, [xb, 0x104]
17212 str w1, [xb, 0x108]
17213 str w1, [xb, 0x10c]
17214
17215 Though the offsets are out of the range supported by stp, we can
17216 still pair them after adjusting the offset, like:
17217
17218 add scratch, xb, 0x100
17219 stp w1, w1, [scratch]
17220 stp w1, w1, [scratch, 0x8]
17221
17222 The peephole patterns detecting this opportunity should guarantee
17223 the scratch register is avaliable. */
17224
17225bool
17226aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 17227 scalar_mode mode)
350013bc 17228{
34d7854d
JW
17229 const int num_insns = 4;
17230 enum reg_class rclass;
17231 HOST_WIDE_INT offvals[num_insns], msize;
17232 rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
350013bc
BC
17233
17234 if (load)
17235 {
34d7854d
JW
17236 for (int i = 0; i < num_insns; i++)
17237 {
17238 reg[i] = operands[2 * i];
17239 mem[i] = operands[2 * i + 1];
17240
17241 gcc_assert (REG_P (reg[i]));
17242 }
d0b51297
JW
17243
17244 /* Do not attempt to merge the loads if the loads clobber each other. */
17245 for (int i = 0; i < 8; i += 2)
17246 for (int j = i + 2; j < 8; j += 2)
17247 if (reg_overlap_mentioned_p (operands[i], operands[j]))
17248 return false;
350013bc
BC
17249 }
17250 else
34d7854d
JW
17251 for (int i = 0; i < num_insns; i++)
17252 {
17253 mem[i] = operands[2 * i];
17254 reg[i] = operands[2 * i + 1];
17255 }
350013bc 17256
34d7854d
JW
17257 /* Skip if memory operand is by itself valid for ldp/stp. */
17258 if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
bf84ac44
AP
17259 return false;
17260
34d7854d
JW
17261 for (int i = 0; i < num_insns; i++)
17262 {
17263 /* The mems cannot be volatile. */
17264 if (MEM_VOLATILE_P (mem[i]))
17265 return false;
17266
17267 /* Check if the addresses are in the form of [base+offset]. */
17268 extract_base_offset_in_addr (mem[i], base + i, offset + i);
17269 if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
17270 return false;
17271 }
17272
363b395b
JW
17273 /* Check if the registers are of same class. */
17274 rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
17275 ? FP_REGS : GENERAL_REGS;
17276
17277 for (int i = 1; i < num_insns; i++)
17278 if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
17279 {
17280 if (rclass != FP_REGS)
17281 return false;
17282 }
17283 else
17284 {
17285 if (rclass != GENERAL_REGS)
17286 return false;
17287 }
17288
17289 /* Only the last register in the order in which they occur
17290 may be clobbered by the load. */
17291 if (rclass == GENERAL_REGS && load)
17292 for (int i = 0; i < num_insns - 1; i++)
34d7854d
JW
17293 if (reg_mentioned_p (reg[i], mem[i]))
17294 return false;
350013bc
BC
17295
17296 /* Check if the bases are same. */
34d7854d
JW
17297 for (int i = 0; i < num_insns - 1; i++)
17298 if (!rtx_equal_p (base[i], base[i + 1]))
17299 return false;
17300
17301 for (int i = 0; i < num_insns; i++)
17302 offvals[i] = INTVAL (offset[i]);
350013bc 17303
350013bc 17304 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17305
17306 /* Check if the offsets can be put in the right order to do a ldp/stp. */
34d7854d
JW
17307 qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
17308 aarch64_host_wide_int_compare);
d0b51297
JW
17309
17310 if (!(offvals[1] == offvals[0] + msize
17311 && offvals[3] == offvals[2] + msize))
350013bc
BC
17312 return false;
17313
d0b51297
JW
17314 /* Check that offsets are within range of each other. The ldp/stp
17315 instructions have 7 bit immediate offsets, so use 0x80. */
17316 if (offvals[2] - offvals[0] >= msize * 0x80)
17317 return false;
350013bc 17318
d0b51297
JW
17319 /* The offsets must be aligned with respect to each other. */
17320 if (offvals[0] % msize != offvals[2] % msize)
17321 return false;
17322
54700e2e
AP
17323 /* If we have SImode and slow unaligned ldp,
17324 check the alignment to be at least 8 byte. */
17325 if (mode == SImode
17326 && (aarch64_tune_params.extra_tuning_flags
34d7854d 17327 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
54700e2e 17328 && !optimize_size
34d7854d 17329 && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
54700e2e
AP
17330 return false;
17331
350013bc
BC
17332 return true;
17333}
17334
17335/* Given OPERANDS of consecutive load/store, this function pairs them
d0b51297
JW
17336 into LDP/STP after adjusting the offset. It depends on the fact
17337 that the operands can be sorted so the offsets are correct for STP.
350013bc
BC
17338 MODE is the mode of memory operands. CODE is the rtl operator
17339 which should be applied to all memory operands, it's SIGN_EXTEND,
17340 ZERO_EXTEND or UNKNOWN. */
17341
17342bool
17343aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17344 scalar_mode mode, RTX_CODE code)
350013bc 17345{
d0b51297 17346 rtx base, offset_1, offset_3, t1, t2;
350013bc 17347 rtx mem_1, mem_2, mem_3, mem_4;
d0b51297
JW
17348 rtx temp_operands[8];
17349 HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
17350 stp_off_upper_limit, stp_off_lower_limit, msize;
9b56ec11 17351
d0b51297
JW
17352 /* We make changes on a copy as we may still bail out. */
17353 for (int i = 0; i < 8; i ++)
17354 temp_operands[i] = operands[i];
9b56ec11 17355
d0b51297
JW
17356 /* Sort the operands. */
17357 qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
9b56ec11 17358
350013bc
BC
17359 if (load)
17360 {
d0b51297
JW
17361 mem_1 = temp_operands[1];
17362 mem_2 = temp_operands[3];
17363 mem_3 = temp_operands[5];
17364 mem_4 = temp_operands[7];
350013bc
BC
17365 }
17366 else
17367 {
d0b51297
JW
17368 mem_1 = temp_operands[0];
17369 mem_2 = temp_operands[2];
17370 mem_3 = temp_operands[4];
17371 mem_4 = temp_operands[6];
350013bc
BC
17372 gcc_assert (code == UNKNOWN);
17373 }
17374
9b56ec11 17375 extract_base_offset_in_addr (mem_1, &base, &offset_1);
d0b51297
JW
17376 extract_base_offset_in_addr (mem_3, &base, &offset_3);
17377 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17378 && offset_3 != NULL_RTX);
350013bc 17379
d0b51297 17380 /* Adjust offset so it can fit in LDP/STP instruction. */
350013bc 17381 msize = GET_MODE_SIZE (mode);
d0b51297
JW
17382 stp_off_upper_limit = msize * (0x40 - 1);
17383 stp_off_lower_limit = - msize * 0x40;
350013bc 17384
d0b51297
JW
17385 off_val_1 = INTVAL (offset_1);
17386 off_val_3 = INTVAL (offset_3);
17387
17388 /* The base offset is optimally half way between the two STP/LDP offsets. */
17389 if (msize <= 4)
17390 base_off = (off_val_1 + off_val_3) / 2;
17391 else
17392 /* However, due to issues with negative LDP/STP offset generation for
17393 larger modes, for DF, DI and vector modes. we must not use negative
17394 addresses smaller than 9 signed unadjusted bits can store. This
17395 provides the most range in this case. */
17396 base_off = off_val_1;
17397
17398 /* Adjust the base so that it is aligned with the addresses but still
17399 optimal. */
17400 if (base_off % msize != off_val_1 % msize)
17401 /* Fix the offset, bearing in mind we want to make it bigger not
17402 smaller. */
17403 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
17404 else if (msize <= 4)
17405 /* The negative range of LDP/STP is one larger than the positive range. */
17406 base_off += msize;
17407
17408 /* Check if base offset is too big or too small. We can attempt to resolve
17409 this issue by setting it to the maximum value and seeing if the offsets
17410 still fit. */
17411 if (base_off >= 0x1000)
350013bc 17412 {
d0b51297
JW
17413 base_off = 0x1000 - 1;
17414 /* We must still make sure that the base offset is aligned with respect
17415 to the address. But it may may not be made any bigger. */
17416 base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17417 }
17418
d0b51297
JW
17419 /* Likewise for the case where the base is too small. */
17420 if (base_off <= -0x1000)
350013bc 17421 {
d0b51297
JW
17422 base_off = -0x1000 + 1;
17423 base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
350013bc
BC
17424 }
17425
d0b51297
JW
17426 /* Offset of the first STP/LDP. */
17427 new_off_1 = off_val_1 - base_off;
17428
17429 /* Offset of the second STP/LDP. */
17430 new_off_3 = off_val_3 - base_off;
350013bc 17431
d0b51297
JW
17432 /* The offsets must be within the range of the LDP/STP instructions. */
17433 if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
17434 || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
350013bc
BC
17435 return false;
17436
d0b51297
JW
17437 replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
17438 new_off_1), true);
17439 replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
17440 new_off_1 + msize), true);
17441 replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
17442 new_off_3), true);
17443 replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
17444 new_off_3 + msize), true);
17445
17446 if (!aarch64_mem_pair_operand (mem_1, mode)
17447 || !aarch64_mem_pair_operand (mem_3, mode))
17448 return false;
350013bc
BC
17449
17450 if (code == ZERO_EXTEND)
17451 {
17452 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17453 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17454 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17455 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17456 }
17457 else if (code == SIGN_EXTEND)
17458 {
17459 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17460 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17461 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17462 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17463 }
17464
17465 if (load)
17466 {
d0b51297 17467 operands[0] = temp_operands[0];
350013bc 17468 operands[1] = mem_1;
d0b51297 17469 operands[2] = temp_operands[2];
350013bc 17470 operands[3] = mem_2;
d0b51297 17471 operands[4] = temp_operands[4];
350013bc 17472 operands[5] = mem_3;
d0b51297 17473 operands[6] = temp_operands[6];
350013bc
BC
17474 operands[7] = mem_4;
17475 }
17476 else
17477 {
17478 operands[0] = mem_1;
d0b51297 17479 operands[1] = temp_operands[1];
350013bc 17480 operands[2] = mem_2;
d0b51297 17481 operands[3] = temp_operands[3];
350013bc 17482 operands[4] = mem_3;
d0b51297 17483 operands[5] = temp_operands[5];
350013bc 17484 operands[6] = mem_4;
d0b51297 17485 operands[7] = temp_operands[7];
350013bc
BC
17486 }
17487
17488 /* Emit adjusting instruction. */
d0b51297 17489 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
350013bc 17490 /* Emit ldp/stp instructions. */
f7df4a84
RS
17491 t1 = gen_rtx_SET (operands[0], operands[1]);
17492 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17493 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17494 t1 = gen_rtx_SET (operands[4], operands[5]);
17495 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17496 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17497 return true;
17498}
17499
76a34e3f
RS
17500/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17501 it isn't worth branching around empty masked ops (including masked
17502 stores). */
17503
17504static bool
17505aarch64_empty_mask_is_expensive (unsigned)
17506{
17507 return false;
17508}
17509
1b1e81f8
JW
17510/* Return 1 if pseudo register should be created and used to hold
17511 GOT address for PIC code. */
17512
17513bool
17514aarch64_use_pseudo_pic_reg (void)
17515{
17516 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17517}
17518
7b841a12
JW
17519/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17520
17521static int
17522aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17523{
17524 switch (XINT (x, 1))
17525 {
17526 case UNSPEC_GOTSMALLPIC:
17527 case UNSPEC_GOTSMALLPIC28K:
17528 case UNSPEC_GOTTINYPIC:
17529 return 0;
17530 default:
17531 break;
17532 }
17533
17534 return default_unspec_may_trap_p (x, flags);
17535}
17536
39252973
KT
17537
17538/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17539 return the log2 of that value. Otherwise return -1. */
17540
17541int
17542aarch64_fpconst_pow_of_2 (rtx x)
17543{
17544 const REAL_VALUE_TYPE *r;
17545
17546 if (!CONST_DOUBLE_P (x))
17547 return -1;
17548
17549 r = CONST_DOUBLE_REAL_VALUE (x);
17550
17551 if (REAL_VALUE_NEGATIVE (*r)
17552 || REAL_VALUE_ISNAN (*r)
17553 || REAL_VALUE_ISINF (*r)
17554 || !real_isinteger (r, DFmode))
17555 return -1;
17556
17557 return exact_log2 (real_to_integer (r));
17558}
17559
17560/* If X is a vector of equal CONST_DOUBLE values and that value is
17561 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17562
17563int
17564aarch64_vec_fpconst_pow_of_2 (rtx x)
17565{
6a70badb
RS
17566 int nelts;
17567 if (GET_CODE (x) != CONST_VECTOR
17568 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17569 return -1;
17570
17571 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17572 return -1;
17573
17574 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17575 if (firstval <= 0)
17576 return -1;
17577
6a70badb 17578 for (int i = 1; i < nelts; i++)
39252973
KT
17579 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17580 return -1;
17581
17582 return firstval;
17583}
17584
11e554b3
JG
17585/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17586 to float.
17587
17588 __fp16 always promotes through this hook.
17589 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17590 through the generic excess precision logic rather than here. */
17591
c2ec330c
AL
17592static tree
17593aarch64_promoted_type (const_tree t)
17594{
11e554b3
JG
17595 if (SCALAR_FLOAT_TYPE_P (t)
17596 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17597 return float_type_node;
11e554b3 17598
c2ec330c
AL
17599 return NULL_TREE;
17600}
ee62a5a6
RS
17601
17602/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17603
17604static bool
9acc9cbe 17605aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17606 optimization_type opt_type)
17607{
17608 switch (op)
17609 {
17610 case rsqrt_optab:
9acc9cbe 17611 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17612
17613 default:
17614 return true;
17615 }
17616}
17617
43cacb12
RS
17618/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17619
17620static unsigned int
17621aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17622 int *offset)
17623{
17624 /* Polynomial invariant 1 == (VG / 2) - 1. */
17625 gcc_assert (i == 1);
17626 *factor = 2;
17627 *offset = 1;
17628 return AARCH64_DWARF_VG;
17629}
17630
11e554b3
JG
17631/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17632 if MODE is HFmode, and punt to the generic implementation otherwise. */
17633
17634static bool
7c5bd57a 17635aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17636{
17637 return (mode == HFmode
17638 ? true
17639 : default_libgcc_floating_mode_supported_p (mode));
17640}
17641
2e5f8203
JG
17642/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17643 if MODE is HFmode, and punt to the generic implementation otherwise. */
17644
17645static bool
18e2a8b8 17646aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17647{
17648 return (mode == HFmode
17649 ? true
17650 : default_scalar_mode_supported_p (mode));
17651}
17652
11e554b3
JG
17653/* Set the value of FLT_EVAL_METHOD.
17654 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17655
17656 0: evaluate all operations and constants, whose semantic type has at
17657 most the range and precision of type float, to the range and
17658 precision of float; evaluate all other operations and constants to
17659 the range and precision of the semantic type;
17660
17661 N, where _FloatN is a supported interchange floating type
17662 evaluate all operations and constants, whose semantic type has at
17663 most the range and precision of _FloatN type, to the range and
17664 precision of the _FloatN type; evaluate all other operations and
17665 constants to the range and precision of the semantic type;
17666
17667 If we have the ARMv8.2-A extensions then we support _Float16 in native
17668 precision, so we should set this to 16. Otherwise, we support the type,
17669 but want to evaluate expressions in float precision, so set this to
17670 0. */
17671
17672static enum flt_eval_method
17673aarch64_excess_precision (enum excess_precision_type type)
17674{
17675 switch (type)
17676 {
17677 case EXCESS_PRECISION_TYPE_FAST:
17678 case EXCESS_PRECISION_TYPE_STANDARD:
17679 /* We can calculate either in 16-bit range and precision or
17680 32-bit range and precision. Make that decision based on whether
17681 we have native support for the ARMv8.2-A 16-bit floating-point
17682 instructions or not. */
17683 return (TARGET_FP_F16INST
17684 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17685 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17686 case EXCESS_PRECISION_TYPE_IMPLICIT:
17687 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17688 default:
17689 gcc_unreachable ();
17690 }
17691 return FLT_EVAL_METHOD_UNPREDICTABLE;
17692}
17693
b48d6421
KT
17694/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17695 scheduled for speculative execution. Reject the long-running division
17696 and square-root instructions. */
17697
17698static bool
17699aarch64_sched_can_speculate_insn (rtx_insn *insn)
17700{
17701 switch (get_attr_type (insn))
17702 {
17703 case TYPE_SDIV:
17704 case TYPE_UDIV:
17705 case TYPE_FDIVS:
17706 case TYPE_FDIVD:
17707 case TYPE_FSQRTS:
17708 case TYPE_FSQRTD:
17709 case TYPE_NEON_FP_SQRT_S:
17710 case TYPE_NEON_FP_SQRT_D:
17711 case TYPE_NEON_FP_SQRT_S_Q:
17712 case TYPE_NEON_FP_SQRT_D_Q:
17713 case TYPE_NEON_FP_DIV_S:
17714 case TYPE_NEON_FP_DIV_D:
17715 case TYPE_NEON_FP_DIV_S_Q:
17716 case TYPE_NEON_FP_DIV_D_Q:
17717 return false;
17718 default:
17719 return true;
17720 }
17721}
17722
43cacb12
RS
17723/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17724
17725static int
17726aarch64_compute_pressure_classes (reg_class *classes)
17727{
17728 int i = 0;
17729 classes[i++] = GENERAL_REGS;
17730 classes[i++] = FP_REGS;
17731 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17732 registers need to go in PR_LO_REGS at some point during their
17733 lifetime. Splitting it into two halves has the effect of making
17734 all predicates count against PR_LO_REGS, so that we try whenever
17735 possible to restrict the number of live predicates to 8. This
17736 greatly reduces the amount of spilling in certain loops. */
17737 classes[i++] = PR_LO_REGS;
17738 classes[i++] = PR_HI_REGS;
17739 return i;
17740}
17741
17742/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17743
17744static bool
17745aarch64_can_change_mode_class (machine_mode from,
17746 machine_mode to, reg_class_t)
17747{
002092be
RS
17748 if (BYTES_BIG_ENDIAN)
17749 {
17750 bool from_sve_p = aarch64_sve_data_mode_p (from);
17751 bool to_sve_p = aarch64_sve_data_mode_p (to);
17752
17753 /* Don't allow changes between SVE data modes and non-SVE modes.
17754 See the comment at the head of aarch64-sve.md for details. */
17755 if (from_sve_p != to_sve_p)
17756 return false;
17757
17758 /* Don't allow changes in element size: lane 0 of the new vector
17759 would not then be lane 0 of the old vector. See the comment
17760 above aarch64_maybe_expand_sve_subreg_move for a more detailed
17761 description.
17762
17763 In the worst case, this forces a register to be spilled in
17764 one mode and reloaded in the other, which handles the
17765 endianness correctly. */
17766 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
17767 return false;
17768 }
43cacb12
RS
17769 return true;
17770}
17771
5cce8171
RS
17772/* Implement TARGET_EARLY_REMAT_MODES. */
17773
17774static void
17775aarch64_select_early_remat_modes (sbitmap modes)
17776{
17777 /* SVE values are not normally live across a call, so it should be
17778 worth doing early rematerialization even in VL-specific mode. */
17779 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
17780 {
17781 machine_mode mode = (machine_mode) i;
17782 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17783 if (vec_flags & VEC_ANY_SVE)
17784 bitmap_set_bit (modes, i);
17785 }
17786}
17787
c0111dc4
RE
17788/* Override the default target speculation_safe_value. */
17789static rtx
17790aarch64_speculation_safe_value (machine_mode mode,
17791 rtx result, rtx val, rtx failval)
17792{
17793 /* Maybe we should warn if falling back to hard barriers. They are
17794 likely to be noticably more expensive than the alternative below. */
17795 if (!aarch64_track_speculation)
17796 return default_speculation_safe_value (mode, result, val, failval);
17797
17798 if (!REG_P (val))
17799 val = copy_to_mode_reg (mode, val);
17800
17801 if (!aarch64_reg_or_zero (failval, mode))
17802 failval = copy_to_mode_reg (mode, failval);
17803
17804 switch (mode)
17805 {
17806 case E_QImode:
17807 emit_insn (gen_despeculate_copyqi (result, val, failval));
17808 break;
17809 case E_HImode:
17810 emit_insn (gen_despeculate_copyhi (result, val, failval));
17811 break;
17812 case E_SImode:
17813 emit_insn (gen_despeculate_copysi (result, val, failval));
17814 break;
17815 case E_DImode:
17816 emit_insn (gen_despeculate_copydi (result, val, failval));
17817 break;
17818 case E_TImode:
17819 emit_insn (gen_despeculate_copyti (result, val, failval));
17820 break;
17821 default:
17822 gcc_unreachable ();
17823 }
17824 return result;
17825}
17826
51b86113
DM
17827/* Target-specific selftests. */
17828
17829#if CHECKING_P
17830
17831namespace selftest {
17832
17833/* Selftest for the RTL loader.
17834 Verify that the RTL loader copes with a dump from
17835 print_rtx_function. This is essentially just a test that class
17836 function_reader can handle a real dump, but it also verifies
17837 that lookup_reg_by_dump_name correctly handles hard regs.
17838 The presence of hard reg names in the dump means that the test is
17839 target-specific, hence it is in this file. */
17840
17841static void
17842aarch64_test_loading_full_dump ()
17843{
17844 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
17845
17846 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
17847
17848 rtx_insn *insn_1 = get_insn_by_uid (1);
17849 ASSERT_EQ (NOTE, GET_CODE (insn_1));
17850
17851 rtx_insn *insn_15 = get_insn_by_uid (15);
17852 ASSERT_EQ (INSN, GET_CODE (insn_15));
17853 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
17854
17855 /* Verify crtl->return_rtx. */
17856 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
17857 ASSERT_EQ (0, REGNO (crtl->return_rtx));
17858 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
17859}
17860
17861/* Run all target-specific selftests. */
17862
17863static void
17864aarch64_run_selftests (void)
17865{
17866 aarch64_test_loading_full_dump ();
17867}
17868
17869} // namespace selftest
17870
17871#endif /* #if CHECKING_P */
17872
43e9d192
IB
17873#undef TARGET_ADDRESS_COST
17874#define TARGET_ADDRESS_COST aarch64_address_cost
17875
17876/* This hook will determines whether unnamed bitfields affect the alignment
17877 of the containing structure. The hook returns true if the structure
17878 should inherit the alignment requirements of an unnamed bitfield's
17879 type. */
17880#undef TARGET_ALIGN_ANON_BITFIELD
17881#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
17882
17883#undef TARGET_ASM_ALIGNED_DI_OP
17884#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
17885
17886#undef TARGET_ASM_ALIGNED_HI_OP
17887#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
17888
17889#undef TARGET_ASM_ALIGNED_SI_OP
17890#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
17891
17892#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17893#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
17894 hook_bool_const_tree_hwi_hwi_const_tree_true
17895
e1c1ecb0
KT
17896#undef TARGET_ASM_FILE_START
17897#define TARGET_ASM_FILE_START aarch64_start_file
17898
43e9d192
IB
17899#undef TARGET_ASM_OUTPUT_MI_THUNK
17900#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
17901
17902#undef TARGET_ASM_SELECT_RTX_SECTION
17903#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
17904
17905#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17906#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
17907
17908#undef TARGET_BUILD_BUILTIN_VA_LIST
17909#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
17910
17911#undef TARGET_CALLEE_COPIES
17912#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
17913
17914#undef TARGET_CAN_ELIMINATE
17915#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
17916
1fd8d40c
KT
17917#undef TARGET_CAN_INLINE_P
17918#define TARGET_CAN_INLINE_P aarch64_can_inline_p
17919
43e9d192
IB
17920#undef TARGET_CANNOT_FORCE_CONST_MEM
17921#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
17922
50487d79
EM
17923#undef TARGET_CASE_VALUES_THRESHOLD
17924#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
17925
43e9d192
IB
17926#undef TARGET_CONDITIONAL_REGISTER_USAGE
17927#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
17928
17929/* Only the least significant bit is used for initialization guard
17930 variables. */
17931#undef TARGET_CXX_GUARD_MASK_BIT
17932#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
17933
17934#undef TARGET_C_MODE_FOR_SUFFIX
17935#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
17936
17937#ifdef TARGET_BIG_ENDIAN_DEFAULT
17938#undef TARGET_DEFAULT_TARGET_FLAGS
17939#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
17940#endif
17941
17942#undef TARGET_CLASS_MAX_NREGS
17943#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
17944
119103ca
JG
17945#undef TARGET_BUILTIN_DECL
17946#define TARGET_BUILTIN_DECL aarch64_builtin_decl
17947
a6fc00da
BH
17948#undef TARGET_BUILTIN_RECIPROCAL
17949#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
17950
11e554b3
JG
17951#undef TARGET_C_EXCESS_PRECISION
17952#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
17953
43e9d192
IB
17954#undef TARGET_EXPAND_BUILTIN
17955#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
17956
17957#undef TARGET_EXPAND_BUILTIN_VA_START
17958#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
17959
9697e620
JG
17960#undef TARGET_FOLD_BUILTIN
17961#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
17962
43e9d192
IB
17963#undef TARGET_FUNCTION_ARG
17964#define TARGET_FUNCTION_ARG aarch64_function_arg
17965
17966#undef TARGET_FUNCTION_ARG_ADVANCE
17967#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
17968
17969#undef TARGET_FUNCTION_ARG_BOUNDARY
17970#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
17971
76b0cbf8
RS
17972#undef TARGET_FUNCTION_ARG_PADDING
17973#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
17974
43cacb12
RS
17975#undef TARGET_GET_RAW_RESULT_MODE
17976#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
17977#undef TARGET_GET_RAW_ARG_MODE
17978#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
17979
43e9d192
IB
17980#undef TARGET_FUNCTION_OK_FOR_SIBCALL
17981#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
17982
17983#undef TARGET_FUNCTION_VALUE
17984#define TARGET_FUNCTION_VALUE aarch64_function_value
17985
17986#undef TARGET_FUNCTION_VALUE_REGNO_P
17987#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
17988
fc72cba7
AL
17989#undef TARGET_GIMPLE_FOLD_BUILTIN
17990#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 17991
43e9d192
IB
17992#undef TARGET_GIMPLIFY_VA_ARG_EXPR
17993#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
17994
17995#undef TARGET_INIT_BUILTINS
17996#define TARGET_INIT_BUILTINS aarch64_init_builtins
17997
c64f7d37
WD
17998#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
17999#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
18000 aarch64_ira_change_pseudo_allocno_class
18001
43e9d192
IB
18002#undef TARGET_LEGITIMATE_ADDRESS_P
18003#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
18004
18005#undef TARGET_LEGITIMATE_CONSTANT_P
18006#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
18007
491ec060
WD
18008#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
18009#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
18010 aarch64_legitimize_address_displacement
18011
43e9d192
IB
18012#undef TARGET_LIBGCC_CMP_RETURN_MODE
18013#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
18014
11e554b3
JG
18015#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
18016#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
18017aarch64_libgcc_floating_mode_supported_p
18018
ac2b960f
YZ
18019#undef TARGET_MANGLE_TYPE
18020#define TARGET_MANGLE_TYPE aarch64_mangle_type
18021
43e9d192
IB
18022#undef TARGET_MEMORY_MOVE_COST
18023#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
18024
26e0ff94
WD
18025#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
18026#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
18027
43e9d192
IB
18028#undef TARGET_MUST_PASS_IN_STACK
18029#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
18030
18031/* This target hook should return true if accesses to volatile bitfields
18032 should use the narrowest mode possible. It should return false if these
18033 accesses should use the bitfield container type. */
18034#undef TARGET_NARROW_VOLATILE_BITFIELD
18035#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
18036
18037#undef TARGET_OPTION_OVERRIDE
18038#define TARGET_OPTION_OVERRIDE aarch64_override_options
18039
18040#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18041#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
18042 aarch64_override_options_after_change
18043
361fb3ee
KT
18044#undef TARGET_OPTION_SAVE
18045#define TARGET_OPTION_SAVE aarch64_option_save
18046
18047#undef TARGET_OPTION_RESTORE
18048#define TARGET_OPTION_RESTORE aarch64_option_restore
18049
18050#undef TARGET_OPTION_PRINT
18051#define TARGET_OPTION_PRINT aarch64_option_print
18052
5a2c8331
KT
18053#undef TARGET_OPTION_VALID_ATTRIBUTE_P
18054#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
18055
d78006d9
KT
18056#undef TARGET_SET_CURRENT_FUNCTION
18057#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
18058
43e9d192
IB
18059#undef TARGET_PASS_BY_REFERENCE
18060#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
18061
18062#undef TARGET_PREFERRED_RELOAD_CLASS
18063#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
18064
cee66c68
WD
18065#undef TARGET_SCHED_REASSOCIATION_WIDTH
18066#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
18067
c2ec330c
AL
18068#undef TARGET_PROMOTED_TYPE
18069#define TARGET_PROMOTED_TYPE aarch64_promoted_type
18070
43e9d192
IB
18071#undef TARGET_SECONDARY_RELOAD
18072#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
18073
18074#undef TARGET_SHIFT_TRUNCATION_MASK
18075#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
18076
18077#undef TARGET_SETUP_INCOMING_VARARGS
18078#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
18079
18080#undef TARGET_STRUCT_VALUE_RTX
18081#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
18082
18083#undef TARGET_REGISTER_MOVE_COST
18084#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
18085
18086#undef TARGET_RETURN_IN_MEMORY
18087#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
18088
18089#undef TARGET_RETURN_IN_MSB
18090#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
18091
18092#undef TARGET_RTX_COSTS
7cc2145f 18093#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 18094
2e5f8203
JG
18095#undef TARGET_SCALAR_MODE_SUPPORTED_P
18096#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
18097
d126a4ae
AP
18098#undef TARGET_SCHED_ISSUE_RATE
18099#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
18100
d03f7e44
MK
18101#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18102#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
18103 aarch64_sched_first_cycle_multipass_dfa_lookahead
18104
2d6bc7fa
KT
18105#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
18106#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
18107 aarch64_first_cycle_multipass_dfa_lookahead_guard
18108
827ab47a
KT
18109#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
18110#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
18111 aarch64_get_separate_components
18112
18113#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
18114#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
18115 aarch64_components_for_bb
18116
18117#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
18118#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
18119 aarch64_disqualify_components
18120
18121#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
18122#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
18123 aarch64_emit_prologue_components
18124
18125#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
18126#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
18127 aarch64_emit_epilogue_components
18128
18129#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
18130#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
18131 aarch64_set_handled_components
18132
43e9d192
IB
18133#undef TARGET_TRAMPOLINE_INIT
18134#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
18135
18136#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
18137#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
18138
18139#undef TARGET_VECTOR_MODE_SUPPORTED_P
18140#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
18141
7df76747
N
18142#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18143#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
18144 aarch64_builtin_support_vector_misalignment
18145
9f4cbab8
RS
18146#undef TARGET_ARRAY_MODE
18147#define TARGET_ARRAY_MODE aarch64_array_mode
18148
43e9d192
IB
18149#undef TARGET_ARRAY_MODE_SUPPORTED_P
18150#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
18151
8990e73a
TB
18152#undef TARGET_VECTORIZE_ADD_STMT_COST
18153#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
18154
18155#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18156#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18157 aarch64_builtin_vectorization_cost
18158
43e9d192
IB
18159#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18160#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
18161
42fc9a7f
JG
18162#undef TARGET_VECTORIZE_BUILTINS
18163#define TARGET_VECTORIZE_BUILTINS
18164
18165#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
18166#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
18167 aarch64_builtin_vectorized_function
18168
3b357264
JG
18169#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
18170#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
18171 aarch64_autovectorize_vector_sizes
18172
aa87aced
KV
18173#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18174#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
18175 aarch64_atomic_assign_expand_fenv
18176
43e9d192
IB
18177/* Section anchor support. */
18178
18179#undef TARGET_MIN_ANCHOR_OFFSET
18180#define TARGET_MIN_ANCHOR_OFFSET -256
18181
18182/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
18183 byte offset; we can do much more for larger data types, but have no way
18184 to determine the size of the access. We assume accesses are aligned. */
18185#undef TARGET_MAX_ANCHOR_OFFSET
18186#define TARGET_MAX_ANCHOR_OFFSET 4095
18187
db0253a4
TB
18188#undef TARGET_VECTOR_ALIGNMENT
18189#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
18190
43cacb12
RS
18191#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
18192#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
18193 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
18194#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
18195#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
18196 aarch64_simd_vector_alignment_reachable
18197
88b08073
JG
18198/* vec_perm support. */
18199
f151c9e1
RS
18200#undef TARGET_VECTORIZE_VEC_PERM_CONST
18201#define TARGET_VECTORIZE_VEC_PERM_CONST \
18202 aarch64_vectorize_vec_perm_const
88b08073 18203
43cacb12
RS
18204#undef TARGET_VECTORIZE_GET_MASK_MODE
18205#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
18206#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
18207#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
18208 aarch64_empty_mask_is_expensive
6a86928d
RS
18209#undef TARGET_PREFERRED_ELSE_VALUE
18210#define TARGET_PREFERRED_ELSE_VALUE \
18211 aarch64_preferred_else_value
43cacb12 18212
c2ec330c
AL
18213#undef TARGET_INIT_LIBFUNCS
18214#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 18215
706b2314 18216#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
18217#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
18218
5cb74e90
RR
18219#undef TARGET_FLAGS_REGNUM
18220#define TARGET_FLAGS_REGNUM CC_REGNUM
18221
78607708
TV
18222#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
18223#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
18224
a3125fc2
CL
18225#undef TARGET_ASAN_SHADOW_OFFSET
18226#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
18227
0c4ec427
RE
18228#undef TARGET_LEGITIMIZE_ADDRESS
18229#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
18230
b48d6421
KT
18231#undef TARGET_SCHED_CAN_SPECULATE_INSN
18232#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
18233
594bdd53
FY
18234#undef TARGET_CAN_USE_DOLOOP_P
18235#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
18236
9bca63d4
WD
18237#undef TARGET_SCHED_ADJUST_PRIORITY
18238#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
18239
6a569cdd
KT
18240#undef TARGET_SCHED_MACRO_FUSION_P
18241#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
18242
18243#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
18244#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
18245
350013bc
BC
18246#undef TARGET_SCHED_FUSION_PRIORITY
18247#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
18248
7b841a12
JW
18249#undef TARGET_UNSPEC_MAY_TRAP_P
18250#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
18251
1b1e81f8
JW
18252#undef TARGET_USE_PSEUDO_PIC_REG
18253#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
18254
cc8ca59e
JB
18255#undef TARGET_PRINT_OPERAND
18256#define TARGET_PRINT_OPERAND aarch64_print_operand
18257
18258#undef TARGET_PRINT_OPERAND_ADDRESS
18259#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
18260
ee62a5a6
RS
18261#undef TARGET_OPTAB_SUPPORTED_P
18262#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
18263
43203dea
RR
18264#undef TARGET_OMIT_STRUCT_RETURN_REG
18265#define TARGET_OMIT_STRUCT_RETURN_REG true
18266
43cacb12
RS
18267#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
18268#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
18269 aarch64_dwarf_poly_indeterminate_value
18270
f46fe37e
EB
18271/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
18272#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18273#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
18274
c43f4279
RS
18275#undef TARGET_HARD_REGNO_NREGS
18276#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
18277#undef TARGET_HARD_REGNO_MODE_OK
18278#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
18279
99e1629f
RS
18280#undef TARGET_MODES_TIEABLE_P
18281#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
18282
80ec73f4
RS
18283#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18284#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18285 aarch64_hard_regno_call_part_clobbered
18286
58e17cf8
RS
18287#undef TARGET_CONSTANT_ALIGNMENT
18288#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
18289
43cacb12
RS
18290#undef TARGET_COMPUTE_PRESSURE_CLASSES
18291#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
18292
18293#undef TARGET_CAN_CHANGE_MODE_CLASS
18294#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
18295
5cce8171
RS
18296#undef TARGET_SELECT_EARLY_REMAT_MODES
18297#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
18298
c0111dc4
RE
18299#undef TARGET_SPECULATION_SAFE_VALUE
18300#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
18301
51b86113
DM
18302#if CHECKING_P
18303#undef TARGET_RUN_TARGET_SELFTESTS
18304#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
18305#endif /* #if CHECKING_P */
18306
43e9d192
IB
18307struct gcc_target targetm = TARGET_INITIALIZER;
18308
18309#include "gt-aarch64.h"