]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Daily bump.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
cbe34bb5 2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
36#include "optabs.h"
37#include "regs.h"
38#include "emit-rtl.h"
39#include "recog.h"
40#include "diagnostic.h"
43e9d192 41#include "insn-attr.h"
40e23961 42#include "alias.h"
40e23961 43#include "fold-const.h"
d8a2d370
DN
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
43e9d192 47#include "output.h"
36566b39 48#include "flags.h"
36566b39 49#include "explow.h"
43e9d192
IB
50#include "expr.h"
51#include "reload.h"
43e9d192 52#include "langhooks.h"
5a2c8331 53#include "opts.h"
2d6bc7fa 54#include "params.h"
45b0be94 55#include "gimplify.h"
43e9d192 56#include "dwarf2.h"
61d371eb 57#include "gimple-iterator.h"
8990e73a 58#include "tree-vectorizer.h"
d1bcc29f 59#include "aarch64-cost-tables.h"
0ee859b5 60#include "dumpfile.h"
9b2b7279 61#include "builtins.h"
8baff86e 62#include "rtl-iter.h"
9bbe08fe 63#include "tm-constrs.h"
d03f7e44 64#include "sched-int.h"
d78006d9 65#include "target-globals.h"
a3eb8a52 66#include "common/common-target.h"
51b86113
DM
67#include "selftest.h"
68#include "selftest-rtl.h"
43e9d192 69
994c5d85 70/* This file should be included last. */
d58627a0
RS
71#include "target-def.h"
72
28514dda
YZ
73/* Defined for convenience. */
74#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
75
43e9d192
IB
76/* Classifies an address.
77
78 ADDRESS_REG_IMM
79 A simple base register plus immediate offset.
80
81 ADDRESS_REG_WB
82 A base register indexed by immediate offset with writeback.
83
84 ADDRESS_REG_REG
85 A base register indexed by (optionally scaled) register.
86
87 ADDRESS_REG_UXTW
88 A base register indexed by (optionally scaled) zero-extended register.
89
90 ADDRESS_REG_SXTW
91 A base register indexed by (optionally scaled) sign-extended register.
92
93 ADDRESS_LO_SUM
94 A LO_SUM rtx with a base register and "LO12" symbol relocation.
95
96 ADDRESS_SYMBOLIC:
97 A constant symbolic address, in pc-relative literal pool. */
98
99enum aarch64_address_type {
100 ADDRESS_REG_IMM,
101 ADDRESS_REG_WB,
102 ADDRESS_REG_REG,
103 ADDRESS_REG_UXTW,
104 ADDRESS_REG_SXTW,
105 ADDRESS_LO_SUM,
106 ADDRESS_SYMBOLIC
107};
108
109struct aarch64_address_info {
110 enum aarch64_address_type type;
111 rtx base;
112 rtx offset;
113 int shift;
114 enum aarch64_symbol_type symbol_type;
115};
116
48063b9d
IB
117struct simd_immediate_info
118{
119 rtx value;
120 int shift;
121 int element_width;
48063b9d 122 bool mvn;
e4f0f84d 123 bool msl;
48063b9d
IB
124};
125
43e9d192
IB
126/* The current code model. */
127enum aarch64_code_model aarch64_cmodel;
128
129#ifdef HAVE_AS_TLS
130#undef TARGET_HAVE_TLS
131#define TARGET_HAVE_TLS 1
132#endif
133
ef4bddc2
RS
134static bool aarch64_composite_type_p (const_tree, machine_mode);
135static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 136 const_tree,
ef4bddc2 137 machine_mode *, int *,
43e9d192
IB
138 bool *);
139static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
140static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 141static void aarch64_override_options_after_change (void);
ef4bddc2 142static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 143static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 144 const unsigned char *sel);
ef4bddc2 145static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
146static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
147 const_tree type,
148 int misalignment,
149 bool is_packed);
88b08073 150
0c6caaf8
RL
151/* Major revision number of the ARM Architecture implemented by the target. */
152unsigned aarch64_architecture_version;
153
43e9d192 154/* The processor for which instructions should be scheduled. */
02fdbd5b 155enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 156
43e9d192
IB
157/* Mask to specify which instruction scheduling options should be used. */
158unsigned long aarch64_tune_flags = 0;
159
1be34295 160/* Global flag for PC relative loads. */
9ee6540a 161bool aarch64_pcrelative_literal_loads;
1be34295 162
8dec06f2
JG
163/* Support for command line parsing of boolean flags in the tuning
164 structures. */
165struct aarch64_flag_desc
166{
167 const char* name;
168 unsigned int flag;
169};
170
ed9fa8d2 171#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
172 { name, AARCH64_FUSE_##internal_name },
173static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
174{
175 { "none", AARCH64_FUSE_NOTHING },
176#include "aarch64-fusion-pairs.def"
177 { "all", AARCH64_FUSE_ALL },
178 { NULL, AARCH64_FUSE_NOTHING }
179};
8dec06f2 180
a339a01c 181#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
182 { name, AARCH64_EXTRA_TUNE_##internal_name },
183static const struct aarch64_flag_desc aarch64_tuning_flags[] =
184{
185 { "none", AARCH64_EXTRA_TUNE_NONE },
186#include "aarch64-tuning-flags.def"
187 { "all", AARCH64_EXTRA_TUNE_ALL },
188 { NULL, AARCH64_EXTRA_TUNE_NONE }
189};
8dec06f2 190
43e9d192
IB
191/* Tuning parameters. */
192
43e9d192
IB
193static const struct cpu_addrcost_table generic_addrcost_table =
194{
67747367 195 {
2fae724a 196 1, /* hi */
bd95e655
JG
197 0, /* si */
198 0, /* di */
2fae724a 199 1, /* ti */
67747367 200 },
bd95e655
JG
201 0, /* pre_modify */
202 0, /* post_modify */
203 0, /* register_offset */
783879e6
EM
204 0, /* register_sextend */
205 0, /* register_zextend */
bd95e655 206 0 /* imm_offset */
43e9d192
IB
207};
208
60bff090
JG
209static const struct cpu_addrcost_table cortexa57_addrcost_table =
210{
60bff090 211 {
bd95e655
JG
212 1, /* hi */
213 0, /* si */
214 0, /* di */
215 1, /* ti */
60bff090 216 },
bd95e655
JG
217 0, /* pre_modify */
218 0, /* post_modify */
219 0, /* register_offset */
783879e6
EM
220 0, /* register_sextend */
221 0, /* register_zextend */
bd95e655 222 0, /* imm_offset */
60bff090
JG
223};
224
5ec1ae3b
EM
225static const struct cpu_addrcost_table exynosm1_addrcost_table =
226{
227 {
228 0, /* hi */
229 0, /* si */
230 0, /* di */
231 2, /* ti */
232 },
233 0, /* pre_modify */
234 0, /* post_modify */
235 1, /* register_offset */
236 1, /* register_sextend */
237 2, /* register_zextend */
238 0, /* imm_offset */
239};
240
381e27aa
PT
241static const struct cpu_addrcost_table xgene1_addrcost_table =
242{
381e27aa 243 {
bd95e655
JG
244 1, /* hi */
245 0, /* si */
246 0, /* di */
247 1, /* ti */
381e27aa 248 },
bd95e655
JG
249 1, /* pre_modify */
250 0, /* post_modify */
251 0, /* register_offset */
783879e6
EM
252 1, /* register_sextend */
253 1, /* register_zextend */
bd95e655 254 0, /* imm_offset */
381e27aa
PT
255};
256
ee446d9f
JW
257static const struct cpu_addrcost_table qdf24xx_addrcost_table =
258{
259 {
260 1, /* hi */
261 0, /* si */
262 0, /* di */
263 1, /* ti */
264 },
265 0, /* pre_modify */
266 0, /* post_modify */
267 0, /* register_offset */
268 0, /* register_sextend */
269 0, /* register_zextend */
270 0 /* imm_offset */
271};
272
d1261ac6 273static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
274{
275 {
5f407e57
AP
276 1, /* hi */
277 1, /* si */
278 1, /* di */
ad611a4c
VP
279 2, /* ti */
280 },
281 0, /* pre_modify */
282 0, /* post_modify */
283 2, /* register_offset */
284 3, /* register_sextend */
285 3, /* register_zextend */
286 0, /* imm_offset */
287};
288
43e9d192
IB
289static const struct cpu_regmove_cost generic_regmove_cost =
290{
bd95e655 291 1, /* GP2GP */
3969c510
WD
292 /* Avoid the use of slow int<->fp moves for spilling by setting
293 their cost higher than memmov_cost. */
bd95e655
JG
294 5, /* GP2FP */
295 5, /* FP2GP */
296 2 /* FP2FP */
43e9d192
IB
297};
298
e4a9c55a
WD
299static const struct cpu_regmove_cost cortexa57_regmove_cost =
300{
bd95e655 301 1, /* GP2GP */
e4a9c55a
WD
302 /* Avoid the use of slow int<->fp moves for spilling by setting
303 their cost higher than memmov_cost. */
bd95e655
JG
304 5, /* GP2FP */
305 5, /* FP2GP */
306 2 /* FP2FP */
e4a9c55a
WD
307};
308
309static const struct cpu_regmove_cost cortexa53_regmove_cost =
310{
bd95e655 311 1, /* GP2GP */
e4a9c55a
WD
312 /* Avoid the use of slow int<->fp moves for spilling by setting
313 their cost higher than memmov_cost. */
bd95e655
JG
314 5, /* GP2FP */
315 5, /* FP2GP */
316 2 /* FP2FP */
e4a9c55a
WD
317};
318
5ec1ae3b
EM
319static const struct cpu_regmove_cost exynosm1_regmove_cost =
320{
321 1, /* GP2GP */
322 /* Avoid the use of slow int<->fp moves for spilling by setting
323 their cost higher than memmov_cost (actual, 4 and 9). */
324 9, /* GP2FP */
325 9, /* FP2GP */
326 1 /* FP2FP */
327};
328
d1bcc29f
AP
329static const struct cpu_regmove_cost thunderx_regmove_cost =
330{
bd95e655
JG
331 2, /* GP2GP */
332 2, /* GP2FP */
333 6, /* FP2GP */
334 4 /* FP2FP */
d1bcc29f
AP
335};
336
381e27aa
PT
337static const struct cpu_regmove_cost xgene1_regmove_cost =
338{
bd95e655 339 1, /* GP2GP */
381e27aa
PT
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
bd95e655
JG
342 8, /* GP2FP */
343 8, /* FP2GP */
344 2 /* FP2FP */
381e27aa
PT
345};
346
ee446d9f
JW
347static const struct cpu_regmove_cost qdf24xx_regmove_cost =
348{
349 2, /* GP2GP */
350 /* Avoid the use of int<->fp moves for spilling. */
351 6, /* GP2FP */
352 6, /* FP2GP */
353 4 /* FP2FP */
354};
355
d1261ac6 356static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
357{
358 1, /* GP2GP */
359 /* Avoid the use of int<->fp moves for spilling. */
360 8, /* GP2FP */
361 8, /* FP2GP */
362 4 /* FP2FP */
363};
364
8990e73a 365/* Generic costs for vector insn classes. */
8990e73a
TB
366static const struct cpu_vector_cost generic_vector_cost =
367{
cd8ae5ed
AP
368 1, /* scalar_int_stmt_cost */
369 1, /* scalar_fp_stmt_cost */
bd95e655
JG
370 1, /* scalar_load_cost */
371 1, /* scalar_store_cost */
cd8ae5ed
AP
372 1, /* vec_int_stmt_cost */
373 1, /* vec_fp_stmt_cost */
c428f91c 374 2, /* vec_permute_cost */
bd95e655
JG
375 1, /* vec_to_scalar_cost */
376 1, /* scalar_to_vec_cost */
377 1, /* vec_align_load_cost */
378 1, /* vec_unalign_load_cost */
379 1, /* vec_unalign_store_cost */
380 1, /* vec_store_cost */
381 3, /* cond_taken_branch_cost */
382 1 /* cond_not_taken_branch_cost */
8990e73a
TB
383};
384
c3f20327
AP
385/* ThunderX costs for vector insn classes. */
386static const struct cpu_vector_cost thunderx_vector_cost =
387{
cd8ae5ed
AP
388 1, /* scalar_int_stmt_cost */
389 1, /* scalar_fp_stmt_cost */
c3f20327
AP
390 3, /* scalar_load_cost */
391 1, /* scalar_store_cost */
cd8ae5ed
AP
392 4, /* vec_int_stmt_cost */
393 4, /* vec_fp_stmt_cost */
c3f20327
AP
394 4, /* vec_permute_cost */
395 2, /* vec_to_scalar_cost */
396 2, /* scalar_to_vec_cost */
397 3, /* vec_align_load_cost */
398 10, /* vec_unalign_load_cost */
399 10, /* vec_unalign_store_cost */
400 1, /* vec_store_cost */
401 3, /* cond_taken_branch_cost */
402 3 /* cond_not_taken_branch_cost */
403};
404
60bff090 405/* Generic costs for vector insn classes. */
60bff090
JG
406static const struct cpu_vector_cost cortexa57_vector_cost =
407{
cd8ae5ed
AP
408 1, /* scalar_int_stmt_cost */
409 1, /* scalar_fp_stmt_cost */
bd95e655
JG
410 4, /* scalar_load_cost */
411 1, /* scalar_store_cost */
cd8ae5ed
AP
412 2, /* vec_int_stmt_cost */
413 2, /* vec_fp_stmt_cost */
c428f91c 414 3, /* vec_permute_cost */
bd95e655
JG
415 8, /* vec_to_scalar_cost */
416 8, /* scalar_to_vec_cost */
db4a1c18
WD
417 4, /* vec_align_load_cost */
418 4, /* vec_unalign_load_cost */
bd95e655
JG
419 1, /* vec_unalign_store_cost */
420 1, /* vec_store_cost */
421 1, /* cond_taken_branch_cost */
422 1 /* cond_not_taken_branch_cost */
60bff090
JG
423};
424
5ec1ae3b
EM
425static const struct cpu_vector_cost exynosm1_vector_cost =
426{
cd8ae5ed
AP
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
429 5, /* scalar_load_cost */
430 1, /* scalar_store_cost */
cd8ae5ed
AP
431 3, /* vec_int_stmt_cost */
432 3, /* vec_fp_stmt_cost */
c428f91c 433 3, /* vec_permute_cost */
5ec1ae3b
EM
434 3, /* vec_to_scalar_cost */
435 3, /* scalar_to_vec_cost */
436 5, /* vec_align_load_cost */
437 5, /* vec_unalign_load_cost */
438 1, /* vec_unalign_store_cost */
439 1, /* vec_store_cost */
440 1, /* cond_taken_branch_cost */
441 1 /* cond_not_taken_branch_cost */
442};
443
381e27aa 444/* Generic costs for vector insn classes. */
381e27aa
PT
445static const struct cpu_vector_cost xgene1_vector_cost =
446{
cd8ae5ed
AP
447 1, /* scalar_int_stmt_cost */
448 1, /* scalar_fp_stmt_cost */
bd95e655
JG
449 5, /* scalar_load_cost */
450 1, /* scalar_store_cost */
cd8ae5ed
AP
451 2, /* vec_int_stmt_cost */
452 2, /* vec_fp_stmt_cost */
c428f91c 453 2, /* vec_permute_cost */
bd95e655
JG
454 4, /* vec_to_scalar_cost */
455 4, /* scalar_to_vec_cost */
456 10, /* vec_align_load_cost */
457 10, /* vec_unalign_load_cost */
458 2, /* vec_unalign_store_cost */
459 2, /* vec_store_cost */
460 2, /* cond_taken_branch_cost */
461 1 /* cond_not_taken_branch_cost */
381e27aa
PT
462};
463
ad611a4c 464/* Costs for vector insn classes for Vulcan. */
d1261ac6 465static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 466{
cd8ae5ed
AP
467 1, /* scalar_int_stmt_cost */
468 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
469 4, /* scalar_load_cost */
470 1, /* scalar_store_cost */
cd8ae5ed
AP
471 5, /* vec_int_stmt_cost */
472 6, /* vec_fp_stmt_cost */
ad611a4c
VP
473 3, /* vec_permute_cost */
474 6, /* vec_to_scalar_cost */
475 5, /* scalar_to_vec_cost */
476 8, /* vec_align_load_cost */
477 8, /* vec_unalign_load_cost */
478 4, /* vec_unalign_store_cost */
479 4, /* vec_store_cost */
480 2, /* cond_taken_branch_cost */
481 1 /* cond_not_taken_branch_cost */
482};
483
b9066f5a
MW
484/* Generic costs for branch instructions. */
485static const struct cpu_branch_cost generic_branch_cost =
486{
9094d4a4
WD
487 1, /* Predictable. */
488 3 /* Unpredictable. */
b9066f5a
MW
489};
490
67707f65
JG
491/* Branch costs for Cortex-A57. */
492static const struct cpu_branch_cost cortexa57_branch_cost =
493{
494 1, /* Predictable. */
495 3 /* Unpredictable. */
496};
497
ad611a4c 498/* Branch costs for Vulcan. */
d1261ac6 499static const struct cpu_branch_cost thunderx2t99_branch_cost =
ad611a4c
VP
500{
501 1, /* Predictable. */
502 3 /* Unpredictable. */
503};
504
9acc9cbe
EM
505/* Generic approximation modes. */
506static const cpu_approx_modes generic_approx_modes =
507{
79a2bc2d 508 AARCH64_APPROX_NONE, /* division */
98daafa0 509 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
510 AARCH64_APPROX_NONE /* recip_sqrt */
511};
512
513/* Approximation modes for Exynos M1. */
514static const cpu_approx_modes exynosm1_approx_modes =
515{
79a2bc2d 516 AARCH64_APPROX_NONE, /* division */
98daafa0 517 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
518 AARCH64_APPROX_ALL /* recip_sqrt */
519};
520
521/* Approximation modes for X-Gene 1. */
522static const cpu_approx_modes xgene1_approx_modes =
523{
79a2bc2d 524 AARCH64_APPROX_NONE, /* division */
98daafa0 525 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
526 AARCH64_APPROX_ALL /* recip_sqrt */
527};
528
9d2c6e2e
MK
529/* Generic prefetch settings (which disable prefetch). */
530static const cpu_prefetch_tune generic_prefetch_tune =
531{
532 0, /* num_slots */
533 -1, /* l1_cache_size */
534 -1, /* l1_cache_line_size */
16b2cafd
MK
535 -1, /* l2_cache_size */
536 -1 /* default_opt_level */
9d2c6e2e
MK
537};
538
539static const cpu_prefetch_tune exynosm1_prefetch_tune =
540{
541 0, /* num_slots */
542 -1, /* l1_cache_size */
543 64, /* l1_cache_line_size */
16b2cafd
MK
544 -1, /* l2_cache_size */
545 -1 /* default_opt_level */
9d2c6e2e
MK
546};
547
548static const cpu_prefetch_tune qdf24xx_prefetch_tune =
549{
70c51b58
MK
550 4, /* num_slots */
551 32, /* l1_cache_size */
9d2c6e2e 552 64, /* l1_cache_line_size */
70c51b58
MK
553 1024, /* l2_cache_size */
554 3 /* default_opt_level */
9d2c6e2e
MK
555};
556
f1e247d0
AP
557static const cpu_prefetch_tune thunderxt88_prefetch_tune =
558{
559 8, /* num_slots */
560 32, /* l1_cache_size */
561 128, /* l1_cache_line_size */
562 16*1024, /* l2_cache_size */
563 3 /* default_opt_level */
564};
565
566static const cpu_prefetch_tune thunderx_prefetch_tune =
567{
568 8, /* num_slots */
569 32, /* l1_cache_size */
570 128, /* l1_cache_line_size */
571 -1, /* l2_cache_size */
572 -1 /* default_opt_level */
573};
574
9d2c6e2e
MK
575static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
576{
f1e247d0
AP
577 8, /* num_slots */
578 32, /* l1_cache_size */
9d2c6e2e 579 64, /* l1_cache_line_size */
f1e247d0 580 256, /* l2_cache_size */
16b2cafd 581 -1 /* default_opt_level */
9d2c6e2e
MK
582};
583
43e9d192
IB
584static const struct tune_params generic_tunings =
585{
4e2cd668 586 &cortexa57_extra_costs,
43e9d192
IB
587 &generic_addrcost_table,
588 &generic_regmove_cost,
8990e73a 589 &generic_vector_cost,
b9066f5a 590 &generic_branch_cost,
9acc9cbe 591 &generic_approx_modes,
bd95e655
JG
592 4, /* memmov_cost */
593 2, /* issue_rate */
e0701ef0 594 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
0b82a5a2 595 8, /* function_align. */
6b13482b
WD
596 4, /* jump_align. */
597 8, /* loop_align. */
cee66c68
WD
598 2, /* int_reassoc_width. */
599 4, /* fp_reassoc_width. */
50093a33
WD
600 1, /* vec_reassoc_width. */
601 2, /* min_div_recip_mul_sf. */
dfba575f 602 2, /* min_div_recip_mul_df. */
50487d79 603 0, /* max_case_values. */
3b4c0f7e 604 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
605 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
606 &generic_prefetch_tune
43e9d192
IB
607};
608
1c72a3ca
JG
609static const struct tune_params cortexa35_tunings =
610{
611 &cortexa53_extra_costs,
612 &generic_addrcost_table,
613 &cortexa53_regmove_cost,
614 &generic_vector_cost,
0bc24338 615 &cortexa57_branch_cost,
9acc9cbe 616 &generic_approx_modes,
1c72a3ca
JG
617 4, /* memmov_cost */
618 1, /* issue_rate */
0bc24338 619 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 620 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 621 16, /* function_align. */
9779b2e8 622 4, /* jump_align. */
d4407370 623 8, /* loop_align. */
1c72a3ca
JG
624 2, /* int_reassoc_width. */
625 4, /* fp_reassoc_width. */
626 1, /* vec_reassoc_width. */
627 2, /* min_div_recip_mul_sf. */
628 2, /* min_div_recip_mul_df. */
629 0, /* max_case_values. */
1c72a3ca 630 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
631 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
632 &generic_prefetch_tune
1c72a3ca
JG
633};
634
984239ad
KT
635static const struct tune_params cortexa53_tunings =
636{
637 &cortexa53_extra_costs,
638 &generic_addrcost_table,
e4a9c55a 639 &cortexa53_regmove_cost,
984239ad 640 &generic_vector_cost,
0bc24338 641 &cortexa57_branch_cost,
9acc9cbe 642 &generic_approx_modes,
bd95e655
JG
643 4, /* memmov_cost */
644 2, /* issue_rate */
00a8574a 645 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 646 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 647 16, /* function_align. */
9779b2e8 648 4, /* jump_align. */
d4407370 649 8, /* loop_align. */
cee66c68
WD
650 2, /* int_reassoc_width. */
651 4, /* fp_reassoc_width. */
50093a33
WD
652 1, /* vec_reassoc_width. */
653 2, /* min_div_recip_mul_sf. */
dfba575f 654 2, /* min_div_recip_mul_df. */
50487d79 655 0, /* max_case_values. */
2d6bc7fa 656 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
657 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
658 &generic_prefetch_tune
984239ad
KT
659};
660
4fd92af6
KT
661static const struct tune_params cortexa57_tunings =
662{
663 &cortexa57_extra_costs,
60bff090 664 &cortexa57_addrcost_table,
e4a9c55a 665 &cortexa57_regmove_cost,
60bff090 666 &cortexa57_vector_cost,
67707f65 667 &cortexa57_branch_cost,
9acc9cbe 668 &generic_approx_modes,
bd95e655
JG
669 4, /* memmov_cost */
670 3, /* issue_rate */
00a8574a 671 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 672 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2 673 16, /* function_align. */
9779b2e8 674 4, /* jump_align. */
d4407370 675 8, /* loop_align. */
cee66c68
WD
676 2, /* int_reassoc_width. */
677 4, /* fp_reassoc_width. */
50093a33
WD
678 1, /* vec_reassoc_width. */
679 2, /* min_div_recip_mul_sf. */
dfba575f 680 2, /* min_div_recip_mul_df. */
50487d79 681 0, /* max_case_values. */
2d6bc7fa 682 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
683 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
684 &generic_prefetch_tune
dfba575f
JG
685};
686
687static const struct tune_params cortexa72_tunings =
688{
689 &cortexa57_extra_costs,
690 &cortexa57_addrcost_table,
691 &cortexa57_regmove_cost,
692 &cortexa57_vector_cost,
0bc24338 693 &cortexa57_branch_cost,
9acc9cbe 694 &generic_approx_modes,
dfba575f
JG
695 4, /* memmov_cost */
696 3, /* issue_rate */
00a8574a 697 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
698 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
699 16, /* function_align. */
9779b2e8 700 4, /* jump_align. */
d4407370 701 8, /* loop_align. */
dfba575f
JG
702 2, /* int_reassoc_width. */
703 4, /* fp_reassoc_width. */
704 1, /* vec_reassoc_width. */
705 2, /* min_div_recip_mul_sf. */
706 2, /* min_div_recip_mul_df. */
50487d79 707 0, /* max_case_values. */
0bc24338 708 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
709 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
710 &generic_prefetch_tune
4fd92af6
KT
711};
712
4fb570c4
KT
713static const struct tune_params cortexa73_tunings =
714{
715 &cortexa57_extra_costs,
716 &cortexa57_addrcost_table,
717 &cortexa57_regmove_cost,
718 &cortexa57_vector_cost,
0bc24338 719 &cortexa57_branch_cost,
4fb570c4
KT
720 &generic_approx_modes,
721 4, /* memmov_cost. */
722 2, /* issue_rate. */
723 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
724 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
725 16, /* function_align. */
9779b2e8 726 4, /* jump_align. */
d4407370 727 8, /* loop_align. */
4fb570c4
KT
728 2, /* int_reassoc_width. */
729 4, /* fp_reassoc_width. */
730 1, /* vec_reassoc_width. */
731 2, /* min_div_recip_mul_sf. */
732 2, /* min_div_recip_mul_df. */
733 0, /* max_case_values. */
4fb570c4 734 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
735 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
736 &generic_prefetch_tune
4fb570c4
KT
737};
738
9d2c6e2e
MK
739
740
5ec1ae3b
EM
741static const struct tune_params exynosm1_tunings =
742{
743 &exynosm1_extra_costs,
744 &exynosm1_addrcost_table,
745 &exynosm1_regmove_cost,
746 &exynosm1_vector_cost,
747 &generic_branch_cost,
9acc9cbe 748 &exynosm1_approx_modes,
5ec1ae3b
EM
749 4, /* memmov_cost */
750 3, /* issue_rate */
25cc2199 751 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
752 4, /* function_align. */
753 4, /* jump_align. */
754 4, /* loop_align. */
755 2, /* int_reassoc_width. */
756 4, /* fp_reassoc_width. */
757 1, /* vec_reassoc_width. */
758 2, /* min_div_recip_mul_sf. */
759 2, /* min_div_recip_mul_df. */
760 48, /* max_case_values. */
220379df 761 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
762 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
763 &exynosm1_prefetch_tune
5ec1ae3b
EM
764};
765
f1e247d0
AP
766static const struct tune_params thunderxt88_tunings =
767{
768 &thunderx_extra_costs,
769 &generic_addrcost_table,
770 &thunderx_regmove_cost,
771 &thunderx_vector_cost,
772 &generic_branch_cost,
773 &generic_approx_modes,
774 6, /* memmov_cost */
775 2, /* issue_rate */
776 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
777 8, /* function_align. */
778 8, /* jump_align. */
779 8, /* loop_align. */
780 2, /* int_reassoc_width. */
781 4, /* fp_reassoc_width. */
782 1, /* vec_reassoc_width. */
783 2, /* min_div_recip_mul_sf. */
784 2, /* min_div_recip_mul_df. */
785 0, /* max_case_values. */
786 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
787 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
788 &thunderxt88_prefetch_tune
789};
790
d1bcc29f
AP
791static const struct tune_params thunderx_tunings =
792{
793 &thunderx_extra_costs,
794 &generic_addrcost_table,
795 &thunderx_regmove_cost,
c3f20327 796 &thunderx_vector_cost,
b9066f5a 797 &generic_branch_cost,
9acc9cbe 798 &generic_approx_modes,
bd95e655
JG
799 6, /* memmov_cost */
800 2, /* issue_rate */
e9a3a175 801 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
802 8, /* function_align. */
803 8, /* jump_align. */
804 8, /* loop_align. */
cee66c68
WD
805 2, /* int_reassoc_width. */
806 4, /* fp_reassoc_width. */
50093a33
WD
807 1, /* vec_reassoc_width. */
808 2, /* min_div_recip_mul_sf. */
dfba575f 809 2, /* min_div_recip_mul_df. */
50487d79 810 0, /* max_case_values. */
2d6bc7fa 811 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
812 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
813 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 814 &thunderx_prefetch_tune
d1bcc29f
AP
815};
816
381e27aa
PT
817static const struct tune_params xgene1_tunings =
818{
819 &xgene1_extra_costs,
820 &xgene1_addrcost_table,
821 &xgene1_regmove_cost,
822 &xgene1_vector_cost,
b9066f5a 823 &generic_branch_cost,
9acc9cbe 824 &xgene1_approx_modes,
bd95e655
JG
825 6, /* memmov_cost */
826 4, /* issue_rate */
e9a3a175 827 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
828 16, /* function_align. */
829 8, /* jump_align. */
830 16, /* loop_align. */
831 2, /* int_reassoc_width. */
832 4, /* fp_reassoc_width. */
50093a33
WD
833 1, /* vec_reassoc_width. */
834 2, /* min_div_recip_mul_sf. */
dfba575f 835 2, /* min_div_recip_mul_df. */
50487d79 836 0, /* max_case_values. */
2d6bc7fa 837 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9d2c6e2e
MK
838 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
839 &generic_prefetch_tune
381e27aa
PT
840};
841
ee446d9f
JW
842static const struct tune_params qdf24xx_tunings =
843{
844 &qdf24xx_extra_costs,
845 &qdf24xx_addrcost_table,
846 &qdf24xx_regmove_cost,
847 &generic_vector_cost,
848 &generic_branch_cost,
849 &generic_approx_modes,
850 4, /* memmov_cost */
851 4, /* issue_rate */
852 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
853 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
854 16, /* function_align. */
855 8, /* jump_align. */
856 16, /* loop_align. */
857 2, /* int_reassoc_width. */
858 4, /* fp_reassoc_width. */
859 1, /* vec_reassoc_width. */
860 2, /* min_div_recip_mul_sf. */
861 2, /* min_div_recip_mul_df. */
862 0, /* max_case_values. */
ee446d9f 863 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
9d2c6e2e
MK
864 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
865 &qdf24xx_prefetch_tune
ee446d9f
JW
866};
867
d1261ac6 868static const struct tune_params thunderx2t99_tunings =
ad611a4c 869{
d1261ac6
AP
870 &thunderx2t99_extra_costs,
871 &thunderx2t99_addrcost_table,
872 &thunderx2t99_regmove_cost,
873 &thunderx2t99_vector_cost,
874 &thunderx2t99_branch_cost,
ad611a4c
VP
875 &generic_approx_modes,
876 4, /* memmov_cost. */
877 4, /* issue_rate. */
6d5b4f9e 878 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */
ad611a4c
VP
879 16, /* function_align. */
880 8, /* jump_align. */
881 16, /* loop_align. */
882 3, /* int_reassoc_width. */
883 2, /* fp_reassoc_width. */
884 2, /* vec_reassoc_width. */
885 2, /* min_div_recip_mul_sf. */
886 2, /* min_div_recip_mul_df. */
887 0, /* max_case_values. */
f1e247d0 888 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
889 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
890 &thunderx2t99_prefetch_tune
ad611a4c
VP
891};
892
8dec06f2
JG
893/* Support for fine-grained override of the tuning structures. */
894struct aarch64_tuning_override_function
895{
896 const char* name;
897 void (*parse_override)(const char*, struct tune_params*);
898};
899
900static void aarch64_parse_fuse_string (const char*, struct tune_params*);
901static void aarch64_parse_tune_string (const char*, struct tune_params*);
902
903static const struct aarch64_tuning_override_function
904aarch64_tuning_override_functions[] =
905{
906 { "fuse", aarch64_parse_fuse_string },
907 { "tune", aarch64_parse_tune_string },
908 { NULL, NULL }
909};
910
43e9d192
IB
911/* A processor implementing AArch64. */
912struct processor
913{
914 const char *const name;
46806c44
KT
915 enum aarch64_processor ident;
916 enum aarch64_processor sched_core;
393ae126 917 enum aarch64_arch arch;
0c6caaf8 918 unsigned architecture_version;
43e9d192
IB
919 const unsigned long flags;
920 const struct tune_params *const tune;
921};
922
393ae126
KT
923/* Architectures implementing AArch64. */
924static const struct processor all_architectures[] =
925{
926#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
927 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
928#include "aarch64-arches.def"
393ae126
KT
929 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
930};
931
43e9d192
IB
932/* Processor cores implementing AArch64. */
933static const struct processor all_cores[] =
934{
e8fcc9fa 935#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
936 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
937 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
938 FLAGS, &COSTS##_tunings},
43e9d192 939#include "aarch64-cores.def"
393ae126
KT
940 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
941 AARCH64_FL_FOR_ARCH8, &generic_tunings},
942 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
943};
944
43e9d192 945
361fb3ee
KT
946/* Target specification. These are populated by the -march, -mtune, -mcpu
947 handling code or by target attributes. */
43e9d192
IB
948static const struct processor *selected_arch;
949static const struct processor *selected_cpu;
950static const struct processor *selected_tune;
951
b175b679
JG
952/* The current tuning set. */
953struct tune_params aarch64_tune_params = generic_tunings;
954
43e9d192
IB
955#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
956
957/* An ISA extension in the co-processor and main instruction set space. */
958struct aarch64_option_extension
959{
960 const char *const name;
961 const unsigned long flags_on;
962 const unsigned long flags_off;
963};
964
43e9d192
IB
965typedef enum aarch64_cond_code
966{
967 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
968 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
969 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
970}
971aarch64_cc;
972
973#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
974
975/* The condition codes of the processor, and the inverse function. */
976static const char * const aarch64_condition_codes[] =
977{
978 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
979 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
980};
981
973d2e01
TP
982/* Generate code to enable conditional branches in functions over 1 MiB. */
983const char *
984aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
985 const char * branch_format)
986{
987 rtx_code_label * tmp_label = gen_label_rtx ();
988 char label_buf[256];
989 char buffer[128];
990 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
991 CODE_LABEL_NUMBER (tmp_label));
992 const char *label_ptr = targetm.strip_name_encoding (label_buf);
993 rtx dest_label = operands[pos_label];
994 operands[pos_label] = tmp_label;
995
996 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
997 output_asm_insn (buffer, operands);
998
999 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1000 operands[pos_label] = dest_label;
1001 output_asm_insn (buffer, operands);
1002 return "";
1003}
1004
261fb553
AL
1005void
1006aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
1007{
1008 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
1009 if (TARGET_GENERAL_REGS_ONLY)
1010 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
1011 else
1012 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
1013}
1014
c64f7d37
WD
1015/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
1016 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
1017 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
1018 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
1019 cost (in this case the best class is the lowest cost one). Using ALL_REGS
1020 irrespectively of its cost results in bad allocations with many redundant
1021 int<->FP moves which are expensive on various cores.
1022 To avoid this we don't allow ALL_REGS as the allocno class, but force a
1023 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
1024 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
1025 Otherwise set the allocno class depending on the mode.
1026 The result of this is that it is no longer inefficient to have a higher
1027 memory move cost than the register move cost.
1028*/
c64f7d37
WD
1029
1030static reg_class_t
31e2b5a3
WD
1031aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1032 reg_class_t best_class)
c64f7d37
WD
1033{
1034 enum machine_mode mode;
1035
1036 if (allocno_class != ALL_REGS)
1037 return allocno_class;
1038
31e2b5a3
WD
1039 if (best_class != ALL_REGS)
1040 return best_class;
1041
c64f7d37
WD
1042 mode = PSEUDO_REGNO_MODE (regno);
1043 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1044}
1045
26e0ff94 1046static unsigned int
50093a33 1047aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 1048{
50093a33 1049 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1050 return aarch64_tune_params.min_div_recip_mul_sf;
1051 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1052}
1053
cee66c68
WD
1054static int
1055aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
1056 enum machine_mode mode)
1057{
1058 if (VECTOR_MODE_P (mode))
b175b679 1059 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1060 if (INTEGRAL_MODE_P (mode))
b175b679 1061 return aarch64_tune_params.int_reassoc_width;
cee66c68 1062 if (FLOAT_MODE_P (mode))
b175b679 1063 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1064 return 1;
1065}
1066
43e9d192
IB
1067/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1068unsigned
1069aarch64_dbx_register_number (unsigned regno)
1070{
1071 if (GP_REGNUM_P (regno))
1072 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1073 else if (regno == SP_REGNUM)
1074 return AARCH64_DWARF_SP;
1075 else if (FP_REGNUM_P (regno))
1076 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
1077
1078 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1079 equivalent DWARF register. */
1080 return DWARF_FRAME_REGISTERS;
1081}
1082
1083/* Return TRUE if MODE is any of the large INT modes. */
1084static bool
ef4bddc2 1085aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
1086{
1087 return mode == OImode || mode == CImode || mode == XImode;
1088}
1089
1090/* Return TRUE if MODE is any of the vector modes. */
1091static bool
ef4bddc2 1092aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
1093{
1094 return aarch64_vector_mode_supported_p (mode)
1095 || aarch64_vect_struct_mode_p (mode);
1096}
1097
1098/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1099static bool
ef4bddc2 1100aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1101 unsigned HOST_WIDE_INT nelems)
1102{
1103 if (TARGET_SIMD
635e66fe
AL
1104 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1105 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1106 && (nelems >= 2 && nelems <= 4))
1107 return true;
1108
1109 return false;
1110}
1111
1112/* Implement HARD_REGNO_NREGS. */
1113
1114int
ef4bddc2 1115aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1116{
1117 switch (aarch64_regno_regclass (regno))
1118 {
1119 case FP_REGS:
1120 case FP_LO_REGS:
1121 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1122 default:
1123 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1124 }
1125 gcc_unreachable ();
1126}
1127
1128/* Implement HARD_REGNO_MODE_OK. */
1129
1130int
ef4bddc2 1131aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1132{
1133 if (GET_MODE_CLASS (mode) == MODE_CC)
1134 return regno == CC_REGNUM;
1135
9259db42
YZ
1136 if (regno == SP_REGNUM)
1137 /* The purpose of comparing with ptr_mode is to support the
1138 global register variable associated with the stack pointer
1139 register via the syntax of asm ("wsp") in ILP32. */
1140 return mode == Pmode || mode == ptr_mode;
1141
1142 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1143 return mode == Pmode;
1144
1145 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1146 return 1;
1147
1148 if (FP_REGNUM_P (regno))
1149 {
1150 if (aarch64_vect_struct_mode_p (mode))
1151 return
1152 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1153 else
1154 return 1;
1155 }
1156
1157 return 0;
1158}
1159
73d9ac6a 1160/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1161machine_mode
73d9ac6a 1162aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1163 machine_mode mode)
73d9ac6a
IB
1164{
1165 /* Handle modes that fit within single registers. */
1166 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1167 {
1168 if (GET_MODE_SIZE (mode) >= 4)
1169 return mode;
1170 else
1171 return SImode;
1172 }
1173 /* Fall back to generic for multi-reg and very large modes. */
1174 else
1175 return choose_hard_reg_mode (regno, nregs, false);
1176}
1177
43e9d192
IB
1178/* Return true if calls to DECL should be treated as
1179 long-calls (ie called via a register). */
1180static bool
1181aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1182{
1183 return false;
1184}
1185
1186/* Return true if calls to symbol-ref SYM should be treated as
1187 long-calls (ie called via a register). */
1188bool
1189aarch64_is_long_call_p (rtx sym)
1190{
1191 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1192}
1193
b60d63cb
JW
1194/* Return true if calls to symbol-ref SYM should not go through
1195 plt stubs. */
1196
1197bool
1198aarch64_is_noplt_call_p (rtx sym)
1199{
1200 const_tree decl = SYMBOL_REF_DECL (sym);
1201
1202 if (flag_pic
1203 && decl
1204 && (!flag_plt
1205 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1206 && !targetm.binds_local_p (decl))
1207 return true;
1208
1209 return false;
1210}
1211
43e9d192
IB
1212/* Return true if the offsets to a zero/sign-extract operation
1213 represent an expression that matches an extend operation. The
1214 operands represent the paramters from
1215
4745e701 1216 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1217bool
ef4bddc2 1218aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1219 rtx extract_imm)
1220{
1221 HOST_WIDE_INT mult_val, extract_val;
1222
1223 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1224 return false;
1225
1226 mult_val = INTVAL (mult_imm);
1227 extract_val = INTVAL (extract_imm);
1228
1229 if (extract_val > 8
1230 && extract_val < GET_MODE_BITSIZE (mode)
1231 && exact_log2 (extract_val & ~7) > 0
1232 && (extract_val & 7) <= 4
1233 && mult_val == (1 << (extract_val & 7)))
1234 return true;
1235
1236 return false;
1237}
1238
1239/* Emit an insn that's a simple single-set. Both the operands must be
1240 known to be valid. */
827ab47a 1241inline static rtx_insn *
43e9d192
IB
1242emit_set_insn (rtx x, rtx y)
1243{
f7df4a84 1244 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1245}
1246
1247/* X and Y are two things to compare using CODE. Emit the compare insn and
1248 return the rtx for register 0 in the proper mode. */
1249rtx
1250aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1251{
ef4bddc2 1252 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1253 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1254
1255 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1256 return cc_reg;
1257}
1258
1259/* Build the SYMBOL_REF for __tls_get_addr. */
1260
1261static GTY(()) rtx tls_get_addr_libfunc;
1262
1263rtx
1264aarch64_tls_get_addr (void)
1265{
1266 if (!tls_get_addr_libfunc)
1267 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1268 return tls_get_addr_libfunc;
1269}
1270
1271/* Return the TLS model to use for ADDR. */
1272
1273static enum tls_model
1274tls_symbolic_operand_type (rtx addr)
1275{
1276 enum tls_model tls_kind = TLS_MODEL_NONE;
1277 rtx sym, addend;
1278
1279 if (GET_CODE (addr) == CONST)
1280 {
1281 split_const (addr, &sym, &addend);
1282 if (GET_CODE (sym) == SYMBOL_REF)
1283 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1284 }
1285 else if (GET_CODE (addr) == SYMBOL_REF)
1286 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1287
1288 return tls_kind;
1289}
1290
1291/* We'll allow lo_sum's in addresses in our legitimate addresses
1292 so that combine would take care of combining addresses where
1293 necessary, but for generation purposes, we'll generate the address
1294 as :
1295 RTL Absolute
1296 tmp = hi (symbol_ref); adrp x1, foo
1297 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1298 nop
1299
1300 PIC TLS
1301 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1302 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1303 bl __tls_get_addr
1304 nop
1305
1306 Load TLS symbol, depending on TLS mechanism and TLS access model.
1307
1308 Global Dynamic - Traditional TLS:
1309 adrp tmp, :tlsgd:imm
1310 add dest, tmp, #:tlsgd_lo12:imm
1311 bl __tls_get_addr
1312
1313 Global Dynamic - TLS Descriptors:
1314 adrp dest, :tlsdesc:imm
1315 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1316 add dest, dest, #:tlsdesc_lo12:imm
1317 blr tmp
1318 mrs tp, tpidr_el0
1319 add dest, dest, tp
1320
1321 Initial Exec:
1322 mrs tp, tpidr_el0
1323 adrp tmp, :gottprel:imm
1324 ldr dest, [tmp, #:gottprel_lo12:imm]
1325 add dest, dest, tp
1326
1327 Local Exec:
1328 mrs tp, tpidr_el0
0699caae
RL
1329 add t0, tp, #:tprel_hi12:imm, lsl #12
1330 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1331*/
1332
1333static void
1334aarch64_load_symref_appropriately (rtx dest, rtx imm,
1335 enum aarch64_symbol_type type)
1336{
1337 switch (type)
1338 {
1339 case SYMBOL_SMALL_ABSOLUTE:
1340 {
28514dda 1341 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1342 rtx tmp_reg = dest;
ef4bddc2 1343 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1344
1345 gcc_assert (mode == Pmode || mode == ptr_mode);
1346
43e9d192 1347 if (can_create_pseudo_p ())
28514dda 1348 tmp_reg = gen_reg_rtx (mode);
43e9d192 1349
28514dda 1350 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1351 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1352 return;
1353 }
1354
a5350ddc 1355 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1356 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1357 return;
1358
1b1e81f8
JW
1359 case SYMBOL_SMALL_GOT_28K:
1360 {
1361 machine_mode mode = GET_MODE (dest);
1362 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1363 rtx insn;
1364 rtx mem;
1b1e81f8
JW
1365
1366 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1367 here before rtl expand. Tree IVOPT will generate rtl pattern to
1368 decide rtx costs, in which case pic_offset_table_rtx is not
1369 initialized. For that case no need to generate the first adrp
026c3cfd 1370 instruction as the final cost for global variable access is
1b1e81f8
JW
1371 one instruction. */
1372 if (gp_rtx != NULL)
1373 {
1374 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1375 using the page base as GOT base, the first page may be wasted,
1376 in the worst scenario, there is only 28K space for GOT).
1377
1378 The generate instruction sequence for accessing global variable
1379 is:
1380
a3957742 1381 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1382
1383 Only one instruction needed. But we must initialize
1384 pic_offset_table_rtx properly. We generate initialize insn for
1385 every global access, and allow CSE to remove all redundant.
1386
1387 The final instruction sequences will look like the following
1388 for multiply global variables access.
1389
a3957742 1390 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1391
a3957742
JW
1392 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1393 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1394 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1395 ... */
1b1e81f8
JW
1396
1397 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1398 crtl->uses_pic_offset_table = 1;
1399 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1400
1401 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1402 gp_rtx = gen_lowpart (mode, gp_rtx);
1403
1b1e81f8
JW
1404 }
1405
1406 if (mode == ptr_mode)
1407 {
1408 if (mode == DImode)
53021678 1409 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1410 else
53021678
JW
1411 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1412
1413 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1414 }
1415 else
1416 {
1417 gcc_assert (mode == Pmode);
53021678
JW
1418
1419 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1420 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1421 }
1422
53021678
JW
1423 /* The operand is expected to be MEM. Whenever the related insn
1424 pattern changed, above code which calculate mem should be
1425 updated. */
1426 gcc_assert (GET_CODE (mem) == MEM);
1427 MEM_READONLY_P (mem) = 1;
1428 MEM_NOTRAP_P (mem) = 1;
1429 emit_insn (insn);
1b1e81f8
JW
1430 return;
1431 }
1432
6642bdb4 1433 case SYMBOL_SMALL_GOT_4G:
43e9d192 1434 {
28514dda
YZ
1435 /* In ILP32, the mode of dest can be either SImode or DImode,
1436 while the got entry is always of SImode size. The mode of
1437 dest depends on how dest is used: if dest is assigned to a
1438 pointer (e.g. in the memory), it has SImode; it may have
1439 DImode if dest is dereferenced to access the memeory.
1440 This is why we have to handle three different ldr_got_small
1441 patterns here (two patterns for ILP32). */
53021678
JW
1442
1443 rtx insn;
1444 rtx mem;
43e9d192 1445 rtx tmp_reg = dest;
ef4bddc2 1446 machine_mode mode = GET_MODE (dest);
28514dda 1447
43e9d192 1448 if (can_create_pseudo_p ())
28514dda
YZ
1449 tmp_reg = gen_reg_rtx (mode);
1450
1451 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1452 if (mode == ptr_mode)
1453 {
1454 if (mode == DImode)
53021678 1455 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1456 else
53021678
JW
1457 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1458
1459 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1460 }
1461 else
1462 {
1463 gcc_assert (mode == Pmode);
53021678
JW
1464
1465 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1466 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1467 }
1468
53021678
JW
1469 gcc_assert (GET_CODE (mem) == MEM);
1470 MEM_READONLY_P (mem) = 1;
1471 MEM_NOTRAP_P (mem) = 1;
1472 emit_insn (insn);
43e9d192
IB
1473 return;
1474 }
1475
1476 case SYMBOL_SMALL_TLSGD:
1477 {
5d8a22a5 1478 rtx_insn *insns;
23b88fda
N
1479 machine_mode mode = GET_MODE (dest);
1480 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1481
1482 start_sequence ();
23b88fda
N
1483 if (TARGET_ILP32)
1484 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1485 else
1486 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1487 insns = get_insns ();
1488 end_sequence ();
1489
1490 RTL_CONST_CALL_P (insns) = 1;
1491 emit_libcall_block (insns, dest, result, imm);
1492 return;
1493 }
1494
1495 case SYMBOL_SMALL_TLSDESC:
1496 {
ef4bddc2 1497 machine_mode mode = GET_MODE (dest);
621ad2de 1498 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1499 rtx tp;
1500
621ad2de
AP
1501 gcc_assert (mode == Pmode || mode == ptr_mode);
1502
2876a13f
JW
1503 /* In ILP32, the got entry is always of SImode size. Unlike
1504 small GOT, the dest is fixed at reg 0. */
1505 if (TARGET_ILP32)
1506 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1507 else
2876a13f 1508 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1509 tp = aarch64_load_tp (NULL);
621ad2de
AP
1510
1511 if (mode != Pmode)
1512 tp = gen_lowpart (mode, tp);
1513
2876a13f 1514 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1515 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1516 return;
1517 }
1518
79496620 1519 case SYMBOL_SMALL_TLSIE:
43e9d192 1520 {
621ad2de
AP
1521 /* In ILP32, the mode of dest can be either SImode or DImode,
1522 while the got entry is always of SImode size. The mode of
1523 dest depends on how dest is used: if dest is assigned to a
1524 pointer (e.g. in the memory), it has SImode; it may have
1525 DImode if dest is dereferenced to access the memeory.
1526 This is why we have to handle three different tlsie_small
1527 patterns here (two patterns for ILP32). */
ef4bddc2 1528 machine_mode mode = GET_MODE (dest);
621ad2de 1529 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1530 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1531
1532 if (mode == ptr_mode)
1533 {
1534 if (mode == DImode)
1535 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1536 else
1537 {
1538 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1539 tp = gen_lowpart (mode, tp);
1540 }
1541 }
1542 else
1543 {
1544 gcc_assert (mode == Pmode);
1545 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1546 }
1547
f7df4a84 1548 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1549 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1550 return;
1551 }
1552
cbf5629e 1553 case SYMBOL_TLSLE12:
d18ba284 1554 case SYMBOL_TLSLE24:
cbf5629e
JW
1555 case SYMBOL_TLSLE32:
1556 case SYMBOL_TLSLE48:
43e9d192 1557 {
cbf5629e 1558 machine_mode mode = GET_MODE (dest);
43e9d192 1559 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1560
cbf5629e
JW
1561 if (mode != Pmode)
1562 tp = gen_lowpart (mode, tp);
1563
1564 switch (type)
1565 {
1566 case SYMBOL_TLSLE12:
1567 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1568 (dest, tp, imm));
1569 break;
1570 case SYMBOL_TLSLE24:
1571 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1572 (dest, tp, imm));
1573 break;
1574 case SYMBOL_TLSLE32:
1575 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1576 (dest, imm));
1577 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1578 (dest, dest, tp));
1579 break;
1580 case SYMBOL_TLSLE48:
1581 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1582 (dest, imm));
1583 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1584 (dest, dest, tp));
1585 break;
1586 default:
1587 gcc_unreachable ();
1588 }
e6f7f0e9 1589
43e9d192
IB
1590 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1591 return;
1592 }
1593
87dd8ab0
MS
1594 case SYMBOL_TINY_GOT:
1595 emit_insn (gen_ldr_got_tiny (dest, imm));
1596 return;
1597
5ae7caad
JW
1598 case SYMBOL_TINY_TLSIE:
1599 {
1600 machine_mode mode = GET_MODE (dest);
1601 rtx tp = aarch64_load_tp (NULL);
1602
1603 if (mode == ptr_mode)
1604 {
1605 if (mode == DImode)
1606 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1607 else
1608 {
1609 tp = gen_lowpart (mode, tp);
1610 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1611 }
1612 }
1613 else
1614 {
1615 gcc_assert (mode == Pmode);
1616 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1617 }
1618
1619 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1620 return;
1621 }
1622
43e9d192
IB
1623 default:
1624 gcc_unreachable ();
1625 }
1626}
1627
1628/* Emit a move from SRC to DEST. Assume that the move expanders can
1629 handle all moves if !can_create_pseudo_p (). The distinction is
1630 important because, unlike emit_move_insn, the move expanders know
1631 how to force Pmode objects into the constant pool even when the
1632 constant pool address is not itself legitimate. */
1633static rtx
1634aarch64_emit_move (rtx dest, rtx src)
1635{
1636 return (can_create_pseudo_p ()
1637 ? emit_move_insn (dest, src)
1638 : emit_move_insn_1 (dest, src));
1639}
1640
030d03b8
RE
1641/* Split a 128-bit move operation into two 64-bit move operations,
1642 taking care to handle partial overlap of register to register
1643 copies. Special cases are needed when moving between GP regs and
1644 FP regs. SRC can be a register, constant or memory; DST a register
1645 or memory. If either operand is memory it must not have any side
1646 effects. */
43e9d192
IB
1647void
1648aarch64_split_128bit_move (rtx dst, rtx src)
1649{
030d03b8
RE
1650 rtx dst_lo, dst_hi;
1651 rtx src_lo, src_hi;
43e9d192 1652
ef4bddc2 1653 machine_mode mode = GET_MODE (dst);
12dc6974 1654
030d03b8
RE
1655 gcc_assert (mode == TImode || mode == TFmode);
1656 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1657 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1658
1659 if (REG_P (dst) && REG_P (src))
1660 {
030d03b8
RE
1661 int src_regno = REGNO (src);
1662 int dst_regno = REGNO (dst);
43e9d192 1663
030d03b8 1664 /* Handle FP <-> GP regs. */
43e9d192
IB
1665 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1666 {
030d03b8
RE
1667 src_lo = gen_lowpart (word_mode, src);
1668 src_hi = gen_highpart (word_mode, src);
1669
1670 if (mode == TImode)
1671 {
1672 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1673 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1674 }
1675 else
1676 {
1677 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1678 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1679 }
1680 return;
43e9d192
IB
1681 }
1682 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1683 {
030d03b8
RE
1684 dst_lo = gen_lowpart (word_mode, dst);
1685 dst_hi = gen_highpart (word_mode, dst);
1686
1687 if (mode == TImode)
1688 {
1689 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1690 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1691 }
1692 else
1693 {
1694 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1695 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1696 }
1697 return;
43e9d192 1698 }
43e9d192
IB
1699 }
1700
030d03b8
RE
1701 dst_lo = gen_lowpart (word_mode, dst);
1702 dst_hi = gen_highpart (word_mode, dst);
1703 src_lo = gen_lowpart (word_mode, src);
1704 src_hi = gen_highpart_mode (word_mode, mode, src);
1705
1706 /* At most one pairing may overlap. */
1707 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1708 {
1709 aarch64_emit_move (dst_hi, src_hi);
1710 aarch64_emit_move (dst_lo, src_lo);
1711 }
1712 else
1713 {
1714 aarch64_emit_move (dst_lo, src_lo);
1715 aarch64_emit_move (dst_hi, src_hi);
1716 }
43e9d192
IB
1717}
1718
1719bool
1720aarch64_split_128bit_move_p (rtx dst, rtx src)
1721{
1722 return (! REG_P (src)
1723 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1724}
1725
8b033a8a
SN
1726/* Split a complex SIMD combine. */
1727
1728void
1729aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1730{
ef4bddc2
RS
1731 machine_mode src_mode = GET_MODE (src1);
1732 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1733
1734 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
1735 gcc_assert (register_operand (dst, dst_mode)
1736 && register_operand (src1, src_mode)
1737 && register_operand (src2, src_mode));
8b033a8a 1738
a977dc0c 1739 rtx (*gen) (rtx, rtx, rtx);
8b033a8a 1740
a977dc0c
MC
1741 switch (src_mode)
1742 {
1743 case V8QImode:
1744 gen = gen_aarch64_simd_combinev8qi;
1745 break;
1746 case V4HImode:
1747 gen = gen_aarch64_simd_combinev4hi;
1748 break;
1749 case V2SImode:
1750 gen = gen_aarch64_simd_combinev2si;
1751 break;
1752 case V4HFmode:
1753 gen = gen_aarch64_simd_combinev4hf;
1754 break;
1755 case V2SFmode:
1756 gen = gen_aarch64_simd_combinev2sf;
1757 break;
1758 case DImode:
1759 gen = gen_aarch64_simd_combinedi;
1760 break;
1761 case DFmode:
1762 gen = gen_aarch64_simd_combinedf;
1763 break;
1764 default:
1765 gcc_unreachable ();
8b033a8a 1766 }
a977dc0c
MC
1767
1768 emit_insn (gen (dst, src1, src2));
1769 return;
8b033a8a
SN
1770}
1771
fd4842cd
SN
1772/* Split a complex SIMD move. */
1773
1774void
1775aarch64_split_simd_move (rtx dst, rtx src)
1776{
ef4bddc2
RS
1777 machine_mode src_mode = GET_MODE (src);
1778 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1779
1780 gcc_assert (VECTOR_MODE_P (dst_mode));
1781
1782 if (REG_P (dst) && REG_P (src))
1783 {
c59b7e28
SN
1784 rtx (*gen) (rtx, rtx);
1785
fd4842cd
SN
1786 gcc_assert (VECTOR_MODE_P (src_mode));
1787
1788 switch (src_mode)
1789 {
1790 case V16QImode:
c59b7e28 1791 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1792 break;
1793 case V8HImode:
c59b7e28 1794 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1795 break;
1796 case V4SImode:
c59b7e28 1797 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1798 break;
1799 case V2DImode:
c59b7e28 1800 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1801 break;
71a11456
AL
1802 case V8HFmode:
1803 gen = gen_aarch64_split_simd_movv8hf;
1804 break;
fd4842cd 1805 case V4SFmode:
c59b7e28 1806 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1807 break;
1808 case V2DFmode:
c59b7e28 1809 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1810 break;
1811 default:
1812 gcc_unreachable ();
1813 }
c59b7e28
SN
1814
1815 emit_insn (gen (dst, src));
fd4842cd
SN
1816 return;
1817 }
1818}
1819
ef22810a
RH
1820bool
1821aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1822 machine_mode ymode, rtx y)
1823{
1824 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1825 gcc_assert (r != NULL);
1826 return rtx_equal_p (x, r);
1827}
1828
1829
43e9d192 1830static rtx
ef4bddc2 1831aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1832{
1833 if (can_create_pseudo_p ())
e18b4a81 1834 return force_reg (mode, value);
43e9d192
IB
1835 else
1836 {
1837 x = aarch64_emit_move (x, value);
1838 return x;
1839 }
1840}
1841
1842
1843static rtx
ef4bddc2 1844aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1845{
9c023bf0 1846 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1847 {
1848 rtx high;
1849 /* Load the full offset into a register. This
1850 might be improvable in the future. */
1851 high = GEN_INT (offset);
1852 offset = 0;
e18b4a81
YZ
1853 high = aarch64_force_temporary (mode, temp, high);
1854 reg = aarch64_force_temporary (mode, temp,
1855 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1856 }
1857 return plus_constant (mode, reg, offset);
1858}
1859
82614948
RR
1860static int
1861aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1862 machine_mode mode)
43e9d192 1863{
43e9d192 1864 int i;
9a4865db
WD
1865 unsigned HOST_WIDE_INT val, val2, mask;
1866 int one_match, zero_match;
1867 int num_insns;
43e9d192 1868
9a4865db
WD
1869 val = INTVAL (imm);
1870
1871 if (aarch64_move_imm (val, mode))
43e9d192 1872 {
82614948 1873 if (generate)
f7df4a84 1874 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1875 return 1;
43e9d192
IB
1876 }
1877
9a4865db 1878 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1879 {
82614948
RR
1880 if (generate)
1881 {
9a4865db
WD
1882 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1883 if (mode == SImode)
1884 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1885 GEN_INT ((val >> 16) & 0xffff)));
1886 else
1887 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1888 GEN_INT ((val >> 16) & 0xffff)));
82614948 1889 }
9a4865db 1890 return 2;
43e9d192
IB
1891 }
1892
1893 /* Remaining cases are all for DImode. */
1894
43e9d192 1895 mask = 0xffff;
9a4865db
WD
1896 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1897 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1898 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1899 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1900
62c8d76c 1901 if (zero_match != 2 && one_match != 2)
43e9d192 1902 {
62c8d76c
WD
1903 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1904 For a 64-bit bitmask try whether changing 16 bits to all ones or
1905 zeroes creates a valid bitmask. To check any repeated bitmask,
1906 try using 16 bits from the other 32-bit half of val. */
43e9d192 1907
62c8d76c 1908 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1909 {
62c8d76c
WD
1910 val2 = val & ~mask;
1911 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1912 break;
1913 val2 = val | mask;
1914 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1915 break;
1916 val2 = val2 & ~mask;
1917 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1918 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1919 break;
43e9d192 1920 }
62c8d76c 1921 if (i != 64)
43e9d192 1922 {
62c8d76c 1923 if (generate)
43e9d192 1924 {
62c8d76c
WD
1925 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1926 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1927 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1928 }
1312b1ba 1929 return 2;
43e9d192
IB
1930 }
1931 }
1932
9a4865db
WD
1933 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1934 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1935 otherwise skip zero bits. */
2c274197 1936
9a4865db 1937 num_insns = 1;
43e9d192 1938 mask = 0xffff;
9a4865db
WD
1939 val2 = one_match > zero_match ? ~val : val;
1940 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1941
1942 if (generate)
1943 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1944 ? (val | ~(mask << i))
1945 : (val & (mask << i)))));
1946 for (i += 16; i < 64; i += 16)
43e9d192 1947 {
9a4865db
WD
1948 if ((val2 & (mask << i)) == 0)
1949 continue;
1950 if (generate)
1951 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1952 GEN_INT ((val >> i) & 0xffff)));
1953 num_insns ++;
82614948
RR
1954 }
1955
1956 return num_insns;
1957}
1958
1959
1960void
1961aarch64_expand_mov_immediate (rtx dest, rtx imm)
1962{
1963 machine_mode mode = GET_MODE (dest);
1964
1965 gcc_assert (mode == SImode || mode == DImode);
1966
1967 /* Check on what type of symbol it is. */
1968 if (GET_CODE (imm) == SYMBOL_REF
1969 || GET_CODE (imm) == LABEL_REF
1970 || GET_CODE (imm) == CONST)
1971 {
1972 rtx mem, base, offset;
1973 enum aarch64_symbol_type sty;
1974
1975 /* If we have (const (plus symbol offset)), separate out the offset
1976 before we start classifying the symbol. */
1977 split_const (imm, &base, &offset);
1978
a6e0bfa7 1979 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1980 switch (sty)
1981 {
1982 case SYMBOL_FORCE_TO_MEM:
1983 if (offset != const0_rtx
1984 && targetm.cannot_force_const_mem (mode, imm))
1985 {
1986 gcc_assert (can_create_pseudo_p ());
1987 base = aarch64_force_temporary (mode, dest, base);
1988 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1989 aarch64_emit_move (dest, base);
1990 return;
1991 }
b4f50fd4 1992
82614948
RR
1993 mem = force_const_mem (ptr_mode, imm);
1994 gcc_assert (mem);
b4f50fd4
RR
1995
1996 /* If we aren't generating PC relative literals, then
1997 we need to expand the literal pool access carefully.
1998 This is something that needs to be done in a number
1999 of places, so could well live as a separate function. */
9ee6540a 2000 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
2001 {
2002 gcc_assert (can_create_pseudo_p ());
2003 base = gen_reg_rtx (ptr_mode);
2004 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
2005 if (ptr_mode != Pmode)
2006 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
2007 mem = gen_rtx_MEM (ptr_mode, base);
2008 }
2009
82614948
RR
2010 if (mode != ptr_mode)
2011 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 2012
f7df4a84 2013 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 2014
82614948
RR
2015 return;
2016
2017 case SYMBOL_SMALL_TLSGD:
2018 case SYMBOL_SMALL_TLSDESC:
79496620 2019 case SYMBOL_SMALL_TLSIE:
1b1e81f8 2020 case SYMBOL_SMALL_GOT_28K:
6642bdb4 2021 case SYMBOL_SMALL_GOT_4G:
82614948 2022 case SYMBOL_TINY_GOT:
5ae7caad 2023 case SYMBOL_TINY_TLSIE:
82614948
RR
2024 if (offset != const0_rtx)
2025 {
2026 gcc_assert(can_create_pseudo_p ());
2027 base = aarch64_force_temporary (mode, dest, base);
2028 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
2029 aarch64_emit_move (dest, base);
2030 return;
2031 }
2032 /* FALLTHRU */
2033
82614948
RR
2034 case SYMBOL_SMALL_ABSOLUTE:
2035 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 2036 case SYMBOL_TLSLE12:
d18ba284 2037 case SYMBOL_TLSLE24:
cbf5629e
JW
2038 case SYMBOL_TLSLE32:
2039 case SYMBOL_TLSLE48:
82614948
RR
2040 aarch64_load_symref_appropriately (dest, imm, sty);
2041 return;
2042
2043 default:
2044 gcc_unreachable ();
2045 }
2046 }
2047
2048 if (!CONST_INT_P (imm))
2049 {
2050 if (GET_CODE (imm) == HIGH)
f7df4a84 2051 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
2052 else
2053 {
2054 rtx mem = force_const_mem (mode, imm);
2055 gcc_assert (mem);
f7df4a84 2056 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 2057 }
82614948
RR
2058
2059 return;
43e9d192 2060 }
82614948
RR
2061
2062 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
2063}
2064
5be6b295
WD
2065/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
2066 temporary value if necessary. FRAME_RELATED_P should be true if
2067 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
2068 to the generated instructions. If SCRATCHREG is known to hold
2069 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2070 immediate again.
2071
2072 Since this function may be used to adjust the stack pointer, we must
2073 ensure that it cannot cause transient stack deallocation (for example
2074 by first incrementing SP and then decrementing when adjusting by a
2075 large immediate). */
c4ddc43a
JW
2076
2077static void
5be6b295
WD
2078aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
2079 HOST_WIDE_INT delta, bool frame_related_p,
2080 bool emit_move_imm)
c4ddc43a
JW
2081{
2082 HOST_WIDE_INT mdelta = abs_hwi (delta);
2083 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 2084 rtx_insn *insn;
c4ddc43a 2085
c4ddc43a
JW
2086 if (!mdelta)
2087 return;
2088
5be6b295 2089 /* Single instruction adjustment. */
c4ddc43a
JW
2090 if (aarch64_uimm12_shift (mdelta))
2091 {
37d6a4b7
JW
2092 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
2093 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2094 return;
2095 }
2096
5be6b295
WD
2097 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2098 Only do this if mdelta is not a 16-bit move as adjusting using a move
2099 is better. */
2100 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
2101 {
2102 HOST_WIDE_INT low_off = mdelta & 0xfff;
2103
2104 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
2105 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2106 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2107 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2108 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2109 return;
2110 }
2111
5be6b295 2112 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2113 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2114 if (emit_move_imm)
2115 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2116 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2117 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2118 if (frame_related_p)
2119 {
2120 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2121 rtx adj = plus_constant (mode, this_rtx, delta);
2122 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2123 }
c4ddc43a
JW
2124}
2125
5be6b295
WD
2126static inline void
2127aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
2128 HOST_WIDE_INT delta)
2129{
2130 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2131}
2132
2133static inline void
2134aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2135{
2136 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2137 true, emit_move_imm);
2138}
2139
2140static inline void
2141aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2142{
2143 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2144 frame_related_p, true);
2145}
2146
43e9d192 2147static bool
fee9ba42
JW
2148aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2149 tree exp ATTRIBUTE_UNUSED)
43e9d192 2150{
fee9ba42 2151 /* Currently, always true. */
43e9d192
IB
2152 return true;
2153}
2154
2155/* Implement TARGET_PASS_BY_REFERENCE. */
2156
2157static bool
2158aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2159 machine_mode mode,
43e9d192
IB
2160 const_tree type,
2161 bool named ATTRIBUTE_UNUSED)
2162{
2163 HOST_WIDE_INT size;
ef4bddc2 2164 machine_mode dummymode;
43e9d192
IB
2165 int nregs;
2166
2167 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2168 size = (mode == BLKmode && type)
2169 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2170
aadc1c43
MHD
2171 /* Aggregates are passed by reference based on their size. */
2172 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2173 {
aadc1c43 2174 size = int_size_in_bytes (type);
43e9d192
IB
2175 }
2176
2177 /* Variable sized arguments are always returned by reference. */
2178 if (size < 0)
2179 return true;
2180
2181 /* Can this be a candidate to be passed in fp/simd register(s)? */
2182 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2183 &dummymode, &nregs,
2184 NULL))
2185 return false;
2186
2187 /* Arguments which are variable sized or larger than 2 registers are
2188 passed by reference unless they are a homogenous floating point
2189 aggregate. */
2190 return size > 2 * UNITS_PER_WORD;
2191}
2192
2193/* Return TRUE if VALTYPE is padded to its least significant bits. */
2194static bool
2195aarch64_return_in_msb (const_tree valtype)
2196{
ef4bddc2 2197 machine_mode dummy_mode;
43e9d192
IB
2198 int dummy_int;
2199
2200 /* Never happens in little-endian mode. */
2201 if (!BYTES_BIG_ENDIAN)
2202 return false;
2203
2204 /* Only composite types smaller than or equal to 16 bytes can
2205 be potentially returned in registers. */
2206 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2207 || int_size_in_bytes (valtype) <= 0
2208 || int_size_in_bytes (valtype) > 16)
2209 return false;
2210
2211 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2212 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2213 is always passed/returned in the least significant bits of fp/simd
2214 register(s). */
2215 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2216 &dummy_mode, &dummy_int, NULL))
2217 return false;
2218
2219 return true;
2220}
2221
2222/* Implement TARGET_FUNCTION_VALUE.
2223 Define how to find the value returned by a function. */
2224
2225static rtx
2226aarch64_function_value (const_tree type, const_tree func,
2227 bool outgoing ATTRIBUTE_UNUSED)
2228{
ef4bddc2 2229 machine_mode mode;
43e9d192
IB
2230 int unsignedp;
2231 int count;
ef4bddc2 2232 machine_mode ag_mode;
43e9d192
IB
2233
2234 mode = TYPE_MODE (type);
2235 if (INTEGRAL_TYPE_P (type))
2236 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2237
2238 if (aarch64_return_in_msb (type))
2239 {
2240 HOST_WIDE_INT size = int_size_in_bytes (type);
2241
2242 if (size % UNITS_PER_WORD != 0)
2243 {
2244 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2245 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2246 }
2247 }
2248
2249 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2250 &ag_mode, &count, NULL))
2251 {
2252 if (!aarch64_composite_type_p (type, mode))
2253 {
2254 gcc_assert (count == 1 && mode == ag_mode);
2255 return gen_rtx_REG (mode, V0_REGNUM);
2256 }
2257 else
2258 {
2259 int i;
2260 rtx par;
2261
2262 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2263 for (i = 0; i < count; i++)
2264 {
2265 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2266 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2267 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2268 XVECEXP (par, 0, i) = tmp;
2269 }
2270 return par;
2271 }
2272 }
2273 else
2274 return gen_rtx_REG (mode, R0_REGNUM);
2275}
2276
2277/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2278 Return true if REGNO is the number of a hard register in which the values
2279 of called function may come back. */
2280
2281static bool
2282aarch64_function_value_regno_p (const unsigned int regno)
2283{
2284 /* Maximum of 16 bytes can be returned in the general registers. Examples
2285 of 16-byte return values are: 128-bit integers and 16-byte small
2286 structures (excluding homogeneous floating-point aggregates). */
2287 if (regno == R0_REGNUM || regno == R1_REGNUM)
2288 return true;
2289
2290 /* Up to four fp/simd registers can return a function value, e.g. a
2291 homogeneous floating-point aggregate having four members. */
2292 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2293 return TARGET_FLOAT;
43e9d192
IB
2294
2295 return false;
2296}
2297
2298/* Implement TARGET_RETURN_IN_MEMORY.
2299
2300 If the type T of the result of a function is such that
2301 void func (T arg)
2302 would require that arg be passed as a value in a register (or set of
2303 registers) according to the parameter passing rules, then the result
2304 is returned in the same registers as would be used for such an
2305 argument. */
2306
2307static bool
2308aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2309{
2310 HOST_WIDE_INT size;
ef4bddc2 2311 machine_mode ag_mode;
43e9d192
IB
2312 int count;
2313
2314 if (!AGGREGATE_TYPE_P (type)
2315 && TREE_CODE (type) != COMPLEX_TYPE
2316 && TREE_CODE (type) != VECTOR_TYPE)
2317 /* Simple scalar types always returned in registers. */
2318 return false;
2319
2320 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2321 type,
2322 &ag_mode,
2323 &count,
2324 NULL))
2325 return false;
2326
2327 /* Types larger than 2 registers returned in memory. */
2328 size = int_size_in_bytes (type);
2329 return (size < 0 || size > 2 * UNITS_PER_WORD);
2330}
2331
2332static bool
ef4bddc2 2333aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2334 const_tree type, int *nregs)
2335{
2336 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2337 return aarch64_vfp_is_call_or_return_candidate (mode,
2338 type,
2339 &pcum->aapcs_vfp_rmode,
2340 nregs,
2341 NULL);
2342}
2343
985b8393 2344/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
2345 bits. The idea is to suppress any stronger alignment requested by
2346 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2347 This is a helper function for local use only. */
2348
985b8393 2349static unsigned int
ef4bddc2 2350aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2351{
75d6cc81 2352 if (!type)
985b8393 2353 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 2354
75d6cc81 2355 if (integer_zerop (TYPE_SIZE (type)))
985b8393 2356 return 0;
43e9d192 2357
75d6cc81
AL
2358 gcc_assert (TYPE_MODE (type) == mode);
2359
2360 if (!AGGREGATE_TYPE_P (type))
985b8393 2361 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
2362
2363 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 2364 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 2365
985b8393 2366 unsigned int alignment = 0;
75d6cc81 2367 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
2368 if (TREE_CODE (field) == FIELD_DECL)
2369 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 2370
985b8393 2371 return alignment;
43e9d192
IB
2372}
2373
2374/* Layout a function argument according to the AAPCS64 rules. The rule
2375 numbers refer to the rule numbers in the AAPCS64. */
2376
2377static void
ef4bddc2 2378aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2379 const_tree type,
2380 bool named ATTRIBUTE_UNUSED)
2381{
2382 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2383 int ncrn, nvrn, nregs;
2384 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2385 HOST_WIDE_INT size;
43e9d192
IB
2386
2387 /* We need to do this once per argument. */
2388 if (pcum->aapcs_arg_processed)
2389 return;
2390
2391 pcum->aapcs_arg_processed = true;
2392
3abf17cf
YZ
2393 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2394 size
4f59f9f2
UB
2395 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2396 UNITS_PER_WORD);
3abf17cf 2397
43e9d192
IB
2398 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2399 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2400 mode,
2401 type,
2402 &nregs);
2403
2404 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2405 The following code thus handles passing by SIMD/FP registers first. */
2406
2407 nvrn = pcum->aapcs_nvrn;
2408
2409 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2410 and homogenous short-vector aggregates (HVA). */
2411 if (allocate_nvrn)
2412 {
261fb553
AL
2413 if (!TARGET_FLOAT)
2414 aarch64_err_no_fpadvsimd (mode, "argument");
2415
43e9d192
IB
2416 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2417 {
2418 pcum->aapcs_nextnvrn = nvrn + nregs;
2419 if (!aarch64_composite_type_p (type, mode))
2420 {
2421 gcc_assert (nregs == 1);
2422 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2423 }
2424 else
2425 {
2426 rtx par;
2427 int i;
2428 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2429 for (i = 0; i < nregs; i++)
2430 {
2431 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2432 V0_REGNUM + nvrn + i);
2433 tmp = gen_rtx_EXPR_LIST
2434 (VOIDmode, tmp,
2435 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2436 XVECEXP (par, 0, i) = tmp;
2437 }
2438 pcum->aapcs_reg = par;
2439 }
2440 return;
2441 }
2442 else
2443 {
2444 /* C.3 NSRN is set to 8. */
2445 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2446 goto on_stack;
2447 }
2448 }
2449
2450 ncrn = pcum->aapcs_ncrn;
3abf17cf 2451 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2452
2453 /* C6 - C9. though the sign and zero extension semantics are
2454 handled elsewhere. This is the case where the argument fits
2455 entirely general registers. */
2456 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2457 {
43e9d192
IB
2458
2459 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2460
2461 /* C.8 if the argument has an alignment of 16 then the NGRN is
2462 rounded up to the next even number. */
985b8393
JJ
2463 if (nregs == 2
2464 && ncrn % 2
2ec07fa6 2465 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 2466 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
2467 alignment nregs should be > 2 and therefore it should be
2468 passed by reference rather than value. */
985b8393
JJ
2469 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2470 {
2471 ++ncrn;
2472 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 2473 }
2ec07fa6 2474
43e9d192
IB
2475 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2476 A reg is still generated for it, but the caller should be smart
2477 enough not to use it. */
2478 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 2479 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
2480 else
2481 {
2482 rtx par;
2483 int i;
2484
2485 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2486 for (i = 0; i < nregs; i++)
2487 {
2488 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2489 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2490 GEN_INT (i * UNITS_PER_WORD));
2491 XVECEXP (par, 0, i) = tmp;
2492 }
2493 pcum->aapcs_reg = par;
2494 }
2495
2496 pcum->aapcs_nextncrn = ncrn + nregs;
2497 return;
2498 }
2499
2500 /* C.11 */
2501 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2502
2503 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2504 this argument and align the total size if necessary. */
43e9d192 2505on_stack:
3abf17cf 2506 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 2507
985b8393 2508 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2509 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2510 16 / UNITS_PER_WORD);
43e9d192
IB
2511 return;
2512}
2513
2514/* Implement TARGET_FUNCTION_ARG. */
2515
2516static rtx
ef4bddc2 2517aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2518 const_tree type, bool named)
2519{
2520 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2521 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2522
2523 if (mode == VOIDmode)
2524 return NULL_RTX;
2525
2526 aarch64_layout_arg (pcum_v, mode, type, named);
2527 return pcum->aapcs_reg;
2528}
2529
2530void
2531aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2532 const_tree fntype ATTRIBUTE_UNUSED,
2533 rtx libname ATTRIBUTE_UNUSED,
2534 const_tree fndecl ATTRIBUTE_UNUSED,
2535 unsigned n_named ATTRIBUTE_UNUSED)
2536{
2537 pcum->aapcs_ncrn = 0;
2538 pcum->aapcs_nvrn = 0;
2539 pcum->aapcs_nextncrn = 0;
2540 pcum->aapcs_nextnvrn = 0;
2541 pcum->pcs_variant = ARM_PCS_AAPCS64;
2542 pcum->aapcs_reg = NULL_RTX;
2543 pcum->aapcs_arg_processed = false;
2544 pcum->aapcs_stack_words = 0;
2545 pcum->aapcs_stack_size = 0;
2546
261fb553
AL
2547 if (!TARGET_FLOAT
2548 && fndecl && TREE_PUBLIC (fndecl)
2549 && fntype && fntype != error_mark_node)
2550 {
2551 const_tree type = TREE_TYPE (fntype);
2552 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2553 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2554 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2555 &mode, &nregs, NULL))
2556 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2557 }
43e9d192
IB
2558 return;
2559}
2560
2561static void
2562aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2563 machine_mode mode,
43e9d192
IB
2564 const_tree type,
2565 bool named)
2566{
2567 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2568 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2569 {
2570 aarch64_layout_arg (pcum_v, mode, type, named);
2571 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2572 != (pcum->aapcs_stack_words != 0));
2573 pcum->aapcs_arg_processed = false;
2574 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2575 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2576 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2577 pcum->aapcs_stack_words = 0;
2578 pcum->aapcs_reg = NULL_RTX;
2579 }
2580}
2581
2582bool
2583aarch64_function_arg_regno_p (unsigned regno)
2584{
2585 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2586 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2587}
2588
2589/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2590 PARM_BOUNDARY bits of alignment, but will be given anything up
2591 to STACK_BOUNDARY bits if the type requires it. This makes sure
2592 that both before and after the layout of each argument, the Next
2593 Stacked Argument Address (NSAA) will have a minimum alignment of
2594 8 bytes. */
2595
2596static unsigned int
ef4bddc2 2597aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 2598{
985b8393
JJ
2599 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2600 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
2601}
2602
2603/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2604
2605 Return true if an argument passed on the stack should be padded upwards,
2606 i.e. if the least-significant byte of the stack slot has useful data.
2607
2608 Small aggregate types are placed in the lowest memory address.
2609
2610 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2611
2612bool
ef4bddc2 2613aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2614{
2615 /* On little-endian targets, the least significant byte of every stack
2616 argument is passed at the lowest byte address of the stack slot. */
2617 if (!BYTES_BIG_ENDIAN)
2618 return true;
2619
00edcfbe 2620 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2621 the least significant byte of a stack argument is passed at the highest
2622 byte address of the stack slot. */
2623 if (type
00edcfbe
YZ
2624 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2625 || POINTER_TYPE_P (type))
43e9d192
IB
2626 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2627 return false;
2628
2629 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2630 return true;
2631}
2632
2633/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2634
2635 It specifies padding for the last (may also be the only)
2636 element of a block move between registers and memory. If
2637 assuming the block is in the memory, padding upward means that
2638 the last element is padded after its highest significant byte,
2639 while in downward padding, the last element is padded at the
2640 its least significant byte side.
2641
2642 Small aggregates and small complex types are always padded
2643 upwards.
2644
2645 We don't need to worry about homogeneous floating-point or
2646 short-vector aggregates; their move is not affected by the
2647 padding direction determined here. Regardless of endianness,
2648 each element of such an aggregate is put in the least
2649 significant bits of a fp/simd register.
2650
2651 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2652 register has useful data, and return the opposite if the most
2653 significant byte does. */
2654
2655bool
ef4bddc2 2656aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2657 bool first ATTRIBUTE_UNUSED)
2658{
2659
2660 /* Small composite types are always padded upward. */
2661 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2662 {
2663 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2664 : GET_MODE_SIZE (mode));
2665 if (size < 2 * UNITS_PER_WORD)
2666 return true;
2667 }
2668
2669 /* Otherwise, use the default padding. */
2670 return !BYTES_BIG_ENDIAN;
2671}
2672
ef4bddc2 2673static machine_mode
43e9d192
IB
2674aarch64_libgcc_cmp_return_mode (void)
2675{
2676 return SImode;
2677}
2678
a3eb8a52
EB
2679#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2680
2681/* We use the 12-bit shifted immediate arithmetic instructions so values
2682 must be multiple of (1 << 12), i.e. 4096. */
2683#define ARITH_FACTOR 4096
2684
2685#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2686#error Cannot use simple address calculation for stack probing
2687#endif
2688
2689/* The pair of scratch registers used for stack probing. */
2690#define PROBE_STACK_FIRST_REG 9
2691#define PROBE_STACK_SECOND_REG 10
2692
2693/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2694 inclusive. These are offsets from the current stack pointer. */
2695
2696static void
2697aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2698{
5f5c5e0f 2699 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
2700
2701 /* See the same assertion on PROBE_INTERVAL above. */
2702 gcc_assert ((first % ARITH_FACTOR) == 0);
2703
2704 /* See if we have a constant small number of probes to generate. If so,
2705 that's the easy case. */
2706 if (size <= PROBE_INTERVAL)
2707 {
2708 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2709
2710 emit_set_insn (reg1,
5f5c5e0f 2711 plus_constant (Pmode,
a3eb8a52 2712 stack_pointer_rtx, -(first + base)));
5f5c5e0f 2713 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
2714 }
2715
2716 /* The run-time loop is made up of 8 insns in the generic case while the
2717 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2718 else if (size <= 4 * PROBE_INTERVAL)
2719 {
2720 HOST_WIDE_INT i, rem;
2721
2722 emit_set_insn (reg1,
5f5c5e0f 2723 plus_constant (Pmode,
a3eb8a52
EB
2724 stack_pointer_rtx,
2725 -(first + PROBE_INTERVAL)));
2726 emit_stack_probe (reg1);
2727
2728 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2729 it exceeds SIZE. If only two probes are needed, this will not
2730 generate any code. Then probe at FIRST + SIZE. */
2731 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2732 {
2733 emit_set_insn (reg1,
5f5c5e0f 2734 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
2735 emit_stack_probe (reg1);
2736 }
2737
2738 rem = size - (i - PROBE_INTERVAL);
2739 if (rem > 256)
2740 {
2741 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2742
5f5c5e0f
EB
2743 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
2744 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
2745 }
2746 else
5f5c5e0f 2747 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
2748 }
2749
2750 /* Otherwise, do the same as above, but in a loop. Note that we must be
2751 extra careful with variables wrapping around because we might be at
2752 the very top (or the very bottom) of the address space and we have
2753 to be able to handle this case properly; in particular, we use an
2754 equality test for the loop condition. */
2755 else
2756 {
5f5c5e0f 2757 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
2758
2759 /* Step 1: round SIZE to the previous multiple of the interval. */
2760
2761 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2762
2763
2764 /* Step 2: compute initial and final value of the loop counter. */
2765
2766 /* TEST_ADDR = SP + FIRST. */
2767 emit_set_insn (reg1,
5f5c5e0f 2768 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
2769
2770 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
2771 HOST_WIDE_INT adjustment = - (first + rounded_size);
2772 if (! aarch64_uimm12_shift (adjustment))
2773 {
2774 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
2775 true, Pmode);
2776 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
2777 }
2778 else
2779 {
2780 emit_set_insn (reg2,
2781 plus_constant (Pmode, stack_pointer_rtx, adjustment));
2782 }
2783
a3eb8a52
EB
2784 /* Step 3: the loop
2785
2786 do
2787 {
2788 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2789 probe at TEST_ADDR
2790 }
2791 while (TEST_ADDR != LAST_ADDR)
2792
2793 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2794 until it is equal to ROUNDED_SIZE. */
2795
5f5c5e0f 2796 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
2797
2798
2799 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2800 that SIZE is equal to ROUNDED_SIZE. */
2801
2802 if (size != rounded_size)
2803 {
2804 HOST_WIDE_INT rem = size - rounded_size;
2805
2806 if (rem > 256)
2807 {
2808 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2809
5f5c5e0f
EB
2810 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
2811 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
2812 }
2813 else
5f5c5e0f 2814 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
2815 }
2816 }
2817
2818 /* Make sure nothing is scheduled before we are done. */
2819 emit_insn (gen_blockage ());
2820}
2821
2822/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2823 absolute addresses. */
2824
2825const char *
2826aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2827{
2828 static int labelno = 0;
2829 char loop_lab[32];
2830 rtx xops[2];
2831
2832 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2833
2834 /* Loop. */
2835 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2836
2837 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2838 xops[0] = reg1;
2839 xops[1] = GEN_INT (PROBE_INTERVAL);
2840 output_asm_insn ("sub\t%0, %0, %1", xops);
2841
2842 /* Probe at TEST_ADDR. */
2843 output_asm_insn ("str\txzr, [%0]", xops);
2844
2845 /* Test if TEST_ADDR == LAST_ADDR. */
2846 xops[1] = reg2;
2847 output_asm_insn ("cmp\t%0, %1", xops);
2848
2849 /* Branch. */
2850 fputs ("\tb.ne\t", asm_out_file);
2851 assemble_name_raw (asm_out_file, loop_lab);
2852 fputc ('\n', asm_out_file);
2853
2854 return "";
2855}
2856
43e9d192
IB
2857static bool
2858aarch64_frame_pointer_required (void)
2859{
0b7f8166
MS
2860 /* In aarch64_override_options_after_change
2861 flag_omit_leaf_frame_pointer turns off the frame pointer by
2862 default. Turn it back on now if we've not got a leaf
2863 function. */
2864 if (flag_omit_leaf_frame_pointer
2865 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2866 return true;
43e9d192 2867
8144a493
WD
2868 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2869 if (crtl->calls_eh_return)
2870 return true;
2871
0b7f8166 2872 return false;
43e9d192
IB
2873}
2874
2875/* Mark the registers that need to be saved by the callee and calculate
2876 the size of the callee-saved registers area and frame record (both FP
2877 and LR may be omitted). */
2878static void
2879aarch64_layout_frame (void)
2880{
2881 HOST_WIDE_INT offset = 0;
4b0685d9 2882 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2883
2884 if (reload_completed && cfun->machine->frame.laid_out)
2885 return;
2886
97826595
MS
2887#define SLOT_NOT_REQUIRED (-2)
2888#define SLOT_REQUIRED (-1)
2889
71bfb77a
WD
2890 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2891 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2892
43e9d192
IB
2893 /* First mark all the registers that really need to be saved... */
2894 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2895 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2896
2897 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2898 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2899
2900 /* ... that includes the eh data registers (if needed)... */
2901 if (crtl->calls_eh_return)
2902 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2903 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2904 = SLOT_REQUIRED;
43e9d192
IB
2905
2906 /* ... and any callee saved register that dataflow says is live. */
2907 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2908 if (df_regs_ever_live_p (regno)
1c923b60
JW
2909 && (regno == R30_REGNUM
2910 || !call_used_regs[regno]))
97826595 2911 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2912
2913 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2914 if (df_regs_ever_live_p (regno)
2915 && !call_used_regs[regno])
4b0685d9
WD
2916 {
2917 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2918 last_fp_reg = regno;
2919 }
43e9d192
IB
2920
2921 if (frame_pointer_needed)
2922 {
2e1cdae5 2923 /* FP and LR are placed in the linkage record. */
43e9d192 2924 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2925 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2926 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2927 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2928 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2929 }
2930
2931 /* Now assign stack slots for them. */
2e1cdae5 2932 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2933 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2934 {
2935 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2936 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2937 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2938 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2939 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2940 offset += UNITS_PER_WORD;
2941 }
2942
4b0685d9
WD
2943 HOST_WIDE_INT max_int_offset = offset;
2944 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2945 bool has_align_gap = offset != max_int_offset;
2946
43e9d192 2947 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2948 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2949 {
4b0685d9
WD
2950 /* If there is an alignment gap between integer and fp callee-saves,
2951 allocate the last fp register to it if possible. */
2952 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2953 {
2954 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2955 break;
2956 }
2957
43e9d192 2958 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2959 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2960 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2961 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2962 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2963 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2964 offset += UNITS_PER_WORD;
2965 }
2966
4f59f9f2 2967 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2968
2969 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2970
71bfb77a
WD
2971 HOST_WIDE_INT varargs_and_saved_regs_size
2972 = offset + cfun->machine->frame.saved_varargs_size;
2973
1c960e02 2974 cfun->machine->frame.hard_fp_offset
71bfb77a 2975 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2976 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2977
2978 cfun->machine->frame.frame_size
4f59f9f2
UB
2979 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2980 + crtl->outgoing_args_size,
2981 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2982
71bfb77a
WD
2983 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2984
2985 cfun->machine->frame.initial_adjust = 0;
2986 cfun->machine->frame.final_adjust = 0;
2987 cfun->machine->frame.callee_adjust = 0;
2988 cfun->machine->frame.callee_offset = 0;
2989
2990 HOST_WIDE_INT max_push_offset = 0;
2991 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2992 max_push_offset = 512;
2993 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2994 max_push_offset = 256;
2995
2996 if (cfun->machine->frame.frame_size < max_push_offset
2997 && crtl->outgoing_args_size == 0)
2998 {
2999 /* Simple, small frame with no outgoing arguments:
3000 stp reg1, reg2, [sp, -frame_size]!
3001 stp reg3, reg4, [sp, 16] */
3002 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
3003 }
3004 else if ((crtl->outgoing_args_size
3005 + cfun->machine->frame.saved_regs_size < 512)
3006 && !(cfun->calls_alloca
3007 && cfun->machine->frame.hard_fp_offset < max_push_offset))
3008 {
3009 /* Frame with small outgoing arguments:
3010 sub sp, sp, frame_size
3011 stp reg1, reg2, [sp, outgoing_args_size]
3012 stp reg3, reg4, [sp, outgoing_args_size + 16] */
3013 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
3014 cfun->machine->frame.callee_offset
3015 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
3016 }
3017 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
3018 {
3019 /* Frame with large outgoing arguments but a small local area:
3020 stp reg1, reg2, [sp, -hard_fp_offset]!
3021 stp reg3, reg4, [sp, 16]
3022 sub sp, sp, outgoing_args_size */
3023 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
3024 cfun->machine->frame.final_adjust
3025 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3026 }
3027 else if (!frame_pointer_needed
3028 && varargs_and_saved_regs_size < max_push_offset)
3029 {
3030 /* Frame with large local area and outgoing arguments (this pushes the
3031 callee-saves first, followed by the locals and outgoing area):
3032 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
3033 stp reg3, reg4, [sp, 16]
3034 sub sp, sp, frame_size - varargs_and_saved_regs_size */
3035 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
3036 cfun->machine->frame.final_adjust
3037 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3038 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
3039 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
3040 }
3041 else
3042 {
3043 /* Frame with large local area and outgoing arguments using frame pointer:
3044 sub sp, sp, hard_fp_offset
3045 stp x29, x30, [sp, 0]
3046 add x29, sp, 0
3047 stp reg3, reg4, [sp, 16]
3048 sub sp, sp, outgoing_args_size */
3049 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
3050 cfun->machine->frame.final_adjust
3051 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
3052 }
3053
43e9d192
IB
3054 cfun->machine->frame.laid_out = true;
3055}
3056
04ddfe06
KT
3057/* Return true if the register REGNO is saved on entry to
3058 the current function. */
3059
43e9d192
IB
3060static bool
3061aarch64_register_saved_on_entry (int regno)
3062{
97826595 3063 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
3064}
3065
04ddfe06
KT
3066/* Return the next register up from REGNO up to LIMIT for the callee
3067 to save. */
3068
64dedd72
JW
3069static unsigned
3070aarch64_next_callee_save (unsigned regno, unsigned limit)
3071{
3072 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
3073 regno ++;
3074 return regno;
3075}
43e9d192 3076
04ddfe06
KT
3077/* Push the register number REGNO of mode MODE to the stack with write-back
3078 adjusting the stack by ADJUSTMENT. */
3079
c5e1f66e 3080static void
ef4bddc2 3081aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
3082 HOST_WIDE_INT adjustment)
3083 {
3084 rtx base_rtx = stack_pointer_rtx;
3085 rtx insn, reg, mem;
3086
3087 reg = gen_rtx_REG (mode, regno);
3088 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
3089 plus_constant (Pmode, base_rtx, -adjustment));
3090 mem = gen_rtx_MEM (mode, mem);
3091
3092 insn = emit_move_insn (mem, reg);
3093 RTX_FRAME_RELATED_P (insn) = 1;
3094}
3095
04ddfe06
KT
3096/* Generate and return an instruction to store the pair of registers
3097 REG and REG2 of mode MODE to location BASE with write-back adjusting
3098 the stack location BASE by ADJUSTMENT. */
3099
80c11907 3100static rtx
ef4bddc2 3101aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
3102 HOST_WIDE_INT adjustment)
3103{
3104 switch (mode)
3105 {
3106 case DImode:
3107 return gen_storewb_pairdi_di (base, base, reg, reg2,
3108 GEN_INT (-adjustment),
3109 GEN_INT (UNITS_PER_WORD - adjustment));
3110 case DFmode:
3111 return gen_storewb_pairdf_di (base, base, reg, reg2,
3112 GEN_INT (-adjustment),
3113 GEN_INT (UNITS_PER_WORD - adjustment));
3114 default:
3115 gcc_unreachable ();
3116 }
3117}
3118
04ddfe06
KT
3119/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3120 stack pointer by ADJUSTMENT. */
3121
80c11907 3122static void
89ac681e 3123aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3124{
5d8a22a5 3125 rtx_insn *insn;
89ac681e
WD
3126 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3127
71bfb77a 3128 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3129 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3130
80c11907
JW
3131 rtx reg1 = gen_rtx_REG (mode, regno1);
3132 rtx reg2 = gen_rtx_REG (mode, regno2);
3133
3134 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3135 reg2, adjustment));
3136 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3137 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3138 RTX_FRAME_RELATED_P (insn) = 1;
3139}
3140
04ddfe06
KT
3141/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3142 adjusting it by ADJUSTMENT afterwards. */
3143
159313d9 3144static rtx
ef4bddc2 3145aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3146 HOST_WIDE_INT adjustment)
3147{
3148 switch (mode)
3149 {
3150 case DImode:
3151 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3152 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3153 case DFmode:
3154 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3155 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3156 default:
3157 gcc_unreachable ();
3158 }
3159}
3160
04ddfe06
KT
3161/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3162 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3163 into CFI_OPS. */
3164
89ac681e
WD
3165static void
3166aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3167 rtx *cfi_ops)
3168{
3169 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3170 rtx reg1 = gen_rtx_REG (mode, regno1);
3171
3172 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3173
71bfb77a 3174 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3175 {
3176 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3177 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3178 emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
3179 }
3180 else
3181 {
3182 rtx reg2 = gen_rtx_REG (mode, regno2);
3183 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3184 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3185 reg2, adjustment));
3186 }
3187}
3188
04ddfe06
KT
3189/* Generate and return a store pair instruction of mode MODE to store
3190 register REG1 to MEM1 and register REG2 to MEM2. */
3191
72df5c1f 3192static rtx
ef4bddc2 3193aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3194 rtx reg2)
3195{
3196 switch (mode)
3197 {
3198 case DImode:
3199 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3200
3201 case DFmode:
3202 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3203
3204 default:
3205 gcc_unreachable ();
3206 }
3207}
3208
04ddfe06
KT
3209/* Generate and regurn a load pair isntruction of mode MODE to load register
3210 REG1 from MEM1 and register REG2 from MEM2. */
3211
72df5c1f 3212static rtx
ef4bddc2 3213aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3214 rtx mem2)
3215{
3216 switch (mode)
3217 {
3218 case DImode:
3219 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3220
3221 case DFmode:
3222 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3223
3224 default:
3225 gcc_unreachable ();
3226 }
3227}
3228
db58fd89
JW
3229/* Return TRUE if return address signing should be enabled for the current
3230 function, otherwise return FALSE. */
3231
3232bool
3233aarch64_return_address_signing_enabled (void)
3234{
3235 /* This function should only be called after frame laid out. */
3236 gcc_assert (cfun->machine->frame.laid_out);
3237
3238 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3239 if it's LR is pushed onto stack. */
3240 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
3241 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
3242 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
3243}
3244
04ddfe06
KT
3245/* Emit code to save the callee-saved registers from register number START
3246 to LIMIT to the stack at the location starting at offset START_OFFSET,
3247 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3248
43e9d192 3249static void
ef4bddc2 3250aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3251 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3252{
5d8a22a5 3253 rtx_insn *insn;
ef4bddc2 3254 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 3255 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
3256 unsigned regno;
3257 unsigned regno2;
3258
0ec74a1e 3259 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3260 regno <= limit;
3261 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3262 {
ae13fce3
JW
3263 rtx reg, mem;
3264 HOST_WIDE_INT offset;
64dedd72 3265
ae13fce3
JW
3266 if (skip_wb
3267 && (regno == cfun->machine->frame.wb_candidate1
3268 || regno == cfun->machine->frame.wb_candidate2))
3269 continue;
3270
827ab47a
KT
3271 if (cfun->machine->reg_is_wrapped_separately[regno])
3272 continue;
3273
ae13fce3
JW
3274 reg = gen_rtx_REG (mode, regno);
3275 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
3276 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3277 offset));
64dedd72
JW
3278
3279 regno2 = aarch64_next_callee_save (regno + 1, limit);
3280
3281 if (regno2 <= limit
827ab47a 3282 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
3283 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3284 == cfun->machine->frame.reg_offset[regno2]))
3285
43e9d192 3286 {
0ec74a1e 3287 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3288 rtx mem2;
3289
3290 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
3291 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3292 offset));
3293 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3294 reg2));
0b4a9743 3295
64dedd72
JW
3296 /* The first part of a frame-related parallel insn is
3297 always assumed to be relevant to the frame
3298 calculations; subsequent parts, are only
3299 frame-related if explicitly marked. */
3300 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3301 regno = regno2;
3302 }
3303 else
8ed2fc62
JW
3304 insn = emit_move_insn (mem, reg);
3305
3306 RTX_FRAME_RELATED_P (insn) = 1;
3307 }
3308}
3309
04ddfe06
KT
3310/* Emit code to restore the callee registers of mode MODE from register
3311 number START up to and including LIMIT. Restore from the stack offset
3312 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3313 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3314
8ed2fc62 3315static void
ef4bddc2 3316aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3317 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3318 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3319{
8ed2fc62 3320 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 3321 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
3322 ? gen_frame_mem : gen_rtx_MEM);
3323 unsigned regno;
3324 unsigned regno2;
3325 HOST_WIDE_INT offset;
3326
3327 for (regno = aarch64_next_callee_save (start, limit);
3328 regno <= limit;
3329 regno = aarch64_next_callee_save (regno + 1, limit))
3330 {
827ab47a
KT
3331 if (cfun->machine->reg_is_wrapped_separately[regno])
3332 continue;
3333
ae13fce3 3334 rtx reg, mem;
8ed2fc62 3335
ae13fce3
JW
3336 if (skip_wb
3337 && (regno == cfun->machine->frame.wb_candidate1
3338 || regno == cfun->machine->frame.wb_candidate2))
3339 continue;
3340
3341 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
3342 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3343 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
3344
3345 regno2 = aarch64_next_callee_save (regno + 1, limit);
3346
3347 if (regno2 <= limit
827ab47a 3348 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
3349 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3350 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3351 {
8ed2fc62
JW
3352 rtx reg2 = gen_rtx_REG (mode, regno2);
3353 rtx mem2;
3354
3355 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3356 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3357 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3358
dd991abb 3359 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3360 regno = regno2;
43e9d192 3361 }
8ed2fc62 3362 else
dd991abb
RH
3363 emit_move_insn (reg, mem);
3364 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3365 }
43e9d192
IB
3366}
3367
827ab47a
KT
3368static inline bool
3369offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3370 HOST_WIDE_INT offset)
3371{
3372 return offset >= -256 && offset < 256;
3373}
3374
3375static inline bool
3376offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3377{
3378 return (offset >= 0
3379 && offset < 4096 * GET_MODE_SIZE (mode)
3380 && offset % GET_MODE_SIZE (mode) == 0);
3381}
3382
3383bool
3384aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3385{
3386 return (offset >= -64 * GET_MODE_SIZE (mode)
3387 && offset < 64 * GET_MODE_SIZE (mode)
3388 && offset % GET_MODE_SIZE (mode) == 0);
3389}
3390
3391/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3392
3393static sbitmap
3394aarch64_get_separate_components (void)
3395{
3396 aarch64_layout_frame ();
3397
3398 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3399 bitmap_clear (components);
3400
3401 /* The registers we need saved to the frame. */
3402 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3403 if (aarch64_register_saved_on_entry (regno))
3404 {
3405 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3406 if (!frame_pointer_needed)
3407 offset += cfun->machine->frame.frame_size
3408 - cfun->machine->frame.hard_fp_offset;
3409 /* Check that we can access the stack slot of the register with one
3410 direct load with no adjustments needed. */
3411 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3412 bitmap_set_bit (components, regno);
3413 }
3414
3415 /* Don't mess with the hard frame pointer. */
3416 if (frame_pointer_needed)
3417 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3418
3419 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3420 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3421 /* If aarch64_layout_frame has chosen registers to store/restore with
3422 writeback don't interfere with them to avoid having to output explicit
3423 stack adjustment instructions. */
3424 if (reg2 != INVALID_REGNUM)
3425 bitmap_clear_bit (components, reg2);
3426 if (reg1 != INVALID_REGNUM)
3427 bitmap_clear_bit (components, reg1);
3428
3429 bitmap_clear_bit (components, LR_REGNUM);
3430 bitmap_clear_bit (components, SP_REGNUM);
3431
3432 return components;
3433}
3434
3435/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3436
3437static sbitmap
3438aarch64_components_for_bb (basic_block bb)
3439{
3440 bitmap in = DF_LIVE_IN (bb);
3441 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3442 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3443
3444 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3445 bitmap_clear (components);
3446
3447 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3448 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3449 if ((!call_used_regs[regno])
3450 && (bitmap_bit_p (in, regno)
3451 || bitmap_bit_p (gen, regno)
3452 || bitmap_bit_p (kill, regno)))
3453 bitmap_set_bit (components, regno);
3454
3455 return components;
3456}
3457
3458/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3459 Nothing to do for aarch64. */
3460
3461static void
3462aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3463{
3464}
3465
3466/* Return the next set bit in BMP from START onwards. Return the total number
3467 of bits in BMP if no set bit is found at or after START. */
3468
3469static unsigned int
3470aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3471{
3472 unsigned int nbits = SBITMAP_SIZE (bmp);
3473 if (start == nbits)
3474 return start;
3475
3476 gcc_assert (start < nbits);
3477 for (unsigned int i = start; i < nbits; i++)
3478 if (bitmap_bit_p (bmp, i))
3479 return i;
3480
3481 return nbits;
3482}
3483
3484/* Do the work for aarch64_emit_prologue_components and
3485 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3486 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3487 for these components or the epilogue sequence. That is, it determines
3488 whether we should emit stores or loads and what kind of CFA notes to attach
3489 to the insns. Otherwise the logic for the two sequences is very
3490 similar. */
3491
3492static void
3493aarch64_process_components (sbitmap components, bool prologue_p)
3494{
3495 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3496 ? HARD_FRAME_POINTER_REGNUM
3497 : STACK_POINTER_REGNUM);
3498
3499 unsigned last_regno = SBITMAP_SIZE (components);
3500 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3501 rtx_insn *insn = NULL;
3502
3503 while (regno != last_regno)
3504 {
3505 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3506 so DFmode for the vector registers is enough. */
3507 machine_mode mode = GP_REGNUM_P (regno) ? DImode : DFmode;
3508 rtx reg = gen_rtx_REG (mode, regno);
3509 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3510 if (!frame_pointer_needed)
3511 offset += cfun->machine->frame.frame_size
3512 - cfun->machine->frame.hard_fp_offset;
3513 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3514 rtx mem = gen_frame_mem (mode, addr);
3515
3516 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3517 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3518 /* No more registers to handle after REGNO.
3519 Emit a single save/restore and exit. */
3520 if (regno2 == last_regno)
3521 {
3522 insn = emit_insn (set);
3523 RTX_FRAME_RELATED_P (insn) = 1;
3524 if (prologue_p)
3525 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3526 else
3527 add_reg_note (insn, REG_CFA_RESTORE, reg);
3528 break;
3529 }
3530
3531 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3532 /* The next register is not of the same class or its offset is not
3533 mergeable with the current one into a pair. */
3534 if (!satisfies_constraint_Ump (mem)
3535 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3536 || (offset2 - cfun->machine->frame.reg_offset[regno])
3537 != GET_MODE_SIZE (mode))
3538 {
3539 insn = emit_insn (set);
3540 RTX_FRAME_RELATED_P (insn) = 1;
3541 if (prologue_p)
3542 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3543 else
3544 add_reg_note (insn, REG_CFA_RESTORE, reg);
3545
3546 regno = regno2;
3547 continue;
3548 }
3549
3550 /* REGNO2 can be saved/restored in a pair with REGNO. */
3551 rtx reg2 = gen_rtx_REG (mode, regno2);
3552 if (!frame_pointer_needed)
3553 offset2 += cfun->machine->frame.frame_size
3554 - cfun->machine->frame.hard_fp_offset;
3555 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3556 rtx mem2 = gen_frame_mem (mode, addr2);
3557 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3558 : gen_rtx_SET (reg2, mem2);
3559
3560 if (prologue_p)
3561 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3562 else
3563 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3564
3565 RTX_FRAME_RELATED_P (insn) = 1;
3566 if (prologue_p)
3567 {
3568 add_reg_note (insn, REG_CFA_OFFSET, set);
3569 add_reg_note (insn, REG_CFA_OFFSET, set2);
3570 }
3571 else
3572 {
3573 add_reg_note (insn, REG_CFA_RESTORE, reg);
3574 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3575 }
3576
3577 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3578 }
3579}
3580
3581/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3582
3583static void
3584aarch64_emit_prologue_components (sbitmap components)
3585{
3586 aarch64_process_components (components, true);
3587}
3588
3589/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3590
3591static void
3592aarch64_emit_epilogue_components (sbitmap components)
3593{
3594 aarch64_process_components (components, false);
3595}
3596
3597/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3598
3599static void
3600aarch64_set_handled_components (sbitmap components)
3601{
3602 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3603 if (bitmap_bit_p (components, regno))
3604 cfun->machine->reg_is_wrapped_separately[regno] = true;
3605}
3606
43e9d192
IB
3607/* AArch64 stack frames generated by this compiler look like:
3608
3609 +-------------------------------+
3610 | |
3611 | incoming stack arguments |
3612 | |
34834420
MS
3613 +-------------------------------+
3614 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3615 | callee-allocated save area |
3616 | for register varargs |
3617 | |
34834420
MS
3618 +-------------------------------+
3619 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3620 | |
3621 +-------------------------------+
454fdba9
RL
3622 | padding0 | \
3623 +-------------------------------+ |
454fdba9 3624 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3625 +-------------------------------+ |
3626 | LR' | |
3627 +-------------------------------+ |
34834420
MS
3628 | FP' | / <- hard_frame_pointer_rtx (aligned)
3629 +-------------------------------+
43e9d192
IB
3630 | dynamic allocation |
3631 +-------------------------------+
34834420
MS
3632 | padding |
3633 +-------------------------------+
3634 | outgoing stack arguments | <-- arg_pointer
3635 | |
3636 +-------------------------------+
3637 | | <-- stack_pointer_rtx (aligned)
43e9d192 3638
34834420
MS
3639 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3640 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3641 unchanged. */
43e9d192
IB
3642
3643/* Generate the prologue instructions for entry into a function.
3644 Establish the stack frame by decreasing the stack pointer with a
3645 properly calculated size and, if necessary, create a frame record
3646 filled with the values of LR and previous frame pointer. The
6991c977 3647 current FP is also set up if it is in use. */
43e9d192
IB
3648
3649void
3650aarch64_expand_prologue (void)
3651{
43e9d192 3652 aarch64_layout_frame ();
43e9d192 3653
71bfb77a
WD
3654 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3655 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3656 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3657 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3658 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3659 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3660 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3661 rtx_insn *insn;
43e9d192 3662
db58fd89
JW
3663 /* Sign return address for functions. */
3664 if (aarch64_return_address_signing_enabled ())
27169e45
JW
3665 {
3666 insn = emit_insn (gen_pacisp ());
3667 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3668 RTX_FRAME_RELATED_P (insn) = 1;
3669 }
db58fd89 3670
dd991abb
RH
3671 if (flag_stack_usage_info)
3672 current_function_static_stack_size = frame_size;
43e9d192 3673
a3eb8a52
EB
3674 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3675 {
3676 if (crtl->is_leaf && !cfun->calls_alloca)
3677 {
3678 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3679 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3680 frame_size - STACK_CHECK_PROTECT);
3681 }
3682 else if (frame_size > 0)
3683 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3684 }
3685
5be6b295 3686 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3687
71bfb77a
WD
3688 if (callee_adjust != 0)
3689 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3690
71bfb77a 3691 if (frame_pointer_needed)
43e9d192 3692 {
71bfb77a
WD
3693 if (callee_adjust == 0)
3694 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3695 R30_REGNUM, false);
3696 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3697 stack_pointer_rtx,
3698 GEN_INT (callee_offset)));
3699 RTX_FRAME_RELATED_P (insn) = 1;
3700 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3701 }
71bfb77a
WD
3702
3703 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3704 callee_adjust != 0 || frame_pointer_needed);
3705 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3706 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3707 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3708}
3709
4f942779
RL
3710/* Return TRUE if we can use a simple_return insn.
3711
3712 This function checks whether the callee saved stack is empty, which
3713 means no restore actions are need. The pro_and_epilogue will use
3714 this to check whether shrink-wrapping opt is feasible. */
3715
3716bool
3717aarch64_use_return_insn_p (void)
3718{
3719 if (!reload_completed)
3720 return false;
3721
3722 if (crtl->profile)
3723 return false;
3724
3725 aarch64_layout_frame ();
3726
3727 return cfun->machine->frame.frame_size == 0;
3728}
3729
71bfb77a
WD
3730/* Generate the epilogue instructions for returning from a function.
3731 This is almost exactly the reverse of the prolog sequence, except
3732 that we need to insert barriers to avoid scheduling loads that read
3733 from a deallocated stack, and we optimize the unwind records by
3734 emitting them all together if possible. */
43e9d192
IB
3735void
3736aarch64_expand_epilogue (bool for_sibcall)
3737{
43e9d192 3738 aarch64_layout_frame ();
43e9d192 3739
71bfb77a
WD
3740 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3741 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3742 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3743 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3744 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3745 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3746 rtx cfi_ops = NULL;
3747 rtx_insn *insn;
44c0e7b9 3748
71bfb77a
WD
3749 /* We need to add memory barrier to prevent read from deallocated stack. */
3750 bool need_barrier_p = (get_frame_size ()
3751 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3752
71bfb77a 3753 /* Emit a barrier to prevent loads from a deallocated stack. */
8144a493
WD
3754 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
3755 || crtl->calls_eh_return)
43e9d192 3756 {
71bfb77a
WD
3757 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3758 need_barrier_p = false;
3759 }
7e8c2bd5 3760
71bfb77a
WD
3761 /* Restore the stack pointer from the frame pointer if it may not
3762 be the same as the stack pointer. */
3763 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3764 {
43e9d192
IB
3765 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3766 hard_frame_pointer_rtx,
71bfb77a
WD
3767 GEN_INT (-callee_offset)));
3768 /* If writeback is used when restoring callee-saves, the CFA
3769 is restored on the instruction doing the writeback. */
3770 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3771 }
71bfb77a 3772 else
5be6b295 3773 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3774
71bfb77a
WD
3775 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3776 callee_adjust != 0, &cfi_ops);
3777 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3778 callee_adjust != 0, &cfi_ops);
43e9d192 3779
71bfb77a
WD
3780 if (need_barrier_p)
3781 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3782
3783 if (callee_adjust != 0)
3784 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3785
3786 if (callee_adjust != 0 || initial_adjust > 65536)
3787 {
3788 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3789 insn = get_last_insn ();
71bfb77a
WD
3790 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3791 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3792 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3793 cfi_ops = NULL;
43e9d192
IB
3794 }
3795
5be6b295 3796 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3797
71bfb77a
WD
3798 if (cfi_ops)
3799 {
3800 /* Emit delayed restores and reset the CFA to be SP. */
3801 insn = get_last_insn ();
3802 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3803 REG_NOTES (insn) = cfi_ops;
3804 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3805 }
3806
db58fd89
JW
3807 /* We prefer to emit the combined return/authenticate instruction RETAA,
3808 however there are three cases in which we must instead emit an explicit
3809 authentication instruction.
3810
3811 1) Sibcalls don't return in a normal way, so if we're about to call one
3812 we must authenticate.
3813
3814 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3815 generating code for !TARGET_ARMV8_3 we can't use it and must
3816 explicitly authenticate.
3817
3818 3) On an eh_return path we make extra stack adjustments to update the
3819 canonical frame address to be the exception handler's CFA. We want
3820 to authenticate using the CFA of the function which calls eh_return.
3821 */
3822 if (aarch64_return_address_signing_enabled ()
3823 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
3824 {
3825 insn = emit_insn (gen_autisp ());
3826 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3827 RTX_FRAME_RELATED_P (insn) = 1;
3828 }
db58fd89 3829
dd991abb
RH
3830 /* Stack adjustment for exception handler. */
3831 if (crtl->calls_eh_return)
3832 {
3833 /* We need to unwind the stack by the offset computed by
3834 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3835 to be SP; letting the CFA move during this adjustment
3836 is just as correct as retaining the CFA from the body
3837 of the function. Therefore, do nothing special. */
3838 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3839 }
3840
3841 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3842 if (!for_sibcall)
3843 emit_jump_insn (ret_rtx);
3844}
3845
8144a493
WD
3846/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3847 normally or return to a previous frame after unwinding.
1c960e02 3848
8144a493
WD
3849 An EH return uses a single shared return sequence. The epilogue is
3850 exactly like a normal epilogue except that it has an extra input
3851 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3852 that must be applied after the frame has been destroyed. An extra label
3853 is inserted before the epilogue which initializes this register to zero,
3854 and this is the entry point for a normal return.
43e9d192 3855
8144a493
WD
3856 An actual EH return updates the return address, initializes the stack
3857 adjustment and jumps directly into the epilogue (bypassing the zeroing
3858 of the adjustment). Since the return address is typically saved on the
3859 stack when a function makes a call, the saved LR must be updated outside
3860 the epilogue.
43e9d192 3861
8144a493
WD
3862 This poses problems as the store is generated well before the epilogue,
3863 so the offset of LR is not known yet. Also optimizations will remove the
3864 store as it appears dead, even after the epilogue is generated (as the
3865 base or offset for loading LR is different in many cases).
43e9d192 3866
8144a493
WD
3867 To avoid these problems this implementation forces the frame pointer
3868 in eh_return functions so that the location of LR is fixed and known early.
3869 It also marks the store volatile, so no optimization is permitted to
3870 remove the store. */
3871rtx
3872aarch64_eh_return_handler_rtx (void)
3873{
3874 rtx tmp = gen_frame_mem (Pmode,
3875 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 3876
8144a493
WD
3877 /* Mark the store volatile, so no optimization is permitted to remove it. */
3878 MEM_VOLATILE_P (tmp) = true;
3879 return tmp;
43e9d192
IB
3880}
3881
43e9d192
IB
3882/* Output code to add DELTA to the first argument, and then jump
3883 to FUNCTION. Used for C++ multiple inheritance. */
3884static void
3885aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3886 HOST_WIDE_INT delta,
3887 HOST_WIDE_INT vcall_offset,
3888 tree function)
3889{
3890 /* The this pointer is always in x0. Note that this differs from
3891 Arm where the this pointer maybe bumped to r1 if r0 is required
3892 to return a pointer to an aggregate. On AArch64 a result value
3893 pointer will be in x8. */
3894 int this_regno = R0_REGNUM;
5d8a22a5
DM
3895 rtx this_rtx, temp0, temp1, addr, funexp;
3896 rtx_insn *insn;
43e9d192 3897
75f1d6fc
SN
3898 reload_completed = 1;
3899 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3900
3901 if (vcall_offset == 0)
5be6b295 3902 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3903 else
3904 {
28514dda 3905 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3906
75f1d6fc
SN
3907 this_rtx = gen_rtx_REG (Pmode, this_regno);
3908 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3909 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3910
75f1d6fc
SN
3911 addr = this_rtx;
3912 if (delta != 0)
3913 {
3914 if (delta >= -256 && delta < 256)
3915 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3916 plus_constant (Pmode, this_rtx, delta));
3917 else
5be6b295 3918 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3919 }
3920
28514dda
YZ
3921 if (Pmode == ptr_mode)
3922 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3923 else
3924 aarch64_emit_move (temp0,
3925 gen_rtx_ZERO_EXTEND (Pmode,
3926 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3927
28514dda 3928 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3929 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3930 else
3931 {
f43657b4
JW
3932 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3933 Pmode);
75f1d6fc 3934 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3935 }
3936
28514dda
YZ
3937 if (Pmode == ptr_mode)
3938 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3939 else
3940 aarch64_emit_move (temp1,
3941 gen_rtx_SIGN_EXTEND (Pmode,
3942 gen_rtx_MEM (ptr_mode, addr)));
3943
75f1d6fc 3944 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3945 }
3946
75f1d6fc
SN
3947 /* Generate a tail call to the target function. */
3948 if (!TREE_USED (function))
3949 {
3950 assemble_external (function);
3951 TREE_USED (function) = 1;
3952 }
3953 funexp = XEXP (DECL_RTL (function), 0);
3954 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3955 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3956 SIBLING_CALL_P (insn) = 1;
3957
3958 insn = get_insns ();
3959 shorten_branches (insn);
3960 final_start_function (insn, file, 1);
3961 final (insn, file, 1);
43e9d192 3962 final_end_function ();
75f1d6fc
SN
3963
3964 /* Stop pretending to be a post-reload pass. */
3965 reload_completed = 0;
43e9d192
IB
3966}
3967
43e9d192
IB
3968static bool
3969aarch64_tls_referenced_p (rtx x)
3970{
3971 if (!TARGET_HAVE_TLS)
3972 return false;
e7de8563
RS
3973 subrtx_iterator::array_type array;
3974 FOR_EACH_SUBRTX (iter, array, x, ALL)
3975 {
3976 const_rtx x = *iter;
3977 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3978 return true;
3979 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3980 TLS offsets, not real symbol references. */
3981 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3982 iter.skip_subrtxes ();
3983 }
3984 return false;
43e9d192
IB
3985}
3986
3987
43e9d192
IB
3988/* Return true if val can be encoded as a 12-bit unsigned immediate with
3989 a left shift of 0 or 12 bits. */
3990bool
3991aarch64_uimm12_shift (HOST_WIDE_INT val)
3992{
3993 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3994 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3995 );
3996}
3997
3998
3999/* Return true if val is an immediate that can be loaded into a
4000 register by a MOVZ instruction. */
4001static bool
ef4bddc2 4002aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
4003{
4004 if (GET_MODE_SIZE (mode) > 4)
4005 {
4006 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
4007 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
4008 return 1;
4009 }
4010 else
4011 {
4012 /* Ignore sign extension. */
4013 val &= (HOST_WIDE_INT) 0xffffffff;
4014 }
4015 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
4016 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
4017}
4018
a64c73a2
WD
4019/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
4020
4021static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
4022 {
4023 0x0000000100000001ull,
4024 0x0001000100010001ull,
4025 0x0101010101010101ull,
4026 0x1111111111111111ull,
4027 0x5555555555555555ull,
4028 };
4029
43e9d192
IB
4030
4031/* Return true if val is a valid bitmask immediate. */
a64c73a2 4032
43e9d192 4033bool
a64c73a2 4034aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 4035{
a64c73a2
WD
4036 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
4037 int bits;
4038
4039 /* Check for a single sequence of one bits and return quickly if so.
4040 The special cases of all ones and all zeroes returns false. */
4041 val = (unsigned HOST_WIDE_INT) val_in;
4042 tmp = val + (val & -val);
4043
4044 if (tmp == (tmp & -tmp))
4045 return (val + 1) > 1;
4046
4047 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4048 if (mode == SImode)
4049 val = (val << 32) | (val & 0xffffffff);
4050
4051 /* Invert if the immediate doesn't start with a zero bit - this means we
4052 only need to search for sequences of one bits. */
4053 if (val & 1)
4054 val = ~val;
4055
4056 /* Find the first set bit and set tmp to val with the first sequence of one
4057 bits removed. Return success if there is a single sequence of ones. */
4058 first_one = val & -val;
4059 tmp = val & (val + first_one);
4060
4061 if (tmp == 0)
4062 return true;
4063
4064 /* Find the next set bit and compute the difference in bit position. */
4065 next_one = tmp & -tmp;
4066 bits = clz_hwi (first_one) - clz_hwi (next_one);
4067 mask = val ^ tmp;
4068
4069 /* Check the bit position difference is a power of 2, and that the first
4070 sequence of one bits fits within 'bits' bits. */
4071 if ((mask >> bits) != 0 || bits != (bits & -bits))
4072 return false;
4073
4074 /* Check the sequence of one bits is repeated 64/bits times. */
4075 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
4076}
4077
43fd192f
MC
4078/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4079 Assumed precondition: VAL_IN Is not zero. */
4080
4081unsigned HOST_WIDE_INT
4082aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
4083{
4084 int lowest_bit_set = ctz_hwi (val_in);
4085 int highest_bit_set = floor_log2 (val_in);
4086 gcc_assert (val_in != 0);
4087
4088 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
4089 (HOST_WIDE_INT_1U << lowest_bit_set));
4090}
4091
4092/* Create constant where bits outside of lowest bit set to highest bit set
4093 are set to 1. */
4094
4095unsigned HOST_WIDE_INT
4096aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
4097{
4098 return val_in | ~aarch64_and_split_imm1 (val_in);
4099}
4100
4101/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4102
4103bool
4104aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
4105{
4106 if (aarch64_bitmask_imm (val_in, mode))
4107 return false;
4108
4109 if (aarch64_move_imm (val_in, mode))
4110 return false;
4111
4112 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
4113
4114 return aarch64_bitmask_imm (imm2, mode);
4115}
43e9d192
IB
4116
4117/* Return true if val is an immediate that can be loaded into a
4118 register in a single instruction. */
4119bool
ef4bddc2 4120aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
4121{
4122 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
4123 return 1;
4124 return aarch64_bitmask_imm (val, mode);
4125}
4126
4127static bool
ef4bddc2 4128aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
4129{
4130 rtx base, offset;
7eda14e1 4131
43e9d192
IB
4132 if (GET_CODE (x) == HIGH)
4133 return true;
4134
4135 split_const (x, &base, &offset);
4136 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 4137 {
a6e0bfa7 4138 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
4139 != SYMBOL_FORCE_TO_MEM)
4140 return true;
4141 else
4142 /* Avoid generating a 64-bit relocation in ILP32; leave
4143 to aarch64_expand_mov_immediate to handle it properly. */
4144 return mode != ptr_mode;
4145 }
43e9d192
IB
4146
4147 return aarch64_tls_referenced_p (x);
4148}
4149
e79136e4
WD
4150/* Implement TARGET_CASE_VALUES_THRESHOLD.
4151 The expansion for a table switch is quite expensive due to the number
4152 of instructions, the table lookup and hard to predict indirect jump.
4153 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4154 set, otherwise use tables for > 16 cases as a tradeoff between size and
4155 performance. When optimizing for size, use the default setting. */
50487d79
EM
4156
4157static unsigned int
4158aarch64_case_values_threshold (void)
4159{
4160 /* Use the specified limit for the number of cases before using jump
4161 tables at higher optimization levels. */
4162 if (optimize > 2
4163 && selected_cpu->tune->max_case_values != 0)
4164 return selected_cpu->tune->max_case_values;
4165 else
e79136e4 4166 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
4167}
4168
43e9d192
IB
4169/* Return true if register REGNO is a valid index register.
4170 STRICT_P is true if REG_OK_STRICT is in effect. */
4171
4172bool
4173aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4174{
4175 if (!HARD_REGISTER_NUM_P (regno))
4176 {
4177 if (!strict_p)
4178 return true;
4179
4180 if (!reg_renumber)
4181 return false;
4182
4183 regno = reg_renumber[regno];
4184 }
4185 return GP_REGNUM_P (regno);
4186}
4187
4188/* Return true if register REGNO is a valid base register for mode MODE.
4189 STRICT_P is true if REG_OK_STRICT is in effect. */
4190
4191bool
4192aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4193{
4194 if (!HARD_REGISTER_NUM_P (regno))
4195 {
4196 if (!strict_p)
4197 return true;
4198
4199 if (!reg_renumber)
4200 return false;
4201
4202 regno = reg_renumber[regno];
4203 }
4204
4205 /* The fake registers will be eliminated to either the stack or
4206 hard frame pointer, both of which are usually valid base registers.
4207 Reload deals with the cases where the eliminated form isn't valid. */
4208 return (GP_REGNUM_P (regno)
4209 || regno == SP_REGNUM
4210 || regno == FRAME_POINTER_REGNUM
4211 || regno == ARG_POINTER_REGNUM);
4212}
4213
4214/* Return true if X is a valid base register for mode MODE.
4215 STRICT_P is true if REG_OK_STRICT is in effect. */
4216
4217static bool
4218aarch64_base_register_rtx_p (rtx x, bool strict_p)
4219{
4220 if (!strict_p && GET_CODE (x) == SUBREG)
4221 x = SUBREG_REG (x);
4222
4223 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4224}
4225
4226/* Return true if address offset is a valid index. If it is, fill in INFO
4227 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4228
4229static bool
4230aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 4231 machine_mode mode, bool strict_p)
43e9d192
IB
4232{
4233 enum aarch64_address_type type;
4234 rtx index;
4235 int shift;
4236
4237 /* (reg:P) */
4238 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4239 && GET_MODE (x) == Pmode)
4240 {
4241 type = ADDRESS_REG_REG;
4242 index = x;
4243 shift = 0;
4244 }
4245 /* (sign_extend:DI (reg:SI)) */
4246 else if ((GET_CODE (x) == SIGN_EXTEND
4247 || GET_CODE (x) == ZERO_EXTEND)
4248 && GET_MODE (x) == DImode
4249 && GET_MODE (XEXP (x, 0)) == SImode)
4250 {
4251 type = (GET_CODE (x) == SIGN_EXTEND)
4252 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4253 index = XEXP (x, 0);
4254 shift = 0;
4255 }
4256 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4257 else if (GET_CODE (x) == MULT
4258 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4259 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4260 && GET_MODE (XEXP (x, 0)) == DImode
4261 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4262 && CONST_INT_P (XEXP (x, 1)))
4263 {
4264 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4265 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4266 index = XEXP (XEXP (x, 0), 0);
4267 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4268 }
4269 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4270 else if (GET_CODE (x) == ASHIFT
4271 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4272 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4273 && GET_MODE (XEXP (x, 0)) == DImode
4274 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4275 && CONST_INT_P (XEXP (x, 1)))
4276 {
4277 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4278 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4279 index = XEXP (XEXP (x, 0), 0);
4280 shift = INTVAL (XEXP (x, 1));
4281 }
4282 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4283 else if ((GET_CODE (x) == SIGN_EXTRACT
4284 || GET_CODE (x) == ZERO_EXTRACT)
4285 && GET_MODE (x) == DImode
4286 && GET_CODE (XEXP (x, 0)) == MULT
4287 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4288 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4289 {
4290 type = (GET_CODE (x) == SIGN_EXTRACT)
4291 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4292 index = XEXP (XEXP (x, 0), 0);
4293 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4294 if (INTVAL (XEXP (x, 1)) != 32 + shift
4295 || INTVAL (XEXP (x, 2)) != 0)
4296 shift = -1;
4297 }
4298 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4299 (const_int 0xffffffff<<shift)) */
4300 else if (GET_CODE (x) == AND
4301 && GET_MODE (x) == DImode
4302 && GET_CODE (XEXP (x, 0)) == MULT
4303 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4304 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4305 && CONST_INT_P (XEXP (x, 1)))
4306 {
4307 type = ADDRESS_REG_UXTW;
4308 index = XEXP (XEXP (x, 0), 0);
4309 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4310 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4311 shift = -1;
4312 }
4313 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4314 else if ((GET_CODE (x) == SIGN_EXTRACT
4315 || GET_CODE (x) == ZERO_EXTRACT)
4316 && GET_MODE (x) == DImode
4317 && GET_CODE (XEXP (x, 0)) == ASHIFT
4318 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4319 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4320 {
4321 type = (GET_CODE (x) == SIGN_EXTRACT)
4322 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4323 index = XEXP (XEXP (x, 0), 0);
4324 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4325 if (INTVAL (XEXP (x, 1)) != 32 + shift
4326 || INTVAL (XEXP (x, 2)) != 0)
4327 shift = -1;
4328 }
4329 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4330 (const_int 0xffffffff<<shift)) */
4331 else if (GET_CODE (x) == AND
4332 && GET_MODE (x) == DImode
4333 && GET_CODE (XEXP (x, 0)) == ASHIFT
4334 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4335 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4336 && CONST_INT_P (XEXP (x, 1)))
4337 {
4338 type = ADDRESS_REG_UXTW;
4339 index = XEXP (XEXP (x, 0), 0);
4340 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4341 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4342 shift = -1;
4343 }
4344 /* (mult:P (reg:P) (const_int scale)) */
4345 else if (GET_CODE (x) == MULT
4346 && GET_MODE (x) == Pmode
4347 && GET_MODE (XEXP (x, 0)) == Pmode
4348 && CONST_INT_P (XEXP (x, 1)))
4349 {
4350 type = ADDRESS_REG_REG;
4351 index = XEXP (x, 0);
4352 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4353 }
4354 /* (ashift:P (reg:P) (const_int shift)) */
4355 else if (GET_CODE (x) == ASHIFT
4356 && GET_MODE (x) == Pmode
4357 && GET_MODE (XEXP (x, 0)) == Pmode
4358 && CONST_INT_P (XEXP (x, 1)))
4359 {
4360 type = ADDRESS_REG_REG;
4361 index = XEXP (x, 0);
4362 shift = INTVAL (XEXP (x, 1));
4363 }
4364 else
4365 return false;
4366
4367 if (GET_CODE (index) == SUBREG)
4368 index = SUBREG_REG (index);
4369
4370 if ((shift == 0 ||
4371 (shift > 0 && shift <= 3
4372 && (1 << shift) == GET_MODE_SIZE (mode)))
4373 && REG_P (index)
4374 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4375 {
4376 info->type = type;
4377 info->offset = index;
4378 info->shift = shift;
4379 return true;
4380 }
4381
4382 return false;
4383}
4384
abc52318
KT
4385/* Return true if MODE is one of the modes for which we
4386 support LDP/STP operations. */
4387
4388static bool
4389aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4390{
4391 return mode == SImode || mode == DImode
4392 || mode == SFmode || mode == DFmode
4393 || (aarch64_vector_mode_supported_p (mode)
4394 && GET_MODE_SIZE (mode) == 8);
4395}
4396
9e0218fc
RH
4397/* Return true if REGNO is a virtual pointer register, or an eliminable
4398 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4399 include stack_pointer or hard_frame_pointer. */
4400static bool
4401virt_or_elim_regno_p (unsigned regno)
4402{
4403 return ((regno >= FIRST_VIRTUAL_REGISTER
4404 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4405 || regno == FRAME_POINTER_REGNUM
4406 || regno == ARG_POINTER_REGNUM);
4407}
4408
43e9d192
IB
4409/* Return true if X is a valid address for machine mode MODE. If it is,
4410 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4411 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4412
4413static bool
4414aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 4415 rtx x, machine_mode mode,
43e9d192
IB
4416 RTX_CODE outer_code, bool strict_p)
4417{
4418 enum rtx_code code = GET_CODE (x);
4419 rtx op0, op1;
2d8c6dc1 4420
80d43579
WD
4421 /* On BE, we use load/store pair for all large int mode load/stores.
4422 TI/TFmode may also use a load/store pair. */
2d8c6dc1 4423 bool load_store_pair_p = (outer_code == PARALLEL
80d43579
WD
4424 || mode == TImode
4425 || mode == TFmode
2d8c6dc1
AH
4426 || (BYTES_BIG_ENDIAN
4427 && aarch64_vect_struct_mode_p (mode)));
4428
43e9d192 4429 bool allow_reg_index_p =
2d8c6dc1
AH
4430 !load_store_pair_p
4431 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4432 && !aarch64_vect_struct_mode_p (mode);
4433
4434 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4435 REG addressing. */
4436 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4437 && (code != POST_INC && code != REG))
4438 return false;
4439
4440 switch (code)
4441 {
4442 case REG:
4443 case SUBREG:
4444 info->type = ADDRESS_REG_IMM;
4445 info->base = x;
4446 info->offset = const0_rtx;
4447 return aarch64_base_register_rtx_p (x, strict_p);
4448
4449 case PLUS:
4450 op0 = XEXP (x, 0);
4451 op1 = XEXP (x, 1);
15c0c5c9
JW
4452
4453 if (! strict_p
4aa81c2e 4454 && REG_P (op0)
9e0218fc 4455 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4456 && CONST_INT_P (op1))
15c0c5c9
JW
4457 {
4458 info->type = ADDRESS_REG_IMM;
4459 info->base = op0;
4460 info->offset = op1;
4461
4462 return true;
4463 }
4464
43e9d192
IB
4465 if (GET_MODE_SIZE (mode) != 0
4466 && CONST_INT_P (op1)
4467 && aarch64_base_register_rtx_p (op0, strict_p))
4468 {
4469 HOST_WIDE_INT offset = INTVAL (op1);
4470
4471 info->type = ADDRESS_REG_IMM;
4472 info->base = op0;
4473 info->offset = op1;
4474
4475 /* TImode and TFmode values are allowed in both pairs of X
4476 registers and individual Q registers. The available
4477 address modes are:
4478 X,X: 7-bit signed scaled offset
4479 Q: 9-bit signed offset
4480 We conservatively require an offset representable in either mode.
8ed49fab
KT
4481 When performing the check for pairs of X registers i.e. LDP/STP
4482 pass down DImode since that is the natural size of the LDP/STP
4483 instruction memory accesses. */
43e9d192 4484 if (mode == TImode || mode == TFmode)
8ed49fab 4485 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
4486 && (offset_9bit_signed_unscaled_p (mode, offset)
4487 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 4488
2d8c6dc1
AH
4489 /* A 7bit offset check because OImode will emit a ldp/stp
4490 instruction (only big endian will get here).
4491 For ldp/stp instructions, the offset is scaled for the size of a
4492 single element of the pair. */
4493 if (mode == OImode)
4494 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4495
4496 /* Three 9/12 bit offsets checks because CImode will emit three
4497 ldr/str instructions (only big endian will get here). */
4498 if (mode == CImode)
4499 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4500 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4501 || offset_12bit_unsigned_scaled_p (V16QImode,
4502 offset + 32)));
4503
4504 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4505 instructions (only big endian will get here). */
4506 if (mode == XImode)
4507 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4508 && aarch64_offset_7bit_signed_scaled_p (TImode,
4509 offset + 32));
4510
4511 if (load_store_pair_p)
43e9d192 4512 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4513 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4514 else
4515 return (offset_9bit_signed_unscaled_p (mode, offset)
4516 || offset_12bit_unsigned_scaled_p (mode, offset));
4517 }
4518
4519 if (allow_reg_index_p)
4520 {
4521 /* Look for base + (scaled/extended) index register. */
4522 if (aarch64_base_register_rtx_p (op0, strict_p)
4523 && aarch64_classify_index (info, op1, mode, strict_p))
4524 {
4525 info->base = op0;
4526 return true;
4527 }
4528 if (aarch64_base_register_rtx_p (op1, strict_p)
4529 && aarch64_classify_index (info, op0, mode, strict_p))
4530 {
4531 info->base = op1;
4532 return true;
4533 }
4534 }
4535
4536 return false;
4537
4538 case POST_INC:
4539 case POST_DEC:
4540 case PRE_INC:
4541 case PRE_DEC:
4542 info->type = ADDRESS_REG_WB;
4543 info->base = XEXP (x, 0);
4544 info->offset = NULL_RTX;
4545 return aarch64_base_register_rtx_p (info->base, strict_p);
4546
4547 case POST_MODIFY:
4548 case PRE_MODIFY:
4549 info->type = ADDRESS_REG_WB;
4550 info->base = XEXP (x, 0);
4551 if (GET_CODE (XEXP (x, 1)) == PLUS
4552 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4553 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4554 && aarch64_base_register_rtx_p (info->base, strict_p))
4555 {
4556 HOST_WIDE_INT offset;
4557 info->offset = XEXP (XEXP (x, 1), 1);
4558 offset = INTVAL (info->offset);
4559
4560 /* TImode and TFmode values are allowed in both pairs of X
4561 registers and individual Q registers. The available
4562 address modes are:
4563 X,X: 7-bit signed scaled offset
4564 Q: 9-bit signed offset
4565 We conservatively require an offset representable in either mode.
4566 */
4567 if (mode == TImode || mode == TFmode)
44707478 4568 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4569 && offset_9bit_signed_unscaled_p (mode, offset));
4570
2d8c6dc1 4571 if (load_store_pair_p)
43e9d192 4572 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4573 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4574 else
4575 return offset_9bit_signed_unscaled_p (mode, offset);
4576 }
4577 return false;
4578
4579 case CONST:
4580 case SYMBOL_REF:
4581 case LABEL_REF:
79517551
SN
4582 /* load literal: pc-relative constant pool entry. Only supported
4583 for SI mode or larger. */
43e9d192 4584 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4585
4586 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4587 {
4588 rtx sym, addend;
4589
4590 split_const (x, &sym, &addend);
b4f50fd4
RR
4591 return ((GET_CODE (sym) == LABEL_REF
4592 || (GET_CODE (sym) == SYMBOL_REF
4593 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4594 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4595 }
4596 return false;
4597
4598 case LO_SUM:
4599 info->type = ADDRESS_LO_SUM;
4600 info->base = XEXP (x, 0);
4601 info->offset = XEXP (x, 1);
4602 if (allow_reg_index_p
4603 && aarch64_base_register_rtx_p (info->base, strict_p))
4604 {
4605 rtx sym, offs;
4606 split_const (info->offset, &sym, &offs);
4607 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4608 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4609 {
4610 /* The symbol and offset must be aligned to the access size. */
4611 unsigned int align;
4612 unsigned int ref_size;
4613
4614 if (CONSTANT_POOL_ADDRESS_P (sym))
4615 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4616 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4617 {
4618 tree exp = SYMBOL_REF_DECL (sym);
4619 align = TYPE_ALIGN (TREE_TYPE (exp));
4620 align = CONSTANT_ALIGNMENT (exp, align);
4621 }
4622 else if (SYMBOL_REF_DECL (sym))
4623 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4624 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4625 && SYMBOL_REF_BLOCK (sym) != NULL)
4626 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4627 else
4628 align = BITS_PER_UNIT;
4629
4630 ref_size = GET_MODE_SIZE (mode);
4631 if (ref_size == 0)
4632 ref_size = GET_MODE_SIZE (DImode);
4633
4634 return ((INTVAL (offs) & (ref_size - 1)) == 0
4635 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4636 }
4637 }
4638 return false;
4639
4640 default:
4641 return false;
4642 }
4643}
4644
9bf2f779
KT
4645/* Return true if the address X is valid for a PRFM instruction.
4646 STRICT_P is true if we should do strict checking with
4647 aarch64_classify_address. */
4648
4649bool
4650aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
4651{
4652 struct aarch64_address_info addr;
4653
4654 /* PRFM accepts the same addresses as DImode... */
4655 bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
4656 if (!res)
4657 return false;
4658
4659 /* ... except writeback forms. */
4660 return addr.type != ADDRESS_REG_WB;
4661}
4662
43e9d192
IB
4663bool
4664aarch64_symbolic_address_p (rtx x)
4665{
4666 rtx offset;
4667
4668 split_const (x, &x, &offset);
4669 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4670}
4671
a6e0bfa7 4672/* Classify the base of symbolic expression X. */
da4f13a4
MS
4673
4674enum aarch64_symbol_type
a6e0bfa7 4675aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4676{
4677 rtx offset;
da4f13a4 4678
43e9d192 4679 split_const (x, &x, &offset);
a6e0bfa7 4680 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4681}
4682
4683
4684/* Return TRUE if X is a legitimate address for accessing memory in
4685 mode MODE. */
4686static bool
ef4bddc2 4687aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4688{
4689 struct aarch64_address_info addr;
4690
4691 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4692}
4693
4694/* Return TRUE if X is a legitimate address for accessing memory in
4695 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4696 pair operation. */
4697bool
ef4bddc2 4698aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4699 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4700{
4701 struct aarch64_address_info addr;
4702
4703 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4704}
4705
491ec060
WD
4706/* Split an out-of-range address displacement into a base and offset.
4707 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4708 to increase opportunities for sharing the base address of different sizes.
8734dfac 4709 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
491ec060
WD
4710static bool
4711aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4712{
8734dfac
WD
4713 HOST_WIDE_INT offset = INTVAL (*disp);
4714 HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
491ec060 4715
8734dfac
WD
4716 if (mode == TImode || mode == TFmode
4717 || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4718 base = (offset + 0x100) & ~0x1ff;
491ec060 4719
8734dfac
WD
4720 *off = GEN_INT (base);
4721 *disp = GEN_INT (offset - base);
491ec060
WD
4722 return true;
4723}
4724
43e9d192
IB
4725/* Return TRUE if rtx X is immediate constant 0.0 */
4726bool
3520f7cc 4727aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4728{
43e9d192
IB
4729 if (GET_MODE (x) == VOIDmode)
4730 return false;
4731
34a72c33 4732 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4733 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4734 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4735}
4736
70f09188
AP
4737/* Return the fixed registers used for condition codes. */
4738
4739static bool
4740aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4741{
4742 *p1 = CC_REGNUM;
4743 *p2 = INVALID_REGNUM;
4744 return true;
4745}
4746
47210a04
RL
4747/* This function is used by the call expanders of the machine description.
4748 RESULT is the register in which the result is returned. It's NULL for
4749 "call" and "sibcall".
4750 MEM is the location of the function call.
4751 SIBCALL indicates whether this function call is normal call or sibling call.
4752 It will generate different pattern accordingly. */
4753
4754void
4755aarch64_expand_call (rtx result, rtx mem, bool sibcall)
4756{
4757 rtx call, callee, tmp;
4758 rtvec vec;
4759 machine_mode mode;
4760
4761 gcc_assert (MEM_P (mem));
4762 callee = XEXP (mem, 0);
4763 mode = GET_MODE (callee);
4764 gcc_assert (mode == Pmode);
4765
4766 /* Decide if we should generate indirect calls by loading the
4767 address of the callee into a register before performing
4768 the branch-and-link. */
4769 if (SYMBOL_REF_P (callee)
4770 ? (aarch64_is_long_call_p (callee)
4771 || aarch64_is_noplt_call_p (callee))
4772 : !REG_P (callee))
4773 XEXP (mem, 0) = force_reg (mode, callee);
4774
4775 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
4776
4777 if (result != NULL_RTX)
4778 call = gen_rtx_SET (result, call);
4779
4780 if (sibcall)
4781 tmp = ret_rtx;
4782 else
4783 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
4784
4785 vec = gen_rtvec (2, call, tmp);
4786 call = gen_rtx_PARALLEL (VOIDmode, vec);
4787
4788 aarch64_emit_call_insn (call);
4789}
4790
78607708
TV
4791/* Emit call insn with PAT and do aarch64-specific handling. */
4792
d07a3fed 4793void
78607708
TV
4794aarch64_emit_call_insn (rtx pat)
4795{
4796 rtx insn = emit_call_insn (pat);
4797
4798 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4799 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4800 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4801}
4802
ef4bddc2 4803machine_mode
43e9d192
IB
4804aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4805{
4806 /* All floating point compares return CCFP if it is an equality
4807 comparison, and CCFPE otherwise. */
4808 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4809 {
4810 switch (code)
4811 {
4812 case EQ:
4813 case NE:
4814 case UNORDERED:
4815 case ORDERED:
4816 case UNLT:
4817 case UNLE:
4818 case UNGT:
4819 case UNGE:
4820 case UNEQ:
4821 case LTGT:
4822 return CCFPmode;
4823
4824 case LT:
4825 case LE:
4826 case GT:
4827 case GE:
4828 return CCFPEmode;
4829
4830 default:
4831 gcc_unreachable ();
4832 }
4833 }
4834
2b8568fe
KT
4835 /* Equality comparisons of short modes against zero can be performed
4836 using the TST instruction with the appropriate bitmask. */
4837 if (y == const0_rtx && REG_P (x)
4838 && (code == EQ || code == NE)
4839 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4840 return CC_NZmode;
4841
b06335f9
KT
4842 /* Similarly, comparisons of zero_extends from shorter modes can
4843 be performed using an ANDS with an immediate mask. */
4844 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4845 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4846 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4847 && (code == EQ || code == NE))
4848 return CC_NZmode;
4849
43e9d192
IB
4850 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4851 && y == const0_rtx
4852 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4853 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4854 || GET_CODE (x) == NEG
4855 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4856 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4857 return CC_NZmode;
4858
1c992d1e 4859 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4860 the comparison will have to be swapped when we emit the assembly
4861 code. */
4862 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 4863 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
4864 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4865 || GET_CODE (x) == LSHIFTRT
1c992d1e 4866 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4867 return CC_SWPmode;
4868
1c992d1e
RE
4869 /* Similarly for a negated operand, but we can only do this for
4870 equalities. */
4871 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4872 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4873 && (code == EQ || code == NE)
4874 && GET_CODE (x) == NEG)
4875 return CC_Zmode;
4876
ef22810a
RH
4877 /* A test for unsigned overflow. */
4878 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4879 && code == NE
4880 && GET_CODE (x) == PLUS
4881 && GET_CODE (y) == ZERO_EXTEND)
4882 return CC_Cmode;
4883
43e9d192
IB
4884 /* For everything else, return CCmode. */
4885 return CCmode;
4886}
4887
3dfa7055
ZC
4888static int
4889aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4890
cd5660ab 4891int
43e9d192
IB
4892aarch64_get_condition_code (rtx x)
4893{
ef4bddc2 4894 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4895 enum rtx_code comp_code = GET_CODE (x);
4896
4897 if (GET_MODE_CLASS (mode) != MODE_CC)
4898 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4899 return aarch64_get_condition_code_1 (mode, comp_code);
4900}
43e9d192 4901
3dfa7055
ZC
4902static int
4903aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4904{
43e9d192
IB
4905 switch (mode)
4906 {
4907 case CCFPmode:
4908 case CCFPEmode:
4909 switch (comp_code)
4910 {
4911 case GE: return AARCH64_GE;
4912 case GT: return AARCH64_GT;
4913 case LE: return AARCH64_LS;
4914 case LT: return AARCH64_MI;
4915 case NE: return AARCH64_NE;
4916 case EQ: return AARCH64_EQ;
4917 case ORDERED: return AARCH64_VC;
4918 case UNORDERED: return AARCH64_VS;
4919 case UNLT: return AARCH64_LT;
4920 case UNLE: return AARCH64_LE;
4921 case UNGT: return AARCH64_HI;
4922 case UNGE: return AARCH64_PL;
cd5660ab 4923 default: return -1;
43e9d192
IB
4924 }
4925 break;
4926
4927 case CCmode:
4928 switch (comp_code)
4929 {
4930 case NE: return AARCH64_NE;
4931 case EQ: return AARCH64_EQ;
4932 case GE: return AARCH64_GE;
4933 case GT: return AARCH64_GT;
4934 case LE: return AARCH64_LE;
4935 case LT: return AARCH64_LT;
4936 case GEU: return AARCH64_CS;
4937 case GTU: return AARCH64_HI;
4938 case LEU: return AARCH64_LS;
4939 case LTU: return AARCH64_CC;
cd5660ab 4940 default: return -1;
43e9d192
IB
4941 }
4942 break;
4943
4944 case CC_SWPmode:
43e9d192
IB
4945 switch (comp_code)
4946 {
4947 case NE: return AARCH64_NE;
4948 case EQ: return AARCH64_EQ;
4949 case GE: return AARCH64_LE;
4950 case GT: return AARCH64_LT;
4951 case LE: return AARCH64_GE;
4952 case LT: return AARCH64_GT;
4953 case GEU: return AARCH64_LS;
4954 case GTU: return AARCH64_CC;
4955 case LEU: return AARCH64_CS;
4956 case LTU: return AARCH64_HI;
cd5660ab 4957 default: return -1;
43e9d192
IB
4958 }
4959 break;
4960
4961 case CC_NZmode:
4962 switch (comp_code)
4963 {
4964 case NE: return AARCH64_NE;
4965 case EQ: return AARCH64_EQ;
4966 case GE: return AARCH64_PL;
4967 case LT: return AARCH64_MI;
cd5660ab 4968 default: return -1;
43e9d192
IB
4969 }
4970 break;
4971
1c992d1e
RE
4972 case CC_Zmode:
4973 switch (comp_code)
4974 {
4975 case NE: return AARCH64_NE;
4976 case EQ: return AARCH64_EQ;
cd5660ab 4977 default: return -1;
1c992d1e
RE
4978 }
4979 break;
4980
ef22810a
RH
4981 case CC_Cmode:
4982 switch (comp_code)
4983 {
4984 case NE: return AARCH64_CS;
4985 case EQ: return AARCH64_CC;
4986 default: return -1;
4987 }
4988 break;
4989
43e9d192 4990 default:
cd5660ab 4991 return -1;
43e9d192 4992 }
3dfa7055 4993
3dfa7055 4994 return -1;
43e9d192
IB
4995}
4996
ddeabd3e
AL
4997bool
4998aarch64_const_vec_all_same_in_range_p (rtx x,
4999 HOST_WIDE_INT minval,
5000 HOST_WIDE_INT maxval)
5001{
5002 HOST_WIDE_INT firstval;
5003 int count, i;
5004
5005 if (GET_CODE (x) != CONST_VECTOR
5006 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
5007 return false;
5008
5009 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
5010 if (firstval < minval || firstval > maxval)
5011 return false;
5012
5013 count = CONST_VECTOR_NUNITS (x);
5014 for (i = 1; i < count; i++)
5015 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
5016 return false;
5017
5018 return true;
5019}
5020
5021bool
5022aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
5023{
5024 return aarch64_const_vec_all_same_in_range_p (x, val, val);
5025}
5026
43e9d192 5027
cf670503
ZC
5028/* N Z C V. */
5029#define AARCH64_CC_V 1
5030#define AARCH64_CC_C (1 << 1)
5031#define AARCH64_CC_Z (1 << 2)
5032#define AARCH64_CC_N (1 << 3)
5033
c8012fbc
WD
5034/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
5035static const int aarch64_nzcv_codes[] =
5036{
5037 0, /* EQ, Z == 1. */
5038 AARCH64_CC_Z, /* NE, Z == 0. */
5039 0, /* CS, C == 1. */
5040 AARCH64_CC_C, /* CC, C == 0. */
5041 0, /* MI, N == 1. */
5042 AARCH64_CC_N, /* PL, N == 0. */
5043 0, /* VS, V == 1. */
5044 AARCH64_CC_V, /* VC, V == 0. */
5045 0, /* HI, C ==1 && Z == 0. */
5046 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
5047 AARCH64_CC_V, /* GE, N == V. */
5048 0, /* LT, N != V. */
5049 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
5050 0, /* LE, !(Z == 0 && N == V). */
5051 0, /* AL, Any. */
5052 0 /* NV, Any. */
cf670503
ZC
5053};
5054
cc8ca59e
JB
5055static void
5056aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
5057{
5058 switch (code)
5059 {
f541a481
KT
5060 /* An integer or symbol address without a preceding # sign. */
5061 case 'c':
5062 switch (GET_CODE (x))
5063 {
5064 case CONST_INT:
5065 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5066 break;
5067
5068 case SYMBOL_REF:
5069 output_addr_const (f, x);
5070 break;
5071
5072 case CONST:
5073 if (GET_CODE (XEXP (x, 0)) == PLUS
5074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5075 {
5076 output_addr_const (f, x);
5077 break;
5078 }
5079 /* Fall through. */
5080
5081 default:
5082 output_operand_lossage ("Unsupported operand for code '%c'", code);
5083 }
5084 break;
5085
43e9d192
IB
5086 case 'e':
5087 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
5088 {
5089 int n;
5090
4aa81c2e 5091 if (!CONST_INT_P (x)
43e9d192
IB
5092 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
5093 {
5094 output_operand_lossage ("invalid operand for '%%%c'", code);
5095 return;
5096 }
5097
5098 switch (n)
5099 {
5100 case 3:
5101 fputc ('b', f);
5102 break;
5103 case 4:
5104 fputc ('h', f);
5105 break;
5106 case 5:
5107 fputc ('w', f);
5108 break;
5109 default:
5110 output_operand_lossage ("invalid operand for '%%%c'", code);
5111 return;
5112 }
5113 }
5114 break;
5115
5116 case 'p':
5117 {
5118 int n;
5119
5120 /* Print N such that 2^N == X. */
4aa81c2e 5121 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
5122 {
5123 output_operand_lossage ("invalid operand for '%%%c'", code);
5124 return;
5125 }
5126
5127 asm_fprintf (f, "%d", n);
5128 }
5129 break;
5130
5131 case 'P':
5132 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 5133 if (!CONST_INT_P (x))
43e9d192
IB
5134 {
5135 output_operand_lossage ("invalid operand for '%%%c'", code);
5136 return;
5137 }
5138
8d55c61b 5139 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
5140 break;
5141
5142 case 'H':
5143 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 5144 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
5145 {
5146 output_operand_lossage ("invalid operand for '%%%c'", code);
5147 return;
5148 }
5149
01a3a324 5150 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
5151 break;
5152
43e9d192 5153 case 'M':
c8012fbc 5154 case 'm':
cd5660ab
KT
5155 {
5156 int cond_code;
c8012fbc 5157 /* Print a condition (eq, ne, etc) or its inverse. */
43e9d192 5158
c8012fbc
WD
5159 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5160 if (x == const_true_rtx)
cd5660ab 5161 {
c8012fbc
WD
5162 if (code == 'M')
5163 fputs ("nv", f);
cd5660ab
KT
5164 return;
5165 }
43e9d192 5166
cd5660ab
KT
5167 if (!COMPARISON_P (x))
5168 {
5169 output_operand_lossage ("invalid operand for '%%%c'", code);
5170 return;
5171 }
c8012fbc 5172
cd5660ab
KT
5173 cond_code = aarch64_get_condition_code (x);
5174 gcc_assert (cond_code >= 0);
c8012fbc
WD
5175 if (code == 'M')
5176 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
5177 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 5178 }
43e9d192
IB
5179 break;
5180
5181 case 'b':
5182 case 'h':
5183 case 's':
5184 case 'd':
5185 case 'q':
5186 /* Print a scalar FP/SIMD register name. */
5187 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5188 {
5189 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5190 return;
5191 }
50ce6f88 5192 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
5193 break;
5194
5195 case 'S':
5196 case 'T':
5197 case 'U':
5198 case 'V':
5199 /* Print the first FP/SIMD register name in a list. */
5200 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5201 {
5202 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5203 return;
5204 }
50ce6f88 5205 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
5206 break;
5207
2d8c6dc1
AH
5208 case 'R':
5209 /* Print a scalar FP/SIMD register name + 1. */
5210 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5211 {
5212 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5213 return;
5214 }
5215 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5216 break;
5217
a05c0ddf 5218 case 'X':
50d38551 5219 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 5220 if (!CONST_INT_P (x))
a05c0ddf
IB
5221 {
5222 output_operand_lossage ("invalid operand for '%%%c'", code);
5223 return;
5224 }
50d38551 5225 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
5226 break;
5227
43e9d192
IB
5228 case 'w':
5229 case 'x':
5230 /* Print a general register name or the zero register (32-bit or
5231 64-bit). */
3520f7cc
JG
5232 if (x == const0_rtx
5233 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 5234 {
50ce6f88 5235 asm_fprintf (f, "%czr", code);
43e9d192
IB
5236 break;
5237 }
5238
5239 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5240 {
50ce6f88 5241 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
5242 break;
5243 }
5244
5245 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5246 {
50ce6f88 5247 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
5248 break;
5249 }
5250
5251 /* Fall through */
5252
5253 case 0:
5254 /* Print a normal operand, if it's a general register, then we
5255 assume DImode. */
5256 if (x == NULL)
5257 {
5258 output_operand_lossage ("missing operand");
5259 return;
5260 }
5261
5262 switch (GET_CODE (x))
5263 {
5264 case REG:
01a3a324 5265 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
5266 break;
5267
5268 case MEM:
cc8ca59e 5269 output_address (GET_MODE (x), XEXP (x, 0));
00eee3fa
WD
5270 /* Check all memory references are Pmode - even with ILP32. */
5271 gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
43e9d192
IB
5272 break;
5273
2af16a7c 5274 case CONST:
43e9d192
IB
5275 case LABEL_REF:
5276 case SYMBOL_REF:
5277 output_addr_const (asm_out_file, x);
5278 break;
5279
5280 case CONST_INT:
5281 asm_fprintf (f, "%wd", INTVAL (x));
5282 break;
5283
5284 case CONST_VECTOR:
3520f7cc
JG
5285 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5286 {
ddeabd3e
AL
5287 gcc_assert (
5288 aarch64_const_vec_all_same_in_range_p (x,
5289 HOST_WIDE_INT_MIN,
5290 HOST_WIDE_INT_MAX));
3520f7cc
JG
5291 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5292 }
5293 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5294 {
5295 fputc ('0', f);
5296 }
5297 else
5298 gcc_unreachable ();
43e9d192
IB
5299 break;
5300
3520f7cc 5301 case CONST_DOUBLE:
2ca5b430
KT
5302 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5303 be getting CONST_DOUBLEs holding integers. */
5304 gcc_assert (GET_MODE (x) != VOIDmode);
5305 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
5306 {
5307 fputc ('0', f);
5308 break;
5309 }
5310 else if (aarch64_float_const_representable_p (x))
5311 {
5312#define buf_size 20
5313 char float_buf[buf_size] = {'\0'};
34a72c33
RS
5314 real_to_decimal_for_mode (float_buf,
5315 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
5316 buf_size, buf_size,
5317 1, GET_MODE (x));
5318 asm_fprintf (asm_out_file, "%s", float_buf);
5319 break;
5320#undef buf_size
5321 }
5322 output_operand_lossage ("invalid constant");
5323 return;
43e9d192
IB
5324 default:
5325 output_operand_lossage ("invalid operand");
5326 return;
5327 }
5328 break;
5329
5330 case 'A':
5331 if (GET_CODE (x) == HIGH)
5332 x = XEXP (x, 0);
5333
a6e0bfa7 5334 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5335 {
6642bdb4 5336 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5337 asm_fprintf (asm_out_file, ":got:");
5338 break;
5339
5340 case SYMBOL_SMALL_TLSGD:
5341 asm_fprintf (asm_out_file, ":tlsgd:");
5342 break;
5343
5344 case SYMBOL_SMALL_TLSDESC:
5345 asm_fprintf (asm_out_file, ":tlsdesc:");
5346 break;
5347
79496620 5348 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5349 asm_fprintf (asm_out_file, ":gottprel:");
5350 break;
5351
d18ba284 5352 case SYMBOL_TLSLE24:
43e9d192
IB
5353 asm_fprintf (asm_out_file, ":tprel:");
5354 break;
5355
87dd8ab0
MS
5356 case SYMBOL_TINY_GOT:
5357 gcc_unreachable ();
5358 break;
5359
43e9d192
IB
5360 default:
5361 break;
5362 }
5363 output_addr_const (asm_out_file, x);
5364 break;
5365
5366 case 'L':
a6e0bfa7 5367 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5368 {
6642bdb4 5369 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5370 asm_fprintf (asm_out_file, ":lo12:");
5371 break;
5372
5373 case SYMBOL_SMALL_TLSGD:
5374 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5375 break;
5376
5377 case SYMBOL_SMALL_TLSDESC:
5378 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5379 break;
5380
79496620 5381 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5382 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5383 break;
5384
cbf5629e
JW
5385 case SYMBOL_TLSLE12:
5386 asm_fprintf (asm_out_file, ":tprel_lo12:");
5387 break;
5388
d18ba284 5389 case SYMBOL_TLSLE24:
43e9d192
IB
5390 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5391 break;
5392
87dd8ab0
MS
5393 case SYMBOL_TINY_GOT:
5394 asm_fprintf (asm_out_file, ":got:");
5395 break;
5396
5ae7caad
JW
5397 case SYMBOL_TINY_TLSIE:
5398 asm_fprintf (asm_out_file, ":gottprel:");
5399 break;
5400
43e9d192
IB
5401 default:
5402 break;
5403 }
5404 output_addr_const (asm_out_file, x);
5405 break;
5406
5407 case 'G':
5408
a6e0bfa7 5409 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5410 {
d18ba284 5411 case SYMBOL_TLSLE24:
43e9d192
IB
5412 asm_fprintf (asm_out_file, ":tprel_hi12:");
5413 break;
5414 default:
5415 break;
5416 }
5417 output_addr_const (asm_out_file, x);
5418 break;
5419
cf670503
ZC
5420 case 'k':
5421 {
c8012fbc 5422 HOST_WIDE_INT cond_code;
cf670503
ZC
5423 /* Print nzcv. */
5424
c8012fbc 5425 if (!CONST_INT_P (x))
cf670503
ZC
5426 {
5427 output_operand_lossage ("invalid operand for '%%%c'", code);
5428 return;
5429 }
5430
c8012fbc
WD
5431 cond_code = INTVAL (x);
5432 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5433 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
5434 }
5435 break;
5436
43e9d192
IB
5437 default:
5438 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5439 return;
5440 }
5441}
5442
cc8ca59e
JB
5443static void
5444aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
5445{
5446 struct aarch64_address_info addr;
5447
cc8ca59e 5448 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
5449 switch (addr.type)
5450 {
5451 case ADDRESS_REG_IMM:
5452 if (addr.offset == const0_rtx)
01a3a324 5453 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 5454 else
16a3246f 5455 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
5456 INTVAL (addr.offset));
5457 return;
5458
5459 case ADDRESS_REG_REG:
5460 if (addr.shift == 0)
16a3246f 5461 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 5462 reg_names [REGNO (addr.offset)]);
43e9d192 5463 else
16a3246f 5464 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 5465 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
5466 return;
5467
5468 case ADDRESS_REG_UXTW:
5469 if (addr.shift == 0)
16a3246f 5470 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5471 REGNO (addr.offset) - R0_REGNUM);
5472 else
16a3246f 5473 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5474 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5475 return;
5476
5477 case ADDRESS_REG_SXTW:
5478 if (addr.shift == 0)
16a3246f 5479 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5480 REGNO (addr.offset) - R0_REGNUM);
5481 else
16a3246f 5482 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5483 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5484 return;
5485
5486 case ADDRESS_REG_WB:
5487 switch (GET_CODE (x))
5488 {
5489 case PRE_INC:
16a3246f 5490 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5491 GET_MODE_SIZE (mode));
43e9d192
IB
5492 return;
5493 case POST_INC:
16a3246f 5494 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5495 GET_MODE_SIZE (mode));
43e9d192
IB
5496 return;
5497 case PRE_DEC:
16a3246f 5498 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5499 GET_MODE_SIZE (mode));
43e9d192
IB
5500 return;
5501 case POST_DEC:
16a3246f 5502 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5503 GET_MODE_SIZE (mode));
43e9d192
IB
5504 return;
5505 case PRE_MODIFY:
16a3246f 5506 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5507 INTVAL (addr.offset));
5508 return;
5509 case POST_MODIFY:
16a3246f 5510 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5511 INTVAL (addr.offset));
5512 return;
5513 default:
5514 break;
5515 }
5516 break;
5517
5518 case ADDRESS_LO_SUM:
16a3246f 5519 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5520 output_addr_const (f, addr.offset);
5521 asm_fprintf (f, "]");
5522 return;
5523
5524 case ADDRESS_SYMBOLIC:
5525 break;
5526 }
5527
5528 output_addr_const (f, x);
5529}
5530
43e9d192
IB
5531bool
5532aarch64_label_mentioned_p (rtx x)
5533{
5534 const char *fmt;
5535 int i;
5536
5537 if (GET_CODE (x) == LABEL_REF)
5538 return true;
5539
5540 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5541 referencing instruction, but they are constant offsets, not
5542 symbols. */
5543 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5544 return false;
5545
5546 fmt = GET_RTX_FORMAT (GET_CODE (x));
5547 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5548 {
5549 if (fmt[i] == 'E')
5550 {
5551 int j;
5552
5553 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5554 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5555 return 1;
5556 }
5557 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5558 return 1;
5559 }
5560
5561 return 0;
5562}
5563
5564/* Implement REGNO_REG_CLASS. */
5565
5566enum reg_class
5567aarch64_regno_regclass (unsigned regno)
5568{
5569 if (GP_REGNUM_P (regno))
a4a182c6 5570 return GENERAL_REGS;
43e9d192
IB
5571
5572 if (regno == SP_REGNUM)
5573 return STACK_REG;
5574
5575 if (regno == FRAME_POINTER_REGNUM
5576 || regno == ARG_POINTER_REGNUM)
f24bb080 5577 return POINTER_REGS;
43e9d192
IB
5578
5579 if (FP_REGNUM_P (regno))
5580 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5581
5582 return NO_REGS;
5583}
5584
0c4ec427 5585static rtx
ef4bddc2 5586aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5587{
5588 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5589 where mask is selected by alignment and size of the offset.
5590 We try to pick as large a range for the offset as possible to
5591 maximize the chance of a CSE. However, for aligned addresses
5592 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5593 elements are likely to use the same base. We need to be careful
5594 not to split a CONST for some forms of address expression, otherwise
5595 it will generate sub-optimal code. */
0c4ec427
RE
5596
5597 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5598 {
9e0218fc 5599 rtx base = XEXP (x, 0);
17d7bdd8 5600 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5601 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5602
9e0218fc 5603 if (GET_CODE (base) == PLUS)
e8426e0a 5604 {
9e0218fc
RH
5605 rtx op0 = XEXP (base, 0);
5606 rtx op1 = XEXP (base, 1);
5607
5608 /* Force any scaling into a temp for CSE. */
5609 op0 = force_reg (Pmode, op0);
5610 op1 = force_reg (Pmode, op1);
5611
5612 /* Let the pointer register be in op0. */
5613 if (REG_POINTER (op1))
5614 std::swap (op0, op1);
5615
5616 /* If the pointer is virtual or frame related, then we know that
5617 virtual register instantiation or register elimination is going
5618 to apply a second constant. We want the two constants folded
5619 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5620 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5621 {
9e0218fc
RH
5622 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5623 NULL_RTX, true, OPTAB_DIRECT);
5624 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5625 }
e8426e0a 5626
9e0218fc
RH
5627 /* Otherwise, in order to encourage CSE (and thence loop strength
5628 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5629 base = expand_binop (Pmode, add_optab, op0, op1,
5630 NULL_RTX, true, OPTAB_DIRECT);
5631 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5632 }
5633
8734dfac 5634 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9e0218fc 5635 HOST_WIDE_INT base_offset;
8734dfac
WD
5636 if (GET_MODE_SIZE (mode) > 16)
5637 base_offset = (offset + 0x400) & ~0x7f0;
0c4ec427
RE
5638 /* For offsets aren't a multiple of the access size, the limit is
5639 -256...255. */
5640 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5641 {
5642 base_offset = (offset + 0x100) & ~0x1ff;
5643
5644 /* BLKmode typically uses LDP of X-registers. */
5645 if (mode == BLKmode)
5646 base_offset = (offset + 512) & ~0x3ff;
5647 }
5648 /* Small negative offsets are supported. */
5649 else if (IN_RANGE (offset, -256, 0))
5650 base_offset = 0;
8734dfac
WD
5651 else if (mode == TImode || mode == TFmode)
5652 base_offset = (offset + 0x100) & ~0x1ff;
ff0f3f1c 5653 /* Use 12-bit offset by access size. */
0c4ec427 5654 else
ff0f3f1c 5655 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5656
9e0218fc
RH
5657 if (base_offset != 0)
5658 {
5659 base = plus_constant (Pmode, base, base_offset);
5660 base = force_operand (base, NULL_RTX);
5661 return plus_constant (Pmode, base, offset - base_offset);
5662 }
0c4ec427
RE
5663 }
5664
5665 return x;
5666}
5667
b4f50fd4
RR
5668/* Return the reload icode required for a constant pool in mode. */
5669static enum insn_code
5670aarch64_constant_pool_reload_icode (machine_mode mode)
5671{
5672 switch (mode)
5673 {
5674 case SFmode:
5675 return CODE_FOR_aarch64_reload_movcpsfdi;
5676
5677 case DFmode:
5678 return CODE_FOR_aarch64_reload_movcpdfdi;
5679
5680 case TFmode:
5681 return CODE_FOR_aarch64_reload_movcptfdi;
5682
5683 case V8QImode:
5684 return CODE_FOR_aarch64_reload_movcpv8qidi;
5685
5686 case V16QImode:
5687 return CODE_FOR_aarch64_reload_movcpv16qidi;
5688
5689 case V4HImode:
5690 return CODE_FOR_aarch64_reload_movcpv4hidi;
5691
5692 case V8HImode:
5693 return CODE_FOR_aarch64_reload_movcpv8hidi;
5694
5695 case V2SImode:
5696 return CODE_FOR_aarch64_reload_movcpv2sidi;
5697
5698 case V4SImode:
5699 return CODE_FOR_aarch64_reload_movcpv4sidi;
5700
5701 case V2DImode:
5702 return CODE_FOR_aarch64_reload_movcpv2didi;
5703
5704 case V2DFmode:
5705 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5706
5707 default:
5708 gcc_unreachable ();
5709 }
5710
5711 gcc_unreachable ();
5712}
43e9d192
IB
5713static reg_class_t
5714aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5715 reg_class_t rclass,
ef4bddc2 5716 machine_mode mode,
43e9d192
IB
5717 secondary_reload_info *sri)
5718{
b4f50fd4
RR
5719
5720 /* If we have to disable direct literal pool loads and stores because the
5721 function is too big, then we need a scratch register. */
5722 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5723 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5724 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5725 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5726 {
5727 sri->icode = aarch64_constant_pool_reload_icode (mode);
5728 return NO_REGS;
5729 }
5730
43e9d192
IB
5731 /* Without the TARGET_SIMD instructions we cannot move a Q register
5732 to a Q register directly. We need a scratch. */
5733 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5734 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5735 && reg_class_subset_p (rclass, FP_REGS))
5736 {
5737 if (mode == TFmode)
5738 sri->icode = CODE_FOR_aarch64_reload_movtf;
5739 else if (mode == TImode)
5740 sri->icode = CODE_FOR_aarch64_reload_movti;
5741 return NO_REGS;
5742 }
5743
5744 /* A TFmode or TImode memory access should be handled via an FP_REGS
5745 because AArch64 has richer addressing modes for LDR/STR instructions
5746 than LDP/STP instructions. */
d5726973 5747 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5748 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5749 return FP_REGS;
5750
5751 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5752 return GENERAL_REGS;
43e9d192
IB
5753
5754 return NO_REGS;
5755}
5756
5757static bool
5758aarch64_can_eliminate (const int from, const int to)
5759{
5760 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5761 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5762
5763 if (frame_pointer_needed)
5764 {
5765 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5766 return true;
5767 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5768 return false;
5769 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5770 && !cfun->calls_alloca)
5771 return true;
5772 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5773 return true;
0b7f8166
MS
5774
5775 return false;
43e9d192 5776 }
1c923b60
JW
5777 else
5778 {
5779 /* If we decided that we didn't need a leaf frame pointer but then used
5780 LR in the function, then we'll want a frame pointer after all, so
5781 prevent this elimination to ensure a frame pointer is used. */
5782 if (to == STACK_POINTER_REGNUM
5783 && flag_omit_leaf_frame_pointer
5784 && df_regs_ever_live_p (LR_REGNUM))
5785 return false;
5786 }
777e6976 5787
43e9d192
IB
5788 return true;
5789}
5790
5791HOST_WIDE_INT
5792aarch64_initial_elimination_offset (unsigned from, unsigned to)
5793{
43e9d192 5794 aarch64_layout_frame ();
78c29983
MS
5795
5796 if (to == HARD_FRAME_POINTER_REGNUM)
5797 {
5798 if (from == ARG_POINTER_REGNUM)
71bfb77a 5799 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5800
5801 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5802 return cfun->machine->frame.hard_fp_offset
5803 - cfun->machine->frame.locals_offset;
78c29983
MS
5804 }
5805
5806 if (to == STACK_POINTER_REGNUM)
5807 {
5808 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5809 return cfun->machine->frame.frame_size
5810 - cfun->machine->frame.locals_offset;
78c29983
MS
5811 }
5812
1c960e02 5813 return cfun->machine->frame.frame_size;
43e9d192
IB
5814}
5815
43e9d192
IB
5816/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5817 previous frame. */
5818
5819rtx
5820aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5821{
5822 if (count != 0)
5823 return const0_rtx;
5824 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5825}
5826
5827
5828static void
5829aarch64_asm_trampoline_template (FILE *f)
5830{
28514dda
YZ
5831 if (TARGET_ILP32)
5832 {
5833 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5834 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5835 }
5836 else
5837 {
5838 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5839 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5840 }
01a3a324 5841 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5842 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5843 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5844 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5845}
5846
5847static void
5848aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5849{
5850 rtx fnaddr, mem, a_tramp;
28514dda 5851 const int tramp_code_sz = 16;
43e9d192
IB
5852
5853 /* Don't need to copy the trailing D-words, we fill those in below. */
5854 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5855 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5856 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5857 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5858 if (GET_MODE (fnaddr) != ptr_mode)
5859 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5860 emit_move_insn (mem, fnaddr);
5861
28514dda 5862 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5863 emit_move_insn (mem, chain_value);
5864
5865 /* XXX We should really define a "clear_cache" pattern and use
5866 gen_clear_cache(). */
5867 a_tramp = XEXP (m_tramp, 0);
5868 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5869 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5870 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5871 ptr_mode);
43e9d192
IB
5872}
5873
5874static unsigned char
ef4bddc2 5875aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5876{
5877 switch (regclass)
5878 {
fee9ba42 5879 case CALLER_SAVE_REGS:
43e9d192
IB
5880 case POINTER_REGS:
5881 case GENERAL_REGS:
5882 case ALL_REGS:
5883 case FP_REGS:
5884 case FP_LO_REGS:
5885 return
7bd11911
KT
5886 aarch64_vector_mode_p (mode)
5887 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5888 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5889 case STACK_REG:
5890 return 1;
5891
5892 case NO_REGS:
5893 return 0;
5894
5895 default:
5896 break;
5897 }
5898 gcc_unreachable ();
5899}
5900
5901static reg_class_t
78d8b9f0 5902aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5903{
51bb310d 5904 if (regclass == POINTER_REGS)
78d8b9f0
IB
5905 return GENERAL_REGS;
5906
51bb310d
MS
5907 if (regclass == STACK_REG)
5908 {
5909 if (REG_P(x)
5910 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5911 return regclass;
5912
5913 return NO_REGS;
5914 }
5915
78d8b9f0
IB
5916 /* If it's an integer immediate that MOVI can't handle, then
5917 FP_REGS is not an option, so we return NO_REGS instead. */
5918 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5919 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5920 return NO_REGS;
5921
27bd251b
IB
5922 /* Register eliminiation can result in a request for
5923 SP+constant->FP_REGS. We cannot support such operations which
5924 use SP as source and an FP_REG as destination, so reject out
5925 right now. */
5926 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5927 {
5928 rtx lhs = XEXP (x, 0);
5929
5930 /* Look through a possible SUBREG introduced by ILP32. */
5931 if (GET_CODE (lhs) == SUBREG)
5932 lhs = SUBREG_REG (lhs);
5933
5934 gcc_assert (REG_P (lhs));
5935 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5936 POINTER_REGS));
5937 return NO_REGS;
5938 }
5939
78d8b9f0 5940 return regclass;
43e9d192
IB
5941}
5942
5943void
5944aarch64_asm_output_labelref (FILE* f, const char *name)
5945{
5946 asm_fprintf (f, "%U%s", name);
5947}
5948
5949static void
5950aarch64_elf_asm_constructor (rtx symbol, int priority)
5951{
5952 if (priority == DEFAULT_INIT_PRIORITY)
5953 default_ctor_section_asm_out_constructor (symbol, priority);
5954 else
5955 {
5956 section *s;
53d190c1
AT
5957 /* While priority is known to be in range [0, 65535], so 18 bytes
5958 would be enough, the compiler might not know that. To avoid
5959 -Wformat-truncation false positive, use a larger size. */
5960 char buf[23];
43e9d192
IB
5961 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5962 s = get_section (buf, SECTION_WRITE, NULL);
5963 switch_to_section (s);
5964 assemble_align (POINTER_SIZE);
28514dda 5965 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5966 }
5967}
5968
5969static void
5970aarch64_elf_asm_destructor (rtx symbol, int priority)
5971{
5972 if (priority == DEFAULT_INIT_PRIORITY)
5973 default_dtor_section_asm_out_destructor (symbol, priority);
5974 else
5975 {
5976 section *s;
53d190c1
AT
5977 /* While priority is known to be in range [0, 65535], so 18 bytes
5978 would be enough, the compiler might not know that. To avoid
5979 -Wformat-truncation false positive, use a larger size. */
5980 char buf[23];
43e9d192
IB
5981 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5982 s = get_section (buf, SECTION_WRITE, NULL);
5983 switch_to_section (s);
5984 assemble_align (POINTER_SIZE);
28514dda 5985 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5986 }
5987}
5988
5989const char*
5990aarch64_output_casesi (rtx *operands)
5991{
5992 char buf[100];
5993 char label[100];
b32d5189 5994 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5995 int index;
5996 static const char *const patterns[4][2] =
5997 {
5998 {
5999 "ldrb\t%w3, [%0,%w1,uxtw]",
6000 "add\t%3, %4, %w3, sxtb #2"
6001 },
6002 {
6003 "ldrh\t%w3, [%0,%w1,uxtw #1]",
6004 "add\t%3, %4, %w3, sxth #2"
6005 },
6006 {
6007 "ldr\t%w3, [%0,%w1,uxtw #2]",
6008 "add\t%3, %4, %w3, sxtw #2"
6009 },
6010 /* We assume that DImode is only generated when not optimizing and
6011 that we don't really need 64-bit address offsets. That would
6012 imply an object file with 8GB of code in a single function! */
6013 {
6014 "ldr\t%w3, [%0,%w1,uxtw #2]",
6015 "add\t%3, %4, %w3, sxtw #2"
6016 }
6017 };
6018
6019 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
6020
6021 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
6022
6023 gcc_assert (index >= 0 && index <= 3);
6024
6025 /* Need to implement table size reduction, by chaning the code below. */
6026 output_asm_insn (patterns[index][0], operands);
6027 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
6028 snprintf (buf, sizeof (buf),
6029 "adr\t%%4, %s", targetm.strip_name_encoding (label));
6030 output_asm_insn (buf, operands);
6031 output_asm_insn (patterns[index][1], operands);
6032 output_asm_insn ("br\t%3", operands);
6033 assemble_label (asm_out_file, label);
6034 return "";
6035}
6036
6037
6038/* Return size in bits of an arithmetic operand which is shifted/scaled and
6039 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
6040 operator. */
6041
6042int
6043aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
6044{
6045 if (shift >= 0 && shift <= 3)
6046 {
6047 int size;
6048 for (size = 8; size <= 32; size *= 2)
6049 {
6050 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
6051 if (mask == bits << shift)
6052 return size;
6053 }
6054 }
6055 return 0;
6056}
6057
e78d485e
RR
6058/* Constant pools are per function only when PC relative
6059 literal loads are true or we are in the large memory
6060 model. */
6061
6062static inline bool
6063aarch64_can_use_per_function_literal_pools_p (void)
6064{
9ee6540a 6065 return (aarch64_pcrelative_literal_loads
e78d485e
RR
6066 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
6067}
6068
43e9d192 6069static bool
e78d485e 6070aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 6071{
3eece53d
RR
6072 /* Fixme:: In an ideal world this would work similar
6073 to the logic in aarch64_select_rtx_section but this
6074 breaks bootstrap in gcc go. For now we workaround
6075 this by returning false here. */
6076 return false;
43e9d192
IB
6077}
6078
e78d485e
RR
6079/* Select appropriate section for constants depending
6080 on where we place literal pools. */
6081
43e9d192 6082static section *
e78d485e
RR
6083aarch64_select_rtx_section (machine_mode mode,
6084 rtx x,
6085 unsigned HOST_WIDE_INT align)
43e9d192 6086{
e78d485e
RR
6087 if (aarch64_can_use_per_function_literal_pools_p ())
6088 return function_section (current_function_decl);
43e9d192 6089
e78d485e
RR
6090 return default_elf_select_rtx_section (mode, x, align);
6091}
43e9d192 6092
5fca7b66
RH
6093/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6094void
6095aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
6096 HOST_WIDE_INT offset)
6097{
6098 /* When using per-function literal pools, we must ensure that any code
6099 section is aligned to the minimal instruction length, lest we get
6100 errors from the assembler re "unaligned instructions". */
6101 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
6102 ASM_OUTPUT_ALIGN (f, 2);
6103}
6104
43e9d192
IB
6105/* Costs. */
6106
6107/* Helper function for rtx cost calculation. Strip a shift expression
6108 from X. Returns the inner operand if successful, or the original
6109 expression on failure. */
6110static rtx
6111aarch64_strip_shift (rtx x)
6112{
6113 rtx op = x;
6114
57b77d46
RE
6115 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6116 we can convert both to ROR during final output. */
43e9d192
IB
6117 if ((GET_CODE (op) == ASHIFT
6118 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
6119 || GET_CODE (op) == LSHIFTRT
6120 || GET_CODE (op) == ROTATERT
6121 || GET_CODE (op) == ROTATE)
43e9d192
IB
6122 && CONST_INT_P (XEXP (op, 1)))
6123 return XEXP (op, 0);
6124
6125 if (GET_CODE (op) == MULT
6126 && CONST_INT_P (XEXP (op, 1))
6127 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
6128 return XEXP (op, 0);
6129
6130 return x;
6131}
6132
4745e701 6133/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
6134 expression from X. Returns the inner operand if successful, or the
6135 original expression on failure. We deal with a number of possible
b10f1009
AP
6136 canonicalization variations here. If STRIP_SHIFT is true, then
6137 we can strip off a shift also. */
43e9d192 6138static rtx
b10f1009 6139aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192
IB
6140{
6141 rtx op = x;
6142
6143 /* Zero and sign extraction of a widened value. */
6144 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
6145 && XEXP (op, 2) == const0_rtx
4745e701 6146 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
6147 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
6148 XEXP (op, 1)))
6149 return XEXP (XEXP (op, 0), 0);
6150
6151 /* It can also be represented (for zero-extend) as an AND with an
6152 immediate. */
6153 if (GET_CODE (op) == AND
6154 && GET_CODE (XEXP (op, 0)) == MULT
6155 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
6156 && CONST_INT_P (XEXP (op, 1))
6157 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
6158 INTVAL (XEXP (op, 1))) != 0)
6159 return XEXP (XEXP (op, 0), 0);
6160
6161 /* Now handle extended register, as this may also have an optional
6162 left shift by 1..4. */
b10f1009
AP
6163 if (strip_shift
6164 && GET_CODE (op) == ASHIFT
43e9d192
IB
6165 && CONST_INT_P (XEXP (op, 1))
6166 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
6167 op = XEXP (op, 0);
6168
6169 if (GET_CODE (op) == ZERO_EXTEND
6170 || GET_CODE (op) == SIGN_EXTEND)
6171 op = XEXP (op, 0);
6172
6173 if (op != x)
6174 return op;
6175
4745e701
JG
6176 return x;
6177}
6178
0a78ebe4
KT
6179/* Return true iff CODE is a shift supported in combination
6180 with arithmetic instructions. */
4d1919ed 6181
0a78ebe4
KT
6182static bool
6183aarch64_shift_p (enum rtx_code code)
6184{
6185 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
6186}
6187
b10f1009
AP
6188
6189/* Return true iff X is a cheap shift without a sign extend. */
6190
6191static bool
6192aarch64_cheap_mult_shift_p (rtx x)
6193{
6194 rtx op0, op1;
6195
6196 op0 = XEXP (x, 0);
6197 op1 = XEXP (x, 1);
6198
6199 if (!(aarch64_tune_params.extra_tuning_flags
6200 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
6201 return false;
6202
6203 if (GET_CODE (op0) == SIGN_EXTEND)
6204 return false;
6205
6206 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
6207 && UINTVAL (op1) <= 4)
6208 return true;
6209
6210 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
6211 return false;
6212
6213 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
6214
6215 if (l2 > 0 && l2 <= 4)
6216 return true;
6217
6218 return false;
6219}
6220
4745e701 6221/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
6222 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6223 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
6224 operands where needed. */
6225
6226static int
e548c9df 6227aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
6228{
6229 rtx op0, op1;
6230 const struct cpu_cost_table *extra_cost
b175b679 6231 = aarch64_tune_params.insn_extra_cost;
4745e701 6232 int cost = 0;
0a78ebe4 6233 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 6234 machine_mode mode = GET_MODE (x);
4745e701
JG
6235
6236 gcc_checking_assert (code == MULT);
6237
6238 op0 = XEXP (x, 0);
6239 op1 = XEXP (x, 1);
6240
6241 if (VECTOR_MODE_P (mode))
6242 mode = GET_MODE_INNER (mode);
6243
6244 /* Integer multiply/fma. */
6245 if (GET_MODE_CLASS (mode) == MODE_INT)
6246 {
6247 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
6248 if (aarch64_shift_p (GET_CODE (x))
6249 || (CONST_INT_P (op1)
6250 && exact_log2 (INTVAL (op1)) > 0))
4745e701 6251 {
0a78ebe4
KT
6252 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6253 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
6254 if (speed)
6255 {
0a78ebe4
KT
6256 if (compound_p)
6257 {
b10f1009
AP
6258 /* If the shift is considered cheap,
6259 then don't add any cost. */
6260 if (aarch64_cheap_mult_shift_p (x))
6261 ;
6262 else if (REG_P (op1))
0a78ebe4
KT
6263 /* ARITH + shift-by-register. */
6264 cost += extra_cost->alu.arith_shift_reg;
6265 else if (is_extend)
6266 /* ARITH + extended register. We don't have a cost field
6267 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6268 cost += extra_cost->alu.extend_arith;
6269 else
6270 /* ARITH + shift-by-immediate. */
6271 cost += extra_cost->alu.arith_shift;
6272 }
4745e701
JG
6273 else
6274 /* LSL (immediate). */
0a78ebe4
KT
6275 cost += extra_cost->alu.shift;
6276
4745e701 6277 }
0a78ebe4
KT
6278 /* Strip extends as we will have costed them in the case above. */
6279 if (is_extend)
b10f1009 6280 op0 = aarch64_strip_extend (op0, true);
4745e701 6281
e548c9df 6282 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
6283
6284 return cost;
6285 }
6286
d2ac256b
KT
6287 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6288 compound and let the below cases handle it. After all, MNEG is a
6289 special-case alias of MSUB. */
6290 if (GET_CODE (op0) == NEG)
6291 {
6292 op0 = XEXP (op0, 0);
6293 compound_p = true;
6294 }
6295
4745e701
JG
6296 /* Integer multiplies or FMAs have zero/sign extending variants. */
6297 if ((GET_CODE (op0) == ZERO_EXTEND
6298 && GET_CODE (op1) == ZERO_EXTEND)
6299 || (GET_CODE (op0) == SIGN_EXTEND
6300 && GET_CODE (op1) == SIGN_EXTEND))
6301 {
e548c9df
AM
6302 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6303 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
6304
6305 if (speed)
6306 {
0a78ebe4 6307 if (compound_p)
d2ac256b 6308 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
6309 cost += extra_cost->mult[0].extend_add;
6310 else
6311 /* MUL/SMULL/UMULL. */
6312 cost += extra_cost->mult[0].extend;
6313 }
6314
6315 return cost;
6316 }
6317
d2ac256b 6318 /* This is either an integer multiply or a MADD. In both cases
4745e701 6319 we want to recurse and cost the operands. */
e548c9df
AM
6320 cost += rtx_cost (op0, mode, MULT, 0, speed);
6321 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6322
6323 if (speed)
6324 {
0a78ebe4 6325 if (compound_p)
d2ac256b 6326 /* MADD/MSUB. */
4745e701
JG
6327 cost += extra_cost->mult[mode == DImode].add;
6328 else
6329 /* MUL. */
6330 cost += extra_cost->mult[mode == DImode].simple;
6331 }
6332
6333 return cost;
6334 }
6335 else
6336 {
6337 if (speed)
6338 {
3d840f7d 6339 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
6340 operands, unless the rounding mode is upward or downward in
6341 which case FNMUL is different than FMUL with operand negation. */
6342 bool neg0 = GET_CODE (op0) == NEG;
6343 bool neg1 = GET_CODE (op1) == NEG;
6344 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6345 {
6346 if (neg0)
6347 op0 = XEXP (op0, 0);
6348 if (neg1)
6349 op1 = XEXP (op1, 0);
6350 }
4745e701 6351
0a78ebe4 6352 if (compound_p)
4745e701
JG
6353 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6354 cost += extra_cost->fp[mode == DFmode].fma;
6355 else
3d840f7d 6356 /* FMUL/FNMUL. */
4745e701
JG
6357 cost += extra_cost->fp[mode == DFmode].mult;
6358 }
6359
e548c9df
AM
6360 cost += rtx_cost (op0, mode, MULT, 0, speed);
6361 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6362 return cost;
6363 }
43e9d192
IB
6364}
6365
67747367
JG
6366static int
6367aarch64_address_cost (rtx x,
ef4bddc2 6368 machine_mode mode,
67747367
JG
6369 addr_space_t as ATTRIBUTE_UNUSED,
6370 bool speed)
6371{
6372 enum rtx_code c = GET_CODE (x);
b175b679 6373 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
6374 struct aarch64_address_info info;
6375 int cost = 0;
6376 info.shift = 0;
6377
6378 if (!aarch64_classify_address (&info, x, mode, c, false))
6379 {
6380 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6381 {
6382 /* This is a CONST or SYMBOL ref which will be split
6383 in a different way depending on the code model in use.
6384 Cost it through the generic infrastructure. */
e548c9df 6385 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
6386 /* Divide through by the cost of one instruction to
6387 bring it to the same units as the address costs. */
6388 cost_symbol_ref /= COSTS_N_INSNS (1);
6389 /* The cost is then the cost of preparing the address,
6390 followed by an immediate (possibly 0) offset. */
6391 return cost_symbol_ref + addr_cost->imm_offset;
6392 }
6393 else
6394 {
6395 /* This is most likely a jump table from a case
6396 statement. */
6397 return addr_cost->register_offset;
6398 }
6399 }
6400
6401 switch (info.type)
6402 {
6403 case ADDRESS_LO_SUM:
6404 case ADDRESS_SYMBOLIC:
6405 case ADDRESS_REG_IMM:
6406 cost += addr_cost->imm_offset;
6407 break;
6408
6409 case ADDRESS_REG_WB:
6410 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6411 cost += addr_cost->pre_modify;
6412 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6413 cost += addr_cost->post_modify;
6414 else
6415 gcc_unreachable ();
6416
6417 break;
6418
6419 case ADDRESS_REG_REG:
6420 cost += addr_cost->register_offset;
6421 break;
6422
67747367 6423 case ADDRESS_REG_SXTW:
783879e6
EM
6424 cost += addr_cost->register_sextend;
6425 break;
6426
6427 case ADDRESS_REG_UXTW:
6428 cost += addr_cost->register_zextend;
67747367
JG
6429 break;
6430
6431 default:
6432 gcc_unreachable ();
6433 }
6434
6435
6436 if (info.shift > 0)
6437 {
6438 /* For the sake of calculating the cost of the shifted register
6439 component, we can treat same sized modes in the same way. */
6440 switch (GET_MODE_BITSIZE (mode))
6441 {
6442 case 16:
6443 cost += addr_cost->addr_scale_costs.hi;
6444 break;
6445
6446 case 32:
6447 cost += addr_cost->addr_scale_costs.si;
6448 break;
6449
6450 case 64:
6451 cost += addr_cost->addr_scale_costs.di;
6452 break;
6453
6454 /* We can't tell, or this is a 128-bit vector. */
6455 default:
6456 cost += addr_cost->addr_scale_costs.ti;
6457 break;
6458 }
6459 }
6460
6461 return cost;
6462}
6463
b9066f5a
MW
6464/* Return the cost of a branch. If SPEED_P is true then the compiler is
6465 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6466 to be taken. */
6467
6468int
6469aarch64_branch_cost (bool speed_p, bool predictable_p)
6470{
6471 /* When optimizing for speed, use the cost of unpredictable branches. */
6472 const struct cpu_branch_cost *branch_costs =
b175b679 6473 aarch64_tune_params.branch_costs;
b9066f5a
MW
6474
6475 if (!speed_p || predictable_p)
6476 return branch_costs->predictable;
6477 else
6478 return branch_costs->unpredictable;
6479}
6480
7cc2145f
JG
6481/* Return true if the RTX X in mode MODE is a zero or sign extract
6482 usable in an ADD or SUB (extended register) instruction. */
6483static bool
ef4bddc2 6484aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
6485{
6486 /* Catch add with a sign extract.
6487 This is add_<optab><mode>_multp2. */
6488 if (GET_CODE (x) == SIGN_EXTRACT
6489 || GET_CODE (x) == ZERO_EXTRACT)
6490 {
6491 rtx op0 = XEXP (x, 0);
6492 rtx op1 = XEXP (x, 1);
6493 rtx op2 = XEXP (x, 2);
6494
6495 if (GET_CODE (op0) == MULT
6496 && CONST_INT_P (op1)
6497 && op2 == const0_rtx
6498 && CONST_INT_P (XEXP (op0, 1))
6499 && aarch64_is_extend_from_extract (mode,
6500 XEXP (op0, 1),
6501 op1))
6502 {
6503 return true;
6504 }
6505 }
e47c4031
KT
6506 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6507 No shift. */
6508 else if (GET_CODE (x) == SIGN_EXTEND
6509 || GET_CODE (x) == ZERO_EXTEND)
6510 return REG_P (XEXP (x, 0));
7cc2145f
JG
6511
6512 return false;
6513}
6514
61263118
KT
6515static bool
6516aarch64_frint_unspec_p (unsigned int u)
6517{
6518 switch (u)
6519 {
6520 case UNSPEC_FRINTZ:
6521 case UNSPEC_FRINTP:
6522 case UNSPEC_FRINTM:
6523 case UNSPEC_FRINTA:
6524 case UNSPEC_FRINTN:
6525 case UNSPEC_FRINTX:
6526 case UNSPEC_FRINTI:
6527 return true;
6528
6529 default:
6530 return false;
6531 }
6532}
6533
fb0cb7fa
KT
6534/* Return true iff X is an rtx that will match an extr instruction
6535 i.e. as described in the *extr<mode>5_insn family of patterns.
6536 OP0 and OP1 will be set to the operands of the shifts involved
6537 on success and will be NULL_RTX otherwise. */
6538
6539static bool
6540aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6541{
6542 rtx op0, op1;
6543 machine_mode mode = GET_MODE (x);
6544
6545 *res_op0 = NULL_RTX;
6546 *res_op1 = NULL_RTX;
6547
6548 if (GET_CODE (x) != IOR)
6549 return false;
6550
6551 op0 = XEXP (x, 0);
6552 op1 = XEXP (x, 1);
6553
6554 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6555 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6556 {
6557 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6558 if (GET_CODE (op1) == ASHIFT)
6559 std::swap (op0, op1);
6560
6561 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6562 return false;
6563
6564 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6565 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6566
6567 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6568 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6569 {
6570 *res_op0 = XEXP (op0, 0);
6571 *res_op1 = XEXP (op1, 0);
6572 return true;
6573 }
6574 }
6575
6576 return false;
6577}
6578
2d5ffe46
AP
6579/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6580 storing it in *COST. Result is true if the total cost of the operation
6581 has now been calculated. */
6582static bool
6583aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6584{
b9e3afe9
AP
6585 rtx inner;
6586 rtx comparator;
6587 enum rtx_code cmpcode;
6588
6589 if (COMPARISON_P (op0))
6590 {
6591 inner = XEXP (op0, 0);
6592 comparator = XEXP (op0, 1);
6593 cmpcode = GET_CODE (op0);
6594 }
6595 else
6596 {
6597 inner = op0;
6598 comparator = const0_rtx;
6599 cmpcode = NE;
6600 }
6601
2d5ffe46
AP
6602 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6603 {
6604 /* Conditional branch. */
b9e3afe9 6605 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6606 return true;
6607 else
6608 {
b9e3afe9 6609 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6610 {
2d5ffe46
AP
6611 if (comparator == const0_rtx)
6612 {
6613 /* TBZ/TBNZ/CBZ/CBNZ. */
6614 if (GET_CODE (inner) == ZERO_EXTRACT)
6615 /* TBZ/TBNZ. */
e548c9df
AM
6616 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6617 ZERO_EXTRACT, 0, speed);
6618 else
6619 /* CBZ/CBNZ. */
6620 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6621
6622 return true;
6623 }
6624 }
b9e3afe9 6625 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6626 {
2d5ffe46
AP
6627 /* TBZ/TBNZ. */
6628 if (comparator == const0_rtx)
6629 return true;
6630 }
6631 }
6632 }
b9e3afe9 6633 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6634 {
786298dc 6635 /* CCMP. */
6dfeb7ce 6636 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6637 {
6638 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6639 if (XEXP (op1, 1) == const0_rtx)
6640 *cost += 1;
6641 if (speed)
6642 {
6643 machine_mode mode = GET_MODE (XEXP (op1, 0));
6644 const struct cpu_cost_table *extra_cost
6645 = aarch64_tune_params.insn_extra_cost;
6646
6647 if (GET_MODE_CLASS (mode) == MODE_INT)
6648 *cost += extra_cost->alu.arith;
6649 else
6650 *cost += extra_cost->fp[mode == DFmode].compare;
6651 }
6652 return true;
6653 }
6654
2d5ffe46
AP
6655 /* It's a conditional operation based on the status flags,
6656 so it must be some flavor of CSEL. */
6657
6658 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6659 if (GET_CODE (op1) == NEG
6660 || GET_CODE (op1) == NOT
6661 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6662 op1 = XEXP (op1, 0);
bad00732
KT
6663 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6664 {
6665 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6666 op1 = XEXP (op1, 0);
6667 op2 = XEXP (op2, 0);
6668 }
2d5ffe46 6669
e548c9df
AM
6670 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6671 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6672 return true;
6673 }
6674
6675 /* We don't know what this is, cost all operands. */
6676 return false;
6677}
6678
283b6c85
KT
6679/* Check whether X is a bitfield operation of the form shift + extend that
6680 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6681 operand to which the bitfield operation is applied. Otherwise return
6682 NULL_RTX. */
6683
6684static rtx
6685aarch64_extend_bitfield_pattern_p (rtx x)
6686{
6687 rtx_code outer_code = GET_CODE (x);
6688 machine_mode outer_mode = GET_MODE (x);
6689
6690 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6691 && outer_mode != SImode && outer_mode != DImode)
6692 return NULL_RTX;
6693
6694 rtx inner = XEXP (x, 0);
6695 rtx_code inner_code = GET_CODE (inner);
6696 machine_mode inner_mode = GET_MODE (inner);
6697 rtx op = NULL_RTX;
6698
6699 switch (inner_code)
6700 {
6701 case ASHIFT:
6702 if (CONST_INT_P (XEXP (inner, 1))
6703 && (inner_mode == QImode || inner_mode == HImode))
6704 op = XEXP (inner, 0);
6705 break;
6706 case LSHIFTRT:
6707 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6708 && (inner_mode == QImode || inner_mode == HImode))
6709 op = XEXP (inner, 0);
6710 break;
6711 case ASHIFTRT:
6712 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6713 && (inner_mode == QImode || inner_mode == HImode))
6714 op = XEXP (inner, 0);
6715 break;
6716 default:
6717 break;
6718 }
6719
6720 return op;
6721}
6722
8c83f71d
KT
6723/* Return true if the mask and a shift amount from an RTX of the form
6724 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6725 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6726
6727bool
6728aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6729{
6730 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6731 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6732 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6733 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6734}
6735
43e9d192
IB
6736/* Calculate the cost of calculating X, storing it in *COST. Result
6737 is true if the total cost of the operation has now been calculated. */
6738static bool
e548c9df 6739aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6740 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6741{
a8eecd00 6742 rtx op0, op1, op2;
73250c4c 6743 const struct cpu_cost_table *extra_cost
b175b679 6744 = aarch64_tune_params.insn_extra_cost;
e548c9df 6745 int code = GET_CODE (x);
43e9d192 6746
7fc5ef02
JG
6747 /* By default, assume that everything has equivalent cost to the
6748 cheapest instruction. Any additional costs are applied as a delta
6749 above this default. */
6750 *cost = COSTS_N_INSNS (1);
6751
43e9d192
IB
6752 switch (code)
6753 {
6754 case SET:
ba123b0d
JG
6755 /* The cost depends entirely on the operands to SET. */
6756 *cost = 0;
43e9d192
IB
6757 op0 = SET_DEST (x);
6758 op1 = SET_SRC (x);
6759
6760 switch (GET_CODE (op0))
6761 {
6762 case MEM:
6763 if (speed)
2961177e
JG
6764 {
6765 rtx address = XEXP (op0, 0);
b6875aac
KV
6766 if (VECTOR_MODE_P (mode))
6767 *cost += extra_cost->ldst.storev;
6768 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6769 *cost += extra_cost->ldst.store;
6770 else if (mode == SFmode)
6771 *cost += extra_cost->ldst.storef;
6772 else if (mode == DFmode)
6773 *cost += extra_cost->ldst.stored;
6774
6775 *cost +=
6776 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6777 0, speed));
6778 }
43e9d192 6779
e548c9df 6780 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6781 return true;
6782
6783 case SUBREG:
6784 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6785 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6786
43e9d192
IB
6787 /* Fall through. */
6788 case REG:
b6875aac
KV
6789 /* The cost is one per vector-register copied. */
6790 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6791 {
6792 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6793 / GET_MODE_SIZE (V4SImode);
6794 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6795 }
ba123b0d
JG
6796 /* const0_rtx is in general free, but we will use an
6797 instruction to set a register to 0. */
b6875aac
KV
6798 else if (REG_P (op1) || op1 == const0_rtx)
6799 {
6800 /* The cost is 1 per register copied. */
6801 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6802 / UNITS_PER_WORD;
b6875aac
KV
6803 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6804 }
ba123b0d
JG
6805 else
6806 /* Cost is just the cost of the RHS of the set. */
e548c9df 6807 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6808 return true;
6809
ba123b0d 6810 case ZERO_EXTRACT:
43e9d192 6811 case SIGN_EXTRACT:
ba123b0d
JG
6812 /* Bit-field insertion. Strip any redundant widening of
6813 the RHS to meet the width of the target. */
43e9d192
IB
6814 if (GET_CODE (op1) == SUBREG)
6815 op1 = SUBREG_REG (op1);
6816 if ((GET_CODE (op1) == ZERO_EXTEND
6817 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6818 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6819 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6820 >= INTVAL (XEXP (op0, 1))))
6821 op1 = XEXP (op1, 0);
ba123b0d
JG
6822
6823 if (CONST_INT_P (op1))
6824 {
6825 /* MOV immediate is assumed to always be cheap. */
6826 *cost = COSTS_N_INSNS (1);
6827 }
6828 else
6829 {
6830 /* BFM. */
6831 if (speed)
6832 *cost += extra_cost->alu.bfi;
e548c9df 6833 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6834 }
6835
43e9d192
IB
6836 return true;
6837
6838 default:
ba123b0d
JG
6839 /* We can't make sense of this, assume default cost. */
6840 *cost = COSTS_N_INSNS (1);
61263118 6841 return false;
43e9d192
IB
6842 }
6843 return false;
6844
9dfc162c
JG
6845 case CONST_INT:
6846 /* If an instruction can incorporate a constant within the
6847 instruction, the instruction's expression avoids calling
6848 rtx_cost() on the constant. If rtx_cost() is called on a
6849 constant, then it is usually because the constant must be
6850 moved into a register by one or more instructions.
6851
6852 The exception is constant 0, which can be expressed
6853 as XZR/WZR and is therefore free. The exception to this is
6854 if we have (set (reg) (const0_rtx)) in which case we must cost
6855 the move. However, we can catch that when we cost the SET, so
6856 we don't need to consider that here. */
6857 if (x == const0_rtx)
6858 *cost = 0;
6859 else
6860 {
6861 /* To an approximation, building any other constant is
6862 proportionally expensive to the number of instructions
6863 required to build that constant. This is true whether we
6864 are compiling for SPEED or otherwise. */
82614948
RR
6865 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6866 (NULL_RTX, x, false, mode));
9dfc162c
JG
6867 }
6868 return true;
6869
6870 case CONST_DOUBLE:
6871 if (speed)
6872 {
6873 /* mov[df,sf]_aarch64. */
6874 if (aarch64_float_const_representable_p (x))
6875 /* FMOV (scalar immediate). */
6876 *cost += extra_cost->fp[mode == DFmode].fpconst;
6877 else if (!aarch64_float_const_zero_rtx_p (x))
6878 {
6879 /* This will be a load from memory. */
6880 if (mode == DFmode)
6881 *cost += extra_cost->ldst.loadd;
6882 else
6883 *cost += extra_cost->ldst.loadf;
6884 }
6885 else
6886 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6887 or MOV v0.s[0], wzr - neither of which are modeled by the
6888 cost tables. Just use the default cost. */
6889 {
6890 }
6891 }
6892
6893 return true;
6894
43e9d192
IB
6895 case MEM:
6896 if (speed)
2961177e
JG
6897 {
6898 /* For loads we want the base cost of a load, plus an
6899 approximation for the additional cost of the addressing
6900 mode. */
6901 rtx address = XEXP (x, 0);
b6875aac
KV
6902 if (VECTOR_MODE_P (mode))
6903 *cost += extra_cost->ldst.loadv;
6904 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6905 *cost += extra_cost->ldst.load;
6906 else if (mode == SFmode)
6907 *cost += extra_cost->ldst.loadf;
6908 else if (mode == DFmode)
6909 *cost += extra_cost->ldst.loadd;
6910
6911 *cost +=
6912 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6913 0, speed));
6914 }
43e9d192
IB
6915
6916 return true;
6917
6918 case NEG:
4745e701
JG
6919 op0 = XEXP (x, 0);
6920
b6875aac
KV
6921 if (VECTOR_MODE_P (mode))
6922 {
6923 if (speed)
6924 {
6925 /* FNEG. */
6926 *cost += extra_cost->vect.alu;
6927 }
6928 return false;
6929 }
6930
e548c9df
AM
6931 if (GET_MODE_CLASS (mode) == MODE_INT)
6932 {
4745e701
JG
6933 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6934 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6935 {
6936 /* CSETM. */
e548c9df 6937 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6938 return true;
6939 }
6940
6941 /* Cost this as SUB wzr, X. */
e548c9df 6942 op0 = CONST0_RTX (mode);
4745e701
JG
6943 op1 = XEXP (x, 0);
6944 goto cost_minus;
6945 }
6946
e548c9df 6947 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6948 {
6949 /* Support (neg(fma...)) as a single instruction only if
6950 sign of zeros is unimportant. This matches the decision
6951 making in aarch64.md. */
6952 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6953 {
6954 /* FNMADD. */
e548c9df 6955 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6956 return true;
6957 }
d318517d
SN
6958 if (GET_CODE (op0) == MULT)
6959 {
6960 /* FNMUL. */
6961 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6962 return true;
6963 }
4745e701
JG
6964 if (speed)
6965 /* FNEG. */
6966 *cost += extra_cost->fp[mode == DFmode].neg;
6967 return false;
6968 }
6969
6970 return false;
43e9d192 6971
781aeb73
KT
6972 case CLRSB:
6973 case CLZ:
6974 if (speed)
b6875aac
KV
6975 {
6976 if (VECTOR_MODE_P (mode))
6977 *cost += extra_cost->vect.alu;
6978 else
6979 *cost += extra_cost->alu.clz;
6980 }
781aeb73
KT
6981
6982 return false;
6983
43e9d192
IB
6984 case COMPARE:
6985 op0 = XEXP (x, 0);
6986 op1 = XEXP (x, 1);
6987
6988 if (op1 == const0_rtx
6989 && GET_CODE (op0) == AND)
6990 {
6991 x = op0;
e548c9df 6992 mode = GET_MODE (op0);
43e9d192
IB
6993 goto cost_logic;
6994 }
6995
a8eecd00
JG
6996 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6997 {
6998 /* TODO: A write to the CC flags possibly costs extra, this
6999 needs encoding in the cost tables. */
7000
e548c9df 7001 mode = GET_MODE (op0);
a8eecd00
JG
7002 /* ANDS. */
7003 if (GET_CODE (op0) == AND)
7004 {
7005 x = op0;
7006 goto cost_logic;
7007 }
7008
7009 if (GET_CODE (op0) == PLUS)
7010 {
7011 /* ADDS (and CMN alias). */
7012 x = op0;
7013 goto cost_plus;
7014 }
7015
7016 if (GET_CODE (op0) == MINUS)
7017 {
7018 /* SUBS. */
7019 x = op0;
7020 goto cost_minus;
7021 }
7022
345854d8
KT
7023 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
7024 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
7025 && CONST_INT_P (XEXP (op0, 2)))
7026 {
7027 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
7028 Handle it here directly rather than going to cost_logic
7029 since we know the immediate generated for the TST is valid
7030 so we can avoid creating an intermediate rtx for it only
7031 for costing purposes. */
7032 if (speed)
7033 *cost += extra_cost->alu.logical;
7034
7035 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
7036 ZERO_EXTRACT, 0, speed);
7037 return true;
7038 }
7039
a8eecd00
JG
7040 if (GET_CODE (op1) == NEG)
7041 {
7042 /* CMN. */
7043 if (speed)
7044 *cost += extra_cost->alu.arith;
7045
e548c9df
AM
7046 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
7047 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
7048 return true;
7049 }
7050
7051 /* CMP.
7052
7053 Compare can freely swap the order of operands, and
7054 canonicalization puts the more complex operation first.
7055 But the integer MINUS logic expects the shift/extend
7056 operation in op1. */
7057 if (! (REG_P (op0)
7058 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
7059 {
7060 op0 = XEXP (x, 1);
7061 op1 = XEXP (x, 0);
7062 }
7063 goto cost_minus;
7064 }
7065
7066 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7067 {
7068 /* FCMP. */
7069 if (speed)
7070 *cost += extra_cost->fp[mode == DFmode].compare;
7071
7072 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
7073 {
e548c9df 7074 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
7075 /* FCMP supports constant 0.0 for no extra cost. */
7076 return true;
7077 }
7078 return false;
7079 }
7080
b6875aac
KV
7081 if (VECTOR_MODE_P (mode))
7082 {
7083 /* Vector compare. */
7084 if (speed)
7085 *cost += extra_cost->vect.alu;
7086
7087 if (aarch64_float_const_zero_rtx_p (op1))
7088 {
7089 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7090 cost. */
7091 return true;
7092 }
7093 return false;
7094 }
a8eecd00 7095 return false;
43e9d192
IB
7096
7097 case MINUS:
4745e701
JG
7098 {
7099 op0 = XEXP (x, 0);
7100 op1 = XEXP (x, 1);
7101
7102cost_minus:
e548c9df 7103 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 7104
4745e701
JG
7105 /* Detect valid immediates. */
7106 if ((GET_MODE_CLASS (mode) == MODE_INT
7107 || (GET_MODE_CLASS (mode) == MODE_CC
7108 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
7109 && CONST_INT_P (op1)
7110 && aarch64_uimm12_shift (INTVAL (op1)))
7111 {
4745e701
JG
7112 if (speed)
7113 /* SUB(S) (immediate). */
7114 *cost += extra_cost->alu.arith;
7115 return true;
4745e701
JG
7116 }
7117
7cc2145f
JG
7118 /* Look for SUB (extended register). */
7119 if (aarch64_rtx_arith_op_extract_p (op1, mode))
7120 {
7121 if (speed)
2533c820 7122 *cost += extra_cost->alu.extend_arith;
7cc2145f 7123
b10f1009 7124 op1 = aarch64_strip_extend (op1, true);
e47c4031 7125 *cost += rtx_cost (op1, VOIDmode,
e548c9df 7126 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
7127 return true;
7128 }
7129
b10f1009 7130 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
7131
7132 /* Cost this as an FMA-alike operation. */
7133 if ((GET_CODE (new_op1) == MULT
0a78ebe4 7134 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
7135 && code != COMPARE)
7136 {
7137 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
7138 (enum rtx_code) code,
7139 speed);
4745e701
JG
7140 return true;
7141 }
43e9d192 7142
e548c9df 7143 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 7144
4745e701
JG
7145 if (speed)
7146 {
b6875aac
KV
7147 if (VECTOR_MODE_P (mode))
7148 {
7149 /* Vector SUB. */
7150 *cost += extra_cost->vect.alu;
7151 }
7152 else if (GET_MODE_CLASS (mode) == MODE_INT)
7153 {
7154 /* SUB(S). */
7155 *cost += extra_cost->alu.arith;
7156 }
4745e701 7157 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7158 {
7159 /* FSUB. */
7160 *cost += extra_cost->fp[mode == DFmode].addsub;
7161 }
4745e701
JG
7162 }
7163 return true;
7164 }
43e9d192
IB
7165
7166 case PLUS:
4745e701
JG
7167 {
7168 rtx new_op0;
43e9d192 7169
4745e701
JG
7170 op0 = XEXP (x, 0);
7171 op1 = XEXP (x, 1);
43e9d192 7172
a8eecd00 7173cost_plus:
4745e701
JG
7174 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7175 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7176 {
7177 /* CSINC. */
e548c9df
AM
7178 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
7179 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
7180 return true;
7181 }
43e9d192 7182
4745e701
JG
7183 if (GET_MODE_CLASS (mode) == MODE_INT
7184 && CONST_INT_P (op1)
7185 && aarch64_uimm12_shift (INTVAL (op1)))
7186 {
e548c9df 7187 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 7188
4745e701
JG
7189 if (speed)
7190 /* ADD (immediate). */
7191 *cost += extra_cost->alu.arith;
7192 return true;
7193 }
7194
e548c9df 7195 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 7196
7cc2145f
JG
7197 /* Look for ADD (extended register). */
7198 if (aarch64_rtx_arith_op_extract_p (op0, mode))
7199 {
7200 if (speed)
2533c820 7201 *cost += extra_cost->alu.extend_arith;
7cc2145f 7202
b10f1009 7203 op0 = aarch64_strip_extend (op0, true);
e47c4031 7204 *cost += rtx_cost (op0, VOIDmode,
e548c9df 7205 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
7206 return true;
7207 }
7208
4745e701
JG
7209 /* Strip any extend, leave shifts behind as we will
7210 cost them through mult_cost. */
b10f1009 7211 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
7212
7213 if (GET_CODE (new_op0) == MULT
0a78ebe4 7214 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
7215 {
7216 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
7217 speed);
4745e701
JG
7218 return true;
7219 }
7220
e548c9df 7221 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
7222
7223 if (speed)
7224 {
b6875aac
KV
7225 if (VECTOR_MODE_P (mode))
7226 {
7227 /* Vector ADD. */
7228 *cost += extra_cost->vect.alu;
7229 }
7230 else if (GET_MODE_CLASS (mode) == MODE_INT)
7231 {
7232 /* ADD. */
7233 *cost += extra_cost->alu.arith;
7234 }
4745e701 7235 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7236 {
7237 /* FADD. */
7238 *cost += extra_cost->fp[mode == DFmode].addsub;
7239 }
4745e701
JG
7240 }
7241 return true;
7242 }
43e9d192 7243
18b42b2a
KT
7244 case BSWAP:
7245 *cost = COSTS_N_INSNS (1);
7246
7247 if (speed)
b6875aac
KV
7248 {
7249 if (VECTOR_MODE_P (mode))
7250 *cost += extra_cost->vect.alu;
7251 else
7252 *cost += extra_cost->alu.rev;
7253 }
18b42b2a
KT
7254 return false;
7255
43e9d192 7256 case IOR:
f7d5cf8d
KT
7257 if (aarch_rev16_p (x))
7258 {
7259 *cost = COSTS_N_INSNS (1);
7260
b6875aac
KV
7261 if (speed)
7262 {
7263 if (VECTOR_MODE_P (mode))
7264 *cost += extra_cost->vect.alu;
7265 else
7266 *cost += extra_cost->alu.rev;
7267 }
7268 return true;
f7d5cf8d 7269 }
fb0cb7fa
KT
7270
7271 if (aarch64_extr_rtx_p (x, &op0, &op1))
7272 {
e548c9df
AM
7273 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7274 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
7275 if (speed)
7276 *cost += extra_cost->alu.shift;
7277
7278 return true;
7279 }
f7d5cf8d 7280 /* Fall through. */
43e9d192
IB
7281 case XOR:
7282 case AND:
7283 cost_logic:
7284 op0 = XEXP (x, 0);
7285 op1 = XEXP (x, 1);
7286
b6875aac
KV
7287 if (VECTOR_MODE_P (mode))
7288 {
7289 if (speed)
7290 *cost += extra_cost->vect.alu;
7291 return true;
7292 }
7293
268c3b47
JG
7294 if (code == AND
7295 && GET_CODE (op0) == MULT
7296 && CONST_INT_P (XEXP (op0, 1))
7297 && CONST_INT_P (op1)
7298 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7299 INTVAL (op1)) != 0)
7300 {
7301 /* This is a UBFM/SBFM. */
e548c9df 7302 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
7303 if (speed)
7304 *cost += extra_cost->alu.bfx;
7305 return true;
7306 }
7307
e548c9df 7308 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 7309 {
8c83f71d 7310 if (CONST_INT_P (op1))
43e9d192 7311 {
8c83f71d
KT
7312 /* We have a mask + shift version of a UBFIZ
7313 i.e. the *andim_ashift<mode>_bfiz pattern. */
7314 if (GET_CODE (op0) == ASHIFT
7315 && aarch64_mask_and_shift_for_ubfiz_p (mode, op1,
7316 XEXP (op0, 1)))
7317 {
7318 *cost += rtx_cost (XEXP (op0, 0), mode,
7319 (enum rtx_code) code, 0, speed);
7320 if (speed)
7321 *cost += extra_cost->alu.bfx;
268c3b47 7322
8c83f71d
KT
7323 return true;
7324 }
7325 else if (aarch64_bitmask_imm (INTVAL (op1), mode))
7326 {
7327 /* We possibly get the immediate for free, this is not
7328 modelled. */
7329 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
7330 if (speed)
7331 *cost += extra_cost->alu.logical;
268c3b47 7332
8c83f71d
KT
7333 return true;
7334 }
43e9d192
IB
7335 }
7336 else
7337 {
268c3b47
JG
7338 rtx new_op0 = op0;
7339
7340 /* Handle ORN, EON, or BIC. */
43e9d192
IB
7341 if (GET_CODE (op0) == NOT)
7342 op0 = XEXP (op0, 0);
268c3b47
JG
7343
7344 new_op0 = aarch64_strip_shift (op0);
7345
7346 /* If we had a shift on op0 then this is a logical-shift-
7347 by-register/immediate operation. Otherwise, this is just
7348 a logical operation. */
7349 if (speed)
7350 {
7351 if (new_op0 != op0)
7352 {
7353 /* Shift by immediate. */
7354 if (CONST_INT_P (XEXP (op0, 1)))
7355 *cost += extra_cost->alu.log_shift;
7356 else
7357 *cost += extra_cost->alu.log_shift_reg;
7358 }
7359 else
7360 *cost += extra_cost->alu.logical;
7361 }
7362
7363 /* In both cases we want to cost both operands. */
e548c9df
AM
7364 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
7365 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
7366
7367 return true;
43e9d192 7368 }
43e9d192
IB
7369 }
7370 return false;
7371
268c3b47 7372 case NOT:
6365da9e
KT
7373 x = XEXP (x, 0);
7374 op0 = aarch64_strip_shift (x);
7375
b6875aac
KV
7376 if (VECTOR_MODE_P (mode))
7377 {
7378 /* Vector NOT. */
7379 *cost += extra_cost->vect.alu;
7380 return false;
7381 }
7382
6365da9e
KT
7383 /* MVN-shifted-reg. */
7384 if (op0 != x)
7385 {
e548c9df 7386 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
7387
7388 if (speed)
7389 *cost += extra_cost->alu.log_shift;
7390
7391 return true;
7392 }
7393 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7394 Handle the second form here taking care that 'a' in the above can
7395 be a shift. */
7396 else if (GET_CODE (op0) == XOR)
7397 {
7398 rtx newop0 = XEXP (op0, 0);
7399 rtx newop1 = XEXP (op0, 1);
7400 rtx op0_stripped = aarch64_strip_shift (newop0);
7401
e548c9df
AM
7402 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7403 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
7404
7405 if (speed)
7406 {
7407 if (op0_stripped != newop0)
7408 *cost += extra_cost->alu.log_shift;
7409 else
7410 *cost += extra_cost->alu.logical;
7411 }
7412
7413 return true;
7414 }
268c3b47
JG
7415 /* MVN. */
7416 if (speed)
7417 *cost += extra_cost->alu.logical;
7418
268c3b47
JG
7419 return false;
7420
43e9d192 7421 case ZERO_EXTEND:
b1685e62
JG
7422
7423 op0 = XEXP (x, 0);
7424 /* If a value is written in SI mode, then zero extended to DI
7425 mode, the operation will in general be free as a write to
7426 a 'w' register implicitly zeroes the upper bits of an 'x'
7427 register. However, if this is
7428
7429 (set (reg) (zero_extend (reg)))
7430
7431 we must cost the explicit register move. */
7432 if (mode == DImode
7433 && GET_MODE (op0) == SImode
7434 && outer == SET)
7435 {
e548c9df 7436 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 7437
dde23f43
KM
7438 /* If OP_COST is non-zero, then the cost of the zero extend
7439 is effectively the cost of the inner operation. Otherwise
7440 we have a MOV instruction and we take the cost from the MOV
7441 itself. This is true independently of whether we are
7442 optimizing for space or time. */
7443 if (op_cost)
b1685e62
JG
7444 *cost = op_cost;
7445
7446 return true;
7447 }
e548c9df 7448 else if (MEM_P (op0))
43e9d192 7449 {
b1685e62 7450 /* All loads can zero extend to any size for free. */
e548c9df 7451 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
7452 return true;
7453 }
b1685e62 7454
283b6c85
KT
7455 op0 = aarch64_extend_bitfield_pattern_p (x);
7456 if (op0)
7457 {
7458 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7459 if (speed)
7460 *cost += extra_cost->alu.bfx;
7461 return true;
7462 }
7463
b1685e62 7464 if (speed)
b6875aac
KV
7465 {
7466 if (VECTOR_MODE_P (mode))
7467 {
7468 /* UMOV. */
7469 *cost += extra_cost->vect.alu;
7470 }
7471 else
7472 {
63715e5e
WD
7473 /* We generate an AND instead of UXTB/UXTH. */
7474 *cost += extra_cost->alu.logical;
b6875aac
KV
7475 }
7476 }
43e9d192
IB
7477 return false;
7478
7479 case SIGN_EXTEND:
b1685e62 7480 if (MEM_P (XEXP (x, 0)))
43e9d192 7481 {
b1685e62
JG
7482 /* LDRSH. */
7483 if (speed)
7484 {
7485 rtx address = XEXP (XEXP (x, 0), 0);
7486 *cost += extra_cost->ldst.load_sign_extend;
7487
7488 *cost +=
7489 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7490 0, speed));
7491 }
43e9d192
IB
7492 return true;
7493 }
b1685e62 7494
283b6c85
KT
7495 op0 = aarch64_extend_bitfield_pattern_p (x);
7496 if (op0)
7497 {
7498 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7499 if (speed)
7500 *cost += extra_cost->alu.bfx;
7501 return true;
7502 }
7503
b1685e62 7504 if (speed)
b6875aac
KV
7505 {
7506 if (VECTOR_MODE_P (mode))
7507 *cost += extra_cost->vect.alu;
7508 else
7509 *cost += extra_cost->alu.extend;
7510 }
43e9d192
IB
7511 return false;
7512
ba0cfa17
JG
7513 case ASHIFT:
7514 op0 = XEXP (x, 0);
7515 op1 = XEXP (x, 1);
7516
7517 if (CONST_INT_P (op1))
7518 {
ba0cfa17 7519 if (speed)
b6875aac
KV
7520 {
7521 if (VECTOR_MODE_P (mode))
7522 {
7523 /* Vector shift (immediate). */
7524 *cost += extra_cost->vect.alu;
7525 }
7526 else
7527 {
7528 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7529 aliases. */
7530 *cost += extra_cost->alu.shift;
7531 }
7532 }
ba0cfa17
JG
7533
7534 /* We can incorporate zero/sign extend for free. */
7535 if (GET_CODE (op0) == ZERO_EXTEND
7536 || GET_CODE (op0) == SIGN_EXTEND)
7537 op0 = XEXP (op0, 0);
7538
e548c9df 7539 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7540 return true;
7541 }
7542 else
7543 {
7813b280 7544 if (VECTOR_MODE_P (mode))
b6875aac 7545 {
7813b280
KT
7546 if (speed)
7547 /* Vector shift (register). */
7548 *cost += extra_cost->vect.alu;
7549 }
7550 else
7551 {
7552 if (speed)
7553 /* LSLV. */
7554 *cost += extra_cost->alu.shift_reg;
7555
7556 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7557 && CONST_INT_P (XEXP (op1, 1))
7558 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
b6875aac 7559 {
7813b280
KT
7560 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7561 /* We already demanded XEXP (op1, 0) to be REG_P, so
7562 don't recurse into it. */
7563 return true;
b6875aac
KV
7564 }
7565 }
ba0cfa17
JG
7566 return false; /* All arguments need to be in registers. */
7567 }
7568
43e9d192 7569 case ROTATE:
43e9d192
IB
7570 case ROTATERT:
7571 case LSHIFTRT:
43e9d192 7572 case ASHIFTRT:
ba0cfa17
JG
7573 op0 = XEXP (x, 0);
7574 op1 = XEXP (x, 1);
43e9d192 7575
ba0cfa17
JG
7576 if (CONST_INT_P (op1))
7577 {
7578 /* ASR (immediate) and friends. */
7579 if (speed)
b6875aac
KV
7580 {
7581 if (VECTOR_MODE_P (mode))
7582 *cost += extra_cost->vect.alu;
7583 else
7584 *cost += extra_cost->alu.shift;
7585 }
43e9d192 7586
e548c9df 7587 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7588 return true;
7589 }
7590 else
7591 {
7813b280 7592 if (VECTOR_MODE_P (mode))
b6875aac 7593 {
7813b280
KT
7594 if (speed)
7595 /* Vector shift (register). */
b6875aac 7596 *cost += extra_cost->vect.alu;
7813b280
KT
7597 }
7598 else
7599 {
7600 if (speed)
7601 /* ASR (register) and friends. */
b6875aac 7602 *cost += extra_cost->alu.shift_reg;
7813b280
KT
7603
7604 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7605 && CONST_INT_P (XEXP (op1, 1))
7606 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
7607 {
7608 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7609 /* We already demanded XEXP (op1, 0) to be REG_P, so
7610 don't recurse into it. */
7611 return true;
7612 }
b6875aac 7613 }
ba0cfa17
JG
7614 return false; /* All arguments need to be in registers. */
7615 }
43e9d192 7616
909734be
JG
7617 case SYMBOL_REF:
7618
1b1e81f8
JW
7619 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7620 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7621 {
7622 /* LDR. */
7623 if (speed)
7624 *cost += extra_cost->ldst.load;
7625 }
7626 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7627 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7628 {
7629 /* ADRP, followed by ADD. */
7630 *cost += COSTS_N_INSNS (1);
7631 if (speed)
7632 *cost += 2 * extra_cost->alu.arith;
7633 }
7634 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7635 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7636 {
7637 /* ADR. */
7638 if (speed)
7639 *cost += extra_cost->alu.arith;
7640 }
7641
7642 if (flag_pic)
7643 {
7644 /* One extra load instruction, after accessing the GOT. */
7645 *cost += COSTS_N_INSNS (1);
7646 if (speed)
7647 *cost += extra_cost->ldst.load;
7648 }
43e9d192
IB
7649 return true;
7650
909734be 7651 case HIGH:
43e9d192 7652 case LO_SUM:
909734be
JG
7653 /* ADRP/ADD (immediate). */
7654 if (speed)
7655 *cost += extra_cost->alu.arith;
43e9d192
IB
7656 return true;
7657
7658 case ZERO_EXTRACT:
7659 case SIGN_EXTRACT:
7cc2145f
JG
7660 /* UBFX/SBFX. */
7661 if (speed)
b6875aac
KV
7662 {
7663 if (VECTOR_MODE_P (mode))
7664 *cost += extra_cost->vect.alu;
7665 else
7666 *cost += extra_cost->alu.bfx;
7667 }
7cc2145f
JG
7668
7669 /* We can trust that the immediates used will be correct (there
7670 are no by-register forms), so we need only cost op0. */
e548c9df 7671 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7672 return true;
7673
7674 case MULT:
4745e701
JG
7675 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7676 /* aarch64_rtx_mult_cost always handles recursion to its
7677 operands. */
7678 return true;
43e9d192
IB
7679
7680 case MOD:
4f58fe36
KT
7681 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7682 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7683 an unconditional negate. This case should only ever be reached through
7684 the set_smod_pow2_cheap check in expmed.c. */
7685 if (CONST_INT_P (XEXP (x, 1))
7686 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7687 && (mode == SImode || mode == DImode))
7688 {
7689 /* We expand to 4 instructions. Reset the baseline. */
7690 *cost = COSTS_N_INSNS (4);
7691
7692 if (speed)
7693 *cost += 2 * extra_cost->alu.logical
7694 + 2 * extra_cost->alu.arith;
7695
7696 return true;
7697 }
7698
7699 /* Fall-through. */
43e9d192 7700 case UMOD:
43e9d192
IB
7701 if (speed)
7702 {
cb9ac430 7703 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
7704 if (VECTOR_MODE_P (mode))
7705 *cost += extra_cost->vect.alu;
e548c9df
AM
7706 else if (GET_MODE_CLASS (mode) == MODE_INT)
7707 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
7708 + extra_cost->mult[mode == DImode].idiv
7709 + (code == MOD ? 1 : 0));
43e9d192
IB
7710 }
7711 return false; /* All arguments need to be in registers. */
7712
7713 case DIV:
7714 case UDIV:
4105fe38 7715 case SQRT:
43e9d192
IB
7716 if (speed)
7717 {
b6875aac
KV
7718 if (VECTOR_MODE_P (mode))
7719 *cost += extra_cost->vect.alu;
7720 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7721 /* There is no integer SQRT, so only DIV and UDIV can get
7722 here. */
cb9ac430
TC
7723 *cost += (extra_cost->mult[mode == DImode].idiv
7724 /* Slighly prefer UDIV over SDIV. */
7725 + (code == DIV ? 1 : 0));
4105fe38
JG
7726 else
7727 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7728 }
7729 return false; /* All arguments need to be in registers. */
7730
a8eecd00 7731 case IF_THEN_ELSE:
2d5ffe46
AP
7732 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7733 XEXP (x, 2), cost, speed);
a8eecd00
JG
7734
7735 case EQ:
7736 case NE:
7737 case GT:
7738 case GTU:
7739 case LT:
7740 case LTU:
7741 case GE:
7742 case GEU:
7743 case LE:
7744 case LEU:
7745
7746 return false; /* All arguments must be in registers. */
7747
b292109f
JG
7748 case FMA:
7749 op0 = XEXP (x, 0);
7750 op1 = XEXP (x, 1);
7751 op2 = XEXP (x, 2);
7752
7753 if (speed)
b6875aac
KV
7754 {
7755 if (VECTOR_MODE_P (mode))
7756 *cost += extra_cost->vect.alu;
7757 else
7758 *cost += extra_cost->fp[mode == DFmode].fma;
7759 }
b292109f
JG
7760
7761 /* FMSUB, FNMADD, and FNMSUB are free. */
7762 if (GET_CODE (op0) == NEG)
7763 op0 = XEXP (op0, 0);
7764
7765 if (GET_CODE (op2) == NEG)
7766 op2 = XEXP (op2, 0);
7767
7768 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7769 and the by-element operand as operand 0. */
7770 if (GET_CODE (op1) == NEG)
7771 op1 = XEXP (op1, 0);
7772
7773 /* Catch vector-by-element operations. The by-element operand can
7774 either be (vec_duplicate (vec_select (x))) or just
7775 (vec_select (x)), depending on whether we are multiplying by
7776 a vector or a scalar.
7777
7778 Canonicalization is not very good in these cases, FMA4 will put the
7779 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7780 if (GET_CODE (op0) == VEC_DUPLICATE)
7781 op0 = XEXP (op0, 0);
7782 else if (GET_CODE (op1) == VEC_DUPLICATE)
7783 op1 = XEXP (op1, 0);
7784
7785 if (GET_CODE (op0) == VEC_SELECT)
7786 op0 = XEXP (op0, 0);
7787 else if (GET_CODE (op1) == VEC_SELECT)
7788 op1 = XEXP (op1, 0);
7789
7790 /* If the remaining parameters are not registers,
7791 get the cost to put them into registers. */
e548c9df
AM
7792 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7793 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7794 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7795 return true;
7796
5e2a765b
KT
7797 case FLOAT:
7798 case UNSIGNED_FLOAT:
7799 if (speed)
7800 *cost += extra_cost->fp[mode == DFmode].fromint;
7801 return false;
7802
b292109f
JG
7803 case FLOAT_EXTEND:
7804 if (speed)
b6875aac
KV
7805 {
7806 if (VECTOR_MODE_P (mode))
7807 {
7808 /*Vector truncate. */
7809 *cost += extra_cost->vect.alu;
7810 }
7811 else
7812 *cost += extra_cost->fp[mode == DFmode].widen;
7813 }
b292109f
JG
7814 return false;
7815
7816 case FLOAT_TRUNCATE:
7817 if (speed)
b6875aac
KV
7818 {
7819 if (VECTOR_MODE_P (mode))
7820 {
7821 /*Vector conversion. */
7822 *cost += extra_cost->vect.alu;
7823 }
7824 else
7825 *cost += extra_cost->fp[mode == DFmode].narrow;
7826 }
b292109f
JG
7827 return false;
7828
61263118
KT
7829 case FIX:
7830 case UNSIGNED_FIX:
7831 x = XEXP (x, 0);
7832 /* Strip the rounding part. They will all be implemented
7833 by the fcvt* family of instructions anyway. */
7834 if (GET_CODE (x) == UNSPEC)
7835 {
7836 unsigned int uns_code = XINT (x, 1);
7837
7838 if (uns_code == UNSPEC_FRINTA
7839 || uns_code == UNSPEC_FRINTM
7840 || uns_code == UNSPEC_FRINTN
7841 || uns_code == UNSPEC_FRINTP
7842 || uns_code == UNSPEC_FRINTZ)
7843 x = XVECEXP (x, 0, 0);
7844 }
7845
7846 if (speed)
b6875aac
KV
7847 {
7848 if (VECTOR_MODE_P (mode))
7849 *cost += extra_cost->vect.alu;
7850 else
7851 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7852 }
39252973
KT
7853
7854 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7855 fixed-point fcvt. */
7856 if (GET_CODE (x) == MULT
7857 && ((VECTOR_MODE_P (mode)
7858 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7859 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7860 {
7861 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7862 0, speed);
7863 return true;
7864 }
7865
e548c9df 7866 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7867 return true;
7868
b292109f 7869 case ABS:
b6875aac
KV
7870 if (VECTOR_MODE_P (mode))
7871 {
7872 /* ABS (vector). */
7873 if (speed)
7874 *cost += extra_cost->vect.alu;
7875 }
7876 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7877 {
19261b99
KT
7878 op0 = XEXP (x, 0);
7879
7880 /* FABD, which is analogous to FADD. */
7881 if (GET_CODE (op0) == MINUS)
7882 {
e548c9df
AM
7883 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7884 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7885 if (speed)
7886 *cost += extra_cost->fp[mode == DFmode].addsub;
7887
7888 return true;
7889 }
7890 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7891 if (speed)
7892 *cost += extra_cost->fp[mode == DFmode].neg;
7893 }
7894 else
7895 {
7896 /* Integer ABS will either be split to
7897 two arithmetic instructions, or will be an ABS
7898 (scalar), which we don't model. */
7899 *cost = COSTS_N_INSNS (2);
7900 if (speed)
7901 *cost += 2 * extra_cost->alu.arith;
7902 }
7903 return false;
7904
7905 case SMAX:
7906 case SMIN:
7907 if (speed)
7908 {
b6875aac
KV
7909 if (VECTOR_MODE_P (mode))
7910 *cost += extra_cost->vect.alu;
7911 else
7912 {
7913 /* FMAXNM/FMINNM/FMAX/FMIN.
7914 TODO: This may not be accurate for all implementations, but
7915 we do not model this in the cost tables. */
7916 *cost += extra_cost->fp[mode == DFmode].addsub;
7917 }
b292109f
JG
7918 }
7919 return false;
7920
61263118
KT
7921 case UNSPEC:
7922 /* The floating point round to integer frint* instructions. */
7923 if (aarch64_frint_unspec_p (XINT (x, 1)))
7924 {
7925 if (speed)
7926 *cost += extra_cost->fp[mode == DFmode].roundint;
7927
7928 return false;
7929 }
781aeb73
KT
7930
7931 if (XINT (x, 1) == UNSPEC_RBIT)
7932 {
7933 if (speed)
7934 *cost += extra_cost->alu.rev;
7935
7936 return false;
7937 }
61263118
KT
7938 break;
7939
fb620c4a
JG
7940 case TRUNCATE:
7941
7942 /* Decompose <su>muldi3_highpart. */
7943 if (/* (truncate:DI */
7944 mode == DImode
7945 /* (lshiftrt:TI */
7946 && GET_MODE (XEXP (x, 0)) == TImode
7947 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7948 /* (mult:TI */
7949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7950 /* (ANY_EXTEND:TI (reg:DI))
7951 (ANY_EXTEND:TI (reg:DI))) */
7952 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7953 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7954 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7955 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7956 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7957 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7958 /* (const_int 64) */
7959 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7960 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7961 {
7962 /* UMULH/SMULH. */
7963 if (speed)
7964 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7965 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7966 mode, MULT, 0, speed);
7967 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7968 mode, MULT, 1, speed);
fb620c4a
JG
7969 return true;
7970 }
7971
7972 /* Fall through. */
43e9d192 7973 default:
61263118 7974 break;
43e9d192 7975 }
61263118 7976
c10e3d7f
AP
7977 if (dump_file
7978 && flag_aarch64_verbose_cost)
61263118
KT
7979 fprintf (dump_file,
7980 "\nFailed to cost RTX. Assuming default cost.\n");
7981
7982 return true;
43e9d192
IB
7983}
7984
0ee859b5
JG
7985/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7986 calculated for X. This cost is stored in *COST. Returns true
7987 if the total cost of X was calculated. */
7988static bool
e548c9df 7989aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7990 int param, int *cost, bool speed)
7991{
e548c9df 7992 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 7993
c10e3d7f
AP
7994 if (dump_file
7995 && flag_aarch64_verbose_cost)
0ee859b5
JG
7996 {
7997 print_rtl_single (dump_file, x);
7998 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7999 speed ? "Hot" : "Cold",
8000 *cost, result ? "final" : "partial");
8001 }
8002
8003 return result;
8004}
8005
43e9d192 8006static int
ef4bddc2 8007aarch64_register_move_cost (machine_mode mode,
8a3a7e67 8008 reg_class_t from_i, reg_class_t to_i)
43e9d192 8009{
8a3a7e67
RH
8010 enum reg_class from = (enum reg_class) from_i;
8011 enum reg_class to = (enum reg_class) to_i;
43e9d192 8012 const struct cpu_regmove_cost *regmove_cost
b175b679 8013 = aarch64_tune_params.regmove_cost;
43e9d192 8014
3be07662 8015 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 8016 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
8017 to = GENERAL_REGS;
8018
2876a13f 8019 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
8020 from = GENERAL_REGS;
8021
6ee70f81
AP
8022 /* Moving between GPR and stack cost is the same as GP2GP. */
8023 if ((from == GENERAL_REGS && to == STACK_REG)
8024 || (to == GENERAL_REGS && from == STACK_REG))
8025 return regmove_cost->GP2GP;
8026
8027 /* To/From the stack register, we move via the gprs. */
8028 if (to == STACK_REG || from == STACK_REG)
8029 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
8030 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
8031
8919453c
WD
8032 if (GET_MODE_SIZE (mode) == 16)
8033 {
8034 /* 128-bit operations on general registers require 2 instructions. */
8035 if (from == GENERAL_REGS && to == GENERAL_REGS)
8036 return regmove_cost->GP2GP * 2;
8037 else if (from == GENERAL_REGS)
8038 return regmove_cost->GP2FP * 2;
8039 else if (to == GENERAL_REGS)
8040 return regmove_cost->FP2GP * 2;
8041
8042 /* When AdvSIMD instructions are disabled it is not possible to move
8043 a 128-bit value directly between Q registers. This is handled in
8044 secondary reload. A general register is used as a scratch to move
8045 the upper DI value and the lower DI value is moved directly,
8046 hence the cost is the sum of three moves. */
8047 if (! TARGET_SIMD)
8048 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
8049
8050 return regmove_cost->FP2FP;
8051 }
8052
43e9d192
IB
8053 if (from == GENERAL_REGS && to == GENERAL_REGS)
8054 return regmove_cost->GP2GP;
8055 else if (from == GENERAL_REGS)
8056 return regmove_cost->GP2FP;
8057 else if (to == GENERAL_REGS)
8058 return regmove_cost->FP2GP;
8059
43e9d192
IB
8060 return regmove_cost->FP2FP;
8061}
8062
8063static int
ef4bddc2 8064aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
8065 reg_class_t rclass ATTRIBUTE_UNUSED,
8066 bool in ATTRIBUTE_UNUSED)
8067{
b175b679 8068 return aarch64_tune_params.memmov_cost;
43e9d192
IB
8069}
8070
0c30e0f3
EM
8071/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
8072 to optimize 1.0/sqrt. */
ee62a5a6
RS
8073
8074static bool
9acc9cbe 8075use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
8076{
8077 return (!flag_trapping_math
8078 && flag_unsafe_math_optimizations
9acc9cbe
EM
8079 && ((aarch64_tune_params.approx_modes->recip_sqrt
8080 & AARCH64_APPROX_MODE (mode))
1a33079e 8081 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
8082}
8083
0c30e0f3
EM
8084/* Function to decide when to use the approximate reciprocal square root
8085 builtin. */
a6fc00da
BH
8086
8087static tree
ee62a5a6 8088aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 8089{
9acc9cbe
EM
8090 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
8091
8092 if (!use_rsqrt_p (mode))
a6fc00da 8093 return NULL_TREE;
ee62a5a6 8094 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
8095}
8096
8097typedef rtx (*rsqrte_type) (rtx, rtx);
8098
98daafa0
EM
8099/* Select reciprocal square root initial estimate insn depending on machine
8100 mode. */
a6fc00da 8101
98daafa0 8102static rsqrte_type
a6fc00da
BH
8103get_rsqrte_type (machine_mode mode)
8104{
8105 switch (mode)
8106 {
2a823433
JW
8107 case DFmode: return gen_aarch64_rsqrtedf;
8108 case SFmode: return gen_aarch64_rsqrtesf;
8109 case V2DFmode: return gen_aarch64_rsqrtev2df;
8110 case V2SFmode: return gen_aarch64_rsqrtev2sf;
8111 case V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
8112 default: gcc_unreachable ();
8113 }
8114}
8115
8116typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
8117
98daafa0 8118/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 8119
98daafa0 8120static rsqrts_type
a6fc00da
BH
8121get_rsqrts_type (machine_mode mode)
8122{
8123 switch (mode)
8124 {
00ea75d4
JW
8125 case DFmode: return gen_aarch64_rsqrtsdf;
8126 case SFmode: return gen_aarch64_rsqrtssf;
8127 case V2DFmode: return gen_aarch64_rsqrtsv2df;
8128 case V2SFmode: return gen_aarch64_rsqrtsv2sf;
8129 case V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
8130 default: gcc_unreachable ();
8131 }
8132}
8133
98daafa0
EM
8134/* Emit instruction sequence to compute either the approximate square root
8135 or its approximate reciprocal, depending on the flag RECP, and return
8136 whether the sequence was emitted or not. */
a6fc00da 8137
98daafa0
EM
8138bool
8139aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 8140{
98daafa0 8141 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
8142
8143 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
8144 {
8145 gcc_assert (!recp);
8146 return false;
8147 }
8148
8149 machine_mode mmsk
8150 = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode)),
8151 GET_MODE_NUNITS (mode));
8152 if (!recp)
8153 {
8154 if (!(flag_mlow_precision_sqrt
8155 || (aarch64_tune_params.approx_modes->sqrt
8156 & AARCH64_APPROX_MODE (mode))))
8157 return false;
8158
8159 if (flag_finite_math_only
8160 || flag_trapping_math
8161 || !flag_unsafe_math_optimizations
8162 || optimize_function_for_size_p (cfun))
8163 return false;
8164 }
8165 else
8166 /* Caller assumes we cannot fail. */
8167 gcc_assert (use_rsqrt_p (mode));
daef0a8c 8168
a6fc00da 8169
98daafa0
EM
8170 rtx xmsk = gen_reg_rtx (mmsk);
8171 if (!recp)
2e19adc8
RE
8172 /* When calculating the approximate square root, compare the
8173 argument with 0.0 and create a mask. */
8174 emit_insn (gen_rtx_SET (xmsk,
8175 gen_rtx_NEG (mmsk,
8176 gen_rtx_EQ (mmsk, src,
8177 CONST0_RTX (mode)))));
a6fc00da 8178
98daafa0
EM
8179 /* Estimate the approximate reciprocal square root. */
8180 rtx xdst = gen_reg_rtx (mode);
8181 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 8182
98daafa0
EM
8183 /* Iterate over the series twice for SF and thrice for DF. */
8184 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 8185
98daafa0
EM
8186 /* Optionally iterate over the series once less for faster performance
8187 while sacrificing the accuracy. */
8188 if ((recp && flag_mrecip_low_precision_sqrt)
8189 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
8190 iterations--;
8191
98daafa0
EM
8192 /* Iterate over the series to calculate the approximate reciprocal square
8193 root. */
8194 rtx x1 = gen_reg_rtx (mode);
8195 while (iterations--)
a6fc00da 8196 {
a6fc00da 8197 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
8198 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
8199
8200 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 8201
98daafa0
EM
8202 if (iterations > 0)
8203 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
8204 }
8205
8206 if (!recp)
8207 {
8208 /* Qualify the approximate reciprocal square root when the argument is
8209 0.0 by squashing the intermediary result to 0.0. */
8210 rtx xtmp = gen_reg_rtx (mmsk);
8211 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
8212 gen_rtx_SUBREG (mmsk, xdst, 0)));
8213 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 8214
98daafa0
EM
8215 /* Calculate the approximate square root. */
8216 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
8217 }
8218
98daafa0
EM
8219 /* Finalize the approximation. */
8220 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
8221
8222 return true;
a6fc00da
BH
8223}
8224
79a2bc2d
EM
8225typedef rtx (*recpe_type) (rtx, rtx);
8226
8227/* Select reciprocal initial estimate insn depending on machine mode. */
8228
8229static recpe_type
8230get_recpe_type (machine_mode mode)
8231{
8232 switch (mode)
8233 {
8234 case SFmode: return (gen_aarch64_frecpesf);
8235 case V2SFmode: return (gen_aarch64_frecpev2sf);
8236 case V4SFmode: return (gen_aarch64_frecpev4sf);
8237 case DFmode: return (gen_aarch64_frecpedf);
8238 case V2DFmode: return (gen_aarch64_frecpev2df);
8239 default: gcc_unreachable ();
8240 }
8241}
8242
8243typedef rtx (*recps_type) (rtx, rtx, rtx);
8244
8245/* Select reciprocal series step insn depending on machine mode. */
8246
8247static recps_type
8248get_recps_type (machine_mode mode)
8249{
8250 switch (mode)
8251 {
8252 case SFmode: return (gen_aarch64_frecpssf);
8253 case V2SFmode: return (gen_aarch64_frecpsv2sf);
8254 case V4SFmode: return (gen_aarch64_frecpsv4sf);
8255 case DFmode: return (gen_aarch64_frecpsdf);
8256 case V2DFmode: return (gen_aarch64_frecpsv2df);
8257 default: gcc_unreachable ();
8258 }
8259}
8260
8261/* Emit the instruction sequence to compute the approximation for the division
8262 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8263
8264bool
8265aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
8266{
8267 machine_mode mode = GET_MODE (quo);
33d72b63
JW
8268
8269 if (GET_MODE_INNER (mode) == HFmode)
8270 return false;
8271
79a2bc2d
EM
8272 bool use_approx_division_p = (flag_mlow_precision_div
8273 || (aarch64_tune_params.approx_modes->division
8274 & AARCH64_APPROX_MODE (mode)));
8275
8276 if (!flag_finite_math_only
8277 || flag_trapping_math
8278 || !flag_unsafe_math_optimizations
8279 || optimize_function_for_size_p (cfun)
8280 || !use_approx_division_p)
8281 return false;
8282
8283 /* Estimate the approximate reciprocal. */
8284 rtx xrcp = gen_reg_rtx (mode);
8285 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8286
8287 /* Iterate over the series twice for SF and thrice for DF. */
8288 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8289
8290 /* Optionally iterate over the series once less for faster performance,
8291 while sacrificing the accuracy. */
8292 if (flag_mlow_precision_div)
8293 iterations--;
8294
8295 /* Iterate over the series to calculate the approximate reciprocal. */
8296 rtx xtmp = gen_reg_rtx (mode);
8297 while (iterations--)
8298 {
8299 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8300
8301 if (iterations > 0)
8302 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8303 }
8304
8305 if (num != CONST1_RTX (mode))
8306 {
8307 /* As the approximate reciprocal of DEN is already calculated, only
8308 calculate the approximate division when NUM is not 1.0. */
8309 rtx xnum = force_reg (mode, num);
8310 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8311 }
8312
8313 /* Finalize the approximation. */
8314 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8315 return true;
8316}
8317
d126a4ae
AP
8318/* Return the number of instructions that can be issued per cycle. */
8319static int
8320aarch64_sched_issue_rate (void)
8321{
b175b679 8322 return aarch64_tune_params.issue_rate;
d126a4ae
AP
8323}
8324
d03f7e44
MK
8325static int
8326aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8327{
8328 int issue_rate = aarch64_sched_issue_rate ();
8329
8330 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8331}
8332
2d6bc7fa
KT
8333
8334/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8335 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8336 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8337
8338static int
8339aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8340 int ready_index)
8341{
8342 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8343}
8344
8345
8990e73a
TB
8346/* Vectorizer cost model target hooks. */
8347
8348/* Implement targetm.vectorize.builtin_vectorization_cost. */
8349static int
8350aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8351 tree vectype,
8352 int misalign ATTRIBUTE_UNUSED)
8353{
8354 unsigned elements;
cd8ae5ed
AP
8355 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
8356 bool fp = false;
8357
8358 if (vectype != NULL)
8359 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
8360
8361 switch (type_of_cost)
8362 {
8363 case scalar_stmt:
cd8ae5ed 8364 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
8365
8366 case scalar_load:
cd8ae5ed 8367 return costs->scalar_load_cost;
8990e73a
TB
8368
8369 case scalar_store:
cd8ae5ed 8370 return costs->scalar_store_cost;
8990e73a
TB
8371
8372 case vector_stmt:
cd8ae5ed 8373 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8374
8375 case vector_load:
cd8ae5ed 8376 return costs->vec_align_load_cost;
8990e73a
TB
8377
8378 case vector_store:
cd8ae5ed 8379 return costs->vec_store_cost;
8990e73a
TB
8380
8381 case vec_to_scalar:
cd8ae5ed 8382 return costs->vec_to_scalar_cost;
8990e73a
TB
8383
8384 case scalar_to_vec:
cd8ae5ed 8385 return costs->scalar_to_vec_cost;
8990e73a
TB
8386
8387 case unaligned_load:
cd8ae5ed 8388 return costs->vec_unalign_load_cost;
8990e73a
TB
8389
8390 case unaligned_store:
cd8ae5ed 8391 return costs->vec_unalign_store_cost;
8990e73a
TB
8392
8393 case cond_branch_taken:
cd8ae5ed 8394 return costs->cond_taken_branch_cost;
8990e73a
TB
8395
8396 case cond_branch_not_taken:
cd8ae5ed 8397 return costs->cond_not_taken_branch_cost;
8990e73a
TB
8398
8399 case vec_perm:
cd8ae5ed 8400 return costs->vec_permute_cost;
c428f91c 8401
8990e73a 8402 case vec_promote_demote:
cd8ae5ed 8403 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8404
8405 case vec_construct:
8406 elements = TYPE_VECTOR_SUBPARTS (vectype);
8407 return elements / 2 + 1;
8408
8409 default:
8410 gcc_unreachable ();
8411 }
8412}
8413
8414/* Implement targetm.vectorize.add_stmt_cost. */
8415static unsigned
8416aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8417 struct _stmt_vec_info *stmt_info, int misalign,
8418 enum vect_cost_model_location where)
8419{
8420 unsigned *cost = (unsigned *) data;
8421 unsigned retval = 0;
8422
8423 if (flag_vect_cost_model)
8424 {
8425 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8426 int stmt_cost =
8427 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8428
8429 /* Statements in an inner loop relative to the loop being
8430 vectorized are weighted more heavily. The value here is
058e4c71 8431 arbitrary and could potentially be improved with analysis. */
8990e73a 8432 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 8433 count *= 50; /* FIXME */
8990e73a
TB
8434
8435 retval = (unsigned) (count * stmt_cost);
8436 cost[where] += retval;
8437 }
8438
8439 return retval;
8440}
8441
0cfff2a1 8442static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 8443
0cfff2a1
KT
8444/* Parse the TO_PARSE string and put the architecture struct that it
8445 selects into RES and the architectural features into ISA_FLAGS.
8446 Return an aarch64_parse_opt_result describing the parse result.
8447 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 8448
0cfff2a1
KT
8449static enum aarch64_parse_opt_result
8450aarch64_parse_arch (const char *to_parse, const struct processor **res,
8451 unsigned long *isa_flags)
43e9d192
IB
8452{
8453 char *ext;
8454 const struct processor *arch;
0cfff2a1 8455 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8456 size_t len;
8457
0cfff2a1 8458 strcpy (str, to_parse);
43e9d192
IB
8459
8460 ext = strchr (str, '+');
8461
8462 if (ext != NULL)
8463 len = ext - str;
8464 else
8465 len = strlen (str);
8466
8467 if (len == 0)
0cfff2a1
KT
8468 return AARCH64_PARSE_MISSING_ARG;
8469
43e9d192 8470
0cfff2a1 8471 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
8472 for (arch = all_architectures; arch->name != NULL; arch++)
8473 {
8474 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8475 {
0cfff2a1 8476 unsigned long isa_temp = arch->flags;
43e9d192
IB
8477
8478 if (ext != NULL)
8479 {
0cfff2a1
KT
8480 /* TO_PARSE string contains at least one extension. */
8481 enum aarch64_parse_opt_result ext_res
8482 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8483
0cfff2a1
KT
8484 if (ext_res != AARCH64_PARSE_OK)
8485 return ext_res;
ffee7aa9 8486 }
0cfff2a1
KT
8487 /* Extension parsing was successful. Confirm the result
8488 arch and ISA flags. */
8489 *res = arch;
8490 *isa_flags = isa_temp;
8491 return AARCH64_PARSE_OK;
43e9d192
IB
8492 }
8493 }
8494
8495 /* ARCH name not found in list. */
0cfff2a1 8496 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8497}
8498
0cfff2a1
KT
8499/* Parse the TO_PARSE string and put the result tuning in RES and the
8500 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8501 describing the parse result. If there is an error parsing, RES and
8502 ISA_FLAGS are left unchanged. */
43e9d192 8503
0cfff2a1
KT
8504static enum aarch64_parse_opt_result
8505aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8506 unsigned long *isa_flags)
43e9d192
IB
8507{
8508 char *ext;
8509 const struct processor *cpu;
0cfff2a1 8510 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8511 size_t len;
8512
0cfff2a1 8513 strcpy (str, to_parse);
43e9d192
IB
8514
8515 ext = strchr (str, '+');
8516
8517 if (ext != NULL)
8518 len = ext - str;
8519 else
8520 len = strlen (str);
8521
8522 if (len == 0)
0cfff2a1
KT
8523 return AARCH64_PARSE_MISSING_ARG;
8524
43e9d192
IB
8525
8526 /* Loop through the list of supported CPUs to find a match. */
8527 for (cpu = all_cores; cpu->name != NULL; cpu++)
8528 {
8529 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8530 {
0cfff2a1
KT
8531 unsigned long isa_temp = cpu->flags;
8532
43e9d192
IB
8533
8534 if (ext != NULL)
8535 {
0cfff2a1
KT
8536 /* TO_PARSE string contains at least one extension. */
8537 enum aarch64_parse_opt_result ext_res
8538 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8539
0cfff2a1
KT
8540 if (ext_res != AARCH64_PARSE_OK)
8541 return ext_res;
8542 }
8543 /* Extension parsing was successfull. Confirm the result
8544 cpu and ISA flags. */
8545 *res = cpu;
8546 *isa_flags = isa_temp;
8547 return AARCH64_PARSE_OK;
43e9d192
IB
8548 }
8549 }
8550
8551 /* CPU name not found in list. */
0cfff2a1 8552 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8553}
8554
0cfff2a1
KT
8555/* Parse the TO_PARSE string and put the cpu it selects into RES.
8556 Return an aarch64_parse_opt_result describing the parse result.
8557 If the parsing fails the RES does not change. */
43e9d192 8558
0cfff2a1
KT
8559static enum aarch64_parse_opt_result
8560aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
8561{
8562 const struct processor *cpu;
0cfff2a1
KT
8563 char *str = (char *) alloca (strlen (to_parse) + 1);
8564
8565 strcpy (str, to_parse);
43e9d192
IB
8566
8567 /* Loop through the list of supported CPUs to find a match. */
8568 for (cpu = all_cores; cpu->name != NULL; cpu++)
8569 {
8570 if (strcmp (cpu->name, str) == 0)
8571 {
0cfff2a1
KT
8572 *res = cpu;
8573 return AARCH64_PARSE_OK;
43e9d192
IB
8574 }
8575 }
8576
8577 /* CPU name not found in list. */
0cfff2a1 8578 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8579}
8580
8dec06f2
JG
8581/* Parse TOKEN, which has length LENGTH to see if it is an option
8582 described in FLAG. If it is, return the index bit for that fusion type.
8583 If not, error (printing OPTION_NAME) and return zero. */
8584
8585static unsigned int
8586aarch64_parse_one_option_token (const char *token,
8587 size_t length,
8588 const struct aarch64_flag_desc *flag,
8589 const char *option_name)
8590{
8591 for (; flag->name != NULL; flag++)
8592 {
8593 if (length == strlen (flag->name)
8594 && !strncmp (flag->name, token, length))
8595 return flag->flag;
8596 }
8597
8598 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8599 return 0;
8600}
8601
8602/* Parse OPTION which is a comma-separated list of flags to enable.
8603 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8604 default state we inherit from the CPU tuning structures. OPTION_NAME
8605 gives the top-level option we are parsing in the -moverride string,
8606 for use in error messages. */
8607
8608static unsigned int
8609aarch64_parse_boolean_options (const char *option,
8610 const struct aarch64_flag_desc *flags,
8611 unsigned int initial_state,
8612 const char *option_name)
8613{
8614 const char separator = '.';
8615 const char* specs = option;
8616 const char* ntoken = option;
8617 unsigned int found_flags = initial_state;
8618
8619 while ((ntoken = strchr (specs, separator)))
8620 {
8621 size_t token_length = ntoken - specs;
8622 unsigned token_ops = aarch64_parse_one_option_token (specs,
8623 token_length,
8624 flags,
8625 option_name);
8626 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8627 in the token stream, reset the supported operations. So:
8628
8629 adrp+add.cmp+branch.none.adrp+add
8630
8631 would have the result of turning on only adrp+add fusion. */
8632 if (!token_ops)
8633 found_flags = 0;
8634
8635 found_flags |= token_ops;
8636 specs = ++ntoken;
8637 }
8638
8639 /* We ended with a comma, print something. */
8640 if (!(*specs))
8641 {
8642 error ("%s string ill-formed\n", option_name);
8643 return 0;
8644 }
8645
8646 /* We still have one more token to parse. */
8647 size_t token_length = strlen (specs);
8648 unsigned token_ops = aarch64_parse_one_option_token (specs,
8649 token_length,
8650 flags,
8651 option_name);
8652 if (!token_ops)
8653 found_flags = 0;
8654
8655 found_flags |= token_ops;
8656 return found_flags;
8657}
8658
8659/* Support for overriding instruction fusion. */
8660
8661static void
8662aarch64_parse_fuse_string (const char *fuse_string,
8663 struct tune_params *tune)
8664{
8665 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8666 aarch64_fusible_pairs,
8667 tune->fusible_ops,
8668 "fuse=");
8669}
8670
8671/* Support for overriding other tuning flags. */
8672
8673static void
8674aarch64_parse_tune_string (const char *tune_string,
8675 struct tune_params *tune)
8676{
8677 tune->extra_tuning_flags
8678 = aarch64_parse_boolean_options (tune_string,
8679 aarch64_tuning_flags,
8680 tune->extra_tuning_flags,
8681 "tune=");
8682}
8683
8684/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8685 we understand. If it is, extract the option string and handoff to
8686 the appropriate function. */
8687
8688void
8689aarch64_parse_one_override_token (const char* token,
8690 size_t length,
8691 struct tune_params *tune)
8692{
8693 const struct aarch64_tuning_override_function *fn
8694 = aarch64_tuning_override_functions;
8695
8696 const char *option_part = strchr (token, '=');
8697 if (!option_part)
8698 {
8699 error ("tuning string missing in option (%s)", token);
8700 return;
8701 }
8702
8703 /* Get the length of the option name. */
8704 length = option_part - token;
8705 /* Skip the '=' to get to the option string. */
8706 option_part++;
8707
8708 for (; fn->name != NULL; fn++)
8709 {
8710 if (!strncmp (fn->name, token, length))
8711 {
8712 fn->parse_override (option_part, tune);
8713 return;
8714 }
8715 }
8716
8717 error ("unknown tuning option (%s)",token);
8718 return;
8719}
8720
5eee3c34
JW
8721/* A checking mechanism for the implementation of the tls size. */
8722
8723static void
8724initialize_aarch64_tls_size (struct gcc_options *opts)
8725{
8726 if (aarch64_tls_size == 0)
8727 aarch64_tls_size = 24;
8728
8729 switch (opts->x_aarch64_cmodel_var)
8730 {
8731 case AARCH64_CMODEL_TINY:
8732 /* Both the default and maximum TLS size allowed under tiny is 1M which
8733 needs two instructions to address, so we clamp the size to 24. */
8734 if (aarch64_tls_size > 24)
8735 aarch64_tls_size = 24;
8736 break;
8737 case AARCH64_CMODEL_SMALL:
8738 /* The maximum TLS size allowed under small is 4G. */
8739 if (aarch64_tls_size > 32)
8740 aarch64_tls_size = 32;
8741 break;
8742 case AARCH64_CMODEL_LARGE:
8743 /* The maximum TLS size allowed under large is 16E.
8744 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8745 if (aarch64_tls_size > 48)
8746 aarch64_tls_size = 48;
8747 break;
8748 default:
8749 gcc_unreachable ();
8750 }
8751
8752 return;
8753}
8754
8dec06f2
JG
8755/* Parse STRING looking for options in the format:
8756 string :: option:string
8757 option :: name=substring
8758 name :: {a-z}
8759 substring :: defined by option. */
8760
8761static void
8762aarch64_parse_override_string (const char* input_string,
8763 struct tune_params* tune)
8764{
8765 const char separator = ':';
8766 size_t string_length = strlen (input_string) + 1;
8767 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8768 char *string = string_root;
8769 strncpy (string, input_string, string_length);
8770 string[string_length - 1] = '\0';
8771
8772 char* ntoken = string;
8773
8774 while ((ntoken = strchr (string, separator)))
8775 {
8776 size_t token_length = ntoken - string;
8777 /* Make this substring look like a string. */
8778 *ntoken = '\0';
8779 aarch64_parse_one_override_token (string, token_length, tune);
8780 string = ++ntoken;
8781 }
8782
8783 /* One last option to parse. */
8784 aarch64_parse_one_override_token (string, strlen (string), tune);
8785 free (string_root);
8786}
43e9d192 8787
43e9d192
IB
8788
8789static void
0cfff2a1 8790aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8791{
a3dc8760
NC
8792 /* The logic here is that if we are disabling all frame pointer generation
8793 then we do not need to disable leaf frame pointer generation as a
8794 separate operation. But if we are *only* disabling leaf frame pointer
8795 generation then we set flag_omit_frame_pointer to true, but in
8796 aarch64_frame_pointer_required we return false only for leaf functions.
8797
8798 PR 70044: We have to be careful about being called multiple times for the
8799 same function. Once we have decided to set flag_omit_frame_pointer just
8800 so that we can omit leaf frame pointers, we must then not interpret a
8801 second call as meaning that all frame pointer generation should be
8802 omitted. We do this by setting flag_omit_frame_pointer to a special,
8803 non-zero value. */
8804 if (opts->x_flag_omit_frame_pointer == 2)
8805 opts->x_flag_omit_frame_pointer = 0;
8806
0cfff2a1
KT
8807 if (opts->x_flag_omit_frame_pointer)
8808 opts->x_flag_omit_leaf_frame_pointer = false;
8809 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8810 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8811
1be34295 8812 /* If not optimizing for size, set the default
0cfff2a1
KT
8813 alignment to what the target wants. */
8814 if (!opts->x_optimize_size)
43e9d192 8815 {
0cfff2a1
KT
8816 if (opts->x_align_loops <= 0)
8817 opts->x_align_loops = aarch64_tune_params.loop_align;
8818 if (opts->x_align_jumps <= 0)
8819 opts->x_align_jumps = aarch64_tune_params.jump_align;
8820 if (opts->x_align_functions <= 0)
8821 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8822 }
b4f50fd4 8823
9ee6540a
WD
8824 /* We default to no pc-relative literal loads. */
8825
8826 aarch64_pcrelative_literal_loads = false;
8827
8828 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8829 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8830 if (opts->x_pcrelative_literal_loads == 1)
8831 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8832
48bb1a55
CL
8833 /* This is PR70113. When building the Linux kernel with
8834 CONFIG_ARM64_ERRATUM_843419, support for relocations
8835 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8836 removed from the kernel to avoid loading objects with possibly
9ee6540a 8837 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8838 generate such relocations, preventing the kernel build from
8839 succeeding. */
9ee6540a
WD
8840 if (opts->x_pcrelative_literal_loads == 2
8841 && TARGET_FIX_ERR_A53_843419)
8842 aarch64_pcrelative_literal_loads = true;
8843
8844 /* In the tiny memory model it makes no sense to disallow PC relative
8845 literal pool loads. */
8846 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8847 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8848 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8849
8850 /* When enabling the lower precision Newton series for the square root, also
8851 enable it for the reciprocal square root, since the latter is an
8852 intermediary step for the former. */
8853 if (flag_mlow_precision_sqrt)
8854 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8855}
43e9d192 8856
0cfff2a1
KT
8857/* 'Unpack' up the internal tuning structs and update the options
8858 in OPTS. The caller must have set up selected_tune and selected_arch
8859 as all the other target-specific codegen decisions are
8860 derived from them. */
8861
e4ea20c8 8862void
0cfff2a1
KT
8863aarch64_override_options_internal (struct gcc_options *opts)
8864{
8865 aarch64_tune_flags = selected_tune->flags;
8866 aarch64_tune = selected_tune->sched_core;
8867 /* Make a copy of the tuning parameters attached to the core, which
8868 we may later overwrite. */
8869 aarch64_tune_params = *(selected_tune->tune);
8870 aarch64_architecture_version = selected_arch->architecture_version;
8871
8872 if (opts->x_aarch64_override_tune_string)
8873 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8874 &aarch64_tune_params);
8875
8876 /* This target defaults to strict volatile bitfields. */
8877 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8878 opts->x_flag_strict_volatile_bitfields = 1;
8879
0cfff2a1 8880 initialize_aarch64_code_model (opts);
5eee3c34 8881 initialize_aarch64_tls_size (opts);
63892fa2 8882
2d6bc7fa
KT
8883 int queue_depth = 0;
8884 switch (aarch64_tune_params.autoprefetcher_model)
8885 {
8886 case tune_params::AUTOPREFETCHER_OFF:
8887 queue_depth = -1;
8888 break;
8889 case tune_params::AUTOPREFETCHER_WEAK:
8890 queue_depth = 0;
8891 break;
8892 case tune_params::AUTOPREFETCHER_STRONG:
8893 queue_depth = max_insn_queue_index + 1;
8894 break;
8895 default:
8896 gcc_unreachable ();
8897 }
8898
8899 /* We don't mind passing in global_options_set here as we don't use
8900 the *options_set structs anyway. */
8901 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8902 queue_depth,
8903 opts->x_param_values,
8904 global_options_set.x_param_values);
8905
9d2c6e2e
MK
8906 /* Set up parameters to be used in prefetching algorithm. Do not
8907 override the defaults unless we are tuning for a core we have
8908 researched values for. */
8909 if (aarch64_tune_params.prefetch->num_slots > 0)
8910 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
8911 aarch64_tune_params.prefetch->num_slots,
8912 opts->x_param_values,
8913 global_options_set.x_param_values);
8914 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
8915 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
8916 aarch64_tune_params.prefetch->l1_cache_size,
8917 opts->x_param_values,
8918 global_options_set.x_param_values);
8919 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 8920 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
8921 aarch64_tune_params.prefetch->l1_cache_line_size,
8922 opts->x_param_values,
8923 global_options_set.x_param_values);
8924 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
8925 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
8926 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
8927 opts->x_param_values,
8928 global_options_set.x_param_values);
8929
16b2cafd
MK
8930 /* Enable sw prefetching at specified optimization level for
8931 CPUS that have prefetch. Lower optimization level threshold by 1
8932 when profiling is enabled. */
8933 if (opts->x_flag_prefetch_loop_arrays < 0
8934 && !opts->x_optimize_size
8935 && aarch64_tune_params.prefetch->default_opt_level >= 0
8936 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
8937 opts->x_flag_prefetch_loop_arrays = 1;
8938
0cfff2a1
KT
8939 aarch64_override_options_after_change_1 (opts);
8940}
43e9d192 8941
01f44038
KT
8942/* Print a hint with a suggestion for a core or architecture name that
8943 most closely resembles what the user passed in STR. ARCH is true if
8944 the user is asking for an architecture name. ARCH is false if the user
8945 is asking for a core name. */
8946
8947static void
8948aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
8949{
8950 auto_vec<const char *> candidates;
8951 const struct processor *entry = arch ? all_architectures : all_cores;
8952 for (; entry->name != NULL; entry++)
8953 candidates.safe_push (entry->name);
8954 char *s;
8955 const char *hint = candidates_list_and_hint (str, s, candidates);
8956 if (hint)
8957 inform (input_location, "valid arguments are: %s;"
8958 " did you mean %qs?", s, hint);
8959 XDELETEVEC (s);
8960}
8961
8962/* Print a hint with a suggestion for a core name that most closely resembles
8963 what the user passed in STR. */
8964
8965inline static void
8966aarch64_print_hint_for_core (const char *str)
8967{
8968 aarch64_print_hint_for_core_or_arch (str, false);
8969}
8970
8971/* Print a hint with a suggestion for an architecture name that most closely
8972 resembles what the user passed in STR. */
8973
8974inline static void
8975aarch64_print_hint_for_arch (const char *str)
8976{
8977 aarch64_print_hint_for_core_or_arch (str, true);
8978}
8979
0cfff2a1
KT
8980/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8981 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
8982 they are valid in RES and ISA_FLAGS. Return whether the option is
8983 valid. */
43e9d192 8984
361fb3ee 8985static bool
0cfff2a1
KT
8986aarch64_validate_mcpu (const char *str, const struct processor **res,
8987 unsigned long *isa_flags)
8988{
8989 enum aarch64_parse_opt_result parse_res
8990 = aarch64_parse_cpu (str, res, isa_flags);
8991
8992 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8993 return true;
0cfff2a1
KT
8994
8995 switch (parse_res)
8996 {
8997 case AARCH64_PARSE_MISSING_ARG:
fb241da2 8998 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
8999 break;
9000 case AARCH64_PARSE_INVALID_ARG:
9001 error ("unknown value %qs for -mcpu", str);
01f44038 9002 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9003 break;
9004 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9005 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
9006 break;
9007 default:
9008 gcc_unreachable ();
9009 }
361fb3ee
KT
9010
9011 return false;
0cfff2a1
KT
9012}
9013
9014/* Validate a command-line -march option. Parse the arch and extensions
9015 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9016 results, if they are valid, in RES and ISA_FLAGS. Return whether the
9017 option is valid. */
0cfff2a1 9018
361fb3ee 9019static bool
0cfff2a1 9020aarch64_validate_march (const char *str, const struct processor **res,
01f44038 9021 unsigned long *isa_flags)
0cfff2a1
KT
9022{
9023 enum aarch64_parse_opt_result parse_res
9024 = aarch64_parse_arch (str, res, isa_flags);
9025
9026 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9027 return true;
0cfff2a1
KT
9028
9029 switch (parse_res)
9030 {
9031 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9032 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
9033 break;
9034 case AARCH64_PARSE_INVALID_ARG:
9035 error ("unknown value %qs for -march", str);
01f44038 9036 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
9037 break;
9038 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9039 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
9040 break;
9041 default:
9042 gcc_unreachable ();
9043 }
361fb3ee
KT
9044
9045 return false;
0cfff2a1
KT
9046}
9047
9048/* Validate a command-line -mtune option. Parse the cpu
9049 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9050 result, if it is valid, in RES. Return whether the option is
9051 valid. */
0cfff2a1 9052
361fb3ee 9053static bool
0cfff2a1
KT
9054aarch64_validate_mtune (const char *str, const struct processor **res)
9055{
9056 enum aarch64_parse_opt_result parse_res
9057 = aarch64_parse_tune (str, res);
9058
9059 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9060 return true;
0cfff2a1
KT
9061
9062 switch (parse_res)
9063 {
9064 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9065 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
9066 break;
9067 case AARCH64_PARSE_INVALID_ARG:
9068 error ("unknown value %qs for -mtune", str);
01f44038 9069 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9070 break;
9071 default:
9072 gcc_unreachable ();
9073 }
361fb3ee
KT
9074 return false;
9075}
9076
9077/* Return the CPU corresponding to the enum CPU.
9078 If it doesn't specify a cpu, return the default. */
9079
9080static const struct processor *
9081aarch64_get_tune_cpu (enum aarch64_processor cpu)
9082{
9083 if (cpu != aarch64_none)
9084 return &all_cores[cpu];
9085
9086 /* The & 0x3f is to extract the bottom 6 bits that encode the
9087 default cpu as selected by the --with-cpu GCC configure option
9088 in config.gcc.
9089 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
9090 flags mechanism should be reworked to make it more sane. */
9091 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9092}
9093
9094/* Return the architecture corresponding to the enum ARCH.
9095 If it doesn't specify a valid architecture, return the default. */
9096
9097static const struct processor *
9098aarch64_get_arch (enum aarch64_arch arch)
9099{
9100 if (arch != aarch64_no_arch)
9101 return &all_architectures[arch];
9102
9103 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9104
9105 return &all_architectures[cpu->arch];
0cfff2a1
KT
9106}
9107
9108/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
9109 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
9110 tuning structs. In particular it must set selected_tune and
9111 aarch64_isa_flags that define the available ISA features and tuning
9112 decisions. It must also set selected_arch as this will be used to
9113 output the .arch asm tags for each function. */
9114
9115static void
9116aarch64_override_options (void)
9117{
9118 unsigned long cpu_isa = 0;
9119 unsigned long arch_isa = 0;
9120 aarch64_isa_flags = 0;
9121
361fb3ee
KT
9122 bool valid_cpu = true;
9123 bool valid_tune = true;
9124 bool valid_arch = true;
9125
0cfff2a1
KT
9126 selected_cpu = NULL;
9127 selected_arch = NULL;
9128 selected_tune = NULL;
9129
9130 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9131 If either of -march or -mtune is given, they override their
9132 respective component of -mcpu. */
9133 if (aarch64_cpu_string)
361fb3ee
KT
9134 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
9135 &cpu_isa);
0cfff2a1
KT
9136
9137 if (aarch64_arch_string)
361fb3ee
KT
9138 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
9139 &arch_isa);
0cfff2a1
KT
9140
9141 if (aarch64_tune_string)
361fb3ee 9142 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
9143
9144 /* If the user did not specify a processor, choose the default
9145 one for them. This will be the CPU set during configuration using
a3cd0246 9146 --with-cpu, otherwise it is "generic". */
43e9d192
IB
9147 if (!selected_cpu)
9148 {
0cfff2a1
KT
9149 if (selected_arch)
9150 {
9151 selected_cpu = &all_cores[selected_arch->ident];
9152 aarch64_isa_flags = arch_isa;
361fb3ee 9153 explicit_arch = selected_arch->arch;
0cfff2a1
KT
9154 }
9155 else
9156 {
361fb3ee
KT
9157 /* Get default configure-time CPU. */
9158 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
9159 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
9160 }
361fb3ee
KT
9161
9162 if (selected_tune)
9163 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
9164 }
9165 /* If both -mcpu and -march are specified check that they are architecturally
9166 compatible, warn if they're not and prefer the -march ISA flags. */
9167 else if (selected_arch)
9168 {
9169 if (selected_arch->arch != selected_cpu->arch)
9170 {
9171 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9172 all_architectures[selected_cpu->arch].name,
9173 selected_arch->name);
9174 }
9175 aarch64_isa_flags = arch_isa;
361fb3ee
KT
9176 explicit_arch = selected_arch->arch;
9177 explicit_tune_core = selected_tune ? selected_tune->ident
9178 : selected_cpu->ident;
0cfff2a1
KT
9179 }
9180 else
9181 {
9182 /* -mcpu but no -march. */
9183 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
9184 explicit_tune_core = selected_tune ? selected_tune->ident
9185 : selected_cpu->ident;
9186 gcc_assert (selected_cpu);
9187 selected_arch = &all_architectures[selected_cpu->arch];
9188 explicit_arch = selected_arch->arch;
43e9d192
IB
9189 }
9190
0cfff2a1
KT
9191 /* Set the arch as well as we will need it when outputing
9192 the .arch directive in assembly. */
9193 if (!selected_arch)
9194 {
9195 gcc_assert (selected_cpu);
9196 selected_arch = &all_architectures[selected_cpu->arch];
9197 }
43e9d192 9198
43e9d192 9199 if (!selected_tune)
3edaf26d 9200 selected_tune = selected_cpu;
43e9d192 9201
0cfff2a1
KT
9202#ifndef HAVE_AS_MABI_OPTION
9203 /* The compiler may have been configured with 2.23.* binutils, which does
9204 not have support for ILP32. */
9205 if (TARGET_ILP32)
9206 error ("Assembler does not support -mabi=ilp32");
9207#endif
43e9d192 9208
db58fd89
JW
9209 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
9210 sorry ("Return address signing is only supported for -mabi=lp64");
9211
361fb3ee
KT
9212 /* Make sure we properly set up the explicit options. */
9213 if ((aarch64_cpu_string && valid_cpu)
9214 || (aarch64_tune_string && valid_tune))
9215 gcc_assert (explicit_tune_core != aarch64_none);
9216
9217 if ((aarch64_cpu_string && valid_cpu)
9218 || (aarch64_arch_string && valid_arch))
9219 gcc_assert (explicit_arch != aarch64_no_arch);
9220
0cfff2a1
KT
9221 aarch64_override_options_internal (&global_options);
9222
9223 /* Save these options as the default ones in case we push and pop them later
9224 while processing functions with potential target attributes. */
9225 target_option_default_node = target_option_current_node
9226 = build_target_option_node (&global_options);
43e9d192
IB
9227}
9228
9229/* Implement targetm.override_options_after_change. */
9230
9231static void
9232aarch64_override_options_after_change (void)
9233{
0cfff2a1 9234 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
9235}
9236
9237static struct machine_function *
9238aarch64_init_machine_status (void)
9239{
9240 struct machine_function *machine;
766090c2 9241 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
9242 return machine;
9243}
9244
9245void
9246aarch64_init_expanders (void)
9247{
9248 init_machine_status = aarch64_init_machine_status;
9249}
9250
9251/* A checking mechanism for the implementation of the various code models. */
9252static void
0cfff2a1 9253initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 9254{
0cfff2a1 9255 if (opts->x_flag_pic)
43e9d192 9256 {
0cfff2a1 9257 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
9258 {
9259 case AARCH64_CMODEL_TINY:
9260 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
9261 break;
9262 case AARCH64_CMODEL_SMALL:
34ecdb0f 9263#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
9264 aarch64_cmodel = (flag_pic == 2
9265 ? AARCH64_CMODEL_SMALL_PIC
9266 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
9267#else
9268 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
9269#endif
43e9d192
IB
9270 break;
9271 case AARCH64_CMODEL_LARGE:
9272 sorry ("code model %qs with -f%s", "large",
0cfff2a1 9273 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 9274 break;
43e9d192
IB
9275 default:
9276 gcc_unreachable ();
9277 }
9278 }
9279 else
0cfff2a1 9280 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
9281}
9282
361fb3ee
KT
9283/* Implement TARGET_OPTION_SAVE. */
9284
9285static void
9286aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
9287{
9288 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
9289}
9290
9291/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9292 using the information saved in PTR. */
9293
9294static void
9295aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
9296{
9297 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
9298 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9299 opts->x_explicit_arch = ptr->x_explicit_arch;
9300 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
9301 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
9302
9303 aarch64_override_options_internal (opts);
9304}
9305
9306/* Implement TARGET_OPTION_PRINT. */
9307
9308static void
9309aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
9310{
9311 const struct processor *cpu
9312 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9313 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
9314 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 9315 std::string extension
04a99ebe 9316 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
9317
9318 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
9319 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9320 arch->name, extension.c_str ());
361fb3ee
KT
9321}
9322
d78006d9
KT
9323static GTY(()) tree aarch64_previous_fndecl;
9324
e4ea20c8
KT
9325void
9326aarch64_reset_previous_fndecl (void)
9327{
9328 aarch64_previous_fndecl = NULL;
9329}
9330
acfc1ac1
KT
9331/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9332 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9333 make sure optab availability predicates are recomputed when necessary. */
9334
9335void
9336aarch64_save_restore_target_globals (tree new_tree)
9337{
9338 if (TREE_TARGET_GLOBALS (new_tree))
9339 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9340 else if (new_tree == target_option_default_node)
9341 restore_target_globals (&default_target_globals);
9342 else
9343 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9344}
9345
d78006d9
KT
9346/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9347 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9348 of the function, if such exists. This function may be called multiple
9349 times on a single function so use aarch64_previous_fndecl to avoid
9350 setting up identical state. */
9351
9352static void
9353aarch64_set_current_function (tree fndecl)
9354{
acfc1ac1
KT
9355 if (!fndecl || fndecl == aarch64_previous_fndecl)
9356 return;
9357
d78006d9
KT
9358 tree old_tree = (aarch64_previous_fndecl
9359 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9360 : NULL_TREE);
9361
acfc1ac1 9362 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 9363
acfc1ac1
KT
9364 /* If current function has no attributes but the previous one did,
9365 use the default node. */
9366 if (!new_tree && old_tree)
9367 new_tree = target_option_default_node;
d78006d9 9368
acfc1ac1
KT
9369 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9370 the default have been handled by aarch64_save_restore_target_globals from
9371 aarch64_pragma_target_parse. */
9372 if (old_tree == new_tree)
9373 return;
d78006d9 9374
acfc1ac1 9375 aarch64_previous_fndecl = fndecl;
6e17a23b 9376
acfc1ac1
KT
9377 /* First set the target options. */
9378 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 9379
acfc1ac1 9380 aarch64_save_restore_target_globals (new_tree);
d78006d9 9381}
361fb3ee 9382
5a2c8331
KT
9383/* Enum describing the various ways we can handle attributes.
9384 In many cases we can reuse the generic option handling machinery. */
9385
9386enum aarch64_attr_opt_type
9387{
9388 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9389 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9390 aarch64_attr_enum, /* Attribute sets an enum variable. */
9391 aarch64_attr_custom /* Attribute requires a custom handling function. */
9392};
9393
9394/* All the information needed to handle a target attribute.
9395 NAME is the name of the attribute.
9c582551 9396 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
9397 in the definition of enum aarch64_attr_opt_type.
9398 ALLOW_NEG is true if the attribute supports a "no-" form.
9399 HANDLER is the function that takes the attribute string and whether
9400 it is a pragma or attribute and handles the option. It is needed only
9401 when the ATTR_TYPE is aarch64_attr_custom.
9402 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 9403 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
9404 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9405 aarch64_attr_enum. */
9406
9407struct aarch64_attribute_info
9408{
9409 const char *name;
9410 enum aarch64_attr_opt_type attr_type;
9411 bool allow_neg;
9412 bool (*handler) (const char *, const char *);
9413 enum opt_code opt_num;
9414};
9415
9416/* Handle the ARCH_STR argument to the arch= target attribute.
9417 PRAGMA_OR_ATTR is used in potential error messages. */
9418
9419static bool
9420aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
9421{
9422 const struct processor *tmp_arch = NULL;
9423 enum aarch64_parse_opt_result parse_res
9424 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9425
9426 if (parse_res == AARCH64_PARSE_OK)
9427 {
9428 gcc_assert (tmp_arch);
9429 selected_arch = tmp_arch;
9430 explicit_arch = selected_arch->arch;
9431 return true;
9432 }
9433
9434 switch (parse_res)
9435 {
9436 case AARCH64_PARSE_MISSING_ARG:
9437 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
9438 break;
9439 case AARCH64_PARSE_INVALID_ARG:
9440 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 9441 aarch64_print_hint_for_arch (str);
5a2c8331
KT
9442 break;
9443 case AARCH64_PARSE_INVALID_FEATURE:
9444 error ("invalid feature modifier %qs for 'arch' target %s",
9445 str, pragma_or_attr);
9446 break;
9447 default:
9448 gcc_unreachable ();
9449 }
9450
9451 return false;
9452}
9453
9454/* Handle the argument CPU_STR to the cpu= target attribute.
9455 PRAGMA_OR_ATTR is used in potential error messages. */
9456
9457static bool
9458aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
9459{
9460 const struct processor *tmp_cpu = NULL;
9461 enum aarch64_parse_opt_result parse_res
9462 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9463
9464 if (parse_res == AARCH64_PARSE_OK)
9465 {
9466 gcc_assert (tmp_cpu);
9467 selected_tune = tmp_cpu;
9468 explicit_tune_core = selected_tune->ident;
9469
9470 selected_arch = &all_architectures[tmp_cpu->arch];
9471 explicit_arch = selected_arch->arch;
9472 return true;
9473 }
9474
9475 switch (parse_res)
9476 {
9477 case AARCH64_PARSE_MISSING_ARG:
9478 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
9479 break;
9480 case AARCH64_PARSE_INVALID_ARG:
9481 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 9482 aarch64_print_hint_for_core (str);
5a2c8331
KT
9483 break;
9484 case AARCH64_PARSE_INVALID_FEATURE:
9485 error ("invalid feature modifier %qs for 'cpu' target %s",
9486 str, pragma_or_attr);
9487 break;
9488 default:
9489 gcc_unreachable ();
9490 }
9491
9492 return false;
9493}
9494
9495/* Handle the argument STR to the tune= target attribute.
9496 PRAGMA_OR_ATTR is used in potential error messages. */
9497
9498static bool
9499aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
9500{
9501 const struct processor *tmp_tune = NULL;
9502 enum aarch64_parse_opt_result parse_res
9503 = aarch64_parse_tune (str, &tmp_tune);
9504
9505 if (parse_res == AARCH64_PARSE_OK)
9506 {
9507 gcc_assert (tmp_tune);
9508 selected_tune = tmp_tune;
9509 explicit_tune_core = selected_tune->ident;
9510 return true;
9511 }
9512
9513 switch (parse_res)
9514 {
9515 case AARCH64_PARSE_INVALID_ARG:
9516 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 9517 aarch64_print_hint_for_core (str);
5a2c8331
KT
9518 break;
9519 default:
9520 gcc_unreachable ();
9521 }
9522
9523 return false;
9524}
9525
9526/* Parse an architecture extensions target attribute string specified in STR.
9527 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9528 if successful. Update aarch64_isa_flags to reflect the ISA features
9529 modified.
9530 PRAGMA_OR_ATTR is used in potential error messages. */
9531
9532static bool
9533aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
9534{
9535 enum aarch64_parse_opt_result parse_res;
9536 unsigned long isa_flags = aarch64_isa_flags;
9537
e4ea20c8
KT
9538 /* We allow "+nothing" in the beginning to clear out all architectural
9539 features if the user wants to handpick specific features. */
9540 if (strncmp ("+nothing", str, 8) == 0)
9541 {
9542 isa_flags = 0;
9543 str += 8;
9544 }
9545
5a2c8331
KT
9546 parse_res = aarch64_parse_extension (str, &isa_flags);
9547
9548 if (parse_res == AARCH64_PARSE_OK)
9549 {
9550 aarch64_isa_flags = isa_flags;
9551 return true;
9552 }
9553
9554 switch (parse_res)
9555 {
9556 case AARCH64_PARSE_MISSING_ARG:
9557 error ("missing feature modifier in target %s %qs",
9558 pragma_or_attr, str);
9559 break;
9560
9561 case AARCH64_PARSE_INVALID_FEATURE:
9562 error ("invalid feature modifier in target %s %qs",
9563 pragma_or_attr, str);
9564 break;
9565
9566 default:
9567 gcc_unreachable ();
9568 }
9569
9570 return false;
9571}
9572
9573/* The target attributes that we support. On top of these we also support just
9574 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9575 handled explicitly in aarch64_process_one_target_attr. */
9576
9577static const struct aarch64_attribute_info aarch64_attributes[] =
9578{
9579 { "general-regs-only", aarch64_attr_mask, false, NULL,
9580 OPT_mgeneral_regs_only },
9581 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9582 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
9583 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9584 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
9585 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9586 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9587 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9588 OPT_momit_leaf_frame_pointer },
9589 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9590 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9591 OPT_march_ },
9592 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9593 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9594 OPT_mtune_ },
db58fd89
JW
9595 { "sign-return-address", aarch64_attr_enum, false, NULL,
9596 OPT_msign_return_address_ },
5a2c8331
KT
9597 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9598};
9599
9600/* Parse ARG_STR which contains the definition of one target attribute.
9601 Show appropriate errors if any or return true if the attribute is valid.
9602 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9603 we're processing a target attribute or pragma. */
9604
9605static bool
9606aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9607{
9608 bool invert = false;
9609
9610 size_t len = strlen (arg_str);
9611
9612 if (len == 0)
9613 {
9614 error ("malformed target %s", pragma_or_attr);
9615 return false;
9616 }
9617
9618 char *str_to_check = (char *) alloca (len + 1);
9619 strcpy (str_to_check, arg_str);
9620
9621 /* Skip leading whitespace. */
9622 while (*str_to_check == ' ' || *str_to_check == '\t')
9623 str_to_check++;
9624
9625 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9626 It is easier to detect and handle it explicitly here rather than going
9627 through the machinery for the rest of the target attributes in this
9628 function. */
9629 if (*str_to_check == '+')
9630 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9631
9632 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9633 {
9634 invert = true;
9635 str_to_check += 3;
9636 }
9637 char *arg = strchr (str_to_check, '=');
9638
9639 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9640 and point ARG to "foo". */
9641 if (arg)
9642 {
9643 *arg = '\0';
9644 arg++;
9645 }
9646 const struct aarch64_attribute_info *p_attr;
16d12992 9647 bool found = false;
5a2c8331
KT
9648 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9649 {
9650 /* If the names don't match up, or the user has given an argument
9651 to an attribute that doesn't accept one, or didn't give an argument
9652 to an attribute that expects one, fail to match. */
9653 if (strcmp (str_to_check, p_attr->name) != 0)
9654 continue;
9655
16d12992 9656 found = true;
5a2c8331
KT
9657 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9658 || p_attr->attr_type == aarch64_attr_enum;
9659
9660 if (attr_need_arg_p ^ (arg != NULL))
9661 {
9662 error ("target %s %qs does not accept an argument",
9663 pragma_or_attr, str_to_check);
9664 return false;
9665 }
9666
9667 /* If the name matches but the attribute does not allow "no-" versions
9668 then we can't match. */
9669 if (invert && !p_attr->allow_neg)
9670 {
9671 error ("target %s %qs does not allow a negated form",
9672 pragma_or_attr, str_to_check);
9673 return false;
9674 }
9675
9676 switch (p_attr->attr_type)
9677 {
9678 /* Has a custom handler registered.
9679 For example, cpu=, arch=, tune=. */
9680 case aarch64_attr_custom:
9681 gcc_assert (p_attr->handler);
9682 if (!p_attr->handler (arg, pragma_or_attr))
9683 return false;
9684 break;
9685
9686 /* Either set or unset a boolean option. */
9687 case aarch64_attr_bool:
9688 {
9689 struct cl_decoded_option decoded;
9690
9691 generate_option (p_attr->opt_num, NULL, !invert,
9692 CL_TARGET, &decoded);
9693 aarch64_handle_option (&global_options, &global_options_set,
9694 &decoded, input_location);
9695 break;
9696 }
9697 /* Set or unset a bit in the target_flags. aarch64_handle_option
9698 should know what mask to apply given the option number. */
9699 case aarch64_attr_mask:
9700 {
9701 struct cl_decoded_option decoded;
9702 /* We only need to specify the option number.
9703 aarch64_handle_option will know which mask to apply. */
9704 decoded.opt_index = p_attr->opt_num;
9705 decoded.value = !invert;
9706 aarch64_handle_option (&global_options, &global_options_set,
9707 &decoded, input_location);
9708 break;
9709 }
9710 /* Use the option setting machinery to set an option to an enum. */
9711 case aarch64_attr_enum:
9712 {
9713 gcc_assert (arg);
9714 bool valid;
9715 int value;
9716 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9717 &value, CL_TARGET);
9718 if (valid)
9719 {
9720 set_option (&global_options, NULL, p_attr->opt_num, value,
9721 NULL, DK_UNSPECIFIED, input_location,
9722 global_dc);
9723 }
9724 else
9725 {
9726 error ("target %s %s=%s is not valid",
9727 pragma_or_attr, str_to_check, arg);
9728 }
9729 break;
9730 }
9731 default:
9732 gcc_unreachable ();
9733 }
9734 }
9735
16d12992
KT
9736 /* If we reached here we either have found an attribute and validated
9737 it or didn't match any. If we matched an attribute but its arguments
9738 were malformed we will have returned false already. */
9739 return found;
5a2c8331
KT
9740}
9741
9742/* Count how many times the character C appears in
9743 NULL-terminated string STR. */
9744
9745static unsigned int
9746num_occurences_in_str (char c, char *str)
9747{
9748 unsigned int res = 0;
9749 while (*str != '\0')
9750 {
9751 if (*str == c)
9752 res++;
9753
9754 str++;
9755 }
9756
9757 return res;
9758}
9759
9760/* Parse the tree in ARGS that contains the target attribute information
9761 and update the global target options space. PRAGMA_OR_ATTR is a string
9762 to be used in error messages, specifying whether this is processing
9763 a target attribute or a target pragma. */
9764
9765bool
9766aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9767{
9768 if (TREE_CODE (args) == TREE_LIST)
9769 {
9770 do
9771 {
9772 tree head = TREE_VALUE (args);
9773 if (head)
9774 {
9775 if (!aarch64_process_target_attr (head, pragma_or_attr))
9776 return false;
9777 }
9778 args = TREE_CHAIN (args);
9779 } while (args);
9780
9781 return true;
9782 }
3b6cb9e3
ML
9783
9784 if (TREE_CODE (args) != STRING_CST)
9785 {
9786 error ("attribute %<target%> argument not a string");
9787 return false;
9788 }
5a2c8331
KT
9789
9790 size_t len = strlen (TREE_STRING_POINTER (args));
9791 char *str_to_check = (char *) alloca (len + 1);
9792 strcpy (str_to_check, TREE_STRING_POINTER (args));
9793
9794 if (len == 0)
9795 {
9796 error ("malformed target %s value", pragma_or_attr);
9797 return false;
9798 }
9799
9800 /* Used to catch empty spaces between commas i.e.
9801 attribute ((target ("attr1,,attr2"))). */
9802 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9803
9804 /* Handle multiple target attributes separated by ','. */
9805 char *token = strtok (str_to_check, ",");
9806
9807 unsigned int num_attrs = 0;
9808 while (token)
9809 {
9810 num_attrs++;
9811 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9812 {
9813 error ("target %s %qs is invalid", pragma_or_attr, token);
9814 return false;
9815 }
9816
9817 token = strtok (NULL, ",");
9818 }
9819
9820 if (num_attrs != num_commas + 1)
9821 {
9822 error ("malformed target %s list %qs",
9823 pragma_or_attr, TREE_STRING_POINTER (args));
9824 return false;
9825 }
9826
9827 return true;
9828}
9829
9830/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9831 process attribute ((target ("..."))). */
9832
9833static bool
9834aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9835{
9836 struct cl_target_option cur_target;
9837 bool ret;
9838 tree old_optimize;
9839 tree new_target, new_optimize;
9840 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9841
9842 /* If what we're processing is the current pragma string then the
9843 target option node is already stored in target_option_current_node
9844 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9845 having to re-parse the string. This is especially useful to keep
9846 arm_neon.h compile times down since that header contains a lot
9847 of intrinsics enclosed in pragmas. */
9848 if (!existing_target && args == current_target_pragma)
9849 {
9850 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9851 return true;
9852 }
5a2c8331
KT
9853 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9854
9855 old_optimize = build_optimization_node (&global_options);
9856 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9857
9858 /* If the function changed the optimization levels as well as setting
9859 target options, start with the optimizations specified. */
9860 if (func_optimize && func_optimize != old_optimize)
9861 cl_optimization_restore (&global_options,
9862 TREE_OPTIMIZATION (func_optimize));
9863
9864 /* Save the current target options to restore at the end. */
9865 cl_target_option_save (&cur_target, &global_options);
9866
9867 /* If fndecl already has some target attributes applied to it, unpack
9868 them so that we add this attribute on top of them, rather than
9869 overwriting them. */
9870 if (existing_target)
9871 {
9872 struct cl_target_option *existing_options
9873 = TREE_TARGET_OPTION (existing_target);
9874
9875 if (existing_options)
9876 cl_target_option_restore (&global_options, existing_options);
9877 }
9878 else
9879 cl_target_option_restore (&global_options,
9880 TREE_TARGET_OPTION (target_option_current_node));
9881
9882
9883 ret = aarch64_process_target_attr (args, "attribute");
9884
9885 /* Set up any additional state. */
9886 if (ret)
9887 {
9888 aarch64_override_options_internal (&global_options);
e95a988a
KT
9889 /* Initialize SIMD builtins if we haven't already.
9890 Set current_target_pragma to NULL for the duration so that
9891 the builtin initialization code doesn't try to tag the functions
9892 being built with the attributes specified by any current pragma, thus
9893 going into an infinite recursion. */
9894 if (TARGET_SIMD)
9895 {
9896 tree saved_current_target_pragma = current_target_pragma;
9897 current_target_pragma = NULL;
9898 aarch64_init_simd_builtins ();
9899 current_target_pragma = saved_current_target_pragma;
9900 }
5a2c8331
KT
9901 new_target = build_target_option_node (&global_options);
9902 }
9903 else
9904 new_target = NULL;
9905
9906 new_optimize = build_optimization_node (&global_options);
9907
9908 if (fndecl && ret)
9909 {
9910 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9911
9912 if (old_optimize != new_optimize)
9913 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9914 }
9915
9916 cl_target_option_restore (&global_options, &cur_target);
9917
9918 if (old_optimize != new_optimize)
9919 cl_optimization_restore (&global_options,
9920 TREE_OPTIMIZATION (old_optimize));
9921 return ret;
9922}
9923
1fd8d40c
KT
9924/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9925 tri-bool options (yes, no, don't care) and the default value is
9926 DEF, determine whether to reject inlining. */
9927
9928static bool
9929aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9930 int dont_care, int def)
9931{
9932 /* If the callee doesn't care, always allow inlining. */
9933 if (callee == dont_care)
9934 return true;
9935
9936 /* If the caller doesn't care, always allow inlining. */
9937 if (caller == dont_care)
9938 return true;
9939
9940 /* Otherwise, allow inlining if either the callee and caller values
9941 agree, or if the callee is using the default value. */
9942 return (callee == caller || callee == def);
9943}
9944
9945/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9946 to inline CALLEE into CALLER based on target-specific info.
9947 Make sure that the caller and callee have compatible architectural
9948 features. Then go through the other possible target attributes
9949 and see if they can block inlining. Try not to reject always_inline
9950 callees unless they are incompatible architecturally. */
9951
9952static bool
9953aarch64_can_inline_p (tree caller, tree callee)
9954{
9955 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9956 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9957
9958 /* If callee has no option attributes, then it is ok to inline. */
9959 if (!callee_tree)
9960 return true;
9961
9962 struct cl_target_option *caller_opts
9963 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9964 : target_option_default_node);
9965
9966 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9967
9968
9969 /* Callee's ISA flags should be a subset of the caller's. */
9970 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9971 != callee_opts->x_aarch64_isa_flags)
9972 return false;
9973
9974 /* Allow non-strict aligned functions inlining into strict
9975 aligned ones. */
9976 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9977 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9978 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9979 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9980 return false;
9981
9982 bool always_inline = lookup_attribute ("always_inline",
9983 DECL_ATTRIBUTES (callee));
9984
9985 /* If the architectural features match up and the callee is always_inline
9986 then the other attributes don't matter. */
9987 if (always_inline)
9988 return true;
9989
9990 if (caller_opts->x_aarch64_cmodel_var
9991 != callee_opts->x_aarch64_cmodel_var)
9992 return false;
9993
9994 if (caller_opts->x_aarch64_tls_dialect
9995 != callee_opts->x_aarch64_tls_dialect)
9996 return false;
9997
9998 /* Honour explicit requests to workaround errata. */
9999 if (!aarch64_tribools_ok_for_inlining_p (
10000 caller_opts->x_aarch64_fix_a53_err835769,
10001 callee_opts->x_aarch64_fix_a53_err835769,
10002 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
10003 return false;
10004
48bb1a55
CL
10005 if (!aarch64_tribools_ok_for_inlining_p (
10006 caller_opts->x_aarch64_fix_a53_err843419,
10007 callee_opts->x_aarch64_fix_a53_err843419,
10008 2, TARGET_FIX_ERR_A53_843419))
10009 return false;
10010
1fd8d40c
KT
10011 /* If the user explicitly specified -momit-leaf-frame-pointer for the
10012 caller and calle and they don't match up, reject inlining. */
10013 if (!aarch64_tribools_ok_for_inlining_p (
10014 caller_opts->x_flag_omit_leaf_frame_pointer,
10015 callee_opts->x_flag_omit_leaf_frame_pointer,
10016 2, 1))
10017 return false;
10018
10019 /* If the callee has specific tuning overrides, respect them. */
10020 if (callee_opts->x_aarch64_override_tune_string != NULL
10021 && caller_opts->x_aarch64_override_tune_string == NULL)
10022 return false;
10023
10024 /* If the user specified tuning override strings for the
10025 caller and callee and they don't match up, reject inlining.
10026 We just do a string compare here, we don't analyze the meaning
10027 of the string, as it would be too costly for little gain. */
10028 if (callee_opts->x_aarch64_override_tune_string
10029 && caller_opts->x_aarch64_override_tune_string
10030 && (strcmp (callee_opts->x_aarch64_override_tune_string,
10031 caller_opts->x_aarch64_override_tune_string) != 0))
10032 return false;
10033
10034 return true;
10035}
10036
43e9d192
IB
10037/* Return true if SYMBOL_REF X binds locally. */
10038
10039static bool
10040aarch64_symbol_binds_local_p (const_rtx x)
10041{
10042 return (SYMBOL_REF_DECL (x)
10043 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
10044 : SYMBOL_REF_LOCAL_P (x));
10045}
10046
10047/* Return true if SYMBOL_REF X is thread local */
10048static bool
10049aarch64_tls_symbol_p (rtx x)
10050{
10051 if (! TARGET_HAVE_TLS)
10052 return false;
10053
10054 if (GET_CODE (x) != SYMBOL_REF)
10055 return false;
10056
10057 return SYMBOL_REF_TLS_MODEL (x) != 0;
10058}
10059
10060/* Classify a TLS symbol into one of the TLS kinds. */
10061enum aarch64_symbol_type
10062aarch64_classify_tls_symbol (rtx x)
10063{
10064 enum tls_model tls_kind = tls_symbolic_operand_type (x);
10065
10066 switch (tls_kind)
10067 {
10068 case TLS_MODEL_GLOBAL_DYNAMIC:
10069 case TLS_MODEL_LOCAL_DYNAMIC:
10070 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
10071
10072 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
10073 switch (aarch64_cmodel)
10074 {
10075 case AARCH64_CMODEL_TINY:
10076 case AARCH64_CMODEL_TINY_PIC:
10077 return SYMBOL_TINY_TLSIE;
10078 default:
79496620 10079 return SYMBOL_SMALL_TLSIE;
5ae7caad 10080 }
43e9d192
IB
10081
10082 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
10083 if (aarch64_tls_size == 12)
10084 return SYMBOL_TLSLE12;
10085 else if (aarch64_tls_size == 24)
10086 return SYMBOL_TLSLE24;
10087 else if (aarch64_tls_size == 32)
10088 return SYMBOL_TLSLE32;
10089 else if (aarch64_tls_size == 48)
10090 return SYMBOL_TLSLE48;
10091 else
10092 gcc_unreachable ();
43e9d192
IB
10093
10094 case TLS_MODEL_EMULATED:
10095 case TLS_MODEL_NONE:
10096 return SYMBOL_FORCE_TO_MEM;
10097
10098 default:
10099 gcc_unreachable ();
10100 }
10101}
10102
10103/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 10104 LABEL_REF X. */
17f4d4bf 10105
43e9d192 10106enum aarch64_symbol_type
a6e0bfa7 10107aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
10108{
10109 if (GET_CODE (x) == LABEL_REF)
10110 {
10111 switch (aarch64_cmodel)
10112 {
10113 case AARCH64_CMODEL_LARGE:
10114 return SYMBOL_FORCE_TO_MEM;
10115
10116 case AARCH64_CMODEL_TINY_PIC:
10117 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
10118 return SYMBOL_TINY_ABSOLUTE;
10119
1b1e81f8 10120 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
10121 case AARCH64_CMODEL_SMALL_PIC:
10122 case AARCH64_CMODEL_SMALL:
10123 return SYMBOL_SMALL_ABSOLUTE;
10124
10125 default:
10126 gcc_unreachable ();
10127 }
10128 }
10129
17f4d4bf 10130 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 10131 {
43e9d192
IB
10132 if (aarch64_tls_symbol_p (x))
10133 return aarch64_classify_tls_symbol (x);
10134
17f4d4bf
CSS
10135 switch (aarch64_cmodel)
10136 {
10137 case AARCH64_CMODEL_TINY:
15f6e0da 10138 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
10139 the offset does not cause overflow of the final address. But
10140 we have no way of knowing the address of symbol at compile time
10141 so we can't accurately say if the distance between the PC and
10142 symbol + offset is outside the addressible range of +/-1M in the
10143 TINY code model. So we rely on images not being greater than
10144 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
10145 be loaded using an alternative mechanism. Furthermore if the
10146 symbol is a weak reference to something that isn't known to
10147 resolve to a symbol in this module, then force to memory. */
10148 if ((SYMBOL_REF_WEAK (x)
10149 && !aarch64_symbol_binds_local_p (x))
f8b756b7 10150 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
10151 return SYMBOL_FORCE_TO_MEM;
10152 return SYMBOL_TINY_ABSOLUTE;
10153
17f4d4bf 10154 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
10155 /* Same reasoning as the tiny code model, but the offset cap here is
10156 4G. */
15f6e0da
RR
10157 if ((SYMBOL_REF_WEAK (x)
10158 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
10159 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
10160 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
10161 return SYMBOL_FORCE_TO_MEM;
10162 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10163
17f4d4bf 10164 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 10165 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 10166 return SYMBOL_TINY_GOT;
38e6c9a6
MS
10167 return SYMBOL_TINY_ABSOLUTE;
10168
1b1e81f8 10169 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
10170 case AARCH64_CMODEL_SMALL_PIC:
10171 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
10172 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
10173 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 10174 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10175
9ee6540a
WD
10176 case AARCH64_CMODEL_LARGE:
10177 /* This is alright even in PIC code as the constant
10178 pool reference is always PC relative and within
10179 the same translation unit. */
10180 if (CONSTANT_POOL_ADDRESS_P (x))
10181 return SYMBOL_SMALL_ABSOLUTE;
10182 else
10183 return SYMBOL_FORCE_TO_MEM;
10184
17f4d4bf
CSS
10185 default:
10186 gcc_unreachable ();
10187 }
43e9d192 10188 }
17f4d4bf 10189
43e9d192
IB
10190 /* By default push everything into the constant pool. */
10191 return SYMBOL_FORCE_TO_MEM;
10192}
10193
43e9d192
IB
10194bool
10195aarch64_constant_address_p (rtx x)
10196{
10197 return (CONSTANT_P (x) && memory_address_p (DImode, x));
10198}
10199
10200bool
10201aarch64_legitimate_pic_operand_p (rtx x)
10202{
10203 if (GET_CODE (x) == SYMBOL_REF
10204 || (GET_CODE (x) == CONST
10205 && GET_CODE (XEXP (x, 0)) == PLUS
10206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
10207 return false;
10208
10209 return true;
10210}
10211
3520f7cc
JG
10212/* Return true if X holds either a quarter-precision or
10213 floating-point +0.0 constant. */
10214static bool
ef4bddc2 10215aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
10216{
10217 if (!CONST_DOUBLE_P (x))
10218 return false;
10219
6a0f8c01
JW
10220 if (aarch64_float_const_zero_rtx_p (x))
10221 return true;
10222
10223 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
10224 if (!(mode == SFmode || mode == DFmode))
10225 return false;
10226
3520f7cc
JG
10227 return aarch64_float_const_representable_p (x);
10228}
10229
43e9d192 10230static bool
ef4bddc2 10231aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
10232{
10233 /* Do not allow vector struct mode constants. We could support
10234 0 and -1 easily, but they need support in aarch64-simd.md. */
10235 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
10236 return false;
10237
10238 /* This could probably go away because
10239 we now decompose CONST_INTs according to expand_mov_immediate. */
10240 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 10241 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
10242 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
10243 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
10244
10245 if (GET_CODE (x) == HIGH
10246 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10247 return true;
10248
f28e54bd
WD
10249 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10250 so spilling them is better than rematerialization. */
10251 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
10252 return true;
10253
43e9d192
IB
10254 return aarch64_constant_address_p (x);
10255}
10256
a5bc806c 10257rtx
43e9d192
IB
10258aarch64_load_tp (rtx target)
10259{
10260 if (!target
10261 || GET_MODE (target) != Pmode
10262 || !register_operand (target, Pmode))
10263 target = gen_reg_rtx (Pmode);
10264
10265 /* Can return in any reg. */
10266 emit_insn (gen_aarch64_load_tp_hard (target));
10267 return target;
10268}
10269
43e9d192
IB
10270/* On AAPCS systems, this is the "struct __va_list". */
10271static GTY(()) tree va_list_type;
10272
10273/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10274 Return the type to use as __builtin_va_list.
10275
10276 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10277
10278 struct __va_list
10279 {
10280 void *__stack;
10281 void *__gr_top;
10282 void *__vr_top;
10283 int __gr_offs;
10284 int __vr_offs;
10285 }; */
10286
10287static tree
10288aarch64_build_builtin_va_list (void)
10289{
10290 tree va_list_name;
10291 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10292
10293 /* Create the type. */
10294 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
10295 /* Give it the required name. */
10296 va_list_name = build_decl (BUILTINS_LOCATION,
10297 TYPE_DECL,
10298 get_identifier ("__va_list"),
10299 va_list_type);
10300 DECL_ARTIFICIAL (va_list_name) = 1;
10301 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 10302 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
10303
10304 /* Create the fields. */
10305 f_stack = build_decl (BUILTINS_LOCATION,
10306 FIELD_DECL, get_identifier ("__stack"),
10307 ptr_type_node);
10308 f_grtop = build_decl (BUILTINS_LOCATION,
10309 FIELD_DECL, get_identifier ("__gr_top"),
10310 ptr_type_node);
10311 f_vrtop = build_decl (BUILTINS_LOCATION,
10312 FIELD_DECL, get_identifier ("__vr_top"),
10313 ptr_type_node);
10314 f_groff = build_decl (BUILTINS_LOCATION,
10315 FIELD_DECL, get_identifier ("__gr_offs"),
10316 integer_type_node);
10317 f_vroff = build_decl (BUILTINS_LOCATION,
10318 FIELD_DECL, get_identifier ("__vr_offs"),
10319 integer_type_node);
10320
88e3bdd1 10321 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
10322 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10323 purpose to identify whether the code is updating va_list internal
10324 offset fields through irregular way. */
10325 va_list_gpr_counter_field = f_groff;
10326 va_list_fpr_counter_field = f_vroff;
10327
43e9d192
IB
10328 DECL_ARTIFICIAL (f_stack) = 1;
10329 DECL_ARTIFICIAL (f_grtop) = 1;
10330 DECL_ARTIFICIAL (f_vrtop) = 1;
10331 DECL_ARTIFICIAL (f_groff) = 1;
10332 DECL_ARTIFICIAL (f_vroff) = 1;
10333
10334 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10335 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10336 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10337 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10338 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10339
10340 TYPE_FIELDS (va_list_type) = f_stack;
10341 DECL_CHAIN (f_stack) = f_grtop;
10342 DECL_CHAIN (f_grtop) = f_vrtop;
10343 DECL_CHAIN (f_vrtop) = f_groff;
10344 DECL_CHAIN (f_groff) = f_vroff;
10345
10346 /* Compute its layout. */
10347 layout_type (va_list_type);
10348
10349 return va_list_type;
10350}
10351
10352/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10353static void
10354aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10355{
10356 const CUMULATIVE_ARGS *cum;
10357 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10358 tree stack, grtop, vrtop, groff, vroff;
10359 tree t;
88e3bdd1
JW
10360 int gr_save_area_size = cfun->va_list_gpr_size;
10361 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
10362 int vr_offset;
10363
10364 cum = &crtl->args.info;
88e3bdd1
JW
10365 if (cfun->va_list_gpr_size)
10366 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10367 cfun->va_list_gpr_size);
10368 if (cfun->va_list_fpr_size)
10369 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10370 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 10371
d5726973 10372 if (!TARGET_FLOAT)
43e9d192 10373 {
261fb553 10374 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
10375 vr_save_area_size = 0;
10376 }
10377
10378 f_stack = TYPE_FIELDS (va_list_type_node);
10379 f_grtop = DECL_CHAIN (f_stack);
10380 f_vrtop = DECL_CHAIN (f_grtop);
10381 f_groff = DECL_CHAIN (f_vrtop);
10382 f_vroff = DECL_CHAIN (f_groff);
10383
10384 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10385 NULL_TREE);
10386 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10387 NULL_TREE);
10388 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10389 NULL_TREE);
10390 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10391 NULL_TREE);
10392 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10393 NULL_TREE);
10394
10395 /* Emit code to initialize STACK, which points to the next varargs stack
10396 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10397 by named arguments. STACK is 8-byte aligned. */
10398 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10399 if (cum->aapcs_stack_size > 0)
10400 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10401 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10402 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10403
10404 /* Emit code to initialize GRTOP, the top of the GR save area.
10405 virtual_incoming_args_rtx should have been 16 byte aligned. */
10406 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10407 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10408 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10409
10410 /* Emit code to initialize VRTOP, the top of the VR save area.
10411 This address is gr_save_area_bytes below GRTOP, rounded
10412 down to the next 16-byte boundary. */
10413 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
10414 vr_offset = ROUND_UP (gr_save_area_size,
10415 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10416
10417 if (vr_offset)
10418 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10419 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10420 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10421
10422 /* Emit code to initialize GROFF, the offset from GRTOP of the
10423 next GPR argument. */
10424 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10425 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10426 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10427
10428 /* Likewise emit code to initialize VROFF, the offset from FTOP
10429 of the next VR argument. */
10430 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10431 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10432 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10433}
10434
10435/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10436
10437static tree
10438aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10439 gimple_seq *post_p ATTRIBUTE_UNUSED)
10440{
10441 tree addr;
10442 bool indirect_p;
10443 bool is_ha; /* is HFA or HVA. */
10444 bool dw_align; /* double-word align. */
ef4bddc2 10445 machine_mode ag_mode = VOIDmode;
43e9d192 10446 int nregs;
ef4bddc2 10447 machine_mode mode;
43e9d192
IB
10448
10449 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10450 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10451 HOST_WIDE_INT size, rsize, adjust, align;
10452 tree t, u, cond1, cond2;
10453
10454 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10455 if (indirect_p)
10456 type = build_pointer_type (type);
10457
10458 mode = TYPE_MODE (type);
10459
10460 f_stack = TYPE_FIELDS (va_list_type_node);
10461 f_grtop = DECL_CHAIN (f_stack);
10462 f_vrtop = DECL_CHAIN (f_grtop);
10463 f_groff = DECL_CHAIN (f_vrtop);
10464 f_vroff = DECL_CHAIN (f_groff);
10465
10466 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10467 f_stack, NULL_TREE);
10468 size = int_size_in_bytes (type);
985b8393 10469 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
10470
10471 dw_align = false;
10472 adjust = 0;
10473 if (aarch64_vfp_is_call_or_return_candidate (mode,
10474 type,
10475 &ag_mode,
10476 &nregs,
10477 &is_ha))
10478 {
10479 /* TYPE passed in fp/simd registers. */
d5726973 10480 if (!TARGET_FLOAT)
261fb553 10481 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
10482
10483 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10484 unshare_expr (valist), f_vrtop, NULL_TREE);
10485 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10486 unshare_expr (valist), f_vroff, NULL_TREE);
10487
10488 rsize = nregs * UNITS_PER_VREG;
10489
10490 if (is_ha)
10491 {
10492 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10493 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10494 }
10495 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
10496 && size < UNITS_PER_VREG)
10497 {
10498 adjust = UNITS_PER_VREG - size;
10499 }
10500 }
10501 else
10502 {
10503 /* TYPE passed in general registers. */
10504 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10505 unshare_expr (valist), f_grtop, NULL_TREE);
10506 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10507 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 10508 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
10509 nregs = rsize / UNITS_PER_WORD;
10510
10511 if (align > 8)
10512 dw_align = true;
10513
10514 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10515 && size < UNITS_PER_WORD)
10516 {
10517 adjust = UNITS_PER_WORD - size;
10518 }
10519 }
10520
10521 /* Get a local temporary for the field value. */
10522 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10523
10524 /* Emit code to branch if off >= 0. */
10525 t = build2 (GE_EXPR, boolean_type_node, off,
10526 build_int_cst (TREE_TYPE (off), 0));
10527 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10528
10529 if (dw_align)
10530 {
10531 /* Emit: offs = (offs + 15) & -16. */
10532 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10533 build_int_cst (TREE_TYPE (off), 15));
10534 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10535 build_int_cst (TREE_TYPE (off), -16));
10536 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10537 }
10538 else
10539 roundup = NULL;
10540
10541 /* Update ap.__[g|v]r_offs */
10542 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10543 build_int_cst (TREE_TYPE (off), rsize));
10544 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10545
10546 /* String up. */
10547 if (roundup)
10548 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10549
10550 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10551 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10552 build_int_cst (TREE_TYPE (f_off), 0));
10553 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10554
10555 /* String up: make sure the assignment happens before the use. */
10556 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10557 COND_EXPR_ELSE (cond1) = t;
10558
10559 /* Prepare the trees handling the argument that is passed on the stack;
10560 the top level node will store in ON_STACK. */
10561 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10562 if (align > 8)
10563 {
10564 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10565 t = fold_convert (intDI_type_node, arg);
10566 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10567 build_int_cst (TREE_TYPE (t), 15));
10568 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10569 build_int_cst (TREE_TYPE (t), -16));
10570 t = fold_convert (TREE_TYPE (arg), t);
10571 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10572 }
10573 else
10574 roundup = NULL;
10575 /* Advance ap.__stack */
10576 t = fold_convert (intDI_type_node, arg);
10577 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10578 build_int_cst (TREE_TYPE (t), size + 7));
10579 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10580 build_int_cst (TREE_TYPE (t), -8));
10581 t = fold_convert (TREE_TYPE (arg), t);
10582 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10583 /* String up roundup and advance. */
10584 if (roundup)
10585 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10586 /* String up with arg */
10587 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10588 /* Big-endianness related address adjustment. */
10589 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10590 && size < UNITS_PER_WORD)
10591 {
10592 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10593 size_int (UNITS_PER_WORD - size));
10594 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10595 }
10596
10597 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10598 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10599
10600 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10601 t = off;
10602 if (adjust)
10603 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10604 build_int_cst (TREE_TYPE (off), adjust));
10605
10606 t = fold_convert (sizetype, t);
10607 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10608
10609 if (is_ha)
10610 {
10611 /* type ha; // treat as "struct {ftype field[n];}"
10612 ... [computing offs]
10613 for (i = 0; i <nregs; ++i, offs += 16)
10614 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10615 return ha; */
10616 int i;
10617 tree tmp_ha, field_t, field_ptr_t;
10618
10619 /* Declare a local variable. */
10620 tmp_ha = create_tmp_var_raw (type, "ha");
10621 gimple_add_tmp_var (tmp_ha);
10622
10623 /* Establish the base type. */
10624 switch (ag_mode)
10625 {
10626 case SFmode:
10627 field_t = float_type_node;
10628 field_ptr_t = float_ptr_type_node;
10629 break;
10630 case DFmode:
10631 field_t = double_type_node;
10632 field_ptr_t = double_ptr_type_node;
10633 break;
10634 case TFmode:
10635 field_t = long_double_type_node;
10636 field_ptr_t = long_double_ptr_type_node;
10637 break;
43e9d192 10638 case HFmode:
1b62ed4f
JG
10639 field_t = aarch64_fp16_type_node;
10640 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10641 break;
43e9d192
IB
10642 case V2SImode:
10643 case V4SImode:
10644 {
10645 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10646 field_t = build_vector_type_for_mode (innertype, ag_mode);
10647 field_ptr_t = build_pointer_type (field_t);
10648 }
10649 break;
10650 default:
10651 gcc_assert (0);
10652 }
10653
10654 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10655 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10656 addr = t;
10657 t = fold_convert (field_ptr_t, addr);
10658 t = build2 (MODIFY_EXPR, field_t,
10659 build1 (INDIRECT_REF, field_t, tmp_ha),
10660 build1 (INDIRECT_REF, field_t, t));
10661
10662 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10663 for (i = 1; i < nregs; ++i)
10664 {
10665 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10666 u = fold_convert (field_ptr_t, addr);
10667 u = build2 (MODIFY_EXPR, field_t,
10668 build2 (MEM_REF, field_t, tmp_ha,
10669 build_int_cst (field_ptr_t,
10670 (i *
10671 int_size_in_bytes (field_t)))),
10672 build1 (INDIRECT_REF, field_t, u));
10673 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10674 }
10675
10676 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10677 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10678 }
10679
10680 COND_EXPR_ELSE (cond2) = t;
10681 addr = fold_convert (build_pointer_type (type), cond1);
10682 addr = build_va_arg_indirect_ref (addr);
10683
10684 if (indirect_p)
10685 addr = build_va_arg_indirect_ref (addr);
10686
10687 return addr;
10688}
10689
10690/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10691
10692static void
ef4bddc2 10693aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10694 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10695 int no_rtl)
10696{
10697 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10698 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10699 int gr_saved = cfun->va_list_gpr_size;
10700 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10701
10702 /* The caller has advanced CUM up to, but not beyond, the last named
10703 argument. Advance a local copy of CUM past the last "real" named
10704 argument, to find out how many registers are left over. */
10705 local_cum = *cum;
10706 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10707
88e3bdd1
JW
10708 /* Found out how many registers we need to save.
10709 Honor tree-stdvar analysis results. */
10710 if (cfun->va_list_gpr_size)
10711 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10712 cfun->va_list_gpr_size / UNITS_PER_WORD);
10713 if (cfun->va_list_fpr_size)
10714 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10715 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10716
d5726973 10717 if (!TARGET_FLOAT)
43e9d192 10718 {
261fb553 10719 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10720 vr_saved = 0;
10721 }
10722
10723 if (!no_rtl)
10724 {
10725 if (gr_saved > 0)
10726 {
10727 rtx ptr, mem;
10728
10729 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10730 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10731 - gr_saved * UNITS_PER_WORD);
10732 mem = gen_frame_mem (BLKmode, ptr);
10733 set_mem_alias_set (mem, get_varargs_alias_set ());
10734
10735 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10736 mem, gr_saved);
10737 }
10738 if (vr_saved > 0)
10739 {
10740 /* We can't use move_block_from_reg, because it will use
10741 the wrong mode, storing D regs only. */
ef4bddc2 10742 machine_mode mode = TImode;
88e3bdd1 10743 int off, i, vr_start;
43e9d192
IB
10744
10745 /* Set OFF to the offset from virtual_incoming_args_rtx of
10746 the first vector register. The VR save area lies below
10747 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10748 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10749 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10750 off -= vr_saved * UNITS_PER_VREG;
10751
88e3bdd1
JW
10752 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10753 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10754 {
10755 rtx ptr, mem;
10756
10757 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10758 mem = gen_frame_mem (mode, ptr);
10759 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10760 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10761 off += UNITS_PER_VREG;
10762 }
10763 }
10764 }
10765
10766 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10767 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10768 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10769 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10770 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10771 + vr_saved * UNITS_PER_VREG);
10772}
10773
10774static void
10775aarch64_conditional_register_usage (void)
10776{
10777 int i;
10778 if (!TARGET_FLOAT)
10779 {
10780 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10781 {
10782 fixed_regs[i] = 1;
10783 call_used_regs[i] = 1;
10784 }
10785 }
10786}
10787
10788/* Walk down the type tree of TYPE counting consecutive base elements.
10789 If *MODEP is VOIDmode, then set it to the first valid floating point
10790 type. If a non-floating point type is found, or if a floating point
10791 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10792 otherwise return the count in the sub-tree. */
10793static int
ef4bddc2 10794aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10795{
ef4bddc2 10796 machine_mode mode;
43e9d192
IB
10797 HOST_WIDE_INT size;
10798
10799 switch (TREE_CODE (type))
10800 {
10801 case REAL_TYPE:
10802 mode = TYPE_MODE (type);
1b62ed4f
JG
10803 if (mode != DFmode && mode != SFmode
10804 && mode != TFmode && mode != HFmode)
43e9d192
IB
10805 return -1;
10806
10807 if (*modep == VOIDmode)
10808 *modep = mode;
10809
10810 if (*modep == mode)
10811 return 1;
10812
10813 break;
10814
10815 case COMPLEX_TYPE:
10816 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10817 if (mode != DFmode && mode != SFmode
10818 && mode != TFmode && mode != HFmode)
43e9d192
IB
10819 return -1;
10820
10821 if (*modep == VOIDmode)
10822 *modep = mode;
10823
10824 if (*modep == mode)
10825 return 2;
10826
10827 break;
10828
10829 case VECTOR_TYPE:
10830 /* Use V2SImode and V4SImode as representatives of all 64-bit
10831 and 128-bit vector types. */
10832 size = int_size_in_bytes (type);
10833 switch (size)
10834 {
10835 case 8:
10836 mode = V2SImode;
10837 break;
10838 case 16:
10839 mode = V4SImode;
10840 break;
10841 default:
10842 return -1;
10843 }
10844
10845 if (*modep == VOIDmode)
10846 *modep = mode;
10847
10848 /* Vector modes are considered to be opaque: two vectors are
10849 equivalent for the purposes of being homogeneous aggregates
10850 if they are the same size. */
10851 if (*modep == mode)
10852 return 1;
10853
10854 break;
10855
10856 case ARRAY_TYPE:
10857 {
10858 int count;
10859 tree index = TYPE_DOMAIN (type);
10860
807e902e
KZ
10861 /* Can't handle incomplete types nor sizes that are not
10862 fixed. */
10863 if (!COMPLETE_TYPE_P (type)
10864 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10865 return -1;
10866
10867 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10868 if (count == -1
10869 || !index
10870 || !TYPE_MAX_VALUE (index)
cc269bb6 10871 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10872 || !TYPE_MIN_VALUE (index)
cc269bb6 10873 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
10874 || count < 0)
10875 return -1;
10876
ae7e9ddd
RS
10877 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10878 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
10879
10880 /* There must be no padding. */
807e902e 10881 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10882 return -1;
10883
10884 return count;
10885 }
10886
10887 case RECORD_TYPE:
10888 {
10889 int count = 0;
10890 int sub_count;
10891 tree field;
10892
807e902e
KZ
10893 /* Can't handle incomplete types nor sizes that are not
10894 fixed. */
10895 if (!COMPLETE_TYPE_P (type)
10896 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10897 return -1;
10898
10899 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10900 {
10901 if (TREE_CODE (field) != FIELD_DECL)
10902 continue;
10903
10904 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10905 if (sub_count < 0)
10906 return -1;
10907 count += sub_count;
10908 }
10909
10910 /* There must be no padding. */
807e902e 10911 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10912 return -1;
10913
10914 return count;
10915 }
10916
10917 case UNION_TYPE:
10918 case QUAL_UNION_TYPE:
10919 {
10920 /* These aren't very interesting except in a degenerate case. */
10921 int count = 0;
10922 int sub_count;
10923 tree field;
10924
807e902e
KZ
10925 /* Can't handle incomplete types nor sizes that are not
10926 fixed. */
10927 if (!COMPLETE_TYPE_P (type)
10928 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10929 return -1;
10930
10931 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10932 {
10933 if (TREE_CODE (field) != FIELD_DECL)
10934 continue;
10935
10936 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10937 if (sub_count < 0)
10938 return -1;
10939 count = count > sub_count ? count : sub_count;
10940 }
10941
10942 /* There must be no padding. */
807e902e 10943 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10944 return -1;
10945
10946 return count;
10947 }
10948
10949 default:
10950 break;
10951 }
10952
10953 return -1;
10954}
10955
b6ec6215
KT
10956/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10957 type as described in AAPCS64 \S 4.1.2.
10958
10959 See the comment above aarch64_composite_type_p for the notes on MODE. */
10960
10961static bool
10962aarch64_short_vector_p (const_tree type,
10963 machine_mode mode)
10964{
10965 HOST_WIDE_INT size = -1;
10966
10967 if (type && TREE_CODE (type) == VECTOR_TYPE)
10968 size = int_size_in_bytes (type);
10969 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10970 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10971 size = GET_MODE_SIZE (mode);
10972
10973 return (size == 8 || size == 16);
10974}
10975
43e9d192
IB
10976/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10977 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10978 array types. The C99 floating-point complex types are also considered
10979 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10980 types, which are GCC extensions and out of the scope of AAPCS64, are
10981 treated as composite types here as well.
10982
10983 Note that MODE itself is not sufficient in determining whether a type
10984 is such a composite type or not. This is because
10985 stor-layout.c:compute_record_mode may have already changed the MODE
10986 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10987 structure with only one field may have its MODE set to the mode of the
10988 field. Also an integer mode whose size matches the size of the
10989 RECORD_TYPE type may be used to substitute the original mode
10990 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10991 solely relied on. */
10992
10993static bool
10994aarch64_composite_type_p (const_tree type,
ef4bddc2 10995 machine_mode mode)
43e9d192 10996{
b6ec6215
KT
10997 if (aarch64_short_vector_p (type, mode))
10998 return false;
10999
43e9d192
IB
11000 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
11001 return true;
11002
11003 if (mode == BLKmode
11004 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
11005 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
11006 return true;
11007
11008 return false;
11009}
11010
43e9d192
IB
11011/* Return TRUE if an argument, whose type is described by TYPE and MODE,
11012 shall be passed or returned in simd/fp register(s) (providing these
11013 parameter passing registers are available).
11014
11015 Upon successful return, *COUNT returns the number of needed registers,
11016 *BASE_MODE returns the mode of the individual register and when IS_HAF
11017 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
11018 floating-point aggregate or a homogeneous short-vector aggregate. */
11019
11020static bool
ef4bddc2 11021aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 11022 const_tree type,
ef4bddc2 11023 machine_mode *base_mode,
43e9d192
IB
11024 int *count,
11025 bool *is_ha)
11026{
ef4bddc2 11027 machine_mode new_mode = VOIDmode;
43e9d192
IB
11028 bool composite_p = aarch64_composite_type_p (type, mode);
11029
11030 if (is_ha != NULL) *is_ha = false;
11031
11032 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
11033 || aarch64_short_vector_p (type, mode))
11034 {
11035 *count = 1;
11036 new_mode = mode;
11037 }
11038 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
11039 {
11040 if (is_ha != NULL) *is_ha = true;
11041 *count = 2;
11042 new_mode = GET_MODE_INNER (mode);
11043 }
11044 else if (type && composite_p)
11045 {
11046 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
11047
11048 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
11049 {
11050 if (is_ha != NULL) *is_ha = true;
11051 *count = ag_count;
11052 }
11053 else
11054 return false;
11055 }
11056 else
11057 return false;
11058
11059 *base_mode = new_mode;
11060 return true;
11061}
11062
11063/* Implement TARGET_STRUCT_VALUE_RTX. */
11064
11065static rtx
11066aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
11067 int incoming ATTRIBUTE_UNUSED)
11068{
11069 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
11070}
11071
11072/* Implements target hook vector_mode_supported_p. */
11073static bool
ef4bddc2 11074aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
11075{
11076 if (TARGET_SIMD
11077 && (mode == V4SImode || mode == V8HImode
11078 || mode == V16QImode || mode == V2DImode
11079 || mode == V2SImode || mode == V4HImode
11080 || mode == V8QImode || mode == V2SFmode
ad7d90cc 11081 || mode == V4SFmode || mode == V2DFmode
71a11456 11082 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 11083 || mode == V1DFmode))
43e9d192
IB
11084 return true;
11085
11086 return false;
11087}
11088
b7342d25
IB
11089/* Return appropriate SIMD container
11090 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
11091static machine_mode
11092aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 11093{
b7342d25 11094 gcc_assert (width == 64 || width == 128);
43e9d192 11095 if (TARGET_SIMD)
b7342d25
IB
11096 {
11097 if (width == 128)
11098 switch (mode)
11099 {
11100 case DFmode:
11101 return V2DFmode;
11102 case SFmode:
11103 return V4SFmode;
b719f884
JG
11104 case HFmode:
11105 return V8HFmode;
b7342d25
IB
11106 case SImode:
11107 return V4SImode;
11108 case HImode:
11109 return V8HImode;
11110 case QImode:
11111 return V16QImode;
11112 case DImode:
11113 return V2DImode;
11114 default:
11115 break;
11116 }
11117 else
11118 switch (mode)
11119 {
11120 case SFmode:
11121 return V2SFmode;
b719f884
JG
11122 case HFmode:
11123 return V4HFmode;
b7342d25
IB
11124 case SImode:
11125 return V2SImode;
11126 case HImode:
11127 return V4HImode;
11128 case QImode:
11129 return V8QImode;
11130 default:
11131 break;
11132 }
11133 }
43e9d192
IB
11134 return word_mode;
11135}
11136
b7342d25 11137/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
11138static machine_mode
11139aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
11140{
11141 return aarch64_simd_container_mode (mode, 128);
11142}
11143
3b357264
JG
11144/* Return the bitmask of possible vector sizes for the vectorizer
11145 to iterate over. */
11146static unsigned int
11147aarch64_autovectorize_vector_sizes (void)
11148{
11149 return (16 | 8);
11150}
11151
ac2b960f
YZ
11152/* Implement TARGET_MANGLE_TYPE. */
11153
6f549691 11154static const char *
ac2b960f
YZ
11155aarch64_mangle_type (const_tree type)
11156{
11157 /* The AArch64 ABI documents say that "__va_list" has to be
11158 managled as if it is in the "std" namespace. */
11159 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
11160 return "St9__va_list";
11161
c2ec330c
AL
11162 /* Half-precision float. */
11163 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
11164 return "Dh";
11165
f9d53c27
TB
11166 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11167 builtin types. */
11168 if (TYPE_NAME (type) != NULL)
11169 return aarch64_mangle_builtin_type (type);
c6fc9e43 11170
ac2b960f
YZ
11171 /* Use the default mangling. */
11172 return NULL;
11173}
11174
75cf1494
KT
11175/* Find the first rtx_insn before insn that will generate an assembly
11176 instruction. */
11177
11178static rtx_insn *
11179aarch64_prev_real_insn (rtx_insn *insn)
11180{
11181 if (!insn)
11182 return NULL;
11183
11184 do
11185 {
11186 insn = prev_real_insn (insn);
11187 }
11188 while (insn && recog_memoized (insn) < 0);
11189
11190 return insn;
11191}
11192
11193static bool
11194is_madd_op (enum attr_type t1)
11195{
11196 unsigned int i;
11197 /* A number of these may be AArch32 only. */
11198 enum attr_type mlatypes[] = {
11199 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
11200 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
11201 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
11202 };
11203
11204 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
11205 {
11206 if (t1 == mlatypes[i])
11207 return true;
11208 }
11209
11210 return false;
11211}
11212
11213/* Check if there is a register dependency between a load and the insn
11214 for which we hold recog_data. */
11215
11216static bool
11217dep_between_memop_and_curr (rtx memop)
11218{
11219 rtx load_reg;
11220 int opno;
11221
8baff86e 11222 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
11223
11224 if (!REG_P (SET_DEST (memop)))
11225 return false;
11226
11227 load_reg = SET_DEST (memop);
8baff86e 11228 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
11229 {
11230 rtx operand = recog_data.operand[opno];
11231 if (REG_P (operand)
11232 && reg_overlap_mentioned_p (load_reg, operand))
11233 return true;
11234
11235 }
11236 return false;
11237}
11238
8baff86e
KT
11239
11240/* When working around the Cortex-A53 erratum 835769,
11241 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11242 instruction and has a preceding memory instruction such that a NOP
11243 should be inserted between them. */
11244
75cf1494
KT
11245bool
11246aarch64_madd_needs_nop (rtx_insn* insn)
11247{
11248 enum attr_type attr_type;
11249 rtx_insn *prev;
11250 rtx body;
11251
b32c1043 11252 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
11253 return false;
11254
e322d6e3 11255 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
11256 return false;
11257
11258 attr_type = get_attr_type (insn);
11259 if (!is_madd_op (attr_type))
11260 return false;
11261
11262 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
11263 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11264 Restore recog state to INSN to avoid state corruption. */
11265 extract_constrain_insn_cached (insn);
11266
550e2205 11267 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
11268 return false;
11269
11270 body = single_set (prev);
11271
11272 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
11273 it and the DImode madd, emit a NOP between them. If body is NULL then we
11274 have a complex memory operation, probably a load/store pair.
11275 Be conservative for now and emit a NOP. */
11276 if (GET_MODE (recog_data.operand[0]) == DImode
11277 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
11278 return true;
11279
11280 return false;
11281
11282}
11283
8baff86e
KT
11284
11285/* Implement FINAL_PRESCAN_INSN. */
11286
75cf1494
KT
11287void
11288aarch64_final_prescan_insn (rtx_insn *insn)
11289{
11290 if (aarch64_madd_needs_nop (insn))
11291 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
11292}
11293
11294
43e9d192 11295/* Return the equivalent letter for size. */
81c2dfb9 11296static char
43e9d192
IB
11297sizetochar (int size)
11298{
11299 switch (size)
11300 {
11301 case 64: return 'd';
11302 case 32: return 's';
11303 case 16: return 'h';
11304 case 8 : return 'b';
11305 default: gcc_unreachable ();
11306 }
11307}
11308
3520f7cc
JG
11309/* Return true iff x is a uniform vector of floating-point
11310 constants, and the constant can be represented in
11311 quarter-precision form. Note, as aarch64_float_const_representable
11312 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11313static bool
11314aarch64_vect_float_const_representable_p (rtx x)
11315{
92695fbb
RS
11316 rtx elt;
11317 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11318 && const_vec_duplicate_p (x, &elt)
11319 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
11320}
11321
d8edd899 11322/* Return true for valid and false for invalid. */
3ea63f60 11323bool
ef4bddc2 11324aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 11325 struct simd_immediate_info *info)
43e9d192
IB
11326{
11327#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11328 matches = 1; \
11329 for (i = 0; i < idx; i += (STRIDE)) \
11330 if (!(TEST)) \
11331 matches = 0; \
11332 if (matches) \
11333 { \
11334 immtype = (CLASS); \
11335 elsize = (ELSIZE); \
43e9d192
IB
11336 eshift = (SHIFT); \
11337 emvn = (NEG); \
11338 break; \
11339 }
11340
11341 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 11342 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 11343 unsigned char bytes[16];
43e9d192
IB
11344 int immtype = -1, matches;
11345 unsigned int invmask = inverse ? 0xff : 0;
11346 int eshift, emvn;
11347
43e9d192 11348 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 11349 {
81c2dfb9
IB
11350 if (! (aarch64_simd_imm_zero_p (op, mode)
11351 || aarch64_vect_float_const_representable_p (op)))
d8edd899 11352 return false;
3520f7cc 11353
48063b9d
IB
11354 if (info)
11355 {
11356 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 11357 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
11358 info->mvn = false;
11359 info->shift = 0;
11360 }
3520f7cc 11361
d8edd899 11362 return true;
3520f7cc 11363 }
43e9d192
IB
11364
11365 /* Splat vector constant out into a byte vector. */
11366 for (i = 0; i < n_elts; i++)
11367 {
4b1e108c
AL
11368 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11369 it must be laid out in the vector register in reverse order. */
11370 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 11371 unsigned HOST_WIDE_INT elpart;
43e9d192 11372
ee78df47
KT
11373 gcc_assert (CONST_INT_P (el));
11374 elpart = INTVAL (el);
11375
11376 for (unsigned int byte = 0; byte < innersize; byte++)
11377 {
11378 bytes[idx++] = (elpart & 0xff) ^ invmask;
11379 elpart >>= BITS_PER_UNIT;
11380 }
43e9d192 11381
43e9d192
IB
11382 }
11383
11384 /* Sanity check. */
11385 gcc_assert (idx == GET_MODE_SIZE (mode));
11386
11387 do
11388 {
11389 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11390 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11391
11392 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11393 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11394
11395 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11396 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11397
11398 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11399 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11400
11401 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11402
11403 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11404
11405 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11406 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11407
11408 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11409 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11410
11411 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11412 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11413
11414 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11415 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11416
11417 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11418
11419 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11420
11421 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 11422 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
11423
11424 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 11425 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
11426
11427 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 11428 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
11429
11430 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 11431 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
11432
11433 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11434
11435 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11436 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11437 }
11438 while (0);
11439
e4f0f84d 11440 if (immtype == -1)
d8edd899 11441 return false;
43e9d192 11442
48063b9d 11443 if (info)
43e9d192 11444 {
48063b9d 11445 info->element_width = elsize;
48063b9d
IB
11446 info->mvn = emvn != 0;
11447 info->shift = eshift;
11448
43e9d192
IB
11449 unsigned HOST_WIDE_INT imm = 0;
11450
e4f0f84d
TB
11451 if (immtype >= 12 && immtype <= 15)
11452 info->msl = true;
11453
43e9d192
IB
11454 /* Un-invert bytes of recognized vector, if necessary. */
11455 if (invmask != 0)
11456 for (i = 0; i < idx; i++)
11457 bytes[i] ^= invmask;
11458
11459 if (immtype == 17)
11460 {
11461 /* FIXME: Broken on 32-bit H_W_I hosts. */
11462 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11463
11464 for (i = 0; i < 8; i++)
11465 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11466 << (i * BITS_PER_UNIT);
11467
43e9d192 11468
48063b9d
IB
11469 info->value = GEN_INT (imm);
11470 }
11471 else
11472 {
11473 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11474 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
11475
11476 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
11477 generic constants. */
11478 if (info->mvn)
43e9d192 11479 imm = ~imm;
48063b9d
IB
11480 imm = (imm >> info->shift) & 0xff;
11481 info->value = GEN_INT (imm);
11482 }
43e9d192
IB
11483 }
11484
48063b9d 11485 return true;
43e9d192
IB
11486#undef CHECK
11487}
11488
43e9d192
IB
11489/* Check of immediate shift constants are within range. */
11490bool
ef4bddc2 11491aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
11492{
11493 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11494 if (left)
ddeabd3e 11495 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 11496 else
ddeabd3e 11497 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
11498}
11499
3520f7cc
JG
11500/* Return true if X is a uniform vector where all elements
11501 are either the floating-point constant 0.0 or the
11502 integer constant 0. */
43e9d192 11503bool
ef4bddc2 11504aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 11505{
3520f7cc 11506 return x == CONST0_RTX (mode);
43e9d192
IB
11507}
11508
7325d85a
KT
11509
11510/* Return the bitmask CONST_INT to select the bits required by a zero extract
11511 operation of width WIDTH at bit position POS. */
11512
11513rtx
11514aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11515{
11516 gcc_assert (CONST_INT_P (width));
11517 gcc_assert (CONST_INT_P (pos));
11518
11519 unsigned HOST_WIDE_INT mask
11520 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11521 return GEN_INT (mask << UINTVAL (pos));
11522}
11523
43e9d192 11524bool
ef4bddc2 11525aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
11526{
11527 HOST_WIDE_INT imm = INTVAL (x);
11528 int i;
11529
11530 for (i = 0; i < 8; i++)
11531 {
11532 unsigned int byte = imm & 0xff;
11533 if (byte != 0xff && byte != 0)
11534 return false;
11535 imm >>= 8;
11536 }
11537
11538 return true;
11539}
11540
83f8c414 11541bool
a6e0bfa7 11542aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 11543{
83f8c414
CSS
11544 if (GET_CODE (x) == HIGH
11545 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11546 return true;
11547
82614948 11548 if (CONST_INT_P (x))
83f8c414
CSS
11549 return true;
11550
11551 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11552 return true;
11553
a6e0bfa7 11554 return aarch64_classify_symbolic_expression (x)
a5350ddc 11555 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
11556}
11557
43e9d192
IB
11558/* Return a const_int vector of VAL. */
11559rtx
ab014eb3 11560aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192
IB
11561{
11562 int nunits = GET_MODE_NUNITS (mode);
11563 rtvec v = rtvec_alloc (nunits);
11564 int i;
11565
ab014eb3
TC
11566 rtx cache = GEN_INT (val);
11567
43e9d192 11568 for (i=0; i < nunits; i++)
ab014eb3 11569 RTVEC_ELT (v, i) = cache;
43e9d192
IB
11570
11571 return gen_rtx_CONST_VECTOR (mode, v);
11572}
11573
051d0e2f
SN
11574/* Check OP is a legal scalar immediate for the MOVI instruction. */
11575
11576bool
ef4bddc2 11577aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 11578{
ef4bddc2 11579 machine_mode vmode;
051d0e2f
SN
11580
11581 gcc_assert (!VECTOR_MODE_P (mode));
11582 vmode = aarch64_preferred_simd_mode (mode);
11583 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 11584 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
11585}
11586
988fa693
JG
11587/* Construct and return a PARALLEL RTX vector with elements numbering the
11588 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11589 the vector - from the perspective of the architecture. This does not
11590 line up with GCC's perspective on lane numbers, so we end up with
11591 different masks depending on our target endian-ness. The diagram
11592 below may help. We must draw the distinction when building masks
11593 which select one half of the vector. An instruction selecting
11594 architectural low-lanes for a big-endian target, must be described using
11595 a mask selecting GCC high-lanes.
11596
11597 Big-Endian Little-Endian
11598
11599GCC 0 1 2 3 3 2 1 0
11600 | x | x | x | x | | x | x | x | x |
11601Architecture 3 2 1 0 3 2 1 0
11602
11603Low Mask: { 2, 3 } { 0, 1 }
11604High Mask: { 0, 1 } { 2, 3 }
11605*/
11606
43e9d192 11607rtx
ef4bddc2 11608aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11609{
11610 int nunits = GET_MODE_NUNITS (mode);
11611 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11612 int high_base = nunits / 2;
11613 int low_base = 0;
11614 int base;
43e9d192
IB
11615 rtx t1;
11616 int i;
11617
988fa693
JG
11618 if (BYTES_BIG_ENDIAN)
11619 base = high ? low_base : high_base;
11620 else
11621 base = high ? high_base : low_base;
11622
11623 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11624 RTVEC_ELT (v, i) = GEN_INT (base + i);
11625
11626 t1 = gen_rtx_PARALLEL (mode, v);
11627 return t1;
11628}
11629
988fa693
JG
11630/* Check OP for validity as a PARALLEL RTX vector with elements
11631 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11632 from the perspective of the architecture. See the diagram above
11633 aarch64_simd_vect_par_cnst_half for more details. */
11634
11635bool
ef4bddc2 11636aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11637 bool high)
11638{
11639 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11640 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11641 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11642 int i = 0;
11643
11644 if (!VECTOR_MODE_P (mode))
11645 return false;
11646
11647 if (count_op != count_ideal)
11648 return false;
11649
11650 for (i = 0; i < count_ideal; i++)
11651 {
11652 rtx elt_op = XVECEXP (op, 0, i);
11653 rtx elt_ideal = XVECEXP (ideal, 0, i);
11654
4aa81c2e 11655 if (!CONST_INT_P (elt_op)
988fa693
JG
11656 || INTVAL (elt_ideal) != INTVAL (elt_op))
11657 return false;
11658 }
11659 return true;
11660}
11661
43e9d192
IB
11662/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11663 HIGH (exclusive). */
11664void
46ed6024
CB
11665aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11666 const_tree exp)
43e9d192
IB
11667{
11668 HOST_WIDE_INT lane;
4aa81c2e 11669 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11670 lane = INTVAL (operand);
11671
11672 if (lane < low || lane >= high)
46ed6024
CB
11673 {
11674 if (exp)
cf0c27ef 11675 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11676 else
cf0c27ef 11677 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11678 }
43e9d192
IB
11679}
11680
43e9d192
IB
11681/* Return TRUE if OP is a valid vector addressing mode. */
11682bool
11683aarch64_simd_mem_operand_p (rtx op)
11684{
11685 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11686 || REG_P (XEXP (op, 0)));
43e9d192
IB
11687}
11688
2d8c6dc1
AH
11689/* Emit a register copy from operand to operand, taking care not to
11690 early-clobber source registers in the process.
43e9d192 11691
2d8c6dc1
AH
11692 COUNT is the number of components into which the copy needs to be
11693 decomposed. */
43e9d192 11694void
2d8c6dc1
AH
11695aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
11696 unsigned int count)
43e9d192
IB
11697{
11698 unsigned int i;
2d8c6dc1
AH
11699 int rdest = REGNO (operands[0]);
11700 int rsrc = REGNO (operands[1]);
43e9d192
IB
11701
11702 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11703 || rdest < rsrc)
11704 for (i = 0; i < count; i++)
11705 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11706 gen_rtx_REG (mode, rsrc + i));
43e9d192 11707 else
2d8c6dc1
AH
11708 for (i = 0; i < count; i++)
11709 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11710 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11711}
11712
668046d1 11713/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11714 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
11715int
11716aarch64_simd_attr_length_rglist (enum machine_mode mode)
11717{
11718 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11719}
11720
db0253a4
TB
11721/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11722 alignment of a vector to 128 bits. */
11723static HOST_WIDE_INT
11724aarch64_simd_vector_alignment (const_tree type)
11725{
9439e9a1 11726 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11727 return MIN (align, 128);
11728}
11729
11730/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11731static bool
11732aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11733{
11734 if (is_packed)
11735 return false;
11736
11737 /* We guarantee alignment for vectors up to 128-bits. */
11738 if (tree_int_cst_compare (TYPE_SIZE (type),
11739 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11740 return false;
11741
11742 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11743 return true;
11744}
11745
7df76747
N
11746/* Return true if the vector misalignment factor is supported by the
11747 target. */
11748static bool
11749aarch64_builtin_support_vector_misalignment (machine_mode mode,
11750 const_tree type, int misalignment,
11751 bool is_packed)
11752{
11753 if (TARGET_SIMD && STRICT_ALIGNMENT)
11754 {
11755 /* Return if movmisalign pattern is not supported for this mode. */
11756 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11757 return false;
11758
11759 if (misalignment == -1)
11760 {
11761 /* Misalignment factor is unknown at compile time but we know
11762 it's word aligned. */
11763 if (aarch64_simd_vector_alignment_reachable (type, is_packed))
11764 {
11765 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
11766
11767 if (element_size != 64)
11768 return true;
11769 }
11770 return false;
11771 }
11772 }
11773 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11774 is_packed);
11775}
11776
4369c11e
TB
11777/* If VALS is a vector constant that can be loaded into a register
11778 using DUP, generate instructions to do so and return an RTX to
11779 assign to the register. Otherwise return NULL_RTX. */
11780static rtx
11781aarch64_simd_dup_constant (rtx vals)
11782{
ef4bddc2
RS
11783 machine_mode mode = GET_MODE (vals);
11784 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11785 rtx x;
4369c11e 11786
92695fbb 11787 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11788 return NULL_RTX;
11789
11790 /* We can load this constant by using DUP and a constant in a
11791 single ARM register. This will be cheaper than a vector
11792 load. */
92695fbb 11793 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11794 return gen_rtx_VEC_DUPLICATE (mode, x);
11795}
11796
11797
11798/* Generate code to load VALS, which is a PARALLEL containing only
11799 constants (for vec_init) or CONST_VECTOR, efficiently into a
11800 register. Returns an RTX to copy into the register, or NULL_RTX
11801 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11802static rtx
4369c11e
TB
11803aarch64_simd_make_constant (rtx vals)
11804{
ef4bddc2 11805 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11806 rtx const_dup;
11807 rtx const_vec = NULL_RTX;
11808 int n_elts = GET_MODE_NUNITS (mode);
11809 int n_const = 0;
11810 int i;
11811
11812 if (GET_CODE (vals) == CONST_VECTOR)
11813 const_vec = vals;
11814 else if (GET_CODE (vals) == PARALLEL)
11815 {
11816 /* A CONST_VECTOR must contain only CONST_INTs and
11817 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11818 Only store valid constants in a CONST_VECTOR. */
11819 for (i = 0; i < n_elts; ++i)
11820 {
11821 rtx x = XVECEXP (vals, 0, i);
11822 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11823 n_const++;
11824 }
11825 if (n_const == n_elts)
11826 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11827 }
11828 else
11829 gcc_unreachable ();
11830
11831 if (const_vec != NULL_RTX
48063b9d 11832 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11833 /* Load using MOVI/MVNI. */
11834 return const_vec;
11835 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11836 /* Loaded using DUP. */
11837 return const_dup;
11838 else if (const_vec != NULL_RTX)
11839 /* Load from constant pool. We can not take advantage of single-cycle
11840 LD1 because we need a PC-relative addressing mode. */
11841 return const_vec;
11842 else
11843 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11844 We can not construct an initializer. */
11845 return NULL_RTX;
11846}
11847
35a093b6
JG
11848/* Expand a vector initialisation sequence, such that TARGET is
11849 initialised to contain VALS. */
11850
4369c11e
TB
11851void
11852aarch64_expand_vector_init (rtx target, rtx vals)
11853{
ef4bddc2
RS
11854 machine_mode mode = GET_MODE (target);
11855 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11856 /* The number of vector elements. */
4369c11e 11857 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11858 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11859 int n_var = 0;
11860 rtx any_const = NULL_RTX;
35a093b6
JG
11861 /* The first element of vals. */
11862 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11863 bool all_same = true;
4369c11e 11864
35a093b6 11865 /* Count the number of variable elements to initialise. */
8b66a2d4 11866 for (int i = 0; i < n_elts; ++i)
4369c11e 11867 {
8b66a2d4 11868 rtx x = XVECEXP (vals, 0, i);
35a093b6 11869 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11870 ++n_var;
11871 else
11872 any_const = x;
4369c11e 11873
35a093b6 11874 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11875 }
11876
35a093b6
JG
11877 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11878 how best to handle this. */
4369c11e
TB
11879 if (n_var == 0)
11880 {
11881 rtx constant = aarch64_simd_make_constant (vals);
11882 if (constant != NULL_RTX)
11883 {
11884 emit_move_insn (target, constant);
11885 return;
11886 }
11887 }
11888
11889 /* Splat a single non-constant element if we can. */
11890 if (all_same)
11891 {
35a093b6 11892 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
11893 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11894 return;
11895 }
11896
85c1b6d7
AP
11897 enum insn_code icode = optab_handler (vec_set_optab, mode);
11898 gcc_assert (icode != CODE_FOR_nothing);
11899
11900 /* If there are only variable elements, try to optimize
11901 the insertion using dup for the most common element
11902 followed by insertions. */
11903
11904 /* The algorithm will fill matches[*][0] with the earliest matching element,
11905 and matches[X][1] with the count of duplicate elements (if X is the
11906 earliest element which has duplicates). */
11907
11908 if (n_var == n_elts && n_elts <= 16)
11909 {
11910 int matches[16][2] = {0};
11911 for (int i = 0; i < n_elts; i++)
11912 {
11913 for (int j = 0; j <= i; j++)
11914 {
11915 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
11916 {
11917 matches[i][0] = j;
11918 matches[j][1]++;
11919 break;
11920 }
11921 }
11922 }
11923 int maxelement = 0;
11924 int maxv = 0;
11925 for (int i = 0; i < n_elts; i++)
11926 if (matches[i][1] > maxv)
11927 {
11928 maxelement = i;
11929 maxv = matches[i][1];
11930 }
11931
11932 /* Create a duplicate of the most common element. */
11933 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
11934 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11935
11936 /* Insert the rest. */
11937 for (int i = 0; i < n_elts; i++)
11938 {
11939 rtx x = XVECEXP (vals, 0, i);
11940 if (matches[i][0] == maxelement)
11941 continue;
11942 x = copy_to_mode_reg (inner_mode, x);
11943 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11944 }
11945 return;
11946 }
11947
35a093b6
JG
11948 /* Initialise a vector which is part-variable. We want to first try
11949 to build those lanes which are constant in the most efficient way we
11950 can. */
11951 if (n_var != n_elts)
4369c11e
TB
11952 {
11953 rtx copy = copy_rtx (vals);
4369c11e 11954
8b66a2d4
AL
11955 /* Load constant part of vector. We really don't care what goes into the
11956 parts we will overwrite, but we're more likely to be able to load the
11957 constant efficiently if it has fewer, larger, repeating parts
11958 (see aarch64_simd_valid_immediate). */
11959 for (int i = 0; i < n_elts; i++)
11960 {
11961 rtx x = XVECEXP (vals, 0, i);
11962 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11963 continue;
11964 rtx subst = any_const;
11965 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11966 {
11967 /* Look in the copied vector, as more elements are const. */
11968 rtx test = XVECEXP (copy, 0, i ^ bit);
11969 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11970 {
11971 subst = test;
11972 break;
11973 }
11974 }
11975 XVECEXP (copy, 0, i) = subst;
11976 }
4369c11e 11977 aarch64_expand_vector_init (target, copy);
35a093b6 11978 }
4369c11e 11979
35a093b6 11980 /* Insert the variable lanes directly. */
8b66a2d4 11981 for (int i = 0; i < n_elts; i++)
35a093b6
JG
11982 {
11983 rtx x = XVECEXP (vals, 0, i);
11984 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11985 continue;
11986 x = copy_to_mode_reg (inner_mode, x);
11987 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11988 }
4369c11e
TB
11989}
11990
43e9d192 11991static unsigned HOST_WIDE_INT
ef4bddc2 11992aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
11993{
11994 return
ac59ad4e
KT
11995 (!SHIFT_COUNT_TRUNCATED
11996 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
11997 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11998}
11999
43e9d192
IB
12000/* Select a format to encode pointers in exception handling data. */
12001int
12002aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
12003{
12004 int type;
12005 switch (aarch64_cmodel)
12006 {
12007 case AARCH64_CMODEL_TINY:
12008 case AARCH64_CMODEL_TINY_PIC:
12009 case AARCH64_CMODEL_SMALL:
12010 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 12011 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12012 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
12013 for everything. */
12014 type = DW_EH_PE_sdata4;
12015 break;
12016 default:
12017 /* No assumptions here. 8-byte relocs required. */
12018 type = DW_EH_PE_sdata8;
12019 break;
12020 }
12021 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
12022}
12023
e1c1ecb0
KT
12024/* The last .arch and .tune assembly strings that we printed. */
12025static std::string aarch64_last_printed_arch_string;
12026static std::string aarch64_last_printed_tune_string;
12027
361fb3ee
KT
12028/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
12029 by the function fndecl. */
12030
12031void
12032aarch64_declare_function_name (FILE *stream, const char* name,
12033 tree fndecl)
12034{
12035 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
12036
12037 struct cl_target_option *targ_options;
12038 if (target_parts)
12039 targ_options = TREE_TARGET_OPTION (target_parts);
12040 else
12041 targ_options = TREE_TARGET_OPTION (target_option_current_node);
12042 gcc_assert (targ_options);
12043
12044 const struct processor *this_arch
12045 = aarch64_get_arch (targ_options->x_explicit_arch);
12046
054b4005
JG
12047 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
12048 std::string extension
04a99ebe
JG
12049 = aarch64_get_extension_string_for_isa_flags (isa_flags,
12050 this_arch->flags);
e1c1ecb0
KT
12051 /* Only update the assembler .arch string if it is distinct from the last
12052 such string we printed. */
12053 std::string to_print = this_arch->name + extension;
12054 if (to_print != aarch64_last_printed_arch_string)
12055 {
12056 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
12057 aarch64_last_printed_arch_string = to_print;
12058 }
361fb3ee
KT
12059
12060 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
12061 useful to readers of the generated asm. Do it only when it changes
12062 from function to function and verbose assembly is requested. */
361fb3ee
KT
12063 const struct processor *this_tune
12064 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
12065
e1c1ecb0
KT
12066 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
12067 {
12068 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
12069 this_tune->name);
12070 aarch64_last_printed_tune_string = this_tune->name;
12071 }
361fb3ee
KT
12072
12073 /* Don't forget the type directive for ELF. */
12074 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
12075 ASM_OUTPUT_LABEL (stream, name);
12076}
12077
e1c1ecb0
KT
12078/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
12079
12080static void
12081aarch64_start_file (void)
12082{
12083 struct cl_target_option *default_options
12084 = TREE_TARGET_OPTION (target_option_default_node);
12085
12086 const struct processor *default_arch
12087 = aarch64_get_arch (default_options->x_explicit_arch);
12088 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
12089 std::string extension
04a99ebe
JG
12090 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
12091 default_arch->flags);
e1c1ecb0
KT
12092
12093 aarch64_last_printed_arch_string = default_arch->name + extension;
12094 aarch64_last_printed_tune_string = "";
12095 asm_fprintf (asm_out_file, "\t.arch %s\n",
12096 aarch64_last_printed_arch_string.c_str ());
12097
12098 default_file_start ();
12099}
12100
0462169c
SN
12101/* Emit load exclusive. */
12102
12103static void
ef4bddc2 12104aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
12105 rtx mem, rtx model_rtx)
12106{
12107 rtx (*gen) (rtx, rtx, rtx);
12108
12109 switch (mode)
12110 {
12111 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
12112 case HImode: gen = gen_aarch64_load_exclusivehi; break;
12113 case SImode: gen = gen_aarch64_load_exclusivesi; break;
12114 case DImode: gen = gen_aarch64_load_exclusivedi; break;
12115 default:
12116 gcc_unreachable ();
12117 }
12118
12119 emit_insn (gen (rval, mem, model_rtx));
12120}
12121
12122/* Emit store exclusive. */
12123
12124static void
ef4bddc2 12125aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
12126 rtx rval, rtx mem, rtx model_rtx)
12127{
12128 rtx (*gen) (rtx, rtx, rtx, rtx);
12129
12130 switch (mode)
12131 {
12132 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
12133 case HImode: gen = gen_aarch64_store_exclusivehi; break;
12134 case SImode: gen = gen_aarch64_store_exclusivesi; break;
12135 case DImode: gen = gen_aarch64_store_exclusivedi; break;
12136 default:
12137 gcc_unreachable ();
12138 }
12139
12140 emit_insn (gen (bval, rval, mem, model_rtx));
12141}
12142
12143/* Mark the previous jump instruction as unlikely. */
12144
12145static void
12146aarch64_emit_unlikely_jump (rtx insn)
12147{
e5af9ddd 12148 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c 12149
f370536c
TS
12150 rtx_insn *jump = emit_jump_insn (insn);
12151 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
0462169c
SN
12152}
12153
12154/* Expand a compare and swap pattern. */
12155
12156void
12157aarch64_expand_compare_and_swap (rtx operands[])
12158{
12159 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 12160 machine_mode mode, cmp_mode;
b0770c0f
MW
12161 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
12162 int idx;
12163 gen_cas_fn gen;
12164 const gen_cas_fn split_cas[] =
12165 {
12166 gen_aarch64_compare_and_swapqi,
12167 gen_aarch64_compare_and_swaphi,
12168 gen_aarch64_compare_and_swapsi,
12169 gen_aarch64_compare_and_swapdi
12170 };
12171 const gen_cas_fn atomic_cas[] =
12172 {
12173 gen_aarch64_compare_and_swapqi_lse,
12174 gen_aarch64_compare_and_swaphi_lse,
12175 gen_aarch64_compare_and_swapsi_lse,
12176 gen_aarch64_compare_and_swapdi_lse
12177 };
0462169c
SN
12178
12179 bval = operands[0];
12180 rval = operands[1];
12181 mem = operands[2];
12182 oldval = operands[3];
12183 newval = operands[4];
12184 is_weak = operands[5];
12185 mod_s = operands[6];
12186 mod_f = operands[7];
12187 mode = GET_MODE (mem);
12188 cmp_mode = mode;
12189
12190 /* Normally the succ memory model must be stronger than fail, but in the
12191 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12192 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12193
46b35980
AM
12194 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
12195 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
12196 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
12197
12198 switch (mode)
12199 {
12200 case QImode:
12201 case HImode:
12202 /* For short modes, we're going to perform the comparison in SImode,
12203 so do the zero-extension now. */
12204 cmp_mode = SImode;
12205 rval = gen_reg_rtx (SImode);
12206 oldval = convert_modes (SImode, mode, oldval, true);
12207 /* Fall through. */
12208
12209 case SImode:
12210 case DImode:
12211 /* Force the value into a register if needed. */
12212 if (!aarch64_plus_operand (oldval, mode))
12213 oldval = force_reg (cmp_mode, oldval);
12214 break;
12215
12216 default:
12217 gcc_unreachable ();
12218 }
12219
12220 switch (mode)
12221 {
b0770c0f
MW
12222 case QImode: idx = 0; break;
12223 case HImode: idx = 1; break;
12224 case SImode: idx = 2; break;
12225 case DImode: idx = 3; break;
0462169c
SN
12226 default:
12227 gcc_unreachable ();
12228 }
b0770c0f
MW
12229 if (TARGET_LSE)
12230 gen = atomic_cas[idx];
12231 else
12232 gen = split_cas[idx];
0462169c
SN
12233
12234 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
12235
12236 if (mode == QImode || mode == HImode)
12237 emit_move_insn (operands[1], gen_lowpart (mode, rval));
12238
12239 x = gen_rtx_REG (CCmode, CC_REGNUM);
12240 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 12241 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
12242}
12243
641c2f8b
MW
12244/* Test whether the target supports using a atomic load-operate instruction.
12245 CODE is the operation and AFTER is TRUE if the data in memory after the
12246 operation should be returned and FALSE if the data before the operation
12247 should be returned. Returns FALSE if the operation isn't supported by the
12248 architecture. */
12249
12250bool
12251aarch64_atomic_ldop_supported_p (enum rtx_code code)
12252{
12253 if (!TARGET_LSE)
12254 return false;
12255
12256 switch (code)
12257 {
12258 case SET:
12259 case AND:
12260 case IOR:
12261 case XOR:
12262 case MINUS:
12263 case PLUS:
12264 return true;
12265 default:
12266 return false;
12267 }
12268}
12269
f70fb3b6
MW
12270/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12271 sequence implementing an atomic operation. */
12272
12273static void
12274aarch64_emit_post_barrier (enum memmodel model)
12275{
12276 const enum memmodel base_model = memmodel_base (model);
12277
12278 if (is_mm_sync (model)
12279 && (base_model == MEMMODEL_ACQUIRE
12280 || base_model == MEMMODEL_ACQ_REL
12281 || base_model == MEMMODEL_SEQ_CST))
12282 {
12283 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
12284 }
12285}
12286
b0770c0f
MW
12287/* Emit an atomic compare-and-swap operation. RVAL is the destination register
12288 for the data in memory. EXPECTED is the value expected to be in memory.
12289 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12290 is the memory ordering to use. */
12291
12292void
12293aarch64_gen_atomic_cas (rtx rval, rtx mem,
12294 rtx expected, rtx desired,
12295 rtx model)
12296{
12297 rtx (*gen) (rtx, rtx, rtx, rtx);
12298 machine_mode mode;
12299
12300 mode = GET_MODE (mem);
12301
12302 switch (mode)
12303 {
12304 case QImode: gen = gen_aarch64_atomic_casqi; break;
12305 case HImode: gen = gen_aarch64_atomic_cashi; break;
12306 case SImode: gen = gen_aarch64_atomic_cassi; break;
12307 case DImode: gen = gen_aarch64_atomic_casdi; break;
12308 default:
12309 gcc_unreachable ();
12310 }
12311
12312 /* Move the expected value into the CAS destination register. */
12313 emit_insn (gen_rtx_SET (rval, expected));
12314
12315 /* Emit the CAS. */
12316 emit_insn (gen (rval, mem, desired, model));
12317
12318 /* Compare the expected value with the value loaded by the CAS, to establish
12319 whether the swap was made. */
12320 aarch64_gen_compare_reg (EQ, rval, expected);
12321}
12322
0462169c
SN
12323/* Split a compare and swap pattern. */
12324
12325void
12326aarch64_split_compare_and_swap (rtx operands[])
12327{
12328 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 12329 machine_mode mode;
0462169c 12330 bool is_weak;
5d8a22a5
DM
12331 rtx_code_label *label1, *label2;
12332 rtx x, cond;
ab876106
MW
12333 enum memmodel model;
12334 rtx model_rtx;
0462169c
SN
12335
12336 rval = operands[0];
12337 mem = operands[1];
12338 oldval = operands[2];
12339 newval = operands[3];
12340 is_weak = (operands[4] != const0_rtx);
ab876106 12341 model_rtx = operands[5];
0462169c
SN
12342 scratch = operands[7];
12343 mode = GET_MODE (mem);
ab876106 12344 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 12345
17f47f86
KT
12346 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12347 loop:
12348 .label1:
12349 LD[A]XR rval, [mem]
12350 CBNZ rval, .label2
12351 ST[L]XR scratch, newval, [mem]
12352 CBNZ scratch, .label1
12353 .label2:
12354 CMP rval, 0. */
12355 bool strong_zero_p = !is_weak && oldval == const0_rtx;
12356
5d8a22a5 12357 label1 = NULL;
0462169c
SN
12358 if (!is_weak)
12359 {
12360 label1 = gen_label_rtx ();
12361 emit_label (label1);
12362 }
12363 label2 = gen_label_rtx ();
12364
ab876106
MW
12365 /* The initial load can be relaxed for a __sync operation since a final
12366 barrier will be emitted to stop code hoisting. */
12367 if (is_mm_sync (model))
12368 aarch64_emit_load_exclusive (mode, rval, mem,
12369 GEN_INT (MEMMODEL_RELAXED));
12370 else
12371 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 12372
17f47f86
KT
12373 if (strong_zero_p)
12374 {
12375 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
12376 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12377 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12378 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12379 }
12380 else
12381 {
12382 cond = aarch64_gen_compare_reg (NE, rval, oldval);
12383 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12384 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12385 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12386 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12387 }
0462169c 12388
ab876106 12389 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
12390
12391 if (!is_weak)
12392 {
12393 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12394 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12395 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 12396 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
12397 }
12398 else
12399 {
12400 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12401 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 12402 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
12403 }
12404
12405 emit_label (label2);
17f47f86
KT
12406 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12407 to set the condition flags. If this is not used it will be removed by
12408 later passes. */
12409 if (strong_zero_p)
12410 {
12411 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12412 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
12413 emit_insn (gen_rtx_SET (cond, x));
12414 }
ab876106
MW
12415 /* Emit any final barrier needed for a __sync operation. */
12416 if (is_mm_sync (model))
12417 aarch64_emit_post_barrier (model);
0462169c
SN
12418}
12419
68729b06
MW
12420/* Emit a BIC instruction. */
12421
12422static void
12423aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12424{
12425 rtx shift_rtx = GEN_INT (shift);
12426 rtx (*gen) (rtx, rtx, rtx, rtx);
12427
12428 switch (mode)
12429 {
12430 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12431 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
12432 default:
12433 gcc_unreachable ();
12434 }
12435
12436 emit_insn (gen (dst, s2, shift_rtx, s1));
12437}
12438
9cd7b720
MW
12439/* Emit an atomic swap. */
12440
12441static void
12442aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12443 rtx mem, rtx model)
12444{
12445 rtx (*gen) (rtx, rtx, rtx, rtx);
12446
12447 switch (mode)
12448 {
12449 case QImode: gen = gen_aarch64_atomic_swpqi; break;
12450 case HImode: gen = gen_aarch64_atomic_swphi; break;
12451 case SImode: gen = gen_aarch64_atomic_swpsi; break;
12452 case DImode: gen = gen_aarch64_atomic_swpdi; break;
12453 default:
12454 gcc_unreachable ();
12455 }
12456
12457 emit_insn (gen (dst, mem, value, model));
12458}
12459
641c2f8b
MW
12460/* Operations supported by aarch64_emit_atomic_load_op. */
12461
12462enum aarch64_atomic_load_op_code
12463{
12464 AARCH64_LDOP_PLUS, /* A + B */
12465 AARCH64_LDOP_XOR, /* A ^ B */
12466 AARCH64_LDOP_OR, /* A | B */
12467 AARCH64_LDOP_BIC /* A & ~B */
12468};
12469
12470/* Emit an atomic load-operate. */
12471
12472static void
12473aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12474 machine_mode mode, rtx dst, rtx src,
12475 rtx mem, rtx model)
12476{
12477 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12478 const aarch64_atomic_load_op_fn plus[] =
12479 {
12480 gen_aarch64_atomic_loadaddqi,
12481 gen_aarch64_atomic_loadaddhi,
12482 gen_aarch64_atomic_loadaddsi,
12483 gen_aarch64_atomic_loadadddi
12484 };
12485 const aarch64_atomic_load_op_fn eor[] =
12486 {
12487 gen_aarch64_atomic_loadeorqi,
12488 gen_aarch64_atomic_loadeorhi,
12489 gen_aarch64_atomic_loadeorsi,
12490 gen_aarch64_atomic_loadeordi
12491 };
12492 const aarch64_atomic_load_op_fn ior[] =
12493 {
12494 gen_aarch64_atomic_loadsetqi,
12495 gen_aarch64_atomic_loadsethi,
12496 gen_aarch64_atomic_loadsetsi,
12497 gen_aarch64_atomic_loadsetdi
12498 };
12499 const aarch64_atomic_load_op_fn bic[] =
12500 {
12501 gen_aarch64_atomic_loadclrqi,
12502 gen_aarch64_atomic_loadclrhi,
12503 gen_aarch64_atomic_loadclrsi,
12504 gen_aarch64_atomic_loadclrdi
12505 };
12506 aarch64_atomic_load_op_fn gen;
12507 int idx = 0;
12508
12509 switch (mode)
12510 {
12511 case QImode: idx = 0; break;
12512 case HImode: idx = 1; break;
12513 case SImode: idx = 2; break;
12514 case DImode: idx = 3; break;
12515 default:
12516 gcc_unreachable ();
12517 }
12518
12519 switch (code)
12520 {
12521 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12522 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12523 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12524 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12525 default:
12526 gcc_unreachable ();
12527 }
12528
12529 emit_insn (gen (dst, mem, src, model));
12530}
12531
12532/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
12533 location to store the data read from memory. OUT_RESULT is the location to
12534 store the result of the operation. MEM is the memory location to read and
12535 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12536 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12537 be NULL. */
9cd7b720
MW
12538
12539void
68729b06 12540aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
12541 rtx mem, rtx value, rtx model_rtx)
12542{
12543 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
12544 machine_mode wmode = (mode == DImode ? DImode : SImode);
12545 const bool short_mode = (mode < SImode);
12546 aarch64_atomic_load_op_code ldop_code;
12547 rtx src;
12548 rtx x;
12549
12550 if (out_data)
12551 out_data = gen_lowpart (mode, out_data);
9cd7b720 12552
68729b06
MW
12553 if (out_result)
12554 out_result = gen_lowpart (mode, out_result);
12555
641c2f8b
MW
12556 /* Make sure the value is in a register, putting it into a destination
12557 register if it needs to be manipulated. */
12558 if (!register_operand (value, mode)
12559 || code == AND || code == MINUS)
12560 {
68729b06 12561 src = out_result ? out_result : out_data;
641c2f8b
MW
12562 emit_move_insn (src, gen_lowpart (mode, value));
12563 }
12564 else
12565 src = value;
12566 gcc_assert (register_operand (src, mode));
9cd7b720 12567
641c2f8b
MW
12568 /* Preprocess the data for the operation as necessary. If the operation is
12569 a SET then emit a swap instruction and finish. */
9cd7b720
MW
12570 switch (code)
12571 {
12572 case SET:
641c2f8b 12573 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
12574 return;
12575
641c2f8b
MW
12576 case MINUS:
12577 /* Negate the value and treat it as a PLUS. */
12578 {
12579 rtx neg_src;
12580
12581 /* Resize the value if necessary. */
12582 if (short_mode)
12583 src = gen_lowpart (wmode, src);
12584
12585 neg_src = gen_rtx_NEG (wmode, src);
12586 emit_insn (gen_rtx_SET (src, neg_src));
12587
12588 if (short_mode)
12589 src = gen_lowpart (mode, src);
12590 }
12591 /* Fall-through. */
12592 case PLUS:
12593 ldop_code = AARCH64_LDOP_PLUS;
12594 break;
12595
12596 case IOR:
12597 ldop_code = AARCH64_LDOP_OR;
12598 break;
12599
12600 case XOR:
12601 ldop_code = AARCH64_LDOP_XOR;
12602 break;
12603
12604 case AND:
12605 {
12606 rtx not_src;
12607
12608 /* Resize the value if necessary. */
12609 if (short_mode)
12610 src = gen_lowpart (wmode, src);
12611
12612 not_src = gen_rtx_NOT (wmode, src);
12613 emit_insn (gen_rtx_SET (src, not_src));
12614
12615 if (short_mode)
12616 src = gen_lowpart (mode, src);
12617 }
12618 ldop_code = AARCH64_LDOP_BIC;
12619 break;
12620
9cd7b720
MW
12621 default:
12622 /* The operation can't be done with atomic instructions. */
12623 gcc_unreachable ();
12624 }
641c2f8b
MW
12625
12626 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
12627
12628 /* If necessary, calculate the data in memory after the update by redoing the
12629 operation from values in registers. */
12630 if (!out_result)
12631 return;
12632
12633 if (short_mode)
12634 {
12635 src = gen_lowpart (wmode, src);
12636 out_data = gen_lowpart (wmode, out_data);
12637 out_result = gen_lowpart (wmode, out_result);
12638 }
12639
12640 x = NULL_RTX;
12641
12642 switch (code)
12643 {
12644 case MINUS:
12645 case PLUS:
12646 x = gen_rtx_PLUS (wmode, out_data, src);
12647 break;
12648 case IOR:
12649 x = gen_rtx_IOR (wmode, out_data, src);
12650 break;
12651 case XOR:
12652 x = gen_rtx_XOR (wmode, out_data, src);
12653 break;
12654 case AND:
12655 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12656 return;
12657 default:
12658 gcc_unreachable ();
12659 }
12660
12661 emit_set_insn (out_result, x);
12662
12663 return;
9cd7b720
MW
12664}
12665
0462169c
SN
12666/* Split an atomic operation. */
12667
12668void
12669aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 12670 rtx value, rtx model_rtx, rtx cond)
0462169c 12671{
ef4bddc2
RS
12672 machine_mode mode = GET_MODE (mem);
12673 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
12674 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12675 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
12676 rtx_code_label *label;
12677 rtx x;
0462169c 12678
9cd7b720 12679 /* Split the atomic operation into a sequence. */
0462169c
SN
12680 label = gen_label_rtx ();
12681 emit_label (label);
12682
12683 if (new_out)
12684 new_out = gen_lowpart (wmode, new_out);
12685 if (old_out)
12686 old_out = gen_lowpart (wmode, old_out);
12687 else
12688 old_out = new_out;
12689 value = simplify_gen_subreg (wmode, value, mode, 0);
12690
f70fb3b6
MW
12691 /* The initial load can be relaxed for a __sync operation since a final
12692 barrier will be emitted to stop code hoisting. */
12693 if (is_sync)
12694 aarch64_emit_load_exclusive (mode, old_out, mem,
12695 GEN_INT (MEMMODEL_RELAXED));
12696 else
12697 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
12698
12699 switch (code)
12700 {
12701 case SET:
12702 new_out = value;
12703 break;
12704
12705 case NOT:
12706 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12707 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12708 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12709 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12710 break;
12711
12712 case MINUS:
12713 if (CONST_INT_P (value))
12714 {
12715 value = GEN_INT (-INTVAL (value));
12716 code = PLUS;
12717 }
12718 /* Fall through. */
12719
12720 default:
12721 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12722 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12723 break;
12724 }
12725
12726 aarch64_emit_store_exclusive (mode, cond, mem,
12727 gen_lowpart (mode, new_out), model_rtx);
12728
12729 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12730 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12731 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12732 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12733
12734 /* Emit any final barrier needed for a __sync operation. */
12735 if (is_sync)
12736 aarch64_emit_post_barrier (model);
0462169c
SN
12737}
12738
c2ec330c
AL
12739static void
12740aarch64_init_libfuncs (void)
12741{
12742 /* Half-precision float operations. The compiler handles all operations
12743 with NULL libfuncs by converting to SFmode. */
12744
12745 /* Conversions. */
12746 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12747 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12748
12749 /* Arithmetic. */
12750 set_optab_libfunc (add_optab, HFmode, NULL);
12751 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12752 set_optab_libfunc (smul_optab, HFmode, NULL);
12753 set_optab_libfunc (neg_optab, HFmode, NULL);
12754 set_optab_libfunc (sub_optab, HFmode, NULL);
12755
12756 /* Comparisons. */
12757 set_optab_libfunc (eq_optab, HFmode, NULL);
12758 set_optab_libfunc (ne_optab, HFmode, NULL);
12759 set_optab_libfunc (lt_optab, HFmode, NULL);
12760 set_optab_libfunc (le_optab, HFmode, NULL);
12761 set_optab_libfunc (ge_optab, HFmode, NULL);
12762 set_optab_libfunc (gt_optab, HFmode, NULL);
12763 set_optab_libfunc (unord_optab, HFmode, NULL);
12764}
12765
43e9d192 12766/* Target hook for c_mode_for_suffix. */
ef4bddc2 12767static machine_mode
43e9d192
IB
12768aarch64_c_mode_for_suffix (char suffix)
12769{
12770 if (suffix == 'q')
12771 return TFmode;
12772
12773 return VOIDmode;
12774}
12775
3520f7cc
JG
12776/* We can only represent floating point constants which will fit in
12777 "quarter-precision" values. These values are characterised by
12778 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12779 by:
12780
12781 (-1)^s * (n/16) * 2^r
12782
12783 Where:
12784 's' is the sign bit.
12785 'n' is an integer in the range 16 <= n <= 31.
12786 'r' is an integer in the range -3 <= r <= 4. */
12787
12788/* Return true iff X can be represented by a quarter-precision
12789 floating point immediate operand X. Note, we cannot represent 0.0. */
12790bool
12791aarch64_float_const_representable_p (rtx x)
12792{
12793 /* This represents our current view of how many bits
12794 make up the mantissa. */
12795 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12796 int exponent;
3520f7cc 12797 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12798 REAL_VALUE_TYPE r, m;
807e902e 12799 bool fail;
3520f7cc
JG
12800
12801 if (!CONST_DOUBLE_P (x))
12802 return false;
12803
c2ec330c
AL
12804 /* We don't support HFmode constants yet. */
12805 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12806 return false;
12807
34a72c33 12808 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12809
12810 /* We cannot represent infinities, NaNs or +/-zero. We won't
12811 know if we have +zero until we analyse the mantissa, but we
12812 can reject the other invalid values. */
12813 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12814 || REAL_VALUE_MINUS_ZERO (r))
12815 return false;
12816
ba96cdfb 12817 /* Extract exponent. */
3520f7cc
JG
12818 r = real_value_abs (&r);
12819 exponent = REAL_EXP (&r);
12820
12821 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12822 highest (sign) bit, with a fixed binary point at bit point_pos.
12823 m1 holds the low part of the mantissa, m2 the high part.
12824 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12825 bits for the mantissa, this can fail (low bits will be lost). */
12826 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12827 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12828
12829 /* If the low part of the mantissa has bits set we cannot represent
12830 the value. */
d9074b29 12831 if (w.ulow () != 0)
3520f7cc
JG
12832 return false;
12833 /* We have rejected the lower HOST_WIDE_INT, so update our
12834 understanding of how many bits lie in the mantissa and
12835 look only at the high HOST_WIDE_INT. */
807e902e 12836 mantissa = w.elt (1);
3520f7cc
JG
12837 point_pos -= HOST_BITS_PER_WIDE_INT;
12838
12839 /* We can only represent values with a mantissa of the form 1.xxxx. */
12840 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12841 if ((mantissa & mask) != 0)
12842 return false;
12843
12844 /* Having filtered unrepresentable values, we may now remove all
12845 but the highest 5 bits. */
12846 mantissa >>= point_pos - 5;
12847
12848 /* We cannot represent the value 0.0, so reject it. This is handled
12849 elsewhere. */
12850 if (mantissa == 0)
12851 return false;
12852
12853 /* Then, as bit 4 is always set, we can mask it off, leaving
12854 the mantissa in the range [0, 15]. */
12855 mantissa &= ~(1 << 4);
12856 gcc_assert (mantissa <= 15);
12857
12858 /* GCC internally does not use IEEE754-like encoding (where normalized
12859 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12860 Our mantissa values are shifted 4 places to the left relative to
12861 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12862 by 5 places to correct for GCC's representation. */
12863 exponent = 5 - exponent;
12864
12865 return (exponent >= 0 && exponent <= 7);
12866}
12867
12868char*
81c2dfb9 12869aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12870 machine_mode mode,
3520f7cc
JG
12871 unsigned width)
12872{
3ea63f60 12873 bool is_valid;
3520f7cc 12874 static char templ[40];
3520f7cc 12875 const char *mnemonic;
e4f0f84d 12876 const char *shift_op;
3520f7cc 12877 unsigned int lane_count = 0;
81c2dfb9 12878 char element_char;
3520f7cc 12879
e4f0f84d 12880 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12881
12882 /* This will return true to show const_vector is legal for use as either
12883 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12884 also update INFO to show how the immediate should be generated. */
81c2dfb9 12885 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12886 gcc_assert (is_valid);
12887
81c2dfb9 12888 element_char = sizetochar (info.element_width);
48063b9d
IB
12889 lane_count = width / info.element_width;
12890
3520f7cc 12891 mode = GET_MODE_INNER (mode);
0d8e1702 12892 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 12893 {
48063b9d 12894 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
12895 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12896 move immediate path. */
48063b9d
IB
12897 if (aarch64_float_const_zero_rtx_p (info.value))
12898 info.value = GEN_INT (0);
12899 else
12900 {
83faf7d0 12901 const unsigned int buf_size = 20;
48063b9d 12902 char float_buf[buf_size] = {'\0'};
34a72c33
RS
12903 real_to_decimal_for_mode (float_buf,
12904 CONST_DOUBLE_REAL_VALUE (info.value),
12905 buf_size, buf_size, 1, mode);
48063b9d
IB
12906
12907 if (lane_count == 1)
12908 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12909 else
12910 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 12911 lane_count, element_char, float_buf);
48063b9d
IB
12912 return templ;
12913 }
3520f7cc 12914 }
3520f7cc 12915
48063b9d 12916 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 12917 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 12918
0d8e1702 12919 gcc_assert (CONST_INT_P (info.value));
3520f7cc 12920 if (lane_count == 1)
48063b9d
IB
12921 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12922 mnemonic, UINTVAL (info.value));
12923 else if (info.shift)
12924 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
12925 ", %s %d", mnemonic, lane_count, element_char,
12926 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 12927 else
48063b9d 12928 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 12929 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
12930 return templ;
12931}
12932
b7342d25
IB
12933char*
12934aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 12935 machine_mode mode)
b7342d25 12936{
ef4bddc2 12937 machine_mode vmode;
b7342d25
IB
12938
12939 gcc_assert (!VECTOR_MODE_P (mode));
12940 vmode = aarch64_simd_container_mode (mode, 64);
12941 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12942 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12943}
12944
88b08073
JG
12945/* Split operands into moves from op[1] + op[2] into op[0]. */
12946
12947void
12948aarch64_split_combinev16qi (rtx operands[3])
12949{
12950 unsigned int dest = REGNO (operands[0]);
12951 unsigned int src1 = REGNO (operands[1]);
12952 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 12953 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
12954 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12955 rtx destlo, desthi;
12956
12957 gcc_assert (halfmode == V16QImode);
12958
12959 if (src1 == dest && src2 == dest + halfregs)
12960 {
12961 /* No-op move. Can't split to nothing; emit something. */
12962 emit_note (NOTE_INSN_DELETED);
12963 return;
12964 }
12965
12966 /* Preserve register attributes for variable tracking. */
12967 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12968 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12969 GET_MODE_SIZE (halfmode));
12970
12971 /* Special case of reversed high/low parts. */
12972 if (reg_overlap_mentioned_p (operands[2], destlo)
12973 && reg_overlap_mentioned_p (operands[1], desthi))
12974 {
12975 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12976 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12977 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12978 }
12979 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12980 {
12981 /* Try to avoid unnecessary moves if part of the result
12982 is in the right place already. */
12983 if (src1 != dest)
12984 emit_move_insn (destlo, operands[1]);
12985 if (src2 != dest + halfregs)
12986 emit_move_insn (desthi, operands[2]);
12987 }
12988 else
12989 {
12990 if (src2 != dest + halfregs)
12991 emit_move_insn (desthi, operands[2]);
12992 if (src1 != dest)
12993 emit_move_insn (destlo, operands[1]);
12994 }
12995}
12996
12997/* vec_perm support. */
12998
12999#define MAX_VECT_LEN 16
13000
13001struct expand_vec_perm_d
13002{
13003 rtx target, op0, op1;
13004 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 13005 machine_mode vmode;
88b08073
JG
13006 unsigned char nelt;
13007 bool one_vector_p;
13008 bool testing_p;
13009};
13010
13011/* Generate a variable permutation. */
13012
13013static void
13014aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
13015{
ef4bddc2 13016 machine_mode vmode = GET_MODE (target);
88b08073
JG
13017 bool one_vector_p = rtx_equal_p (op0, op1);
13018
13019 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
13020 gcc_checking_assert (GET_MODE (op0) == vmode);
13021 gcc_checking_assert (GET_MODE (op1) == vmode);
13022 gcc_checking_assert (GET_MODE (sel) == vmode);
13023 gcc_checking_assert (TARGET_SIMD);
13024
13025 if (one_vector_p)
13026 {
13027 if (vmode == V8QImode)
13028 {
13029 /* Expand the argument to a V16QI mode by duplicating it. */
13030 rtx pair = gen_reg_rtx (V16QImode);
13031 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
13032 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13033 }
13034 else
13035 {
13036 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
13037 }
13038 }
13039 else
13040 {
13041 rtx pair;
13042
13043 if (vmode == V8QImode)
13044 {
13045 pair = gen_reg_rtx (V16QImode);
13046 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
13047 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13048 }
13049 else
13050 {
13051 pair = gen_reg_rtx (OImode);
13052 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
13053 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
13054 }
13055 }
13056}
13057
13058void
13059aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
13060{
ef4bddc2 13061 machine_mode vmode = GET_MODE (target);
c9d1a16a 13062 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 13063 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 13064 rtx mask;
88b08073
JG
13065
13066 /* The TBL instruction does not use a modulo index, so we must take care
13067 of that ourselves. */
f7c4e5b8
AL
13068 mask = aarch64_simd_gen_const_vector_dup (vmode,
13069 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
13070 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
13071
f7c4e5b8
AL
13072 /* For big-endian, we also need to reverse the index within the vector
13073 (but not which vector). */
13074 if (BYTES_BIG_ENDIAN)
13075 {
13076 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
13077 if (!one_vector_p)
13078 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
13079 sel = expand_simple_binop (vmode, XOR, sel, mask,
13080 NULL, 0, OPTAB_LIB_WIDEN);
13081 }
88b08073
JG
13082 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
13083}
13084
cc4d934f
JG
13085/* Recognize patterns suitable for the TRN instructions. */
13086static bool
13087aarch64_evpc_trn (struct expand_vec_perm_d *d)
13088{
13089 unsigned int i, odd, mask, nelt = d->nelt;
13090 rtx out, in0, in1, x;
13091 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13092 machine_mode vmode = d->vmode;
cc4d934f
JG
13093
13094 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13095 return false;
13096
13097 /* Note that these are little-endian tests.
13098 We correct for big-endian later. */
13099 if (d->perm[0] == 0)
13100 odd = 0;
13101 else if (d->perm[0] == 1)
13102 odd = 1;
13103 else
13104 return false;
13105 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13106
13107 for (i = 0; i < nelt; i += 2)
13108 {
13109 if (d->perm[i] != i + odd)
13110 return false;
13111 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
13112 return false;
13113 }
13114
13115 /* Success! */
13116 if (d->testing_p)
13117 return true;
13118
13119 in0 = d->op0;
13120 in1 = d->op1;
13121 if (BYTES_BIG_ENDIAN)
13122 {
13123 x = in0, in0 = in1, in1 = x;
13124 odd = !odd;
13125 }
13126 out = d->target;
13127
13128 if (odd)
13129 {
13130 switch (vmode)
13131 {
13132 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
13133 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
13134 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
13135 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
13136 case V4SImode: gen = gen_aarch64_trn2v4si; break;
13137 case V2SImode: gen = gen_aarch64_trn2v2si; break;
13138 case V2DImode: gen = gen_aarch64_trn2v2di; break;
358decd5
JW
13139 case V4HFmode: gen = gen_aarch64_trn2v4hf; break;
13140 case V8HFmode: gen = gen_aarch64_trn2v8hf; break;
cc4d934f
JG
13141 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
13142 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
13143 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
13144 default:
13145 return false;
13146 }
13147 }
13148 else
13149 {
13150 switch (vmode)
13151 {
13152 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
13153 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
13154 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
13155 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
13156 case V4SImode: gen = gen_aarch64_trn1v4si; break;
13157 case V2SImode: gen = gen_aarch64_trn1v2si; break;
13158 case V2DImode: gen = gen_aarch64_trn1v2di; break;
358decd5
JW
13159 case V4HFmode: gen = gen_aarch64_trn1v4hf; break;
13160 case V8HFmode: gen = gen_aarch64_trn1v8hf; break;
cc4d934f
JG
13161 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
13162 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
13163 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
13164 default:
13165 return false;
13166 }
13167 }
13168
13169 emit_insn (gen (out, in0, in1));
13170 return true;
13171}
13172
13173/* Recognize patterns suitable for the UZP instructions. */
13174static bool
13175aarch64_evpc_uzp (struct expand_vec_perm_d *d)
13176{
13177 unsigned int i, odd, mask, nelt = d->nelt;
13178 rtx out, in0, in1, x;
13179 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13180 machine_mode vmode = d->vmode;
cc4d934f
JG
13181
13182 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13183 return false;
13184
13185 /* Note that these are little-endian tests.
13186 We correct for big-endian later. */
13187 if (d->perm[0] == 0)
13188 odd = 0;
13189 else if (d->perm[0] == 1)
13190 odd = 1;
13191 else
13192 return false;
13193 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13194
13195 for (i = 0; i < nelt; i++)
13196 {
13197 unsigned elt = (i * 2 + odd) & mask;
13198 if (d->perm[i] != elt)
13199 return false;
13200 }
13201
13202 /* Success! */
13203 if (d->testing_p)
13204 return true;
13205
13206 in0 = d->op0;
13207 in1 = d->op1;
13208 if (BYTES_BIG_ENDIAN)
13209 {
13210 x = in0, in0 = in1, in1 = x;
13211 odd = !odd;
13212 }
13213 out = d->target;
13214
13215 if (odd)
13216 {
13217 switch (vmode)
13218 {
13219 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
13220 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
13221 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
13222 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
13223 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
13224 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
13225 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
358decd5
JW
13226 case V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
13227 case V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
cc4d934f
JG
13228 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
13229 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
13230 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
13231 default:
13232 return false;
13233 }
13234 }
13235 else
13236 {
13237 switch (vmode)
13238 {
13239 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
13240 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
13241 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
13242 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
13243 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
13244 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
13245 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
358decd5
JW
13246 case V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
13247 case V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
cc4d934f
JG
13248 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
13249 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
13250 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
13251 default:
13252 return false;
13253 }
13254 }
13255
13256 emit_insn (gen (out, in0, in1));
13257 return true;
13258}
13259
13260/* Recognize patterns suitable for the ZIP instructions. */
13261static bool
13262aarch64_evpc_zip (struct expand_vec_perm_d *d)
13263{
13264 unsigned int i, high, mask, nelt = d->nelt;
13265 rtx out, in0, in1, x;
13266 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13267 machine_mode vmode = d->vmode;
cc4d934f
JG
13268
13269 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13270 return false;
13271
13272 /* Note that these are little-endian tests.
13273 We correct for big-endian later. */
13274 high = nelt / 2;
13275 if (d->perm[0] == high)
13276 /* Do Nothing. */
13277 ;
13278 else if (d->perm[0] == 0)
13279 high = 0;
13280 else
13281 return false;
13282 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13283
13284 for (i = 0; i < nelt / 2; i++)
13285 {
13286 unsigned elt = (i + high) & mask;
13287 if (d->perm[i * 2] != elt)
13288 return false;
13289 elt = (elt + nelt) & mask;
13290 if (d->perm[i * 2 + 1] != elt)
13291 return false;
13292 }
13293
13294 /* Success! */
13295 if (d->testing_p)
13296 return true;
13297
13298 in0 = d->op0;
13299 in1 = d->op1;
13300 if (BYTES_BIG_ENDIAN)
13301 {
13302 x = in0, in0 = in1, in1 = x;
13303 high = !high;
13304 }
13305 out = d->target;
13306
13307 if (high)
13308 {
13309 switch (vmode)
13310 {
13311 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
13312 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
13313 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
13314 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
13315 case V4SImode: gen = gen_aarch64_zip2v4si; break;
13316 case V2SImode: gen = gen_aarch64_zip2v2si; break;
13317 case V2DImode: gen = gen_aarch64_zip2v2di; break;
358decd5
JW
13318 case V4HFmode: gen = gen_aarch64_zip2v4hf; break;
13319 case V8HFmode: gen = gen_aarch64_zip2v8hf; break;
cc4d934f
JG
13320 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
13321 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
13322 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
13323 default:
13324 return false;
13325 }
13326 }
13327 else
13328 {
13329 switch (vmode)
13330 {
13331 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
13332 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
13333 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
13334 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
13335 case V4SImode: gen = gen_aarch64_zip1v4si; break;
13336 case V2SImode: gen = gen_aarch64_zip1v2si; break;
13337 case V2DImode: gen = gen_aarch64_zip1v2di; break;
358decd5
JW
13338 case V4HFmode: gen = gen_aarch64_zip1v4hf; break;
13339 case V8HFmode: gen = gen_aarch64_zip1v8hf; break;
cc4d934f
JG
13340 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
13341 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
13342 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
13343 default:
13344 return false;
13345 }
13346 }
13347
13348 emit_insn (gen (out, in0, in1));
13349 return true;
13350}
13351
ae0533da
AL
13352/* Recognize patterns for the EXT insn. */
13353
13354static bool
13355aarch64_evpc_ext (struct expand_vec_perm_d *d)
13356{
13357 unsigned int i, nelt = d->nelt;
13358 rtx (*gen) (rtx, rtx, rtx, rtx);
13359 rtx offset;
13360
13361 unsigned int location = d->perm[0]; /* Always < nelt. */
13362
13363 /* Check if the extracted indices are increasing by one. */
13364 for (i = 1; i < nelt; i++)
13365 {
13366 unsigned int required = location + i;
13367 if (d->one_vector_p)
13368 {
13369 /* We'll pass the same vector in twice, so allow indices to wrap. */
13370 required &= (nelt - 1);
13371 }
13372 if (d->perm[i] != required)
13373 return false;
13374 }
13375
ae0533da
AL
13376 switch (d->vmode)
13377 {
13378 case V16QImode: gen = gen_aarch64_extv16qi; break;
13379 case V8QImode: gen = gen_aarch64_extv8qi; break;
13380 case V4HImode: gen = gen_aarch64_extv4hi; break;
13381 case V8HImode: gen = gen_aarch64_extv8hi; break;
13382 case V2SImode: gen = gen_aarch64_extv2si; break;
13383 case V4SImode: gen = gen_aarch64_extv4si; break;
358decd5
JW
13384 case V4HFmode: gen = gen_aarch64_extv4hf; break;
13385 case V8HFmode: gen = gen_aarch64_extv8hf; break;
ae0533da
AL
13386 case V2SFmode: gen = gen_aarch64_extv2sf; break;
13387 case V4SFmode: gen = gen_aarch64_extv4sf; break;
13388 case V2DImode: gen = gen_aarch64_extv2di; break;
13389 case V2DFmode: gen = gen_aarch64_extv2df; break;
13390 default:
13391 return false;
13392 }
13393
13394 /* Success! */
13395 if (d->testing_p)
13396 return true;
13397
b31e65bb
AL
13398 /* The case where (location == 0) is a no-op for both big- and little-endian,
13399 and is removed by the mid-end at optimization levels -O1 and higher. */
13400
13401 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
13402 {
13403 /* After setup, we want the high elements of the first vector (stored
13404 at the LSB end of the register), and the low elements of the second
13405 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 13406 std::swap (d->op0, d->op1);
ae0533da
AL
13407 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13408 location = nelt - location;
13409 }
13410
13411 offset = GEN_INT (location);
13412 emit_insn (gen (d->target, d->op0, d->op1, offset));
13413 return true;
13414}
13415
923fcec3
AL
13416/* Recognize patterns for the REV insns. */
13417
13418static bool
13419aarch64_evpc_rev (struct expand_vec_perm_d *d)
13420{
13421 unsigned int i, j, diff, nelt = d->nelt;
13422 rtx (*gen) (rtx, rtx);
13423
13424 if (!d->one_vector_p)
13425 return false;
13426
13427 diff = d->perm[0];
13428 switch (diff)
13429 {
13430 case 7:
13431 switch (d->vmode)
13432 {
13433 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
13434 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
13435 default:
13436 return false;
13437 }
13438 break;
13439 case 3:
13440 switch (d->vmode)
13441 {
13442 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
13443 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
13444 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
13445 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
13446 default:
13447 return false;
13448 }
13449 break;
13450 case 1:
13451 switch (d->vmode)
13452 {
13453 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
13454 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
13455 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
13456 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
13457 case V4SImode: gen = gen_aarch64_rev64v4si; break;
13458 case V2SImode: gen = gen_aarch64_rev64v2si; break;
13459 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
13460 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
358decd5
JW
13461 case V8HFmode: gen = gen_aarch64_rev64v8hf; break;
13462 case V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
13463 default:
13464 return false;
13465 }
13466 break;
13467 default:
13468 return false;
13469 }
13470
13471 for (i = 0; i < nelt ; i += diff + 1)
13472 for (j = 0; j <= diff; j += 1)
13473 {
13474 /* This is guaranteed to be true as the value of diff
13475 is 7, 3, 1 and we should have enough elements in the
13476 queue to generate this. Getting a vector mask with a
13477 value of diff other than these values implies that
13478 something is wrong by the time we get here. */
13479 gcc_assert (i + j < nelt);
13480 if (d->perm[i + j] != i + diff - j)
13481 return false;
13482 }
13483
13484 /* Success! */
13485 if (d->testing_p)
13486 return true;
13487
13488 emit_insn (gen (d->target, d->op0));
13489 return true;
13490}
13491
91bd4114
JG
13492static bool
13493aarch64_evpc_dup (struct expand_vec_perm_d *d)
13494{
13495 rtx (*gen) (rtx, rtx, rtx);
13496 rtx out = d->target;
13497 rtx in0;
ef4bddc2 13498 machine_mode vmode = d->vmode;
91bd4114
JG
13499 unsigned int i, elt, nelt = d->nelt;
13500 rtx lane;
13501
91bd4114
JG
13502 elt = d->perm[0];
13503 for (i = 1; i < nelt; i++)
13504 {
13505 if (elt != d->perm[i])
13506 return false;
13507 }
13508
13509 /* The generic preparation in aarch64_expand_vec_perm_const_1
13510 swaps the operand order and the permute indices if it finds
13511 d->perm[0] to be in the second operand. Thus, we can always
13512 use d->op0 and need not do any extra arithmetic to get the
13513 correct lane number. */
13514 in0 = d->op0;
f901401e 13515 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
13516
13517 switch (vmode)
13518 {
13519 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
13520 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
13521 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
13522 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
13523 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
13524 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
13525 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
13526 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
13527 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
13528 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
13529 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
13530 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
13531 default:
13532 return false;
13533 }
13534
13535 emit_insn (gen (out, in0, lane));
13536 return true;
13537}
13538
88b08073
JG
13539static bool
13540aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13541{
13542 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 13543 machine_mode vmode = d->vmode;
88b08073
JG
13544 unsigned int i, nelt = d->nelt;
13545
88b08073
JG
13546 if (d->testing_p)
13547 return true;
13548
13549 /* Generic code will try constant permutation twice. Once with the
13550 original mode and again with the elements lowered to QImode.
13551 So wait and don't do the selector expansion ourselves. */
13552 if (vmode != V8QImode && vmode != V16QImode)
13553 return false;
13554
13555 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
13556 {
13557 int nunits = GET_MODE_NUNITS (vmode);
13558
13559 /* If big-endian and two vectors we end up with a weird mixed-endian
13560 mode on NEON. Reverse the index within each word but not the word
13561 itself. */
13562 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13563 : d->perm[i]);
13564 }
88b08073
JG
13565 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13566 sel = force_reg (vmode, sel);
13567
13568 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13569 return true;
13570}
13571
13572static bool
13573aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13574{
13575 /* The pattern matching functions above are written to look for a small
13576 number to begin the sequence (0, 1, N/2). If we begin with an index
13577 from the second operand, we can swap the operands. */
13578 if (d->perm[0] >= d->nelt)
13579 {
13580 unsigned i, nelt = d->nelt;
88b08073 13581
0696116a 13582 gcc_assert (nelt == (nelt & -nelt));
88b08073 13583 for (i = 0; i < nelt; ++i)
0696116a 13584 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 13585
cb5c6c29 13586 std::swap (d->op0, d->op1);
88b08073
JG
13587 }
13588
13589 if (TARGET_SIMD)
cc4d934f 13590 {
923fcec3
AL
13591 if (aarch64_evpc_rev (d))
13592 return true;
13593 else if (aarch64_evpc_ext (d))
ae0533da 13594 return true;
f901401e
AL
13595 else if (aarch64_evpc_dup (d))
13596 return true;
ae0533da 13597 else if (aarch64_evpc_zip (d))
cc4d934f
JG
13598 return true;
13599 else if (aarch64_evpc_uzp (d))
13600 return true;
13601 else if (aarch64_evpc_trn (d))
13602 return true;
13603 return aarch64_evpc_tbl (d);
13604 }
88b08073
JG
13605 return false;
13606}
13607
13608/* Expand a vec_perm_const pattern. */
13609
13610bool
13611aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
13612{
13613 struct expand_vec_perm_d d;
13614 int i, nelt, which;
13615
13616 d.target = target;
13617 d.op0 = op0;
13618 d.op1 = op1;
13619
13620 d.vmode = GET_MODE (target);
13621 gcc_assert (VECTOR_MODE_P (d.vmode));
13622 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13623 d.testing_p = false;
13624
13625 for (i = which = 0; i < nelt; ++i)
13626 {
13627 rtx e = XVECEXP (sel, 0, i);
13628 int ei = INTVAL (e) & (2 * nelt - 1);
13629 which |= (ei < nelt ? 1 : 2);
13630 d.perm[i] = ei;
13631 }
13632
13633 switch (which)
13634 {
13635 default:
13636 gcc_unreachable ();
13637
13638 case 3:
13639 d.one_vector_p = false;
13640 if (!rtx_equal_p (op0, op1))
13641 break;
13642
13643 /* The elements of PERM do not suggest that only the first operand
13644 is used, but both operands are identical. Allow easier matching
13645 of the permutation by folding the permutation into the single
13646 input vector. */
13647 /* Fall Through. */
13648 case 2:
13649 for (i = 0; i < nelt; ++i)
13650 d.perm[i] &= nelt - 1;
13651 d.op0 = op1;
13652 d.one_vector_p = true;
13653 break;
13654
13655 case 1:
13656 d.op1 = op0;
13657 d.one_vector_p = true;
13658 break;
13659 }
13660
13661 return aarch64_expand_vec_perm_const_1 (&d);
13662}
13663
13664static bool
ef4bddc2 13665aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
13666 const unsigned char *sel)
13667{
13668 struct expand_vec_perm_d d;
13669 unsigned int i, nelt, which;
13670 bool ret;
13671
13672 d.vmode = vmode;
13673 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13674 d.testing_p = true;
13675 memcpy (d.perm, sel, nelt);
13676
13677 /* Calculate whether all elements are in one vector. */
13678 for (i = which = 0; i < nelt; ++i)
13679 {
13680 unsigned char e = d.perm[i];
13681 gcc_assert (e < 2 * nelt);
13682 which |= (e < nelt ? 1 : 2);
13683 }
13684
13685 /* If all elements are from the second vector, reindex as if from the
13686 first vector. */
13687 if (which == 2)
13688 for (i = 0; i < nelt; ++i)
13689 d.perm[i] -= nelt;
13690
13691 /* Check whether the mask can be applied to a single vector. */
13692 d.one_vector_p = (which != 3);
13693
13694 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13695 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13696 if (!d.one_vector_p)
13697 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13698
13699 start_sequence ();
13700 ret = aarch64_expand_vec_perm_const_1 (&d);
13701 end_sequence ();
13702
13703 return ret;
13704}
13705
668046d1
DS
13706rtx
13707aarch64_reverse_mask (enum machine_mode mode)
13708{
13709 /* We have to reverse each vector because we dont have
13710 a permuted load that can reverse-load according to ABI rules. */
13711 rtx mask;
13712 rtvec v = rtvec_alloc (16);
13713 int i, j;
13714 int nunits = GET_MODE_NUNITS (mode);
13715 int usize = GET_MODE_UNIT_SIZE (mode);
13716
13717 gcc_assert (BYTES_BIG_ENDIAN);
13718 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13719
13720 for (i = 0; i < nunits; i++)
13721 for (j = 0; j < usize; j++)
13722 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13723 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13724 return force_reg (V16QImode, mask);
13725}
13726
61f17a5c
WD
13727/* Implement MODES_TIEABLE_P. In principle we should always return true.
13728 However due to issues with register allocation it is preferable to avoid
13729 tieing integer scalar and FP scalar modes. Executing integer operations
13730 in general registers is better than treating them as scalar vector
13731 operations. This reduces latency and avoids redundant int<->FP moves.
13732 So tie modes if they are either the same class, or vector modes with
13733 other vector modes, vector structs or any scalar mode.
13734*/
97e1ad78
JG
13735
13736bool
ef4bddc2 13737aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13738{
13739 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13740 return true;
13741
13742 /* We specifically want to allow elements of "structure" modes to
13743 be tieable to the structure. This more general condition allows
13744 other rarer situations too. */
61f17a5c
WD
13745 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13746 return true;
13747
13748 /* Also allow any scalar modes with vectors. */
13749 if (aarch64_vector_mode_supported_p (mode1)
13750 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13751 return true;
13752
13753 return false;
13754}
13755
e2c75eea
JG
13756/* Return a new RTX holding the result of moving POINTER forward by
13757 AMOUNT bytes. */
13758
13759static rtx
13760aarch64_move_pointer (rtx pointer, int amount)
13761{
13762 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13763
13764 return adjust_automodify_address (pointer, GET_MODE (pointer),
13765 next, amount);
13766}
13767
13768/* Return a new RTX holding the result of moving POINTER forward by the
13769 size of the mode it points to. */
13770
13771static rtx
13772aarch64_progress_pointer (rtx pointer)
13773{
13774 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13775
13776 return aarch64_move_pointer (pointer, amount);
13777}
13778
13779/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13780 MODE bytes. */
13781
13782static void
13783aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13784 machine_mode mode)
e2c75eea
JG
13785{
13786 rtx reg = gen_reg_rtx (mode);
13787
13788 /* "Cast" the pointers to the correct mode. */
13789 *src = adjust_address (*src, mode, 0);
13790 *dst = adjust_address (*dst, mode, 0);
13791 /* Emit the memcpy. */
13792 emit_move_insn (reg, *src);
13793 emit_move_insn (*dst, reg);
13794 /* Move the pointers forward. */
13795 *src = aarch64_progress_pointer (*src);
13796 *dst = aarch64_progress_pointer (*dst);
13797}
13798
13799/* Expand movmem, as if from a __builtin_memcpy. Return true if
13800 we succeed, otherwise return false. */
13801
13802bool
13803aarch64_expand_movmem (rtx *operands)
13804{
13805 unsigned int n;
13806 rtx dst = operands[0];
13807 rtx src = operands[1];
13808 rtx base;
13809 bool speed_p = !optimize_function_for_size_p (cfun);
13810
13811 /* When optimizing for size, give a better estimate of the length of a
13812 memcpy call, but use the default otherwise. */
13813 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13814
13815 /* We can't do anything smart if the amount to copy is not constant. */
13816 if (!CONST_INT_P (operands[2]))
13817 return false;
13818
13819 n = UINTVAL (operands[2]);
13820
13821 /* Try to keep the number of instructions low. For cases below 16 bytes we
13822 need to make at most two moves. For cases above 16 bytes it will be one
13823 move for each 16 byte chunk, then at most two additional moves. */
13824 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13825 return false;
13826
13827 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13828 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13829
13830 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13831 src = adjust_automodify_address (src, VOIDmode, base, 0);
13832
13833 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13834 1-byte chunk. */
13835 if (n < 4)
13836 {
13837 if (n >= 2)
13838 {
13839 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13840 n -= 2;
13841 }
13842
13843 if (n == 1)
13844 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13845
13846 return true;
13847 }
13848
13849 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13850 4-byte chunk, partially overlapping with the previously copied chunk. */
13851 if (n < 8)
13852 {
13853 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13854 n -= 4;
13855 if (n > 0)
13856 {
13857 int move = n - 4;
13858
13859 src = aarch64_move_pointer (src, move);
13860 dst = aarch64_move_pointer (dst, move);
13861 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13862 }
13863 return true;
13864 }
13865
13866 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13867 them, then (if applicable) an 8-byte chunk. */
13868 while (n >= 8)
13869 {
13870 if (n / 16)
13871 {
13872 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13873 n -= 16;
13874 }
13875 else
13876 {
13877 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13878 n -= 8;
13879 }
13880 }
13881
13882 /* Finish the final bytes of the copy. We can always do this in one
13883 instruction. We either copy the exact amount we need, or partially
13884 overlap with the previous chunk we copied and copy 8-bytes. */
13885 if (n == 0)
13886 return true;
13887 else if (n == 1)
13888 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13889 else if (n == 2)
13890 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13891 else if (n == 4)
13892 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13893 else
13894 {
13895 if (n == 3)
13896 {
13897 src = aarch64_move_pointer (src, -1);
13898 dst = aarch64_move_pointer (dst, -1);
13899 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13900 }
13901 else
13902 {
13903 int move = n - 8;
13904
13905 src = aarch64_move_pointer (src, move);
13906 dst = aarch64_move_pointer (dst, move);
13907 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13908 }
13909 }
13910
13911 return true;
13912}
13913
141a3ccf
KT
13914/* Split a DImode store of a CONST_INT SRC to MEM DST as two
13915 SImode stores. Handle the case when the constant has identical
13916 bottom and top halves. This is beneficial when the two stores can be
13917 merged into an STP and we avoid synthesising potentially expensive
13918 immediates twice. Return true if such a split is possible. */
13919
13920bool
13921aarch64_split_dimode_const_store (rtx dst, rtx src)
13922{
13923 rtx lo = gen_lowpart (SImode, src);
13924 rtx hi = gen_highpart_mode (SImode, DImode, src);
13925
13926 bool size_p = optimize_function_for_size_p (cfun);
13927
13928 if (!rtx_equal_p (lo, hi))
13929 return false;
13930
13931 unsigned int orig_cost
13932 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
13933 unsigned int lo_cost
13934 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
13935
13936 /* We want to transform:
13937 MOV x1, 49370
13938 MOVK x1, 0x140, lsl 16
13939 MOVK x1, 0xc0da, lsl 32
13940 MOVK x1, 0x140, lsl 48
13941 STR x1, [x0]
13942 into:
13943 MOV w1, 49370
13944 MOVK w1, 0x140, lsl 16
13945 STP w1, w1, [x0]
13946 So we want to perform this only when we save two instructions
13947 or more. When optimizing for size, however, accept any code size
13948 savings we can. */
13949 if (size_p && orig_cost <= lo_cost)
13950 return false;
13951
13952 if (!size_p
13953 && (orig_cost <= lo_cost + 1))
13954 return false;
13955
13956 rtx mem_lo = adjust_address (dst, SImode, 0);
13957 if (!aarch64_mem_pair_operand (mem_lo, SImode))
13958 return false;
13959
13960 rtx tmp_reg = gen_reg_rtx (SImode);
13961 aarch64_expand_mov_immediate (tmp_reg, lo);
13962 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
13963 /* Don't emit an explicit store pair as this may not be always profitable.
13964 Let the sched-fusion logic decide whether to merge them. */
13965 emit_move_insn (mem_lo, tmp_reg);
13966 emit_move_insn (mem_hi, tmp_reg);
13967
13968 return true;
13969}
13970
a3125fc2
CL
13971/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13972
13973static unsigned HOST_WIDE_INT
13974aarch64_asan_shadow_offset (void)
13975{
13976 return (HOST_WIDE_INT_1 << 36);
13977}
13978
d3006da6 13979static bool
445d7826 13980aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
13981 unsigned int align,
13982 enum by_pieces_operation op,
13983 bool speed_p)
13984{
13985 /* STORE_BY_PIECES can be used when copying a constant string, but
13986 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13987 For now we always fail this and let the move_by_pieces code copy
13988 the string from read-only memory. */
13989 if (op == STORE_BY_PIECES)
13990 return false;
13991
13992 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13993}
13994
5f3bc026 13995static rtx
cb4347e8 13996aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
13997 int code, tree treeop0, tree treeop1)
13998{
c8012fbc
WD
13999 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
14000 rtx op0, op1;
5f3bc026 14001 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14002 insn_code icode;
5f3bc026
ZC
14003 struct expand_operand ops[4];
14004
5f3bc026
ZC
14005 start_sequence ();
14006 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14007
14008 op_mode = GET_MODE (op0);
14009 if (op_mode == VOIDmode)
14010 op_mode = GET_MODE (op1);
14011
14012 switch (op_mode)
14013 {
14014 case QImode:
14015 case HImode:
14016 case SImode:
14017 cmp_mode = SImode;
14018 icode = CODE_FOR_cmpsi;
14019 break;
14020
14021 case DImode:
14022 cmp_mode = DImode;
14023 icode = CODE_FOR_cmpdi;
14024 break;
14025
786e3c06
WD
14026 case SFmode:
14027 cmp_mode = SFmode;
14028 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14029 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
14030 break;
14031
14032 case DFmode:
14033 cmp_mode = DFmode;
14034 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14035 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
14036 break;
14037
5f3bc026
ZC
14038 default:
14039 end_sequence ();
14040 return NULL_RTX;
14041 }
14042
c8012fbc
WD
14043 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
14044 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
14045 if (!op0 || !op1)
14046 {
14047 end_sequence ();
14048 return NULL_RTX;
14049 }
14050 *prep_seq = get_insns ();
14051 end_sequence ();
14052
c8012fbc
WD
14053 create_fixed_operand (&ops[0], op0);
14054 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
14055
14056 start_sequence ();
c8012fbc 14057 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
14058 {
14059 end_sequence ();
14060 return NULL_RTX;
14061 }
14062 *gen_seq = get_insns ();
14063 end_sequence ();
14064
c8012fbc
WD
14065 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
14066 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
14067}
14068
14069static rtx
cb4347e8
TS
14070aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
14071 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 14072{
c8012fbc
WD
14073 rtx op0, op1, target;
14074 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 14075 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14076 insn_code icode;
5f3bc026 14077 struct expand_operand ops[6];
c8012fbc 14078 int aarch64_cond;
5f3bc026 14079
cb4347e8 14080 push_to_sequence (*prep_seq);
5f3bc026
ZC
14081 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14082
14083 op_mode = GET_MODE (op0);
14084 if (op_mode == VOIDmode)
14085 op_mode = GET_MODE (op1);
14086
14087 switch (op_mode)
14088 {
14089 case QImode:
14090 case HImode:
14091 case SImode:
14092 cmp_mode = SImode;
c8012fbc 14093 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
14094 break;
14095
14096 case DImode:
14097 cmp_mode = DImode;
c8012fbc 14098 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
14099 break;
14100
786e3c06
WD
14101 case SFmode:
14102 cmp_mode = SFmode;
14103 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14104 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
14105 break;
14106
14107 case DFmode:
14108 cmp_mode = DFmode;
14109 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14110 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
14111 break;
14112
5f3bc026
ZC
14113 default:
14114 end_sequence ();
14115 return NULL_RTX;
14116 }
14117
14118 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
14119 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
14120 if (!op0 || !op1)
14121 {
14122 end_sequence ();
14123 return NULL_RTX;
14124 }
14125 *prep_seq = get_insns ();
14126 end_sequence ();
14127
14128 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 14129 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 14130
c8012fbc
WD
14131 if (bit_code != AND)
14132 {
14133 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
14134 GET_MODE (XEXP (prev, 0))),
14135 VOIDmode, XEXP (prev, 0), const0_rtx);
14136 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
14137 }
14138
14139 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
14140 create_fixed_operand (&ops[1], target);
14141 create_fixed_operand (&ops[2], op0);
14142 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
14143 create_fixed_operand (&ops[4], prev);
14144 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 14145
cb4347e8 14146 push_to_sequence (*gen_seq);
5f3bc026
ZC
14147 if (!maybe_expand_insn (icode, 6, ops))
14148 {
14149 end_sequence ();
14150 return NULL_RTX;
14151 }
14152
14153 *gen_seq = get_insns ();
14154 end_sequence ();
14155
c8012fbc 14156 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
14157}
14158
14159#undef TARGET_GEN_CCMP_FIRST
14160#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14161
14162#undef TARGET_GEN_CCMP_NEXT
14163#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14164
6a569cdd
KT
14165/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14166 instruction fusion of some sort. */
14167
14168static bool
14169aarch64_macro_fusion_p (void)
14170{
b175b679 14171 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
14172}
14173
14174
14175/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14176 should be kept together during scheduling. */
14177
14178static bool
14179aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
14180{
14181 rtx set_dest;
14182 rtx prev_set = single_set (prev);
14183 rtx curr_set = single_set (curr);
14184 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14185 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
14186
14187 if (!aarch64_macro_fusion_p ())
14188 return false;
14189
d7b03373 14190 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
14191 {
14192 /* We are trying to match:
14193 prev (mov) == (set (reg r0) (const_int imm16))
14194 curr (movk) == (set (zero_extract (reg r0)
14195 (const_int 16)
14196 (const_int 16))
14197 (const_int imm16_1)) */
14198
14199 set_dest = SET_DEST (curr_set);
14200
14201 if (GET_CODE (set_dest) == ZERO_EXTRACT
14202 && CONST_INT_P (SET_SRC (curr_set))
14203 && CONST_INT_P (SET_SRC (prev_set))
14204 && CONST_INT_P (XEXP (set_dest, 2))
14205 && INTVAL (XEXP (set_dest, 2)) == 16
14206 && REG_P (XEXP (set_dest, 0))
14207 && REG_P (SET_DEST (prev_set))
14208 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
14209 {
14210 return true;
14211 }
14212 }
14213
d7b03373 14214 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
14215 {
14216
14217 /* We're trying to match:
14218 prev (adrp) == (set (reg r1)
14219 (high (symbol_ref ("SYM"))))
14220 curr (add) == (set (reg r0)
14221 (lo_sum (reg r1)
14222 (symbol_ref ("SYM"))))
14223 Note that r0 need not necessarily be the same as r1, especially
14224 during pre-regalloc scheduling. */
14225
14226 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14227 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14228 {
14229 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
14230 && REG_P (XEXP (SET_SRC (curr_set), 0))
14231 && REGNO (XEXP (SET_SRC (curr_set), 0))
14232 == REGNO (SET_DEST (prev_set))
14233 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
14234 XEXP (SET_SRC (curr_set), 1)))
14235 return true;
14236 }
14237 }
14238
d7b03373 14239 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
14240 {
14241
14242 /* We're trying to match:
14243 prev (movk) == (set (zero_extract (reg r0)
14244 (const_int 16)
14245 (const_int 32))
14246 (const_int imm16_1))
14247 curr (movk) == (set (zero_extract (reg r0)
14248 (const_int 16)
14249 (const_int 48))
14250 (const_int imm16_2)) */
14251
14252 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
14253 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
14254 && REG_P (XEXP (SET_DEST (prev_set), 0))
14255 && REG_P (XEXP (SET_DEST (curr_set), 0))
14256 && REGNO (XEXP (SET_DEST (prev_set), 0))
14257 == REGNO (XEXP (SET_DEST (curr_set), 0))
14258 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
14259 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
14260 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
14261 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
14262 && CONST_INT_P (SET_SRC (prev_set))
14263 && CONST_INT_P (SET_SRC (curr_set)))
14264 return true;
14265
14266 }
d7b03373 14267 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
14268 {
14269 /* We're trying to match:
14270 prev (adrp) == (set (reg r0)
14271 (high (symbol_ref ("SYM"))))
14272 curr (ldr) == (set (reg r1)
14273 (mem (lo_sum (reg r0)
14274 (symbol_ref ("SYM")))))
14275 or
14276 curr (ldr) == (set (reg r1)
14277 (zero_extend (mem
14278 (lo_sum (reg r0)
14279 (symbol_ref ("SYM")))))) */
14280 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14281 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14282 {
14283 rtx curr_src = SET_SRC (curr_set);
14284
14285 if (GET_CODE (curr_src) == ZERO_EXTEND)
14286 curr_src = XEXP (curr_src, 0);
14287
14288 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
14289 && REG_P (XEXP (XEXP (curr_src, 0), 0))
14290 && REGNO (XEXP (XEXP (curr_src, 0), 0))
14291 == REGNO (SET_DEST (prev_set))
14292 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
14293 XEXP (SET_SRC (prev_set), 0)))
14294 return true;
14295 }
14296 }
cd0cb232 14297
d7b03373 14298 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
14299 && aarch_crypto_can_dual_issue (prev, curr))
14300 return true;
14301
d7b03373 14302 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
14303 && any_condjump_p (curr))
14304 {
14305 enum attr_type prev_type = get_attr_type (prev);
14306
14307 /* FIXME: this misses some which is considered simple arthematic
14308 instructions for ThunderX. Simple shifts are missed here. */
14309 if (prev_type == TYPE_ALUS_SREG
14310 || prev_type == TYPE_ALUS_IMM
14311 || prev_type == TYPE_LOGICS_REG
14312 || prev_type == TYPE_LOGICS_IMM)
14313 return true;
14314 }
14315
6a569cdd
KT
14316 return false;
14317}
14318
f2879a90
KT
14319/* Return true iff the instruction fusion described by OP is enabled. */
14320
14321bool
14322aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
14323{
14324 return (aarch64_tune_params.fusible_ops & op) != 0;
14325}
14326
350013bc
BC
14327/* If MEM is in the form of [base+offset], extract the two parts
14328 of address and set to BASE and OFFSET, otherwise return false
14329 after clearing BASE and OFFSET. */
14330
14331bool
14332extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
14333{
14334 rtx addr;
14335
14336 gcc_assert (MEM_P (mem));
14337
14338 addr = XEXP (mem, 0);
14339
14340 if (REG_P (addr))
14341 {
14342 *base = addr;
14343 *offset = const0_rtx;
14344 return true;
14345 }
14346
14347 if (GET_CODE (addr) == PLUS
14348 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
14349 {
14350 *base = XEXP (addr, 0);
14351 *offset = XEXP (addr, 1);
14352 return true;
14353 }
14354
14355 *base = NULL_RTX;
14356 *offset = NULL_RTX;
14357
14358 return false;
14359}
14360
14361/* Types for scheduling fusion. */
14362enum sched_fusion_type
14363{
14364 SCHED_FUSION_NONE = 0,
14365 SCHED_FUSION_LD_SIGN_EXTEND,
14366 SCHED_FUSION_LD_ZERO_EXTEND,
14367 SCHED_FUSION_LD,
14368 SCHED_FUSION_ST,
14369 SCHED_FUSION_NUM
14370};
14371
14372/* If INSN is a load or store of address in the form of [base+offset],
14373 extract the two parts and set to BASE and OFFSET. Return scheduling
14374 fusion type this INSN is. */
14375
14376static enum sched_fusion_type
14377fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
14378{
14379 rtx x, dest, src;
14380 enum sched_fusion_type fusion = SCHED_FUSION_LD;
14381
14382 gcc_assert (INSN_P (insn));
14383 x = PATTERN (insn);
14384 if (GET_CODE (x) != SET)
14385 return SCHED_FUSION_NONE;
14386
14387 src = SET_SRC (x);
14388 dest = SET_DEST (x);
14389
abc52318
KT
14390 machine_mode dest_mode = GET_MODE (dest);
14391
14392 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
14393 return SCHED_FUSION_NONE;
14394
14395 if (GET_CODE (src) == SIGN_EXTEND)
14396 {
14397 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14398 src = XEXP (src, 0);
14399 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14400 return SCHED_FUSION_NONE;
14401 }
14402 else if (GET_CODE (src) == ZERO_EXTEND)
14403 {
14404 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14405 src = XEXP (src, 0);
14406 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14407 return SCHED_FUSION_NONE;
14408 }
14409
14410 if (GET_CODE (src) == MEM && REG_P (dest))
14411 extract_base_offset_in_addr (src, base, offset);
14412 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14413 {
14414 fusion = SCHED_FUSION_ST;
14415 extract_base_offset_in_addr (dest, base, offset);
14416 }
14417 else
14418 return SCHED_FUSION_NONE;
14419
14420 if (*base == NULL_RTX || *offset == NULL_RTX)
14421 fusion = SCHED_FUSION_NONE;
14422
14423 return fusion;
14424}
14425
14426/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14427
14428 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14429 and PRI are only calculated for these instructions. For other instruction,
14430 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14431 type instruction fusion can be added by returning different priorities.
14432
14433 It's important that irrelevant instructions get the largest FUSION_PRI. */
14434
14435static void
14436aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14437 int *fusion_pri, int *pri)
14438{
14439 int tmp, off_val;
14440 rtx base, offset;
14441 enum sched_fusion_type fusion;
14442
14443 gcc_assert (INSN_P (insn));
14444
14445 tmp = max_pri - 1;
14446 fusion = fusion_load_store (insn, &base, &offset);
14447 if (fusion == SCHED_FUSION_NONE)
14448 {
14449 *pri = tmp;
14450 *fusion_pri = tmp;
14451 return;
14452 }
14453
14454 /* Set FUSION_PRI according to fusion type and base register. */
14455 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14456
14457 /* Calculate PRI. */
14458 tmp /= 2;
14459
14460 /* INSN with smaller offset goes first. */
14461 off_val = (int)(INTVAL (offset));
14462 if (off_val >= 0)
14463 tmp -= (off_val & 0xfffff);
14464 else
14465 tmp += ((- off_val) & 0xfffff);
14466
14467 *pri = tmp;
14468 return;
14469}
14470
9bca63d4
WD
14471/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14472 Adjust priority of sha1h instructions so they are scheduled before
14473 other SHA1 instructions. */
14474
14475static int
14476aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
14477{
14478 rtx x = PATTERN (insn);
14479
14480 if (GET_CODE (x) == SET)
14481 {
14482 x = SET_SRC (x);
14483
14484 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
14485 return priority + 10;
14486 }
14487
14488 return priority;
14489}
14490
350013bc
BC
14491/* Given OPERANDS of consecutive load/store, check if we can merge
14492 them into ldp/stp. LOAD is true if they are load instructions.
14493 MODE is the mode of memory operands. */
14494
14495bool
14496aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
14497 enum machine_mode mode)
14498{
14499 HOST_WIDE_INT offval_1, offval_2, msize;
14500 enum reg_class rclass_1, rclass_2;
14501 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14502
14503 if (load)
14504 {
14505 mem_1 = operands[1];
14506 mem_2 = operands[3];
14507 reg_1 = operands[0];
14508 reg_2 = operands[2];
14509 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14510 if (REGNO (reg_1) == REGNO (reg_2))
14511 return false;
14512 }
14513 else
14514 {
14515 mem_1 = operands[0];
14516 mem_2 = operands[2];
14517 reg_1 = operands[1];
14518 reg_2 = operands[3];
14519 }
14520
bf84ac44
AP
14521 /* The mems cannot be volatile. */
14522 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14523 return false;
14524
54700e2e
AP
14525 /* If we have SImode and slow unaligned ldp,
14526 check the alignment to be at least 8 byte. */
14527 if (mode == SImode
14528 && (aarch64_tune_params.extra_tuning_flags
14529 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14530 && !optimize_size
14531 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14532 return false;
14533
350013bc
BC
14534 /* Check if the addresses are in the form of [base+offset]. */
14535 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14536 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14537 return false;
14538 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14539 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14540 return false;
14541
14542 /* Check if the bases are same. */
14543 if (!rtx_equal_p (base_1, base_2))
14544 return false;
14545
14546 offval_1 = INTVAL (offset_1);
14547 offval_2 = INTVAL (offset_2);
14548 msize = GET_MODE_SIZE (mode);
14549 /* Check if the offsets are consecutive. */
14550 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14551 return false;
14552
14553 /* Check if the addresses are clobbered by load. */
14554 if (load)
14555 {
14556 if (reg_mentioned_p (reg_1, mem_1))
14557 return false;
14558
14559 /* In increasing order, the last load can clobber the address. */
14560 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14561 return false;
14562 }
14563
14564 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14565 rclass_1 = FP_REGS;
14566 else
14567 rclass_1 = GENERAL_REGS;
14568
14569 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14570 rclass_2 = FP_REGS;
14571 else
14572 rclass_2 = GENERAL_REGS;
14573
14574 /* Check if the registers are of same class. */
14575 if (rclass_1 != rclass_2)
14576 return false;
14577
14578 return true;
14579}
14580
14581/* Given OPERANDS of consecutive load/store, check if we can merge
14582 them into ldp/stp by adjusting the offset. LOAD is true if they
14583 are load instructions. MODE is the mode of memory operands.
14584
14585 Given below consecutive stores:
14586
14587 str w1, [xb, 0x100]
14588 str w1, [xb, 0x104]
14589 str w1, [xb, 0x108]
14590 str w1, [xb, 0x10c]
14591
14592 Though the offsets are out of the range supported by stp, we can
14593 still pair them after adjusting the offset, like:
14594
14595 add scratch, xb, 0x100
14596 stp w1, w1, [scratch]
14597 stp w1, w1, [scratch, 0x8]
14598
14599 The peephole patterns detecting this opportunity should guarantee
14600 the scratch register is avaliable. */
14601
14602bool
14603aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
14604 enum machine_mode mode)
14605{
14606 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14607 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14608 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14609 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14610
14611 if (load)
14612 {
14613 reg_1 = operands[0];
14614 mem_1 = operands[1];
14615 reg_2 = operands[2];
14616 mem_2 = operands[3];
14617 reg_3 = operands[4];
14618 mem_3 = operands[5];
14619 reg_4 = operands[6];
14620 mem_4 = operands[7];
14621 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14622 && REG_P (reg_3) && REG_P (reg_4));
14623 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14624 return false;
14625 }
14626 else
14627 {
14628 mem_1 = operands[0];
14629 reg_1 = operands[1];
14630 mem_2 = operands[2];
14631 reg_2 = operands[3];
14632 mem_3 = operands[4];
14633 reg_3 = operands[5];
14634 mem_4 = operands[6];
14635 reg_4 = operands[7];
14636 }
14637 /* Skip if memory operand is by itslef valid for ldp/stp. */
14638 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14639 return false;
14640
bf84ac44
AP
14641 /* The mems cannot be volatile. */
14642 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14643 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14644 return false;
14645
350013bc
BC
14646 /* Check if the addresses are in the form of [base+offset]. */
14647 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14648 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14649 return false;
14650 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14651 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14652 return false;
14653 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14654 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14655 return false;
14656 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14657 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14658 return false;
14659
14660 /* Check if the bases are same. */
14661 if (!rtx_equal_p (base_1, base_2)
14662 || !rtx_equal_p (base_2, base_3)
14663 || !rtx_equal_p (base_3, base_4))
14664 return false;
14665
14666 offval_1 = INTVAL (offset_1);
14667 offval_2 = INTVAL (offset_2);
14668 offval_3 = INTVAL (offset_3);
14669 offval_4 = INTVAL (offset_4);
14670 msize = GET_MODE_SIZE (mode);
14671 /* Check if the offsets are consecutive. */
14672 if ((offval_1 != (offval_2 + msize)
14673 || offval_1 != (offval_3 + msize * 2)
14674 || offval_1 != (offval_4 + msize * 3))
14675 && (offval_4 != (offval_3 + msize)
14676 || offval_4 != (offval_2 + msize * 2)
14677 || offval_4 != (offval_1 + msize * 3)))
14678 return false;
14679
14680 /* Check if the addresses are clobbered by load. */
14681 if (load)
14682 {
14683 if (reg_mentioned_p (reg_1, mem_1)
14684 || reg_mentioned_p (reg_2, mem_2)
14685 || reg_mentioned_p (reg_3, mem_3))
14686 return false;
14687
14688 /* In increasing order, the last load can clobber the address. */
14689 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14690 return false;
14691 }
14692
54700e2e
AP
14693 /* If we have SImode and slow unaligned ldp,
14694 check the alignment to be at least 8 byte. */
14695 if (mode == SImode
14696 && (aarch64_tune_params.extra_tuning_flags
14697 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14698 && !optimize_size
14699 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14700 return false;
14701
350013bc
BC
14702 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14703 rclass_1 = FP_REGS;
14704 else
14705 rclass_1 = GENERAL_REGS;
14706
14707 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14708 rclass_2 = FP_REGS;
14709 else
14710 rclass_2 = GENERAL_REGS;
14711
14712 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14713 rclass_3 = FP_REGS;
14714 else
14715 rclass_3 = GENERAL_REGS;
14716
14717 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14718 rclass_4 = FP_REGS;
14719 else
14720 rclass_4 = GENERAL_REGS;
14721
14722 /* Check if the registers are of same class. */
14723 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14724 return false;
14725
14726 return true;
14727}
14728
14729/* Given OPERANDS of consecutive load/store, this function pairs them
14730 into ldp/stp after adjusting the offset. It depends on the fact
14731 that addresses of load/store instructions are in increasing order.
14732 MODE is the mode of memory operands. CODE is the rtl operator
14733 which should be applied to all memory operands, it's SIGN_EXTEND,
14734 ZERO_EXTEND or UNKNOWN. */
14735
14736bool
14737aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
14738 enum machine_mode mode, RTX_CODE code)
14739{
14740 rtx base, offset, t1, t2;
14741 rtx mem_1, mem_2, mem_3, mem_4;
14742 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14743
14744 if (load)
14745 {
14746 mem_1 = operands[1];
14747 mem_2 = operands[3];
14748 mem_3 = operands[5];
14749 mem_4 = operands[7];
14750 }
14751 else
14752 {
14753 mem_1 = operands[0];
14754 mem_2 = operands[2];
14755 mem_3 = operands[4];
14756 mem_4 = operands[6];
14757 gcc_assert (code == UNKNOWN);
14758 }
14759
14760 extract_base_offset_in_addr (mem_1, &base, &offset);
14761 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14762
14763 /* Adjust offset thus it can fit in ldp/stp instruction. */
14764 msize = GET_MODE_SIZE (mode);
14765 stp_off_limit = msize * 0x40;
14766 off_val = INTVAL (offset);
14767 abs_off = (off_val < 0) ? -off_val : off_val;
14768 new_off = abs_off % stp_off_limit;
14769 adj_off = abs_off - new_off;
14770
14771 /* Further adjust to make sure all offsets are OK. */
14772 if ((new_off + msize * 2) >= stp_off_limit)
14773 {
14774 adj_off += stp_off_limit;
14775 new_off -= stp_off_limit;
14776 }
14777
14778 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14779 if (adj_off >= 0x1000)
14780 return false;
14781
14782 if (off_val < 0)
14783 {
14784 adj_off = -adj_off;
14785 new_off = -new_off;
14786 }
14787
14788 /* Create new memory references. */
14789 mem_1 = change_address (mem_1, VOIDmode,
14790 plus_constant (DImode, operands[8], new_off));
14791
14792 /* Check if the adjusted address is OK for ldp/stp. */
14793 if (!aarch64_mem_pair_operand (mem_1, mode))
14794 return false;
14795
14796 msize = GET_MODE_SIZE (mode);
14797 mem_2 = change_address (mem_2, VOIDmode,
14798 plus_constant (DImode,
14799 operands[8],
14800 new_off + msize));
14801 mem_3 = change_address (mem_3, VOIDmode,
14802 plus_constant (DImode,
14803 operands[8],
14804 new_off + msize * 2));
14805 mem_4 = change_address (mem_4, VOIDmode,
14806 plus_constant (DImode,
14807 operands[8],
14808 new_off + msize * 3));
14809
14810 if (code == ZERO_EXTEND)
14811 {
14812 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14813 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14814 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14815 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14816 }
14817 else if (code == SIGN_EXTEND)
14818 {
14819 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
14820 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
14821 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
14822 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
14823 }
14824
14825 if (load)
14826 {
14827 operands[1] = mem_1;
14828 operands[3] = mem_2;
14829 operands[5] = mem_3;
14830 operands[7] = mem_4;
14831 }
14832 else
14833 {
14834 operands[0] = mem_1;
14835 operands[2] = mem_2;
14836 operands[4] = mem_3;
14837 operands[6] = mem_4;
14838 }
14839
14840 /* Emit adjusting instruction. */
f7df4a84 14841 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 14842 /* Emit ldp/stp instructions. */
f7df4a84
RS
14843 t1 = gen_rtx_SET (operands[0], operands[1]);
14844 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 14845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
14846 t1 = gen_rtx_SET (operands[4], operands[5]);
14847 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
14848 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14849 return true;
14850}
14851
1b1e81f8
JW
14852/* Return 1 if pseudo register should be created and used to hold
14853 GOT address for PIC code. */
14854
14855bool
14856aarch64_use_pseudo_pic_reg (void)
14857{
14858 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
14859}
14860
7b841a12
JW
14861/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14862
14863static int
14864aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
14865{
14866 switch (XINT (x, 1))
14867 {
14868 case UNSPEC_GOTSMALLPIC:
14869 case UNSPEC_GOTSMALLPIC28K:
14870 case UNSPEC_GOTTINYPIC:
14871 return 0;
14872 default:
14873 break;
14874 }
14875
14876 return default_unspec_may_trap_p (x, flags);
14877}
14878
39252973
KT
14879
14880/* If X is a positive CONST_DOUBLE with a value that is a power of 2
14881 return the log2 of that value. Otherwise return -1. */
14882
14883int
14884aarch64_fpconst_pow_of_2 (rtx x)
14885{
14886 const REAL_VALUE_TYPE *r;
14887
14888 if (!CONST_DOUBLE_P (x))
14889 return -1;
14890
14891 r = CONST_DOUBLE_REAL_VALUE (x);
14892
14893 if (REAL_VALUE_NEGATIVE (*r)
14894 || REAL_VALUE_ISNAN (*r)
14895 || REAL_VALUE_ISINF (*r)
14896 || !real_isinteger (r, DFmode))
14897 return -1;
14898
14899 return exact_log2 (real_to_integer (r));
14900}
14901
14902/* If X is a vector of equal CONST_DOUBLE values and that value is
14903 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14904
14905int
14906aarch64_vec_fpconst_pow_of_2 (rtx x)
14907{
14908 if (GET_CODE (x) != CONST_VECTOR)
14909 return -1;
14910
14911 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
14912 return -1;
14913
14914 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
14915 if (firstval <= 0)
14916 return -1;
14917
14918 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
14919 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
14920 return -1;
14921
14922 return firstval;
14923}
14924
11e554b3
JG
14925/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14926 to float.
14927
14928 __fp16 always promotes through this hook.
14929 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14930 through the generic excess precision logic rather than here. */
14931
c2ec330c
AL
14932static tree
14933aarch64_promoted_type (const_tree t)
14934{
11e554b3
JG
14935 if (SCALAR_FLOAT_TYPE_P (t)
14936 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 14937 return float_type_node;
11e554b3 14938
c2ec330c
AL
14939 return NULL_TREE;
14940}
ee62a5a6
RS
14941
14942/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14943
14944static bool
9acc9cbe 14945aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
14946 optimization_type opt_type)
14947{
14948 switch (op)
14949 {
14950 case rsqrt_optab:
9acc9cbe 14951 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
14952
14953 default:
14954 return true;
14955 }
14956}
14957
11e554b3
JG
14958/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14959 if MODE is HFmode, and punt to the generic implementation otherwise. */
14960
14961static bool
14962aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
14963{
14964 return (mode == HFmode
14965 ? true
14966 : default_libgcc_floating_mode_supported_p (mode));
14967}
14968
2e5f8203
JG
14969/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
14970 if MODE is HFmode, and punt to the generic implementation otherwise. */
14971
14972static bool
14973aarch64_scalar_mode_supported_p (machine_mode mode)
14974{
14975 return (mode == HFmode
14976 ? true
14977 : default_scalar_mode_supported_p (mode));
14978}
14979
11e554b3
JG
14980/* Set the value of FLT_EVAL_METHOD.
14981 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
14982
14983 0: evaluate all operations and constants, whose semantic type has at
14984 most the range and precision of type float, to the range and
14985 precision of float; evaluate all other operations and constants to
14986 the range and precision of the semantic type;
14987
14988 N, where _FloatN is a supported interchange floating type
14989 evaluate all operations and constants, whose semantic type has at
14990 most the range and precision of _FloatN type, to the range and
14991 precision of the _FloatN type; evaluate all other operations and
14992 constants to the range and precision of the semantic type;
14993
14994 If we have the ARMv8.2-A extensions then we support _Float16 in native
14995 precision, so we should set this to 16. Otherwise, we support the type,
14996 but want to evaluate expressions in float precision, so set this to
14997 0. */
14998
14999static enum flt_eval_method
15000aarch64_excess_precision (enum excess_precision_type type)
15001{
15002 switch (type)
15003 {
15004 case EXCESS_PRECISION_TYPE_FAST:
15005 case EXCESS_PRECISION_TYPE_STANDARD:
15006 /* We can calculate either in 16-bit range and precision or
15007 32-bit range and precision. Make that decision based on whether
15008 we have native support for the ARMv8.2-A 16-bit floating-point
15009 instructions or not. */
15010 return (TARGET_FP_F16INST
15011 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
15012 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
15013 case EXCESS_PRECISION_TYPE_IMPLICIT:
15014 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
15015 default:
15016 gcc_unreachable ();
15017 }
15018 return FLT_EVAL_METHOD_UNPREDICTABLE;
15019}
15020
b48d6421
KT
15021/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
15022 scheduled for speculative execution. Reject the long-running division
15023 and square-root instructions. */
15024
15025static bool
15026aarch64_sched_can_speculate_insn (rtx_insn *insn)
15027{
15028 switch (get_attr_type (insn))
15029 {
15030 case TYPE_SDIV:
15031 case TYPE_UDIV:
15032 case TYPE_FDIVS:
15033 case TYPE_FDIVD:
15034 case TYPE_FSQRTS:
15035 case TYPE_FSQRTD:
15036 case TYPE_NEON_FP_SQRT_S:
15037 case TYPE_NEON_FP_SQRT_D:
15038 case TYPE_NEON_FP_SQRT_S_Q:
15039 case TYPE_NEON_FP_SQRT_D_Q:
15040 case TYPE_NEON_FP_DIV_S:
15041 case TYPE_NEON_FP_DIV_D:
15042 case TYPE_NEON_FP_DIV_S_Q:
15043 case TYPE_NEON_FP_DIV_D_Q:
15044 return false;
15045 default:
15046 return true;
15047 }
15048}
15049
51b86113
DM
15050/* Target-specific selftests. */
15051
15052#if CHECKING_P
15053
15054namespace selftest {
15055
15056/* Selftest for the RTL loader.
15057 Verify that the RTL loader copes with a dump from
15058 print_rtx_function. This is essentially just a test that class
15059 function_reader can handle a real dump, but it also verifies
15060 that lookup_reg_by_dump_name correctly handles hard regs.
15061 The presence of hard reg names in the dump means that the test is
15062 target-specific, hence it is in this file. */
15063
15064static void
15065aarch64_test_loading_full_dump ()
15066{
15067 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
15068
15069 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
15070
15071 rtx_insn *insn_1 = get_insn_by_uid (1);
15072 ASSERT_EQ (NOTE, GET_CODE (insn_1));
15073
15074 rtx_insn *insn_15 = get_insn_by_uid (15);
15075 ASSERT_EQ (INSN, GET_CODE (insn_15));
15076 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
15077
15078 /* Verify crtl->return_rtx. */
15079 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
15080 ASSERT_EQ (0, REGNO (crtl->return_rtx));
15081 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
15082}
15083
15084/* Run all target-specific selftests. */
15085
15086static void
15087aarch64_run_selftests (void)
15088{
15089 aarch64_test_loading_full_dump ();
15090}
15091
15092} // namespace selftest
15093
15094#endif /* #if CHECKING_P */
15095
43e9d192
IB
15096#undef TARGET_ADDRESS_COST
15097#define TARGET_ADDRESS_COST aarch64_address_cost
15098
15099/* This hook will determines whether unnamed bitfields affect the alignment
15100 of the containing structure. The hook returns true if the structure
15101 should inherit the alignment requirements of an unnamed bitfield's
15102 type. */
15103#undef TARGET_ALIGN_ANON_BITFIELD
15104#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
15105
15106#undef TARGET_ASM_ALIGNED_DI_OP
15107#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
15108
15109#undef TARGET_ASM_ALIGNED_HI_OP
15110#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
15111
15112#undef TARGET_ASM_ALIGNED_SI_OP
15113#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
15114
15115#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15116#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15117 hook_bool_const_tree_hwi_hwi_const_tree_true
15118
e1c1ecb0
KT
15119#undef TARGET_ASM_FILE_START
15120#define TARGET_ASM_FILE_START aarch64_start_file
15121
43e9d192
IB
15122#undef TARGET_ASM_OUTPUT_MI_THUNK
15123#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15124
15125#undef TARGET_ASM_SELECT_RTX_SECTION
15126#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15127
15128#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15129#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15130
15131#undef TARGET_BUILD_BUILTIN_VA_LIST
15132#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15133
15134#undef TARGET_CALLEE_COPIES
15135#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15136
15137#undef TARGET_CAN_ELIMINATE
15138#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15139
1fd8d40c
KT
15140#undef TARGET_CAN_INLINE_P
15141#define TARGET_CAN_INLINE_P aarch64_can_inline_p
15142
43e9d192
IB
15143#undef TARGET_CANNOT_FORCE_CONST_MEM
15144#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15145
50487d79
EM
15146#undef TARGET_CASE_VALUES_THRESHOLD
15147#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15148
43e9d192
IB
15149#undef TARGET_CONDITIONAL_REGISTER_USAGE
15150#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15151
15152/* Only the least significant bit is used for initialization guard
15153 variables. */
15154#undef TARGET_CXX_GUARD_MASK_BIT
15155#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15156
15157#undef TARGET_C_MODE_FOR_SUFFIX
15158#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15159
15160#ifdef TARGET_BIG_ENDIAN_DEFAULT
15161#undef TARGET_DEFAULT_TARGET_FLAGS
15162#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15163#endif
15164
15165#undef TARGET_CLASS_MAX_NREGS
15166#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15167
119103ca
JG
15168#undef TARGET_BUILTIN_DECL
15169#define TARGET_BUILTIN_DECL aarch64_builtin_decl
15170
a6fc00da
BH
15171#undef TARGET_BUILTIN_RECIPROCAL
15172#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15173
11e554b3
JG
15174#undef TARGET_C_EXCESS_PRECISION
15175#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15176
43e9d192
IB
15177#undef TARGET_EXPAND_BUILTIN
15178#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15179
15180#undef TARGET_EXPAND_BUILTIN_VA_START
15181#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15182
9697e620
JG
15183#undef TARGET_FOLD_BUILTIN
15184#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15185
43e9d192
IB
15186#undef TARGET_FUNCTION_ARG
15187#define TARGET_FUNCTION_ARG aarch64_function_arg
15188
15189#undef TARGET_FUNCTION_ARG_ADVANCE
15190#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15191
15192#undef TARGET_FUNCTION_ARG_BOUNDARY
15193#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15194
15195#undef TARGET_FUNCTION_OK_FOR_SIBCALL
15196#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15197
15198#undef TARGET_FUNCTION_VALUE
15199#define TARGET_FUNCTION_VALUE aarch64_function_value
15200
15201#undef TARGET_FUNCTION_VALUE_REGNO_P
15202#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15203
15204#undef TARGET_FRAME_POINTER_REQUIRED
15205#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
15206
fc72cba7
AL
15207#undef TARGET_GIMPLE_FOLD_BUILTIN
15208#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 15209
43e9d192
IB
15210#undef TARGET_GIMPLIFY_VA_ARG_EXPR
15211#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15212
15213#undef TARGET_INIT_BUILTINS
15214#define TARGET_INIT_BUILTINS aarch64_init_builtins
15215
c64f7d37
WD
15216#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15217#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15218 aarch64_ira_change_pseudo_allocno_class
15219
43e9d192
IB
15220#undef TARGET_LEGITIMATE_ADDRESS_P
15221#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15222
15223#undef TARGET_LEGITIMATE_CONSTANT_P
15224#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15225
491ec060
WD
15226#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15227#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15228 aarch64_legitimize_address_displacement
15229
43e9d192
IB
15230#undef TARGET_LIBGCC_CMP_RETURN_MODE
15231#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15232
11e554b3
JG
15233#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15234#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15235aarch64_libgcc_floating_mode_supported_p
15236
ac2b960f
YZ
15237#undef TARGET_MANGLE_TYPE
15238#define TARGET_MANGLE_TYPE aarch64_mangle_type
15239
43e9d192
IB
15240#undef TARGET_MEMORY_MOVE_COST
15241#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15242
26e0ff94
WD
15243#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15244#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15245
43e9d192
IB
15246#undef TARGET_MUST_PASS_IN_STACK
15247#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15248
15249/* This target hook should return true if accesses to volatile bitfields
15250 should use the narrowest mode possible. It should return false if these
15251 accesses should use the bitfield container type. */
15252#undef TARGET_NARROW_VOLATILE_BITFIELD
15253#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15254
15255#undef TARGET_OPTION_OVERRIDE
15256#define TARGET_OPTION_OVERRIDE aarch64_override_options
15257
15258#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15259#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15260 aarch64_override_options_after_change
15261
361fb3ee
KT
15262#undef TARGET_OPTION_SAVE
15263#define TARGET_OPTION_SAVE aarch64_option_save
15264
15265#undef TARGET_OPTION_RESTORE
15266#define TARGET_OPTION_RESTORE aarch64_option_restore
15267
15268#undef TARGET_OPTION_PRINT
15269#define TARGET_OPTION_PRINT aarch64_option_print
15270
5a2c8331
KT
15271#undef TARGET_OPTION_VALID_ATTRIBUTE_P
15272#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15273
d78006d9
KT
15274#undef TARGET_SET_CURRENT_FUNCTION
15275#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15276
43e9d192
IB
15277#undef TARGET_PASS_BY_REFERENCE
15278#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15279
15280#undef TARGET_PREFERRED_RELOAD_CLASS
15281#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15282
cee66c68
WD
15283#undef TARGET_SCHED_REASSOCIATION_WIDTH
15284#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15285
c2ec330c
AL
15286#undef TARGET_PROMOTED_TYPE
15287#define TARGET_PROMOTED_TYPE aarch64_promoted_type
15288
43e9d192
IB
15289#undef TARGET_SECONDARY_RELOAD
15290#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15291
15292#undef TARGET_SHIFT_TRUNCATION_MASK
15293#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15294
15295#undef TARGET_SETUP_INCOMING_VARARGS
15296#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15297
15298#undef TARGET_STRUCT_VALUE_RTX
15299#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15300
15301#undef TARGET_REGISTER_MOVE_COST
15302#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15303
15304#undef TARGET_RETURN_IN_MEMORY
15305#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15306
15307#undef TARGET_RETURN_IN_MSB
15308#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15309
15310#undef TARGET_RTX_COSTS
7cc2145f 15311#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 15312
2e5f8203
JG
15313#undef TARGET_SCALAR_MODE_SUPPORTED_P
15314#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15315
d126a4ae
AP
15316#undef TARGET_SCHED_ISSUE_RATE
15317#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15318
d03f7e44
MK
15319#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15320#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15321 aarch64_sched_first_cycle_multipass_dfa_lookahead
15322
2d6bc7fa
KT
15323#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15324#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15325 aarch64_first_cycle_multipass_dfa_lookahead_guard
15326
827ab47a
KT
15327#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15328#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15329 aarch64_get_separate_components
15330
15331#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15332#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15333 aarch64_components_for_bb
15334
15335#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15336#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15337 aarch64_disqualify_components
15338
15339#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15340#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15341 aarch64_emit_prologue_components
15342
15343#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15344#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15345 aarch64_emit_epilogue_components
15346
15347#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15348#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15349 aarch64_set_handled_components
15350
43e9d192
IB
15351#undef TARGET_TRAMPOLINE_INIT
15352#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15353
15354#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15355#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15356
15357#undef TARGET_VECTOR_MODE_SUPPORTED_P
15358#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15359
7df76747
N
15360#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15361#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15362 aarch64_builtin_support_vector_misalignment
15363
43e9d192
IB
15364#undef TARGET_ARRAY_MODE_SUPPORTED_P
15365#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15366
8990e73a
TB
15367#undef TARGET_VECTORIZE_ADD_STMT_COST
15368#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15369
15370#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15371#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15372 aarch64_builtin_vectorization_cost
15373
43e9d192
IB
15374#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15375#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15376
42fc9a7f
JG
15377#undef TARGET_VECTORIZE_BUILTINS
15378#define TARGET_VECTORIZE_BUILTINS
15379
15380#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15381#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15382 aarch64_builtin_vectorized_function
15383
3b357264
JG
15384#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15385#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15386 aarch64_autovectorize_vector_sizes
15387
aa87aced
KV
15388#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15389#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15390 aarch64_atomic_assign_expand_fenv
15391
43e9d192
IB
15392/* Section anchor support. */
15393
15394#undef TARGET_MIN_ANCHOR_OFFSET
15395#define TARGET_MIN_ANCHOR_OFFSET -256
15396
15397/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15398 byte offset; we can do much more for larger data types, but have no way
15399 to determine the size of the access. We assume accesses are aligned. */
15400#undef TARGET_MAX_ANCHOR_OFFSET
15401#define TARGET_MAX_ANCHOR_OFFSET 4095
15402
db0253a4
TB
15403#undef TARGET_VECTOR_ALIGNMENT
15404#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15405
15406#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15407#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15408 aarch64_simd_vector_alignment_reachable
15409
88b08073
JG
15410/* vec_perm support. */
15411
15412#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15413#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15414 aarch64_vectorize_vec_perm_const_ok
15415
c2ec330c
AL
15416#undef TARGET_INIT_LIBFUNCS
15417#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 15418
706b2314 15419#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
15420#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15421
5cb74e90
RR
15422#undef TARGET_FLAGS_REGNUM
15423#define TARGET_FLAGS_REGNUM CC_REGNUM
15424
78607708
TV
15425#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15426#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15427
a3125fc2
CL
15428#undef TARGET_ASAN_SHADOW_OFFSET
15429#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15430
0c4ec427
RE
15431#undef TARGET_LEGITIMIZE_ADDRESS
15432#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15433
d3006da6
JG
15434#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15435#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15436 aarch64_use_by_pieces_infrastructure_p
15437
b48d6421
KT
15438#undef TARGET_SCHED_CAN_SPECULATE_INSN
15439#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15440
594bdd53
FY
15441#undef TARGET_CAN_USE_DOLOOP_P
15442#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15443
9bca63d4
WD
15444#undef TARGET_SCHED_ADJUST_PRIORITY
15445#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15446
6a569cdd
KT
15447#undef TARGET_SCHED_MACRO_FUSION_P
15448#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15449
15450#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15451#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15452
350013bc
BC
15453#undef TARGET_SCHED_FUSION_PRIORITY
15454#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15455
7b841a12
JW
15456#undef TARGET_UNSPEC_MAY_TRAP_P
15457#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15458
1b1e81f8
JW
15459#undef TARGET_USE_PSEUDO_PIC_REG
15460#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15461
cc8ca59e
JB
15462#undef TARGET_PRINT_OPERAND
15463#define TARGET_PRINT_OPERAND aarch64_print_operand
15464
15465#undef TARGET_PRINT_OPERAND_ADDRESS
15466#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15467
ee62a5a6
RS
15468#undef TARGET_OPTAB_SUPPORTED_P
15469#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15470
43203dea
RR
15471#undef TARGET_OMIT_STRUCT_RETURN_REG
15472#define TARGET_OMIT_STRUCT_RETURN_REG true
15473
f46fe37e
EB
15474/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15475#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15476#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15477
51b86113
DM
15478#if CHECKING_P
15479#undef TARGET_RUN_TARGET_SELFTESTS
15480#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15481#endif /* #if CHECKING_P */
15482
43e9d192
IB
15483struct gcc_target targetm = TARGET_INITIALIZER;
15484
15485#include "gt-aarch64.h"