]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
PR translation/80280 - Missing closing quote (%>) c/semantics.c and c/c-typeck.c
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
cbe34bb5 2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
36#include "optabs.h"
37#include "regs.h"
38#include "emit-rtl.h"
39#include "recog.h"
40#include "diagnostic.h"
43e9d192 41#include "insn-attr.h"
40e23961 42#include "alias.h"
40e23961 43#include "fold-const.h"
d8a2d370
DN
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
43e9d192 47#include "output.h"
36566b39 48#include "flags.h"
36566b39 49#include "explow.h"
43e9d192
IB
50#include "expr.h"
51#include "reload.h"
43e9d192 52#include "langhooks.h"
5a2c8331 53#include "opts.h"
2d6bc7fa 54#include "params.h"
45b0be94 55#include "gimplify.h"
43e9d192 56#include "dwarf2.h"
61d371eb 57#include "gimple-iterator.h"
8990e73a 58#include "tree-vectorizer.h"
d1bcc29f 59#include "aarch64-cost-tables.h"
0ee859b5 60#include "dumpfile.h"
9b2b7279 61#include "builtins.h"
8baff86e 62#include "rtl-iter.h"
9bbe08fe 63#include "tm-constrs.h"
d03f7e44 64#include "sched-int.h"
d78006d9 65#include "target-globals.h"
a3eb8a52 66#include "common/common-target.h"
51b86113
DM
67#include "selftest.h"
68#include "selftest-rtl.h"
43e9d192 69
994c5d85 70/* This file should be included last. */
d58627a0
RS
71#include "target-def.h"
72
28514dda
YZ
73/* Defined for convenience. */
74#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
75
43e9d192
IB
76/* Classifies an address.
77
78 ADDRESS_REG_IMM
79 A simple base register plus immediate offset.
80
81 ADDRESS_REG_WB
82 A base register indexed by immediate offset with writeback.
83
84 ADDRESS_REG_REG
85 A base register indexed by (optionally scaled) register.
86
87 ADDRESS_REG_UXTW
88 A base register indexed by (optionally scaled) zero-extended register.
89
90 ADDRESS_REG_SXTW
91 A base register indexed by (optionally scaled) sign-extended register.
92
93 ADDRESS_LO_SUM
94 A LO_SUM rtx with a base register and "LO12" symbol relocation.
95
96 ADDRESS_SYMBOLIC:
97 A constant symbolic address, in pc-relative literal pool. */
98
99enum aarch64_address_type {
100 ADDRESS_REG_IMM,
101 ADDRESS_REG_WB,
102 ADDRESS_REG_REG,
103 ADDRESS_REG_UXTW,
104 ADDRESS_REG_SXTW,
105 ADDRESS_LO_SUM,
106 ADDRESS_SYMBOLIC
107};
108
109struct aarch64_address_info {
110 enum aarch64_address_type type;
111 rtx base;
112 rtx offset;
113 int shift;
114 enum aarch64_symbol_type symbol_type;
115};
116
48063b9d
IB
117struct simd_immediate_info
118{
119 rtx value;
120 int shift;
121 int element_width;
48063b9d 122 bool mvn;
e4f0f84d 123 bool msl;
48063b9d
IB
124};
125
43e9d192
IB
126/* The current code model. */
127enum aarch64_code_model aarch64_cmodel;
128
129#ifdef HAVE_AS_TLS
130#undef TARGET_HAVE_TLS
131#define TARGET_HAVE_TLS 1
132#endif
133
ef4bddc2
RS
134static bool aarch64_composite_type_p (const_tree, machine_mode);
135static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 136 const_tree,
ef4bddc2 137 machine_mode *, int *,
43e9d192
IB
138 bool *);
139static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
140static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 141static void aarch64_override_options_after_change (void);
ef4bddc2 142static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 143static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 144 const unsigned char *sel);
ef4bddc2 145static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
146static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
147 const_tree type,
148 int misalignment,
149 bool is_packed);
88b08073 150
0c6caaf8
RL
151/* Major revision number of the ARM Architecture implemented by the target. */
152unsigned aarch64_architecture_version;
153
43e9d192 154/* The processor for which instructions should be scheduled. */
02fdbd5b 155enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 156
43e9d192
IB
157/* Mask to specify which instruction scheduling options should be used. */
158unsigned long aarch64_tune_flags = 0;
159
1be34295 160/* Global flag for PC relative loads. */
9ee6540a 161bool aarch64_pcrelative_literal_loads;
1be34295 162
8dec06f2
JG
163/* Support for command line parsing of boolean flags in the tuning
164 structures. */
165struct aarch64_flag_desc
166{
167 const char* name;
168 unsigned int flag;
169};
170
ed9fa8d2 171#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
172 { name, AARCH64_FUSE_##internal_name },
173static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
174{
175 { "none", AARCH64_FUSE_NOTHING },
176#include "aarch64-fusion-pairs.def"
177 { "all", AARCH64_FUSE_ALL },
178 { NULL, AARCH64_FUSE_NOTHING }
179};
8dec06f2 180
a339a01c 181#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
182 { name, AARCH64_EXTRA_TUNE_##internal_name },
183static const struct aarch64_flag_desc aarch64_tuning_flags[] =
184{
185 { "none", AARCH64_EXTRA_TUNE_NONE },
186#include "aarch64-tuning-flags.def"
187 { "all", AARCH64_EXTRA_TUNE_ALL },
188 { NULL, AARCH64_EXTRA_TUNE_NONE }
189};
8dec06f2 190
43e9d192
IB
191/* Tuning parameters. */
192
43e9d192
IB
193static const struct cpu_addrcost_table generic_addrcost_table =
194{
67747367 195 {
2fae724a 196 1, /* hi */
bd95e655
JG
197 0, /* si */
198 0, /* di */
2fae724a 199 1, /* ti */
67747367 200 },
bd95e655
JG
201 0, /* pre_modify */
202 0, /* post_modify */
203 0, /* register_offset */
783879e6
EM
204 0, /* register_sextend */
205 0, /* register_zextend */
bd95e655 206 0 /* imm_offset */
43e9d192
IB
207};
208
60bff090
JG
209static const struct cpu_addrcost_table cortexa57_addrcost_table =
210{
60bff090 211 {
bd95e655
JG
212 1, /* hi */
213 0, /* si */
214 0, /* di */
215 1, /* ti */
60bff090 216 },
bd95e655
JG
217 0, /* pre_modify */
218 0, /* post_modify */
219 0, /* register_offset */
783879e6
EM
220 0, /* register_sextend */
221 0, /* register_zextend */
bd95e655 222 0, /* imm_offset */
60bff090
JG
223};
224
5ec1ae3b
EM
225static const struct cpu_addrcost_table exynosm1_addrcost_table =
226{
227 {
228 0, /* hi */
229 0, /* si */
230 0, /* di */
231 2, /* ti */
232 },
233 0, /* pre_modify */
234 0, /* post_modify */
235 1, /* register_offset */
236 1, /* register_sextend */
237 2, /* register_zextend */
238 0, /* imm_offset */
239};
240
381e27aa
PT
241static const struct cpu_addrcost_table xgene1_addrcost_table =
242{
381e27aa 243 {
bd95e655
JG
244 1, /* hi */
245 0, /* si */
246 0, /* di */
247 1, /* ti */
381e27aa 248 },
bd95e655
JG
249 1, /* pre_modify */
250 0, /* post_modify */
251 0, /* register_offset */
783879e6
EM
252 1, /* register_sextend */
253 1, /* register_zextend */
bd95e655 254 0, /* imm_offset */
381e27aa
PT
255};
256
ee446d9f
JW
257static const struct cpu_addrcost_table qdf24xx_addrcost_table =
258{
259 {
260 1, /* hi */
261 0, /* si */
262 0, /* di */
263 1, /* ti */
264 },
265 0, /* pre_modify */
266 0, /* post_modify */
267 0, /* register_offset */
268 0, /* register_sextend */
269 0, /* register_zextend */
270 0 /* imm_offset */
271};
272
d1261ac6 273static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
274{
275 {
5f407e57
AP
276 1, /* hi */
277 1, /* si */
278 1, /* di */
ad611a4c
VP
279 2, /* ti */
280 },
281 0, /* pre_modify */
282 0, /* post_modify */
283 2, /* register_offset */
284 3, /* register_sextend */
285 3, /* register_zextend */
286 0, /* imm_offset */
287};
288
43e9d192
IB
289static const struct cpu_regmove_cost generic_regmove_cost =
290{
bd95e655 291 1, /* GP2GP */
3969c510
WD
292 /* Avoid the use of slow int<->fp moves for spilling by setting
293 their cost higher than memmov_cost. */
bd95e655
JG
294 5, /* GP2FP */
295 5, /* FP2GP */
296 2 /* FP2FP */
43e9d192
IB
297};
298
e4a9c55a
WD
299static const struct cpu_regmove_cost cortexa57_regmove_cost =
300{
bd95e655 301 1, /* GP2GP */
e4a9c55a
WD
302 /* Avoid the use of slow int<->fp moves for spilling by setting
303 their cost higher than memmov_cost. */
bd95e655
JG
304 5, /* GP2FP */
305 5, /* FP2GP */
306 2 /* FP2FP */
e4a9c55a
WD
307};
308
309static const struct cpu_regmove_cost cortexa53_regmove_cost =
310{
bd95e655 311 1, /* GP2GP */
e4a9c55a
WD
312 /* Avoid the use of slow int<->fp moves for spilling by setting
313 their cost higher than memmov_cost. */
bd95e655
JG
314 5, /* GP2FP */
315 5, /* FP2GP */
316 2 /* FP2FP */
e4a9c55a
WD
317};
318
5ec1ae3b
EM
319static const struct cpu_regmove_cost exynosm1_regmove_cost =
320{
321 1, /* GP2GP */
322 /* Avoid the use of slow int<->fp moves for spilling by setting
323 their cost higher than memmov_cost (actual, 4 and 9). */
324 9, /* GP2FP */
325 9, /* FP2GP */
326 1 /* FP2FP */
327};
328
d1bcc29f
AP
329static const struct cpu_regmove_cost thunderx_regmove_cost =
330{
bd95e655
JG
331 2, /* GP2GP */
332 2, /* GP2FP */
333 6, /* FP2GP */
334 4 /* FP2FP */
d1bcc29f
AP
335};
336
381e27aa
PT
337static const struct cpu_regmove_cost xgene1_regmove_cost =
338{
bd95e655 339 1, /* GP2GP */
381e27aa
PT
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
bd95e655
JG
342 8, /* GP2FP */
343 8, /* FP2GP */
344 2 /* FP2FP */
381e27aa
PT
345};
346
ee446d9f
JW
347static const struct cpu_regmove_cost qdf24xx_regmove_cost =
348{
349 2, /* GP2GP */
350 /* Avoid the use of int<->fp moves for spilling. */
351 6, /* GP2FP */
352 6, /* FP2GP */
353 4 /* FP2FP */
354};
355
d1261ac6 356static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
357{
358 1, /* GP2GP */
359 /* Avoid the use of int<->fp moves for spilling. */
360 8, /* GP2FP */
361 8, /* FP2GP */
362 4 /* FP2FP */
363};
364
8990e73a 365/* Generic costs for vector insn classes. */
8990e73a
TB
366static const struct cpu_vector_cost generic_vector_cost =
367{
cd8ae5ed
AP
368 1, /* scalar_int_stmt_cost */
369 1, /* scalar_fp_stmt_cost */
bd95e655
JG
370 1, /* scalar_load_cost */
371 1, /* scalar_store_cost */
cd8ae5ed
AP
372 1, /* vec_int_stmt_cost */
373 1, /* vec_fp_stmt_cost */
c428f91c 374 2, /* vec_permute_cost */
bd95e655
JG
375 1, /* vec_to_scalar_cost */
376 1, /* scalar_to_vec_cost */
377 1, /* vec_align_load_cost */
378 1, /* vec_unalign_load_cost */
379 1, /* vec_unalign_store_cost */
380 1, /* vec_store_cost */
381 3, /* cond_taken_branch_cost */
382 1 /* cond_not_taken_branch_cost */
8990e73a
TB
383};
384
c3f20327
AP
385/* ThunderX costs for vector insn classes. */
386static const struct cpu_vector_cost thunderx_vector_cost =
387{
cd8ae5ed
AP
388 1, /* scalar_int_stmt_cost */
389 1, /* scalar_fp_stmt_cost */
c3f20327
AP
390 3, /* scalar_load_cost */
391 1, /* scalar_store_cost */
cd8ae5ed
AP
392 4, /* vec_int_stmt_cost */
393 4, /* vec_fp_stmt_cost */
c3f20327
AP
394 4, /* vec_permute_cost */
395 2, /* vec_to_scalar_cost */
396 2, /* scalar_to_vec_cost */
397 3, /* vec_align_load_cost */
398 10, /* vec_unalign_load_cost */
399 10, /* vec_unalign_store_cost */
400 1, /* vec_store_cost */
401 3, /* cond_taken_branch_cost */
402 3 /* cond_not_taken_branch_cost */
403};
404
60bff090 405/* Generic costs for vector insn classes. */
60bff090
JG
406static const struct cpu_vector_cost cortexa57_vector_cost =
407{
cd8ae5ed
AP
408 1, /* scalar_int_stmt_cost */
409 1, /* scalar_fp_stmt_cost */
bd95e655
JG
410 4, /* scalar_load_cost */
411 1, /* scalar_store_cost */
cd8ae5ed
AP
412 2, /* vec_int_stmt_cost */
413 2, /* vec_fp_stmt_cost */
c428f91c 414 3, /* vec_permute_cost */
bd95e655
JG
415 8, /* vec_to_scalar_cost */
416 8, /* scalar_to_vec_cost */
db4a1c18
WD
417 4, /* vec_align_load_cost */
418 4, /* vec_unalign_load_cost */
bd95e655
JG
419 1, /* vec_unalign_store_cost */
420 1, /* vec_store_cost */
421 1, /* cond_taken_branch_cost */
422 1 /* cond_not_taken_branch_cost */
60bff090
JG
423};
424
5ec1ae3b
EM
425static const struct cpu_vector_cost exynosm1_vector_cost =
426{
cd8ae5ed
AP
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
429 5, /* scalar_load_cost */
430 1, /* scalar_store_cost */
cd8ae5ed
AP
431 3, /* vec_int_stmt_cost */
432 3, /* vec_fp_stmt_cost */
c428f91c 433 3, /* vec_permute_cost */
5ec1ae3b
EM
434 3, /* vec_to_scalar_cost */
435 3, /* scalar_to_vec_cost */
436 5, /* vec_align_load_cost */
437 5, /* vec_unalign_load_cost */
438 1, /* vec_unalign_store_cost */
439 1, /* vec_store_cost */
440 1, /* cond_taken_branch_cost */
441 1 /* cond_not_taken_branch_cost */
442};
443
381e27aa 444/* Generic costs for vector insn classes. */
381e27aa
PT
445static const struct cpu_vector_cost xgene1_vector_cost =
446{
cd8ae5ed
AP
447 1, /* scalar_int_stmt_cost */
448 1, /* scalar_fp_stmt_cost */
bd95e655
JG
449 5, /* scalar_load_cost */
450 1, /* scalar_store_cost */
cd8ae5ed
AP
451 2, /* vec_int_stmt_cost */
452 2, /* vec_fp_stmt_cost */
c428f91c 453 2, /* vec_permute_cost */
bd95e655
JG
454 4, /* vec_to_scalar_cost */
455 4, /* scalar_to_vec_cost */
456 10, /* vec_align_load_cost */
457 10, /* vec_unalign_load_cost */
458 2, /* vec_unalign_store_cost */
459 2, /* vec_store_cost */
460 2, /* cond_taken_branch_cost */
461 1 /* cond_not_taken_branch_cost */
381e27aa
PT
462};
463
ad611a4c 464/* Costs for vector insn classes for Vulcan. */
d1261ac6 465static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 466{
cd8ae5ed
AP
467 1, /* scalar_int_stmt_cost */
468 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
469 4, /* scalar_load_cost */
470 1, /* scalar_store_cost */
cd8ae5ed
AP
471 5, /* vec_int_stmt_cost */
472 6, /* vec_fp_stmt_cost */
ad611a4c
VP
473 3, /* vec_permute_cost */
474 6, /* vec_to_scalar_cost */
475 5, /* scalar_to_vec_cost */
476 8, /* vec_align_load_cost */
477 8, /* vec_unalign_load_cost */
478 4, /* vec_unalign_store_cost */
479 4, /* vec_store_cost */
480 2, /* cond_taken_branch_cost */
481 1 /* cond_not_taken_branch_cost */
482};
483
b9066f5a
MW
484/* Generic costs for branch instructions. */
485static const struct cpu_branch_cost generic_branch_cost =
486{
9094d4a4
WD
487 1, /* Predictable. */
488 3 /* Unpredictable. */
b9066f5a
MW
489};
490
67707f65
JG
491/* Branch costs for Cortex-A57. */
492static const struct cpu_branch_cost cortexa57_branch_cost =
493{
494 1, /* Predictable. */
495 3 /* Unpredictable. */
496};
497
ad611a4c 498/* Branch costs for Vulcan. */
d1261ac6 499static const struct cpu_branch_cost thunderx2t99_branch_cost =
ad611a4c
VP
500{
501 1, /* Predictable. */
502 3 /* Unpredictable. */
503};
504
9acc9cbe
EM
505/* Generic approximation modes. */
506static const cpu_approx_modes generic_approx_modes =
507{
79a2bc2d 508 AARCH64_APPROX_NONE, /* division */
98daafa0 509 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
510 AARCH64_APPROX_NONE /* recip_sqrt */
511};
512
513/* Approximation modes for Exynos M1. */
514static const cpu_approx_modes exynosm1_approx_modes =
515{
79a2bc2d 516 AARCH64_APPROX_NONE, /* division */
98daafa0 517 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
518 AARCH64_APPROX_ALL /* recip_sqrt */
519};
520
521/* Approximation modes for X-Gene 1. */
522static const cpu_approx_modes xgene1_approx_modes =
523{
79a2bc2d 524 AARCH64_APPROX_NONE, /* division */
98daafa0 525 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
526 AARCH64_APPROX_ALL /* recip_sqrt */
527};
528
43e9d192
IB
529static const struct tune_params generic_tunings =
530{
4e2cd668 531 &cortexa57_extra_costs,
43e9d192
IB
532 &generic_addrcost_table,
533 &generic_regmove_cost,
8990e73a 534 &generic_vector_cost,
b9066f5a 535 &generic_branch_cost,
9acc9cbe 536 &generic_approx_modes,
bd95e655
JG
537 4, /* memmov_cost */
538 2, /* issue_rate */
e0701ef0 539 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
0b82a5a2
WD
540 8, /* function_align. */
541 8, /* jump_align. */
542 4, /* loop_align. */
cee66c68
WD
543 2, /* int_reassoc_width. */
544 4, /* fp_reassoc_width. */
50093a33
WD
545 1, /* vec_reassoc_width. */
546 2, /* min_div_recip_mul_sf. */
dfba575f 547 2, /* min_div_recip_mul_df. */
50487d79
EM
548 0, /* max_case_values. */
549 0, /* cache_line_size. */
2d6bc7fa 550 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
dfba575f 551 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
552};
553
1c72a3ca
JG
554static const struct tune_params cortexa35_tunings =
555{
556 &cortexa53_extra_costs,
557 &generic_addrcost_table,
558 &cortexa53_regmove_cost,
559 &generic_vector_cost,
0bc24338 560 &cortexa57_branch_cost,
9acc9cbe 561 &generic_approx_modes,
1c72a3ca
JG
562 4, /* memmov_cost */
563 1, /* issue_rate */
0bc24338 564 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 565 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 566 16, /* function_align. */
1c72a3ca 567 8, /* jump_align. */
d4407370 568 8, /* loop_align. */
1c72a3ca
JG
569 2, /* int_reassoc_width. */
570 4, /* fp_reassoc_width. */
571 1, /* vec_reassoc_width. */
572 2, /* min_div_recip_mul_sf. */
573 2, /* min_div_recip_mul_df. */
574 0, /* max_case_values. */
575 0, /* cache_line_size. */
576 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
577 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
578};
579
984239ad
KT
580static const struct tune_params cortexa53_tunings =
581{
582 &cortexa53_extra_costs,
583 &generic_addrcost_table,
e4a9c55a 584 &cortexa53_regmove_cost,
984239ad 585 &generic_vector_cost,
0bc24338 586 &cortexa57_branch_cost,
9acc9cbe 587 &generic_approx_modes,
bd95e655
JG
588 4, /* memmov_cost */
589 2, /* issue_rate */
00a8574a 590 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 591 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 592 16, /* function_align. */
0b82a5a2 593 8, /* jump_align. */
d4407370 594 8, /* loop_align. */
cee66c68
WD
595 2, /* int_reassoc_width. */
596 4, /* fp_reassoc_width. */
50093a33
WD
597 1, /* vec_reassoc_width. */
598 2, /* min_div_recip_mul_sf. */
dfba575f 599 2, /* min_div_recip_mul_df. */
50487d79
EM
600 0, /* max_case_values. */
601 0, /* cache_line_size. */
2d6bc7fa 602 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 603 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
604};
605
4fd92af6
KT
606static const struct tune_params cortexa57_tunings =
607{
608 &cortexa57_extra_costs,
60bff090 609 &cortexa57_addrcost_table,
e4a9c55a 610 &cortexa57_regmove_cost,
60bff090 611 &cortexa57_vector_cost,
67707f65 612 &cortexa57_branch_cost,
9acc9cbe 613 &generic_approx_modes,
bd95e655
JG
614 4, /* memmov_cost */
615 3, /* issue_rate */
00a8574a 616 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 617 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
618 16, /* function_align. */
619 8, /* jump_align. */
d4407370 620 8, /* loop_align. */
cee66c68
WD
621 2, /* int_reassoc_width. */
622 4, /* fp_reassoc_width. */
50093a33
WD
623 1, /* vec_reassoc_width. */
624 2, /* min_div_recip_mul_sf. */
dfba575f 625 2, /* min_div_recip_mul_df. */
50487d79
EM
626 0, /* max_case_values. */
627 0, /* cache_line_size. */
2d6bc7fa 628 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
7c175186 629 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
dfba575f
JG
630};
631
632static const struct tune_params cortexa72_tunings =
633{
634 &cortexa57_extra_costs,
635 &cortexa57_addrcost_table,
636 &cortexa57_regmove_cost,
637 &cortexa57_vector_cost,
0bc24338 638 &cortexa57_branch_cost,
9acc9cbe 639 &generic_approx_modes,
dfba575f
JG
640 4, /* memmov_cost */
641 3, /* issue_rate */
00a8574a 642 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
643 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
644 16, /* function_align. */
645 8, /* jump_align. */
d4407370 646 8, /* loop_align. */
dfba575f
JG
647 2, /* int_reassoc_width. */
648 4, /* fp_reassoc_width. */
649 1, /* vec_reassoc_width. */
650 2, /* min_div_recip_mul_sf. */
651 2, /* min_div_recip_mul_df. */
50487d79
EM
652 0, /* max_case_values. */
653 0, /* cache_line_size. */
0bc24338 654 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 655 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
656};
657
4fb570c4
KT
658static const struct tune_params cortexa73_tunings =
659{
660 &cortexa57_extra_costs,
661 &cortexa57_addrcost_table,
662 &cortexa57_regmove_cost,
663 &cortexa57_vector_cost,
0bc24338 664 &cortexa57_branch_cost,
4fb570c4
KT
665 &generic_approx_modes,
666 4, /* memmov_cost. */
667 2, /* issue_rate. */
668 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
669 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
670 16, /* function_align. */
671 8, /* jump_align. */
d4407370 672 8, /* loop_align. */
4fb570c4
KT
673 2, /* int_reassoc_width. */
674 4, /* fp_reassoc_width. */
675 1, /* vec_reassoc_width. */
676 2, /* min_div_recip_mul_sf. */
677 2, /* min_div_recip_mul_df. */
678 0, /* max_case_values. */
679 0, /* cache_line_size. */
680 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
681 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
682};
683
5ec1ae3b
EM
684static const struct tune_params exynosm1_tunings =
685{
686 &exynosm1_extra_costs,
687 &exynosm1_addrcost_table,
688 &exynosm1_regmove_cost,
689 &exynosm1_vector_cost,
690 &generic_branch_cost,
9acc9cbe 691 &exynosm1_approx_modes,
5ec1ae3b
EM
692 4, /* memmov_cost */
693 3, /* issue_rate */
25cc2199 694 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
695 4, /* function_align. */
696 4, /* jump_align. */
697 4, /* loop_align. */
698 2, /* int_reassoc_width. */
699 4, /* fp_reassoc_width. */
700 1, /* vec_reassoc_width. */
701 2, /* min_div_recip_mul_sf. */
702 2, /* min_div_recip_mul_df. */
703 48, /* max_case_values. */
704 64, /* cache_line_size. */
220379df 705 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9acc9cbe 706 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
5ec1ae3b
EM
707};
708
d1bcc29f
AP
709static const struct tune_params thunderx_tunings =
710{
711 &thunderx_extra_costs,
712 &generic_addrcost_table,
713 &thunderx_regmove_cost,
c3f20327 714 &thunderx_vector_cost,
b9066f5a 715 &generic_branch_cost,
9acc9cbe 716 &generic_approx_modes,
bd95e655
JG
717 6, /* memmov_cost */
718 2, /* issue_rate */
e9a3a175 719 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
720 8, /* function_align. */
721 8, /* jump_align. */
722 8, /* loop_align. */
cee66c68
WD
723 2, /* int_reassoc_width. */
724 4, /* fp_reassoc_width. */
50093a33
WD
725 1, /* vec_reassoc_width. */
726 2, /* min_div_recip_mul_sf. */
dfba575f 727 2, /* min_div_recip_mul_df. */
50487d79
EM
728 0, /* max_case_values. */
729 0, /* cache_line_size. */
2d6bc7fa 730 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
54700e2e 731 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
d1bcc29f
AP
732};
733
381e27aa
PT
734static const struct tune_params xgene1_tunings =
735{
736 &xgene1_extra_costs,
737 &xgene1_addrcost_table,
738 &xgene1_regmove_cost,
739 &xgene1_vector_cost,
b9066f5a 740 &generic_branch_cost,
9acc9cbe 741 &xgene1_approx_modes,
bd95e655
JG
742 6, /* memmov_cost */
743 4, /* issue_rate */
e9a3a175 744 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
745 16, /* function_align. */
746 8, /* jump_align. */
747 16, /* loop_align. */
748 2, /* int_reassoc_width. */
749 4, /* fp_reassoc_width. */
50093a33
WD
750 1, /* vec_reassoc_width. */
751 2, /* min_div_recip_mul_sf. */
dfba575f 752 2, /* min_div_recip_mul_df. */
50487d79
EM
753 0, /* max_case_values. */
754 0, /* cache_line_size. */
2d6bc7fa 755 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9acc9cbe 756 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
757};
758
ee446d9f
JW
759static const struct tune_params qdf24xx_tunings =
760{
761 &qdf24xx_extra_costs,
762 &qdf24xx_addrcost_table,
763 &qdf24xx_regmove_cost,
764 &generic_vector_cost,
765 &generic_branch_cost,
766 &generic_approx_modes,
767 4, /* memmov_cost */
768 4, /* issue_rate */
769 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
770 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
771 16, /* function_align. */
772 8, /* jump_align. */
773 16, /* loop_align. */
774 2, /* int_reassoc_width. */
775 4, /* fp_reassoc_width. */
776 1, /* vec_reassoc_width. */
777 2, /* min_div_recip_mul_sf. */
778 2, /* min_div_recip_mul_df. */
779 0, /* max_case_values. */
780 64, /* cache_line_size. */
781 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
782 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
783};
784
d1261ac6 785static const struct tune_params thunderx2t99_tunings =
ad611a4c 786{
d1261ac6
AP
787 &thunderx2t99_extra_costs,
788 &thunderx2t99_addrcost_table,
789 &thunderx2t99_regmove_cost,
790 &thunderx2t99_vector_cost,
791 &thunderx2t99_branch_cost,
ad611a4c
VP
792 &generic_approx_modes,
793 4, /* memmov_cost. */
794 4, /* issue_rate. */
6d5b4f9e 795 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */
ad611a4c
VP
796 16, /* function_align. */
797 8, /* jump_align. */
798 16, /* loop_align. */
799 3, /* int_reassoc_width. */
800 2, /* fp_reassoc_width. */
801 2, /* vec_reassoc_width. */
802 2, /* min_div_recip_mul_sf. */
803 2, /* min_div_recip_mul_df. */
804 0, /* max_case_values. */
b91cd96b 805 64, /* cache_line_size. */
ad611a4c
VP
806 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
807 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
808};
809
8dec06f2
JG
810/* Support for fine-grained override of the tuning structures. */
811struct aarch64_tuning_override_function
812{
813 const char* name;
814 void (*parse_override)(const char*, struct tune_params*);
815};
816
817static void aarch64_parse_fuse_string (const char*, struct tune_params*);
818static void aarch64_parse_tune_string (const char*, struct tune_params*);
819
820static const struct aarch64_tuning_override_function
821aarch64_tuning_override_functions[] =
822{
823 { "fuse", aarch64_parse_fuse_string },
824 { "tune", aarch64_parse_tune_string },
825 { NULL, NULL }
826};
827
43e9d192
IB
828/* A processor implementing AArch64. */
829struct processor
830{
831 const char *const name;
46806c44
KT
832 enum aarch64_processor ident;
833 enum aarch64_processor sched_core;
393ae126 834 enum aarch64_arch arch;
0c6caaf8 835 unsigned architecture_version;
43e9d192
IB
836 const unsigned long flags;
837 const struct tune_params *const tune;
838};
839
393ae126
KT
840/* Architectures implementing AArch64. */
841static const struct processor all_architectures[] =
842{
843#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
844 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
845#include "aarch64-arches.def"
393ae126
KT
846 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
847};
848
43e9d192
IB
849/* Processor cores implementing AArch64. */
850static const struct processor all_cores[] =
851{
e8fcc9fa 852#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
853 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
854 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
855 FLAGS, &COSTS##_tunings},
43e9d192 856#include "aarch64-cores.def"
393ae126
KT
857 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
858 AARCH64_FL_FOR_ARCH8, &generic_tunings},
859 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
860};
861
43e9d192 862
361fb3ee
KT
863/* Target specification. These are populated by the -march, -mtune, -mcpu
864 handling code or by target attributes. */
43e9d192
IB
865static const struct processor *selected_arch;
866static const struct processor *selected_cpu;
867static const struct processor *selected_tune;
868
b175b679
JG
869/* The current tuning set. */
870struct tune_params aarch64_tune_params = generic_tunings;
871
43e9d192
IB
872#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
873
874/* An ISA extension in the co-processor and main instruction set space. */
875struct aarch64_option_extension
876{
877 const char *const name;
878 const unsigned long flags_on;
879 const unsigned long flags_off;
880};
881
43e9d192
IB
882typedef enum aarch64_cond_code
883{
884 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
885 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
886 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
887}
888aarch64_cc;
889
890#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
891
892/* The condition codes of the processor, and the inverse function. */
893static const char * const aarch64_condition_codes[] =
894{
895 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
896 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
897};
898
973d2e01
TP
899/* Generate code to enable conditional branches in functions over 1 MiB. */
900const char *
901aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
902 const char * branch_format)
903{
904 rtx_code_label * tmp_label = gen_label_rtx ();
905 char label_buf[256];
906 char buffer[128];
907 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
908 CODE_LABEL_NUMBER (tmp_label));
909 const char *label_ptr = targetm.strip_name_encoding (label_buf);
910 rtx dest_label = operands[pos_label];
911 operands[pos_label] = tmp_label;
912
913 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
914 output_asm_insn (buffer, operands);
915
916 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
917 operands[pos_label] = dest_label;
918 output_asm_insn (buffer, operands);
919 return "";
920}
921
261fb553
AL
922void
923aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
924{
925 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
926 if (TARGET_GENERAL_REGS_ONLY)
927 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
928 else
929 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
930}
931
c64f7d37
WD
932/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
933 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
934 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
935 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
936 cost (in this case the best class is the lowest cost one). Using ALL_REGS
937 irrespectively of its cost results in bad allocations with many redundant
938 int<->FP moves which are expensive on various cores.
939 To avoid this we don't allow ALL_REGS as the allocno class, but force a
940 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
941 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
942 Otherwise set the allocno class depending on the mode.
943 The result of this is that it is no longer inefficient to have a higher
944 memory move cost than the register move cost.
945*/
c64f7d37
WD
946
947static reg_class_t
31e2b5a3
WD
948aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
949 reg_class_t best_class)
c64f7d37
WD
950{
951 enum machine_mode mode;
952
953 if (allocno_class != ALL_REGS)
954 return allocno_class;
955
31e2b5a3
WD
956 if (best_class != ALL_REGS)
957 return best_class;
958
c64f7d37
WD
959 mode = PSEUDO_REGNO_MODE (regno);
960 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
961}
962
26e0ff94 963static unsigned int
50093a33 964aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 965{
50093a33 966 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
967 return aarch64_tune_params.min_div_recip_mul_sf;
968 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
969}
970
cee66c68
WD
971static int
972aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
973 enum machine_mode mode)
974{
975 if (VECTOR_MODE_P (mode))
b175b679 976 return aarch64_tune_params.vec_reassoc_width;
cee66c68 977 if (INTEGRAL_MODE_P (mode))
b175b679 978 return aarch64_tune_params.int_reassoc_width;
cee66c68 979 if (FLOAT_MODE_P (mode))
b175b679 980 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
981 return 1;
982}
983
43e9d192
IB
984/* Provide a mapping from gcc register numbers to dwarf register numbers. */
985unsigned
986aarch64_dbx_register_number (unsigned regno)
987{
988 if (GP_REGNUM_P (regno))
989 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
990 else if (regno == SP_REGNUM)
991 return AARCH64_DWARF_SP;
992 else if (FP_REGNUM_P (regno))
993 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
994
995 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
996 equivalent DWARF register. */
997 return DWARF_FRAME_REGISTERS;
998}
999
1000/* Return TRUE if MODE is any of the large INT modes. */
1001static bool
ef4bddc2 1002aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
1003{
1004 return mode == OImode || mode == CImode || mode == XImode;
1005}
1006
1007/* Return TRUE if MODE is any of the vector modes. */
1008static bool
ef4bddc2 1009aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
1010{
1011 return aarch64_vector_mode_supported_p (mode)
1012 || aarch64_vect_struct_mode_p (mode);
1013}
1014
1015/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1016static bool
ef4bddc2 1017aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1018 unsigned HOST_WIDE_INT nelems)
1019{
1020 if (TARGET_SIMD
635e66fe
AL
1021 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1022 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1023 && (nelems >= 2 && nelems <= 4))
1024 return true;
1025
1026 return false;
1027}
1028
1029/* Implement HARD_REGNO_NREGS. */
1030
1031int
ef4bddc2 1032aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1033{
1034 switch (aarch64_regno_regclass (regno))
1035 {
1036 case FP_REGS:
1037 case FP_LO_REGS:
1038 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1039 default:
1040 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1041 }
1042 gcc_unreachable ();
1043}
1044
1045/* Implement HARD_REGNO_MODE_OK. */
1046
1047int
ef4bddc2 1048aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1049{
1050 if (GET_MODE_CLASS (mode) == MODE_CC)
1051 return regno == CC_REGNUM;
1052
9259db42
YZ
1053 if (regno == SP_REGNUM)
1054 /* The purpose of comparing with ptr_mode is to support the
1055 global register variable associated with the stack pointer
1056 register via the syntax of asm ("wsp") in ILP32. */
1057 return mode == Pmode || mode == ptr_mode;
1058
1059 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1060 return mode == Pmode;
1061
1062 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1063 return 1;
1064
1065 if (FP_REGNUM_P (regno))
1066 {
1067 if (aarch64_vect_struct_mode_p (mode))
1068 return
1069 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1070 else
1071 return 1;
1072 }
1073
1074 return 0;
1075}
1076
73d9ac6a 1077/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1078machine_mode
73d9ac6a 1079aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1080 machine_mode mode)
73d9ac6a
IB
1081{
1082 /* Handle modes that fit within single registers. */
1083 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1084 {
1085 if (GET_MODE_SIZE (mode) >= 4)
1086 return mode;
1087 else
1088 return SImode;
1089 }
1090 /* Fall back to generic for multi-reg and very large modes. */
1091 else
1092 return choose_hard_reg_mode (regno, nregs, false);
1093}
1094
43e9d192
IB
1095/* Return true if calls to DECL should be treated as
1096 long-calls (ie called via a register). */
1097static bool
1098aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1099{
1100 return false;
1101}
1102
1103/* Return true if calls to symbol-ref SYM should be treated as
1104 long-calls (ie called via a register). */
1105bool
1106aarch64_is_long_call_p (rtx sym)
1107{
1108 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1109}
1110
b60d63cb
JW
1111/* Return true if calls to symbol-ref SYM should not go through
1112 plt stubs. */
1113
1114bool
1115aarch64_is_noplt_call_p (rtx sym)
1116{
1117 const_tree decl = SYMBOL_REF_DECL (sym);
1118
1119 if (flag_pic
1120 && decl
1121 && (!flag_plt
1122 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1123 && !targetm.binds_local_p (decl))
1124 return true;
1125
1126 return false;
1127}
1128
43e9d192
IB
1129/* Return true if the offsets to a zero/sign-extract operation
1130 represent an expression that matches an extend operation. The
1131 operands represent the paramters from
1132
4745e701 1133 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1134bool
ef4bddc2 1135aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1136 rtx extract_imm)
1137{
1138 HOST_WIDE_INT mult_val, extract_val;
1139
1140 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1141 return false;
1142
1143 mult_val = INTVAL (mult_imm);
1144 extract_val = INTVAL (extract_imm);
1145
1146 if (extract_val > 8
1147 && extract_val < GET_MODE_BITSIZE (mode)
1148 && exact_log2 (extract_val & ~7) > 0
1149 && (extract_val & 7) <= 4
1150 && mult_val == (1 << (extract_val & 7)))
1151 return true;
1152
1153 return false;
1154}
1155
1156/* Emit an insn that's a simple single-set. Both the operands must be
1157 known to be valid. */
827ab47a 1158inline static rtx_insn *
43e9d192
IB
1159emit_set_insn (rtx x, rtx y)
1160{
f7df4a84 1161 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1162}
1163
1164/* X and Y are two things to compare using CODE. Emit the compare insn and
1165 return the rtx for register 0 in the proper mode. */
1166rtx
1167aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1168{
ef4bddc2 1169 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1170 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1171
1172 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1173 return cc_reg;
1174}
1175
1176/* Build the SYMBOL_REF for __tls_get_addr. */
1177
1178static GTY(()) rtx tls_get_addr_libfunc;
1179
1180rtx
1181aarch64_tls_get_addr (void)
1182{
1183 if (!tls_get_addr_libfunc)
1184 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1185 return tls_get_addr_libfunc;
1186}
1187
1188/* Return the TLS model to use for ADDR. */
1189
1190static enum tls_model
1191tls_symbolic_operand_type (rtx addr)
1192{
1193 enum tls_model tls_kind = TLS_MODEL_NONE;
1194 rtx sym, addend;
1195
1196 if (GET_CODE (addr) == CONST)
1197 {
1198 split_const (addr, &sym, &addend);
1199 if (GET_CODE (sym) == SYMBOL_REF)
1200 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1201 }
1202 else if (GET_CODE (addr) == SYMBOL_REF)
1203 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1204
1205 return tls_kind;
1206}
1207
1208/* We'll allow lo_sum's in addresses in our legitimate addresses
1209 so that combine would take care of combining addresses where
1210 necessary, but for generation purposes, we'll generate the address
1211 as :
1212 RTL Absolute
1213 tmp = hi (symbol_ref); adrp x1, foo
1214 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1215 nop
1216
1217 PIC TLS
1218 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1219 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1220 bl __tls_get_addr
1221 nop
1222
1223 Load TLS symbol, depending on TLS mechanism and TLS access model.
1224
1225 Global Dynamic - Traditional TLS:
1226 adrp tmp, :tlsgd:imm
1227 add dest, tmp, #:tlsgd_lo12:imm
1228 bl __tls_get_addr
1229
1230 Global Dynamic - TLS Descriptors:
1231 adrp dest, :tlsdesc:imm
1232 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1233 add dest, dest, #:tlsdesc_lo12:imm
1234 blr tmp
1235 mrs tp, tpidr_el0
1236 add dest, dest, tp
1237
1238 Initial Exec:
1239 mrs tp, tpidr_el0
1240 adrp tmp, :gottprel:imm
1241 ldr dest, [tmp, #:gottprel_lo12:imm]
1242 add dest, dest, tp
1243
1244 Local Exec:
1245 mrs tp, tpidr_el0
0699caae
RL
1246 add t0, tp, #:tprel_hi12:imm, lsl #12
1247 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1248*/
1249
1250static void
1251aarch64_load_symref_appropriately (rtx dest, rtx imm,
1252 enum aarch64_symbol_type type)
1253{
1254 switch (type)
1255 {
1256 case SYMBOL_SMALL_ABSOLUTE:
1257 {
28514dda 1258 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1259 rtx tmp_reg = dest;
ef4bddc2 1260 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1261
1262 gcc_assert (mode == Pmode || mode == ptr_mode);
1263
43e9d192 1264 if (can_create_pseudo_p ())
28514dda 1265 tmp_reg = gen_reg_rtx (mode);
43e9d192 1266
28514dda 1267 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1268 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1269 return;
1270 }
1271
a5350ddc 1272 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1273 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1274 return;
1275
1b1e81f8
JW
1276 case SYMBOL_SMALL_GOT_28K:
1277 {
1278 machine_mode mode = GET_MODE (dest);
1279 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1280 rtx insn;
1281 rtx mem;
1b1e81f8
JW
1282
1283 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1284 here before rtl expand. Tree IVOPT will generate rtl pattern to
1285 decide rtx costs, in which case pic_offset_table_rtx is not
1286 initialized. For that case no need to generate the first adrp
026c3cfd 1287 instruction as the final cost for global variable access is
1b1e81f8
JW
1288 one instruction. */
1289 if (gp_rtx != NULL)
1290 {
1291 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1292 using the page base as GOT base, the first page may be wasted,
1293 in the worst scenario, there is only 28K space for GOT).
1294
1295 The generate instruction sequence for accessing global variable
1296 is:
1297
a3957742 1298 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1299
1300 Only one instruction needed. But we must initialize
1301 pic_offset_table_rtx properly. We generate initialize insn for
1302 every global access, and allow CSE to remove all redundant.
1303
1304 The final instruction sequences will look like the following
1305 for multiply global variables access.
1306
a3957742 1307 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1308
a3957742
JW
1309 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1310 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1311 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1312 ... */
1b1e81f8
JW
1313
1314 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1315 crtl->uses_pic_offset_table = 1;
1316 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1317
1318 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1319 gp_rtx = gen_lowpart (mode, gp_rtx);
1320
1b1e81f8
JW
1321 }
1322
1323 if (mode == ptr_mode)
1324 {
1325 if (mode == DImode)
53021678 1326 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1327 else
53021678
JW
1328 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1329
1330 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1331 }
1332 else
1333 {
1334 gcc_assert (mode == Pmode);
53021678
JW
1335
1336 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1337 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1338 }
1339
53021678
JW
1340 /* The operand is expected to be MEM. Whenever the related insn
1341 pattern changed, above code which calculate mem should be
1342 updated. */
1343 gcc_assert (GET_CODE (mem) == MEM);
1344 MEM_READONLY_P (mem) = 1;
1345 MEM_NOTRAP_P (mem) = 1;
1346 emit_insn (insn);
1b1e81f8
JW
1347 return;
1348 }
1349
6642bdb4 1350 case SYMBOL_SMALL_GOT_4G:
43e9d192 1351 {
28514dda
YZ
1352 /* In ILP32, the mode of dest can be either SImode or DImode,
1353 while the got entry is always of SImode size. The mode of
1354 dest depends on how dest is used: if dest is assigned to a
1355 pointer (e.g. in the memory), it has SImode; it may have
1356 DImode if dest is dereferenced to access the memeory.
1357 This is why we have to handle three different ldr_got_small
1358 patterns here (two patterns for ILP32). */
53021678
JW
1359
1360 rtx insn;
1361 rtx mem;
43e9d192 1362 rtx tmp_reg = dest;
ef4bddc2 1363 machine_mode mode = GET_MODE (dest);
28514dda 1364
43e9d192 1365 if (can_create_pseudo_p ())
28514dda
YZ
1366 tmp_reg = gen_reg_rtx (mode);
1367
1368 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1369 if (mode == ptr_mode)
1370 {
1371 if (mode == DImode)
53021678 1372 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1373 else
53021678
JW
1374 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1375
1376 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1377 }
1378 else
1379 {
1380 gcc_assert (mode == Pmode);
53021678
JW
1381
1382 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1383 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1384 }
1385
53021678
JW
1386 gcc_assert (GET_CODE (mem) == MEM);
1387 MEM_READONLY_P (mem) = 1;
1388 MEM_NOTRAP_P (mem) = 1;
1389 emit_insn (insn);
43e9d192
IB
1390 return;
1391 }
1392
1393 case SYMBOL_SMALL_TLSGD:
1394 {
5d8a22a5 1395 rtx_insn *insns;
23b88fda
N
1396 machine_mode mode = GET_MODE (dest);
1397 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1398
1399 start_sequence ();
23b88fda
N
1400 if (TARGET_ILP32)
1401 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1402 else
1403 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1404 insns = get_insns ();
1405 end_sequence ();
1406
1407 RTL_CONST_CALL_P (insns) = 1;
1408 emit_libcall_block (insns, dest, result, imm);
1409 return;
1410 }
1411
1412 case SYMBOL_SMALL_TLSDESC:
1413 {
ef4bddc2 1414 machine_mode mode = GET_MODE (dest);
621ad2de 1415 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1416 rtx tp;
1417
621ad2de
AP
1418 gcc_assert (mode == Pmode || mode == ptr_mode);
1419
2876a13f
JW
1420 /* In ILP32, the got entry is always of SImode size. Unlike
1421 small GOT, the dest is fixed at reg 0. */
1422 if (TARGET_ILP32)
1423 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1424 else
2876a13f 1425 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1426 tp = aarch64_load_tp (NULL);
621ad2de
AP
1427
1428 if (mode != Pmode)
1429 tp = gen_lowpart (mode, tp);
1430
2876a13f 1431 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1432 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1433 return;
1434 }
1435
79496620 1436 case SYMBOL_SMALL_TLSIE:
43e9d192 1437 {
621ad2de
AP
1438 /* In ILP32, the mode of dest can be either SImode or DImode,
1439 while the got entry is always of SImode size. The mode of
1440 dest depends on how dest is used: if dest is assigned to a
1441 pointer (e.g. in the memory), it has SImode; it may have
1442 DImode if dest is dereferenced to access the memeory.
1443 This is why we have to handle three different tlsie_small
1444 patterns here (two patterns for ILP32). */
ef4bddc2 1445 machine_mode mode = GET_MODE (dest);
621ad2de 1446 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1447 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1448
1449 if (mode == ptr_mode)
1450 {
1451 if (mode == DImode)
1452 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1453 else
1454 {
1455 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1456 tp = gen_lowpart (mode, tp);
1457 }
1458 }
1459 else
1460 {
1461 gcc_assert (mode == Pmode);
1462 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1463 }
1464
f7df4a84 1465 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1466 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1467 return;
1468 }
1469
cbf5629e 1470 case SYMBOL_TLSLE12:
d18ba284 1471 case SYMBOL_TLSLE24:
cbf5629e
JW
1472 case SYMBOL_TLSLE32:
1473 case SYMBOL_TLSLE48:
43e9d192 1474 {
cbf5629e 1475 machine_mode mode = GET_MODE (dest);
43e9d192 1476 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1477
cbf5629e
JW
1478 if (mode != Pmode)
1479 tp = gen_lowpart (mode, tp);
1480
1481 switch (type)
1482 {
1483 case SYMBOL_TLSLE12:
1484 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1485 (dest, tp, imm));
1486 break;
1487 case SYMBOL_TLSLE24:
1488 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1489 (dest, tp, imm));
1490 break;
1491 case SYMBOL_TLSLE32:
1492 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1493 (dest, imm));
1494 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1495 (dest, dest, tp));
1496 break;
1497 case SYMBOL_TLSLE48:
1498 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1499 (dest, imm));
1500 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1501 (dest, dest, tp));
1502 break;
1503 default:
1504 gcc_unreachable ();
1505 }
e6f7f0e9 1506
43e9d192
IB
1507 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1508 return;
1509 }
1510
87dd8ab0
MS
1511 case SYMBOL_TINY_GOT:
1512 emit_insn (gen_ldr_got_tiny (dest, imm));
1513 return;
1514
5ae7caad
JW
1515 case SYMBOL_TINY_TLSIE:
1516 {
1517 machine_mode mode = GET_MODE (dest);
1518 rtx tp = aarch64_load_tp (NULL);
1519
1520 if (mode == ptr_mode)
1521 {
1522 if (mode == DImode)
1523 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1524 else
1525 {
1526 tp = gen_lowpart (mode, tp);
1527 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1528 }
1529 }
1530 else
1531 {
1532 gcc_assert (mode == Pmode);
1533 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1534 }
1535
1536 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1537 return;
1538 }
1539
43e9d192
IB
1540 default:
1541 gcc_unreachable ();
1542 }
1543}
1544
1545/* Emit a move from SRC to DEST. Assume that the move expanders can
1546 handle all moves if !can_create_pseudo_p (). The distinction is
1547 important because, unlike emit_move_insn, the move expanders know
1548 how to force Pmode objects into the constant pool even when the
1549 constant pool address is not itself legitimate. */
1550static rtx
1551aarch64_emit_move (rtx dest, rtx src)
1552{
1553 return (can_create_pseudo_p ()
1554 ? emit_move_insn (dest, src)
1555 : emit_move_insn_1 (dest, src));
1556}
1557
030d03b8
RE
1558/* Split a 128-bit move operation into two 64-bit move operations,
1559 taking care to handle partial overlap of register to register
1560 copies. Special cases are needed when moving between GP regs and
1561 FP regs. SRC can be a register, constant or memory; DST a register
1562 or memory. If either operand is memory it must not have any side
1563 effects. */
43e9d192
IB
1564void
1565aarch64_split_128bit_move (rtx dst, rtx src)
1566{
030d03b8
RE
1567 rtx dst_lo, dst_hi;
1568 rtx src_lo, src_hi;
43e9d192 1569
ef4bddc2 1570 machine_mode mode = GET_MODE (dst);
12dc6974 1571
030d03b8
RE
1572 gcc_assert (mode == TImode || mode == TFmode);
1573 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1574 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1575
1576 if (REG_P (dst) && REG_P (src))
1577 {
030d03b8
RE
1578 int src_regno = REGNO (src);
1579 int dst_regno = REGNO (dst);
43e9d192 1580
030d03b8 1581 /* Handle FP <-> GP regs. */
43e9d192
IB
1582 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1583 {
030d03b8
RE
1584 src_lo = gen_lowpart (word_mode, src);
1585 src_hi = gen_highpart (word_mode, src);
1586
1587 if (mode == TImode)
1588 {
1589 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1590 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1591 }
1592 else
1593 {
1594 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1595 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1596 }
1597 return;
43e9d192
IB
1598 }
1599 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1600 {
030d03b8
RE
1601 dst_lo = gen_lowpart (word_mode, dst);
1602 dst_hi = gen_highpart (word_mode, dst);
1603
1604 if (mode == TImode)
1605 {
1606 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1607 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1608 }
1609 else
1610 {
1611 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1612 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1613 }
1614 return;
43e9d192 1615 }
43e9d192
IB
1616 }
1617
030d03b8
RE
1618 dst_lo = gen_lowpart (word_mode, dst);
1619 dst_hi = gen_highpart (word_mode, dst);
1620 src_lo = gen_lowpart (word_mode, src);
1621 src_hi = gen_highpart_mode (word_mode, mode, src);
1622
1623 /* At most one pairing may overlap. */
1624 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1625 {
1626 aarch64_emit_move (dst_hi, src_hi);
1627 aarch64_emit_move (dst_lo, src_lo);
1628 }
1629 else
1630 {
1631 aarch64_emit_move (dst_lo, src_lo);
1632 aarch64_emit_move (dst_hi, src_hi);
1633 }
43e9d192
IB
1634}
1635
1636bool
1637aarch64_split_128bit_move_p (rtx dst, rtx src)
1638{
1639 return (! REG_P (src)
1640 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1641}
1642
8b033a8a
SN
1643/* Split a complex SIMD combine. */
1644
1645void
1646aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1647{
ef4bddc2
RS
1648 machine_mode src_mode = GET_MODE (src1);
1649 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1650
1651 gcc_assert (VECTOR_MODE_P (dst_mode));
1652
1653 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1654 {
1655 rtx (*gen) (rtx, rtx, rtx);
1656
1657 switch (src_mode)
1658 {
1659 case V8QImode:
1660 gen = gen_aarch64_simd_combinev8qi;
1661 break;
1662 case V4HImode:
1663 gen = gen_aarch64_simd_combinev4hi;
1664 break;
1665 case V2SImode:
1666 gen = gen_aarch64_simd_combinev2si;
1667 break;
7c369485
AL
1668 case V4HFmode:
1669 gen = gen_aarch64_simd_combinev4hf;
1670 break;
8b033a8a
SN
1671 case V2SFmode:
1672 gen = gen_aarch64_simd_combinev2sf;
1673 break;
1674 case DImode:
1675 gen = gen_aarch64_simd_combinedi;
1676 break;
1677 case DFmode:
1678 gen = gen_aarch64_simd_combinedf;
1679 break;
1680 default:
1681 gcc_unreachable ();
1682 }
1683
1684 emit_insn (gen (dst, src1, src2));
1685 return;
1686 }
1687}
1688
fd4842cd
SN
1689/* Split a complex SIMD move. */
1690
1691void
1692aarch64_split_simd_move (rtx dst, rtx src)
1693{
ef4bddc2
RS
1694 machine_mode src_mode = GET_MODE (src);
1695 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1696
1697 gcc_assert (VECTOR_MODE_P (dst_mode));
1698
1699 if (REG_P (dst) && REG_P (src))
1700 {
c59b7e28
SN
1701 rtx (*gen) (rtx, rtx);
1702
fd4842cd
SN
1703 gcc_assert (VECTOR_MODE_P (src_mode));
1704
1705 switch (src_mode)
1706 {
1707 case V16QImode:
c59b7e28 1708 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1709 break;
1710 case V8HImode:
c59b7e28 1711 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1712 break;
1713 case V4SImode:
c59b7e28 1714 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1715 break;
1716 case V2DImode:
c59b7e28 1717 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1718 break;
71a11456
AL
1719 case V8HFmode:
1720 gen = gen_aarch64_split_simd_movv8hf;
1721 break;
fd4842cd 1722 case V4SFmode:
c59b7e28 1723 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1724 break;
1725 case V2DFmode:
c59b7e28 1726 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1727 break;
1728 default:
1729 gcc_unreachable ();
1730 }
c59b7e28
SN
1731
1732 emit_insn (gen (dst, src));
fd4842cd
SN
1733 return;
1734 }
1735}
1736
ef22810a
RH
1737bool
1738aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1739 machine_mode ymode, rtx y)
1740{
1741 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1742 gcc_assert (r != NULL);
1743 return rtx_equal_p (x, r);
1744}
1745
1746
43e9d192 1747static rtx
ef4bddc2 1748aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1749{
1750 if (can_create_pseudo_p ())
e18b4a81 1751 return force_reg (mode, value);
43e9d192
IB
1752 else
1753 {
1754 x = aarch64_emit_move (x, value);
1755 return x;
1756 }
1757}
1758
1759
1760static rtx
ef4bddc2 1761aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1762{
9c023bf0 1763 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1764 {
1765 rtx high;
1766 /* Load the full offset into a register. This
1767 might be improvable in the future. */
1768 high = GEN_INT (offset);
1769 offset = 0;
e18b4a81
YZ
1770 high = aarch64_force_temporary (mode, temp, high);
1771 reg = aarch64_force_temporary (mode, temp,
1772 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1773 }
1774 return plus_constant (mode, reg, offset);
1775}
1776
82614948
RR
1777static int
1778aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1779 machine_mode mode)
43e9d192 1780{
43e9d192 1781 int i;
9a4865db
WD
1782 unsigned HOST_WIDE_INT val, val2, mask;
1783 int one_match, zero_match;
1784 int num_insns;
43e9d192 1785
9a4865db
WD
1786 val = INTVAL (imm);
1787
1788 if (aarch64_move_imm (val, mode))
43e9d192 1789 {
82614948 1790 if (generate)
f7df4a84 1791 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1792 return 1;
43e9d192
IB
1793 }
1794
9a4865db 1795 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1796 {
82614948
RR
1797 if (generate)
1798 {
9a4865db
WD
1799 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1800 if (mode == SImode)
1801 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1802 GEN_INT ((val >> 16) & 0xffff)));
1803 else
1804 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1805 GEN_INT ((val >> 16) & 0xffff)));
82614948 1806 }
9a4865db 1807 return 2;
43e9d192
IB
1808 }
1809
1810 /* Remaining cases are all for DImode. */
1811
43e9d192 1812 mask = 0xffff;
9a4865db
WD
1813 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1814 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1815 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1816 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1817
62c8d76c 1818 if (zero_match != 2 && one_match != 2)
43e9d192 1819 {
62c8d76c
WD
1820 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1821 For a 64-bit bitmask try whether changing 16 bits to all ones or
1822 zeroes creates a valid bitmask. To check any repeated bitmask,
1823 try using 16 bits from the other 32-bit half of val. */
43e9d192 1824
62c8d76c 1825 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1826 {
62c8d76c
WD
1827 val2 = val & ~mask;
1828 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1829 break;
1830 val2 = val | mask;
1831 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1832 break;
1833 val2 = val2 & ~mask;
1834 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1835 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1836 break;
43e9d192 1837 }
62c8d76c 1838 if (i != 64)
43e9d192 1839 {
62c8d76c 1840 if (generate)
43e9d192 1841 {
62c8d76c
WD
1842 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1843 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1844 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1845 }
1312b1ba 1846 return 2;
43e9d192
IB
1847 }
1848 }
1849
9a4865db
WD
1850 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1851 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1852 otherwise skip zero bits. */
2c274197 1853
9a4865db 1854 num_insns = 1;
43e9d192 1855 mask = 0xffff;
9a4865db
WD
1856 val2 = one_match > zero_match ? ~val : val;
1857 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1858
1859 if (generate)
1860 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1861 ? (val | ~(mask << i))
1862 : (val & (mask << i)))));
1863 for (i += 16; i < 64; i += 16)
43e9d192 1864 {
9a4865db
WD
1865 if ((val2 & (mask << i)) == 0)
1866 continue;
1867 if (generate)
1868 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1869 GEN_INT ((val >> i) & 0xffff)));
1870 num_insns ++;
82614948
RR
1871 }
1872
1873 return num_insns;
1874}
1875
1876
1877void
1878aarch64_expand_mov_immediate (rtx dest, rtx imm)
1879{
1880 machine_mode mode = GET_MODE (dest);
1881
1882 gcc_assert (mode == SImode || mode == DImode);
1883
1884 /* Check on what type of symbol it is. */
1885 if (GET_CODE (imm) == SYMBOL_REF
1886 || GET_CODE (imm) == LABEL_REF
1887 || GET_CODE (imm) == CONST)
1888 {
1889 rtx mem, base, offset;
1890 enum aarch64_symbol_type sty;
1891
1892 /* If we have (const (plus symbol offset)), separate out the offset
1893 before we start classifying the symbol. */
1894 split_const (imm, &base, &offset);
1895
a6e0bfa7 1896 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1897 switch (sty)
1898 {
1899 case SYMBOL_FORCE_TO_MEM:
1900 if (offset != const0_rtx
1901 && targetm.cannot_force_const_mem (mode, imm))
1902 {
1903 gcc_assert (can_create_pseudo_p ());
1904 base = aarch64_force_temporary (mode, dest, base);
1905 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1906 aarch64_emit_move (dest, base);
1907 return;
1908 }
b4f50fd4 1909
82614948
RR
1910 mem = force_const_mem (ptr_mode, imm);
1911 gcc_assert (mem);
b4f50fd4
RR
1912
1913 /* If we aren't generating PC relative literals, then
1914 we need to expand the literal pool access carefully.
1915 This is something that needs to be done in a number
1916 of places, so could well live as a separate function. */
9ee6540a 1917 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1918 {
1919 gcc_assert (can_create_pseudo_p ());
1920 base = gen_reg_rtx (ptr_mode);
1921 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1922 mem = gen_rtx_MEM (ptr_mode, base);
1923 }
1924
82614948
RR
1925 if (mode != ptr_mode)
1926 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1927
f7df4a84 1928 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1929
82614948
RR
1930 return;
1931
1932 case SYMBOL_SMALL_TLSGD:
1933 case SYMBOL_SMALL_TLSDESC:
79496620 1934 case SYMBOL_SMALL_TLSIE:
1b1e81f8 1935 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1936 case SYMBOL_SMALL_GOT_4G:
82614948 1937 case SYMBOL_TINY_GOT:
5ae7caad 1938 case SYMBOL_TINY_TLSIE:
82614948
RR
1939 if (offset != const0_rtx)
1940 {
1941 gcc_assert(can_create_pseudo_p ());
1942 base = aarch64_force_temporary (mode, dest, base);
1943 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1944 aarch64_emit_move (dest, base);
1945 return;
1946 }
1947 /* FALLTHRU */
1948
82614948
RR
1949 case SYMBOL_SMALL_ABSOLUTE:
1950 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 1951 case SYMBOL_TLSLE12:
d18ba284 1952 case SYMBOL_TLSLE24:
cbf5629e
JW
1953 case SYMBOL_TLSLE32:
1954 case SYMBOL_TLSLE48:
82614948
RR
1955 aarch64_load_symref_appropriately (dest, imm, sty);
1956 return;
1957
1958 default:
1959 gcc_unreachable ();
1960 }
1961 }
1962
1963 if (!CONST_INT_P (imm))
1964 {
1965 if (GET_CODE (imm) == HIGH)
f7df4a84 1966 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1967 else
1968 {
1969 rtx mem = force_const_mem (mode, imm);
1970 gcc_assert (mem);
f7df4a84 1971 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1972 }
82614948
RR
1973
1974 return;
43e9d192 1975 }
82614948
RR
1976
1977 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1978}
1979
5be6b295
WD
1980/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1981 temporary value if necessary. FRAME_RELATED_P should be true if
1982 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1983 to the generated instructions. If SCRATCHREG is known to hold
1984 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
1985 immediate again.
1986
1987 Since this function may be used to adjust the stack pointer, we must
1988 ensure that it cannot cause transient stack deallocation (for example
1989 by first incrementing SP and then decrementing when adjusting by a
1990 large immediate). */
c4ddc43a
JW
1991
1992static void
5be6b295
WD
1993aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
1994 HOST_WIDE_INT delta, bool frame_related_p,
1995 bool emit_move_imm)
c4ddc43a
JW
1996{
1997 HOST_WIDE_INT mdelta = abs_hwi (delta);
1998 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 1999 rtx_insn *insn;
c4ddc43a 2000
c4ddc43a
JW
2001 if (!mdelta)
2002 return;
2003
5be6b295 2004 /* Single instruction adjustment. */
c4ddc43a
JW
2005 if (aarch64_uimm12_shift (mdelta))
2006 {
37d6a4b7
JW
2007 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
2008 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2009 return;
2010 }
2011
5be6b295
WD
2012 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2013 Only do this if mdelta is not a 16-bit move as adjusting using a move
2014 is better. */
2015 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
2016 {
2017 HOST_WIDE_INT low_off = mdelta & 0xfff;
2018
2019 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
2020 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2021 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2022 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2023 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2024 return;
2025 }
2026
5be6b295 2027 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2028 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2029 if (emit_move_imm)
2030 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2031 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2032 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2033 if (frame_related_p)
2034 {
2035 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2036 rtx adj = plus_constant (mode, this_rtx, delta);
2037 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2038 }
c4ddc43a
JW
2039}
2040
5be6b295
WD
2041static inline void
2042aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
2043 HOST_WIDE_INT delta)
2044{
2045 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2046}
2047
2048static inline void
2049aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2050{
2051 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2052 true, emit_move_imm);
2053}
2054
2055static inline void
2056aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2057{
2058 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2059 frame_related_p, true);
2060}
2061
43e9d192 2062static bool
fee9ba42
JW
2063aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2064 tree exp ATTRIBUTE_UNUSED)
43e9d192 2065{
fee9ba42 2066 /* Currently, always true. */
43e9d192
IB
2067 return true;
2068}
2069
2070/* Implement TARGET_PASS_BY_REFERENCE. */
2071
2072static bool
2073aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2074 machine_mode mode,
43e9d192
IB
2075 const_tree type,
2076 bool named ATTRIBUTE_UNUSED)
2077{
2078 HOST_WIDE_INT size;
ef4bddc2 2079 machine_mode dummymode;
43e9d192
IB
2080 int nregs;
2081
2082 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2083 size = (mode == BLKmode && type)
2084 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2085
aadc1c43
MHD
2086 /* Aggregates are passed by reference based on their size. */
2087 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2088 {
aadc1c43 2089 size = int_size_in_bytes (type);
43e9d192
IB
2090 }
2091
2092 /* Variable sized arguments are always returned by reference. */
2093 if (size < 0)
2094 return true;
2095
2096 /* Can this be a candidate to be passed in fp/simd register(s)? */
2097 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2098 &dummymode, &nregs,
2099 NULL))
2100 return false;
2101
2102 /* Arguments which are variable sized or larger than 2 registers are
2103 passed by reference unless they are a homogenous floating point
2104 aggregate. */
2105 return size > 2 * UNITS_PER_WORD;
2106}
2107
2108/* Return TRUE if VALTYPE is padded to its least significant bits. */
2109static bool
2110aarch64_return_in_msb (const_tree valtype)
2111{
ef4bddc2 2112 machine_mode dummy_mode;
43e9d192
IB
2113 int dummy_int;
2114
2115 /* Never happens in little-endian mode. */
2116 if (!BYTES_BIG_ENDIAN)
2117 return false;
2118
2119 /* Only composite types smaller than or equal to 16 bytes can
2120 be potentially returned in registers. */
2121 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2122 || int_size_in_bytes (valtype) <= 0
2123 || int_size_in_bytes (valtype) > 16)
2124 return false;
2125
2126 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2127 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2128 is always passed/returned in the least significant bits of fp/simd
2129 register(s). */
2130 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2131 &dummy_mode, &dummy_int, NULL))
2132 return false;
2133
2134 return true;
2135}
2136
2137/* Implement TARGET_FUNCTION_VALUE.
2138 Define how to find the value returned by a function. */
2139
2140static rtx
2141aarch64_function_value (const_tree type, const_tree func,
2142 bool outgoing ATTRIBUTE_UNUSED)
2143{
ef4bddc2 2144 machine_mode mode;
43e9d192
IB
2145 int unsignedp;
2146 int count;
ef4bddc2 2147 machine_mode ag_mode;
43e9d192
IB
2148
2149 mode = TYPE_MODE (type);
2150 if (INTEGRAL_TYPE_P (type))
2151 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2152
2153 if (aarch64_return_in_msb (type))
2154 {
2155 HOST_WIDE_INT size = int_size_in_bytes (type);
2156
2157 if (size % UNITS_PER_WORD != 0)
2158 {
2159 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2160 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2161 }
2162 }
2163
2164 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2165 &ag_mode, &count, NULL))
2166 {
2167 if (!aarch64_composite_type_p (type, mode))
2168 {
2169 gcc_assert (count == 1 && mode == ag_mode);
2170 return gen_rtx_REG (mode, V0_REGNUM);
2171 }
2172 else
2173 {
2174 int i;
2175 rtx par;
2176
2177 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2178 for (i = 0; i < count; i++)
2179 {
2180 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2181 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2182 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2183 XVECEXP (par, 0, i) = tmp;
2184 }
2185 return par;
2186 }
2187 }
2188 else
2189 return gen_rtx_REG (mode, R0_REGNUM);
2190}
2191
2192/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2193 Return true if REGNO is the number of a hard register in which the values
2194 of called function may come back. */
2195
2196static bool
2197aarch64_function_value_regno_p (const unsigned int regno)
2198{
2199 /* Maximum of 16 bytes can be returned in the general registers. Examples
2200 of 16-byte return values are: 128-bit integers and 16-byte small
2201 structures (excluding homogeneous floating-point aggregates). */
2202 if (regno == R0_REGNUM || regno == R1_REGNUM)
2203 return true;
2204
2205 /* Up to four fp/simd registers can return a function value, e.g. a
2206 homogeneous floating-point aggregate having four members. */
2207 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2208 return TARGET_FLOAT;
43e9d192
IB
2209
2210 return false;
2211}
2212
2213/* Implement TARGET_RETURN_IN_MEMORY.
2214
2215 If the type T of the result of a function is such that
2216 void func (T arg)
2217 would require that arg be passed as a value in a register (or set of
2218 registers) according to the parameter passing rules, then the result
2219 is returned in the same registers as would be used for such an
2220 argument. */
2221
2222static bool
2223aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2224{
2225 HOST_WIDE_INT size;
ef4bddc2 2226 machine_mode ag_mode;
43e9d192
IB
2227 int count;
2228
2229 if (!AGGREGATE_TYPE_P (type)
2230 && TREE_CODE (type) != COMPLEX_TYPE
2231 && TREE_CODE (type) != VECTOR_TYPE)
2232 /* Simple scalar types always returned in registers. */
2233 return false;
2234
2235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2236 type,
2237 &ag_mode,
2238 &count,
2239 NULL))
2240 return false;
2241
2242 /* Types larger than 2 registers returned in memory. */
2243 size = int_size_in_bytes (type);
2244 return (size < 0 || size > 2 * UNITS_PER_WORD);
2245}
2246
2247static bool
ef4bddc2 2248aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2249 const_tree type, int *nregs)
2250{
2251 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2252 return aarch64_vfp_is_call_or_return_candidate (mode,
2253 type,
2254 &pcum->aapcs_vfp_rmode,
2255 nregs,
2256 NULL);
2257}
2258
985b8393 2259/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
2260 bits. The idea is to suppress any stronger alignment requested by
2261 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2262 This is a helper function for local use only. */
2263
985b8393 2264static unsigned int
ef4bddc2 2265aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2266{
75d6cc81 2267 if (!type)
985b8393 2268 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 2269
75d6cc81 2270 if (integer_zerop (TYPE_SIZE (type)))
985b8393 2271 return 0;
43e9d192 2272
75d6cc81
AL
2273 gcc_assert (TYPE_MODE (type) == mode);
2274
2275 if (!AGGREGATE_TYPE_P (type))
985b8393 2276 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
2277
2278 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 2279 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 2280
985b8393 2281 unsigned int alignment = 0;
75d6cc81 2282 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
2283 if (TREE_CODE (field) == FIELD_DECL)
2284 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 2285
985b8393 2286 return alignment;
43e9d192
IB
2287}
2288
2289/* Layout a function argument according to the AAPCS64 rules. The rule
2290 numbers refer to the rule numbers in the AAPCS64. */
2291
2292static void
ef4bddc2 2293aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2294 const_tree type,
2295 bool named ATTRIBUTE_UNUSED)
2296{
2297 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2298 int ncrn, nvrn, nregs;
2299 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2300 HOST_WIDE_INT size;
43e9d192
IB
2301
2302 /* We need to do this once per argument. */
2303 if (pcum->aapcs_arg_processed)
2304 return;
2305
2306 pcum->aapcs_arg_processed = true;
2307
3abf17cf
YZ
2308 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2309 size
4f59f9f2
UB
2310 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2311 UNITS_PER_WORD);
3abf17cf 2312
43e9d192
IB
2313 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2314 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2315 mode,
2316 type,
2317 &nregs);
2318
2319 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2320 The following code thus handles passing by SIMD/FP registers first. */
2321
2322 nvrn = pcum->aapcs_nvrn;
2323
2324 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2325 and homogenous short-vector aggregates (HVA). */
2326 if (allocate_nvrn)
2327 {
261fb553
AL
2328 if (!TARGET_FLOAT)
2329 aarch64_err_no_fpadvsimd (mode, "argument");
2330
43e9d192
IB
2331 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2332 {
2333 pcum->aapcs_nextnvrn = nvrn + nregs;
2334 if (!aarch64_composite_type_p (type, mode))
2335 {
2336 gcc_assert (nregs == 1);
2337 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2338 }
2339 else
2340 {
2341 rtx par;
2342 int i;
2343 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2344 for (i = 0; i < nregs; i++)
2345 {
2346 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2347 V0_REGNUM + nvrn + i);
2348 tmp = gen_rtx_EXPR_LIST
2349 (VOIDmode, tmp,
2350 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2351 XVECEXP (par, 0, i) = tmp;
2352 }
2353 pcum->aapcs_reg = par;
2354 }
2355 return;
2356 }
2357 else
2358 {
2359 /* C.3 NSRN is set to 8. */
2360 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2361 goto on_stack;
2362 }
2363 }
2364
2365 ncrn = pcum->aapcs_ncrn;
3abf17cf 2366 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2367
2368 /* C6 - C9. though the sign and zero extension semantics are
2369 handled elsewhere. This is the case where the argument fits
2370 entirely general registers. */
2371 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2372 {
43e9d192
IB
2373
2374 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2375
2376 /* C.8 if the argument has an alignment of 16 then the NGRN is
2377 rounded up to the next even number. */
985b8393
JJ
2378 if (nregs == 2
2379 && ncrn % 2
2ec07fa6 2380 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 2381 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
2382 alignment nregs should be > 2 and therefore it should be
2383 passed by reference rather than value. */
985b8393
JJ
2384 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2385 {
2386 ++ncrn;
2387 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 2388 }
2ec07fa6 2389
43e9d192
IB
2390 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2391 A reg is still generated for it, but the caller should be smart
2392 enough not to use it. */
2393 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 2394 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
2395 else
2396 {
2397 rtx par;
2398 int i;
2399
2400 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2401 for (i = 0; i < nregs; i++)
2402 {
2403 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2404 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2405 GEN_INT (i * UNITS_PER_WORD));
2406 XVECEXP (par, 0, i) = tmp;
2407 }
2408 pcum->aapcs_reg = par;
2409 }
2410
2411 pcum->aapcs_nextncrn = ncrn + nregs;
2412 return;
2413 }
2414
2415 /* C.11 */
2416 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2417
2418 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2419 this argument and align the total size if necessary. */
43e9d192 2420on_stack:
3abf17cf 2421 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 2422
985b8393 2423 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2424 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2425 16 / UNITS_PER_WORD);
43e9d192
IB
2426 return;
2427}
2428
2429/* Implement TARGET_FUNCTION_ARG. */
2430
2431static rtx
ef4bddc2 2432aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2433 const_tree type, bool named)
2434{
2435 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2436 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2437
2438 if (mode == VOIDmode)
2439 return NULL_RTX;
2440
2441 aarch64_layout_arg (pcum_v, mode, type, named);
2442 return pcum->aapcs_reg;
2443}
2444
2445void
2446aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2447 const_tree fntype ATTRIBUTE_UNUSED,
2448 rtx libname ATTRIBUTE_UNUSED,
2449 const_tree fndecl ATTRIBUTE_UNUSED,
2450 unsigned n_named ATTRIBUTE_UNUSED)
2451{
2452 pcum->aapcs_ncrn = 0;
2453 pcum->aapcs_nvrn = 0;
2454 pcum->aapcs_nextncrn = 0;
2455 pcum->aapcs_nextnvrn = 0;
2456 pcum->pcs_variant = ARM_PCS_AAPCS64;
2457 pcum->aapcs_reg = NULL_RTX;
2458 pcum->aapcs_arg_processed = false;
2459 pcum->aapcs_stack_words = 0;
2460 pcum->aapcs_stack_size = 0;
2461
261fb553
AL
2462 if (!TARGET_FLOAT
2463 && fndecl && TREE_PUBLIC (fndecl)
2464 && fntype && fntype != error_mark_node)
2465 {
2466 const_tree type = TREE_TYPE (fntype);
2467 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2468 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2469 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2470 &mode, &nregs, NULL))
2471 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2472 }
43e9d192
IB
2473 return;
2474}
2475
2476static void
2477aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2478 machine_mode mode,
43e9d192
IB
2479 const_tree type,
2480 bool named)
2481{
2482 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2483 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2484 {
2485 aarch64_layout_arg (pcum_v, mode, type, named);
2486 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2487 != (pcum->aapcs_stack_words != 0));
2488 pcum->aapcs_arg_processed = false;
2489 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2490 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2491 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2492 pcum->aapcs_stack_words = 0;
2493 pcum->aapcs_reg = NULL_RTX;
2494 }
2495}
2496
2497bool
2498aarch64_function_arg_regno_p (unsigned regno)
2499{
2500 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2501 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2502}
2503
2504/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2505 PARM_BOUNDARY bits of alignment, but will be given anything up
2506 to STACK_BOUNDARY bits if the type requires it. This makes sure
2507 that both before and after the layout of each argument, the Next
2508 Stacked Argument Address (NSAA) will have a minimum alignment of
2509 8 bytes. */
2510
2511static unsigned int
ef4bddc2 2512aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 2513{
985b8393
JJ
2514 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2515 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
2516}
2517
2518/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2519
2520 Return true if an argument passed on the stack should be padded upwards,
2521 i.e. if the least-significant byte of the stack slot has useful data.
2522
2523 Small aggregate types are placed in the lowest memory address.
2524
2525 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2526
2527bool
ef4bddc2 2528aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2529{
2530 /* On little-endian targets, the least significant byte of every stack
2531 argument is passed at the lowest byte address of the stack slot. */
2532 if (!BYTES_BIG_ENDIAN)
2533 return true;
2534
00edcfbe 2535 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2536 the least significant byte of a stack argument is passed at the highest
2537 byte address of the stack slot. */
2538 if (type
00edcfbe
YZ
2539 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2540 || POINTER_TYPE_P (type))
43e9d192
IB
2541 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2542 return false;
2543
2544 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2545 return true;
2546}
2547
2548/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2549
2550 It specifies padding for the last (may also be the only)
2551 element of a block move between registers and memory. If
2552 assuming the block is in the memory, padding upward means that
2553 the last element is padded after its highest significant byte,
2554 while in downward padding, the last element is padded at the
2555 its least significant byte side.
2556
2557 Small aggregates and small complex types are always padded
2558 upwards.
2559
2560 We don't need to worry about homogeneous floating-point or
2561 short-vector aggregates; their move is not affected by the
2562 padding direction determined here. Regardless of endianness,
2563 each element of such an aggregate is put in the least
2564 significant bits of a fp/simd register.
2565
2566 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2567 register has useful data, and return the opposite if the most
2568 significant byte does. */
2569
2570bool
ef4bddc2 2571aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2572 bool first ATTRIBUTE_UNUSED)
2573{
2574
2575 /* Small composite types are always padded upward. */
2576 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2577 {
2578 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2579 : GET_MODE_SIZE (mode));
2580 if (size < 2 * UNITS_PER_WORD)
2581 return true;
2582 }
2583
2584 /* Otherwise, use the default padding. */
2585 return !BYTES_BIG_ENDIAN;
2586}
2587
ef4bddc2 2588static machine_mode
43e9d192
IB
2589aarch64_libgcc_cmp_return_mode (void)
2590{
2591 return SImode;
2592}
2593
a3eb8a52
EB
2594#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2595
2596/* We use the 12-bit shifted immediate arithmetic instructions so values
2597 must be multiple of (1 << 12), i.e. 4096. */
2598#define ARITH_FACTOR 4096
2599
2600#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2601#error Cannot use simple address calculation for stack probing
2602#endif
2603
2604/* The pair of scratch registers used for stack probing. */
2605#define PROBE_STACK_FIRST_REG 9
2606#define PROBE_STACK_SECOND_REG 10
2607
2608/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2609 inclusive. These are offsets from the current stack pointer. */
2610
2611static void
2612aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2613{
5f5c5e0f 2614 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
2615
2616 /* See the same assertion on PROBE_INTERVAL above. */
2617 gcc_assert ((first % ARITH_FACTOR) == 0);
2618
2619 /* See if we have a constant small number of probes to generate. If so,
2620 that's the easy case. */
2621 if (size <= PROBE_INTERVAL)
2622 {
2623 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2624
2625 emit_set_insn (reg1,
5f5c5e0f 2626 plus_constant (Pmode,
a3eb8a52 2627 stack_pointer_rtx, -(first + base)));
5f5c5e0f 2628 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
2629 }
2630
2631 /* The run-time loop is made up of 8 insns in the generic case while the
2632 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2633 else if (size <= 4 * PROBE_INTERVAL)
2634 {
2635 HOST_WIDE_INT i, rem;
2636
2637 emit_set_insn (reg1,
5f5c5e0f 2638 plus_constant (Pmode,
a3eb8a52
EB
2639 stack_pointer_rtx,
2640 -(first + PROBE_INTERVAL)));
2641 emit_stack_probe (reg1);
2642
2643 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2644 it exceeds SIZE. If only two probes are needed, this will not
2645 generate any code. Then probe at FIRST + SIZE. */
2646 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2647 {
2648 emit_set_insn (reg1,
5f5c5e0f 2649 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
2650 emit_stack_probe (reg1);
2651 }
2652
2653 rem = size - (i - PROBE_INTERVAL);
2654 if (rem > 256)
2655 {
2656 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2657
5f5c5e0f
EB
2658 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
2659 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
2660 }
2661 else
5f5c5e0f 2662 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
2663 }
2664
2665 /* Otherwise, do the same as above, but in a loop. Note that we must be
2666 extra careful with variables wrapping around because we might be at
2667 the very top (or the very bottom) of the address space and we have
2668 to be able to handle this case properly; in particular, we use an
2669 equality test for the loop condition. */
2670 else
2671 {
5f5c5e0f 2672 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
2673
2674 /* Step 1: round SIZE to the previous multiple of the interval. */
2675
2676 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2677
2678
2679 /* Step 2: compute initial and final value of the loop counter. */
2680
2681 /* TEST_ADDR = SP + FIRST. */
2682 emit_set_insn (reg1,
5f5c5e0f 2683 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
2684
2685 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2686 emit_set_insn (reg2,
5f5c5e0f 2687 plus_constant (Pmode, stack_pointer_rtx,
a3eb8a52
EB
2688 -(first + rounded_size)));
2689
2690
2691 /* Step 3: the loop
2692
2693 do
2694 {
2695 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2696 probe at TEST_ADDR
2697 }
2698 while (TEST_ADDR != LAST_ADDR)
2699
2700 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2701 until it is equal to ROUNDED_SIZE. */
2702
5f5c5e0f 2703 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
2704
2705
2706 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2707 that SIZE is equal to ROUNDED_SIZE. */
2708
2709 if (size != rounded_size)
2710 {
2711 HOST_WIDE_INT rem = size - rounded_size;
2712
2713 if (rem > 256)
2714 {
2715 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2716
5f5c5e0f
EB
2717 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
2718 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
2719 }
2720 else
5f5c5e0f 2721 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
2722 }
2723 }
2724
2725 /* Make sure nothing is scheduled before we are done. */
2726 emit_insn (gen_blockage ());
2727}
2728
2729/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2730 absolute addresses. */
2731
2732const char *
2733aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2734{
2735 static int labelno = 0;
2736 char loop_lab[32];
2737 rtx xops[2];
2738
2739 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2740
2741 /* Loop. */
2742 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2743
2744 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2745 xops[0] = reg1;
2746 xops[1] = GEN_INT (PROBE_INTERVAL);
2747 output_asm_insn ("sub\t%0, %0, %1", xops);
2748
2749 /* Probe at TEST_ADDR. */
2750 output_asm_insn ("str\txzr, [%0]", xops);
2751
2752 /* Test if TEST_ADDR == LAST_ADDR. */
2753 xops[1] = reg2;
2754 output_asm_insn ("cmp\t%0, %1", xops);
2755
2756 /* Branch. */
2757 fputs ("\tb.ne\t", asm_out_file);
2758 assemble_name_raw (asm_out_file, loop_lab);
2759 fputc ('\n', asm_out_file);
2760
2761 return "";
2762}
2763
43e9d192
IB
2764static bool
2765aarch64_frame_pointer_required (void)
2766{
0b7f8166
MS
2767 /* In aarch64_override_options_after_change
2768 flag_omit_leaf_frame_pointer turns off the frame pointer by
2769 default. Turn it back on now if we've not got a leaf
2770 function. */
2771 if (flag_omit_leaf_frame_pointer
2772 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2773 return true;
43e9d192 2774
8144a493
WD
2775 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2776 if (crtl->calls_eh_return)
2777 return true;
2778
0b7f8166 2779 return false;
43e9d192
IB
2780}
2781
2782/* Mark the registers that need to be saved by the callee and calculate
2783 the size of the callee-saved registers area and frame record (both FP
2784 and LR may be omitted). */
2785static void
2786aarch64_layout_frame (void)
2787{
2788 HOST_WIDE_INT offset = 0;
4b0685d9 2789 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2790
2791 if (reload_completed && cfun->machine->frame.laid_out)
2792 return;
2793
97826595
MS
2794#define SLOT_NOT_REQUIRED (-2)
2795#define SLOT_REQUIRED (-1)
2796
71bfb77a
WD
2797 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2798 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2799
43e9d192
IB
2800 /* First mark all the registers that really need to be saved... */
2801 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2802 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2803
2804 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2805 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2806
2807 /* ... that includes the eh data registers (if needed)... */
2808 if (crtl->calls_eh_return)
2809 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2810 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2811 = SLOT_REQUIRED;
43e9d192
IB
2812
2813 /* ... and any callee saved register that dataflow says is live. */
2814 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2815 if (df_regs_ever_live_p (regno)
1c923b60
JW
2816 && (regno == R30_REGNUM
2817 || !call_used_regs[regno]))
97826595 2818 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2819
2820 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2821 if (df_regs_ever_live_p (regno)
2822 && !call_used_regs[regno])
4b0685d9
WD
2823 {
2824 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2825 last_fp_reg = regno;
2826 }
43e9d192
IB
2827
2828 if (frame_pointer_needed)
2829 {
2e1cdae5 2830 /* FP and LR are placed in the linkage record. */
43e9d192 2831 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2832 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2833 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2834 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2835 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2836 }
2837
2838 /* Now assign stack slots for them. */
2e1cdae5 2839 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2840 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2841 {
2842 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2843 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2844 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2845 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2846 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2847 offset += UNITS_PER_WORD;
2848 }
2849
4b0685d9
WD
2850 HOST_WIDE_INT max_int_offset = offset;
2851 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2852 bool has_align_gap = offset != max_int_offset;
2853
43e9d192 2854 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2855 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2856 {
4b0685d9
WD
2857 /* If there is an alignment gap between integer and fp callee-saves,
2858 allocate the last fp register to it if possible. */
2859 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2860 {
2861 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2862 break;
2863 }
2864
43e9d192 2865 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2866 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2867 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2868 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2869 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2870 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2871 offset += UNITS_PER_WORD;
2872 }
2873
4f59f9f2 2874 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2875
2876 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2877
71bfb77a
WD
2878 HOST_WIDE_INT varargs_and_saved_regs_size
2879 = offset + cfun->machine->frame.saved_varargs_size;
2880
1c960e02 2881 cfun->machine->frame.hard_fp_offset
71bfb77a 2882 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2883 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2884
2885 cfun->machine->frame.frame_size
4f59f9f2
UB
2886 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2887 + crtl->outgoing_args_size,
2888 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2889
71bfb77a
WD
2890 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2891
2892 cfun->machine->frame.initial_adjust = 0;
2893 cfun->machine->frame.final_adjust = 0;
2894 cfun->machine->frame.callee_adjust = 0;
2895 cfun->machine->frame.callee_offset = 0;
2896
2897 HOST_WIDE_INT max_push_offset = 0;
2898 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2899 max_push_offset = 512;
2900 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2901 max_push_offset = 256;
2902
2903 if (cfun->machine->frame.frame_size < max_push_offset
2904 && crtl->outgoing_args_size == 0)
2905 {
2906 /* Simple, small frame with no outgoing arguments:
2907 stp reg1, reg2, [sp, -frame_size]!
2908 stp reg3, reg4, [sp, 16] */
2909 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
2910 }
2911 else if ((crtl->outgoing_args_size
2912 + cfun->machine->frame.saved_regs_size < 512)
2913 && !(cfun->calls_alloca
2914 && cfun->machine->frame.hard_fp_offset < max_push_offset))
2915 {
2916 /* Frame with small outgoing arguments:
2917 sub sp, sp, frame_size
2918 stp reg1, reg2, [sp, outgoing_args_size]
2919 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2920 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
2921 cfun->machine->frame.callee_offset
2922 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
2923 }
2924 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
2925 {
2926 /* Frame with large outgoing arguments but a small local area:
2927 stp reg1, reg2, [sp, -hard_fp_offset]!
2928 stp reg3, reg4, [sp, 16]
2929 sub sp, sp, outgoing_args_size */
2930 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
2931 cfun->machine->frame.final_adjust
2932 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2933 }
2934 else if (!frame_pointer_needed
2935 && varargs_and_saved_regs_size < max_push_offset)
2936 {
2937 /* Frame with large local area and outgoing arguments (this pushes the
2938 callee-saves first, followed by the locals and outgoing area):
2939 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
2940 stp reg3, reg4, [sp, 16]
2941 sub sp, sp, frame_size - varargs_and_saved_regs_size */
2942 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
2943 cfun->machine->frame.final_adjust
2944 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2945 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
2946 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
2947 }
2948 else
2949 {
2950 /* Frame with large local area and outgoing arguments using frame pointer:
2951 sub sp, sp, hard_fp_offset
2952 stp x29, x30, [sp, 0]
2953 add x29, sp, 0
2954 stp reg3, reg4, [sp, 16]
2955 sub sp, sp, outgoing_args_size */
2956 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
2957 cfun->machine->frame.final_adjust
2958 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
2959 }
2960
43e9d192
IB
2961 cfun->machine->frame.laid_out = true;
2962}
2963
04ddfe06
KT
2964/* Return true if the register REGNO is saved on entry to
2965 the current function. */
2966
43e9d192
IB
2967static bool
2968aarch64_register_saved_on_entry (int regno)
2969{
97826595 2970 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2971}
2972
04ddfe06
KT
2973/* Return the next register up from REGNO up to LIMIT for the callee
2974 to save. */
2975
64dedd72
JW
2976static unsigned
2977aarch64_next_callee_save (unsigned regno, unsigned limit)
2978{
2979 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2980 regno ++;
2981 return regno;
2982}
43e9d192 2983
04ddfe06
KT
2984/* Push the register number REGNO of mode MODE to the stack with write-back
2985 adjusting the stack by ADJUSTMENT. */
2986
c5e1f66e 2987static void
ef4bddc2 2988aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2989 HOST_WIDE_INT adjustment)
2990 {
2991 rtx base_rtx = stack_pointer_rtx;
2992 rtx insn, reg, mem;
2993
2994 reg = gen_rtx_REG (mode, regno);
2995 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2996 plus_constant (Pmode, base_rtx, -adjustment));
2997 mem = gen_rtx_MEM (mode, mem);
2998
2999 insn = emit_move_insn (mem, reg);
3000 RTX_FRAME_RELATED_P (insn) = 1;
3001}
3002
04ddfe06
KT
3003/* Generate and return an instruction to store the pair of registers
3004 REG and REG2 of mode MODE to location BASE with write-back adjusting
3005 the stack location BASE by ADJUSTMENT. */
3006
80c11907 3007static rtx
ef4bddc2 3008aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
3009 HOST_WIDE_INT adjustment)
3010{
3011 switch (mode)
3012 {
3013 case DImode:
3014 return gen_storewb_pairdi_di (base, base, reg, reg2,
3015 GEN_INT (-adjustment),
3016 GEN_INT (UNITS_PER_WORD - adjustment));
3017 case DFmode:
3018 return gen_storewb_pairdf_di (base, base, reg, reg2,
3019 GEN_INT (-adjustment),
3020 GEN_INT (UNITS_PER_WORD - adjustment));
3021 default:
3022 gcc_unreachable ();
3023 }
3024}
3025
04ddfe06
KT
3026/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3027 stack pointer by ADJUSTMENT. */
3028
80c11907 3029static void
89ac681e 3030aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3031{
5d8a22a5 3032 rtx_insn *insn;
89ac681e
WD
3033 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3034
71bfb77a 3035 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3036 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3037
80c11907
JW
3038 rtx reg1 = gen_rtx_REG (mode, regno1);
3039 rtx reg2 = gen_rtx_REG (mode, regno2);
3040
3041 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3042 reg2, adjustment));
3043 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3044 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3045 RTX_FRAME_RELATED_P (insn) = 1;
3046}
3047
04ddfe06
KT
3048/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3049 adjusting it by ADJUSTMENT afterwards. */
3050
159313d9 3051static rtx
ef4bddc2 3052aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3053 HOST_WIDE_INT adjustment)
3054{
3055 switch (mode)
3056 {
3057 case DImode:
3058 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3059 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3060 case DFmode:
3061 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3062 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3063 default:
3064 gcc_unreachable ();
3065 }
3066}
3067
04ddfe06
KT
3068/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3069 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3070 into CFI_OPS. */
3071
89ac681e
WD
3072static void
3073aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3074 rtx *cfi_ops)
3075{
3076 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3077 rtx reg1 = gen_rtx_REG (mode, regno1);
3078
3079 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3080
71bfb77a 3081 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3082 {
3083 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3084 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3085 emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
3086 }
3087 else
3088 {
3089 rtx reg2 = gen_rtx_REG (mode, regno2);
3090 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3091 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3092 reg2, adjustment));
3093 }
3094}
3095
04ddfe06
KT
3096/* Generate and return a store pair instruction of mode MODE to store
3097 register REG1 to MEM1 and register REG2 to MEM2. */
3098
72df5c1f 3099static rtx
ef4bddc2 3100aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3101 rtx reg2)
3102{
3103 switch (mode)
3104 {
3105 case DImode:
3106 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3107
3108 case DFmode:
3109 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3110
3111 default:
3112 gcc_unreachable ();
3113 }
3114}
3115
04ddfe06
KT
3116/* Generate and regurn a load pair isntruction of mode MODE to load register
3117 REG1 from MEM1 and register REG2 from MEM2. */
3118
72df5c1f 3119static rtx
ef4bddc2 3120aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3121 rtx mem2)
3122{
3123 switch (mode)
3124 {
3125 case DImode:
3126 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3127
3128 case DFmode:
3129 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3130
3131 default:
3132 gcc_unreachable ();
3133 }
3134}
3135
db58fd89
JW
3136/* Return TRUE if return address signing should be enabled for the current
3137 function, otherwise return FALSE. */
3138
3139bool
3140aarch64_return_address_signing_enabled (void)
3141{
3142 /* This function should only be called after frame laid out. */
3143 gcc_assert (cfun->machine->frame.laid_out);
3144
3145 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3146 if it's LR is pushed onto stack. */
3147 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
3148 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
3149 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
3150}
3151
04ddfe06
KT
3152/* Emit code to save the callee-saved registers from register number START
3153 to LIMIT to the stack at the location starting at offset START_OFFSET,
3154 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3155
43e9d192 3156static void
ef4bddc2 3157aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3158 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3159{
5d8a22a5 3160 rtx_insn *insn;
ef4bddc2 3161 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 3162 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
3163 unsigned regno;
3164 unsigned regno2;
3165
0ec74a1e 3166 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3167 regno <= limit;
3168 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3169 {
ae13fce3
JW
3170 rtx reg, mem;
3171 HOST_WIDE_INT offset;
64dedd72 3172
ae13fce3
JW
3173 if (skip_wb
3174 && (regno == cfun->machine->frame.wb_candidate1
3175 || regno == cfun->machine->frame.wb_candidate2))
3176 continue;
3177
827ab47a
KT
3178 if (cfun->machine->reg_is_wrapped_separately[regno])
3179 continue;
3180
ae13fce3
JW
3181 reg = gen_rtx_REG (mode, regno);
3182 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
3183 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3184 offset));
64dedd72
JW
3185
3186 regno2 = aarch64_next_callee_save (regno + 1, limit);
3187
3188 if (regno2 <= limit
827ab47a 3189 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
3190 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3191 == cfun->machine->frame.reg_offset[regno2]))
3192
43e9d192 3193 {
0ec74a1e 3194 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3195 rtx mem2;
3196
3197 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
3198 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3199 offset));
3200 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3201 reg2));
0b4a9743 3202
64dedd72
JW
3203 /* The first part of a frame-related parallel insn is
3204 always assumed to be relevant to the frame
3205 calculations; subsequent parts, are only
3206 frame-related if explicitly marked. */
3207 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3208 regno = regno2;
3209 }
3210 else
8ed2fc62
JW
3211 insn = emit_move_insn (mem, reg);
3212
3213 RTX_FRAME_RELATED_P (insn) = 1;
3214 }
3215}
3216
04ddfe06
KT
3217/* Emit code to restore the callee registers of mode MODE from register
3218 number START up to and including LIMIT. Restore from the stack offset
3219 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3220 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3221
8ed2fc62 3222static void
ef4bddc2 3223aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3224 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3225 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3226{
8ed2fc62 3227 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 3228 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
3229 ? gen_frame_mem : gen_rtx_MEM);
3230 unsigned regno;
3231 unsigned regno2;
3232 HOST_WIDE_INT offset;
3233
3234 for (regno = aarch64_next_callee_save (start, limit);
3235 regno <= limit;
3236 regno = aarch64_next_callee_save (regno + 1, limit))
3237 {
827ab47a
KT
3238 if (cfun->machine->reg_is_wrapped_separately[regno])
3239 continue;
3240
ae13fce3 3241 rtx reg, mem;
8ed2fc62 3242
ae13fce3
JW
3243 if (skip_wb
3244 && (regno == cfun->machine->frame.wb_candidate1
3245 || regno == cfun->machine->frame.wb_candidate2))
3246 continue;
3247
3248 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
3249 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3250 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
3251
3252 regno2 = aarch64_next_callee_save (regno + 1, limit);
3253
3254 if (regno2 <= limit
827ab47a 3255 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
3256 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3257 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3258 {
8ed2fc62
JW
3259 rtx reg2 = gen_rtx_REG (mode, regno2);
3260 rtx mem2;
3261
3262 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3263 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3264 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3265
dd991abb 3266 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3267 regno = regno2;
43e9d192 3268 }
8ed2fc62 3269 else
dd991abb
RH
3270 emit_move_insn (reg, mem);
3271 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3272 }
43e9d192
IB
3273}
3274
827ab47a
KT
3275static inline bool
3276offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3277 HOST_WIDE_INT offset)
3278{
3279 return offset >= -256 && offset < 256;
3280}
3281
3282static inline bool
3283offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3284{
3285 return (offset >= 0
3286 && offset < 4096 * GET_MODE_SIZE (mode)
3287 && offset % GET_MODE_SIZE (mode) == 0);
3288}
3289
3290bool
3291aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3292{
3293 return (offset >= -64 * GET_MODE_SIZE (mode)
3294 && offset < 64 * GET_MODE_SIZE (mode)
3295 && offset % GET_MODE_SIZE (mode) == 0);
3296}
3297
3298/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3299
3300static sbitmap
3301aarch64_get_separate_components (void)
3302{
3303 aarch64_layout_frame ();
3304
3305 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3306 bitmap_clear (components);
3307
3308 /* The registers we need saved to the frame. */
3309 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3310 if (aarch64_register_saved_on_entry (regno))
3311 {
3312 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3313 if (!frame_pointer_needed)
3314 offset += cfun->machine->frame.frame_size
3315 - cfun->machine->frame.hard_fp_offset;
3316 /* Check that we can access the stack slot of the register with one
3317 direct load with no adjustments needed. */
3318 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3319 bitmap_set_bit (components, regno);
3320 }
3321
3322 /* Don't mess with the hard frame pointer. */
3323 if (frame_pointer_needed)
3324 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3325
3326 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3327 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3328 /* If aarch64_layout_frame has chosen registers to store/restore with
3329 writeback don't interfere with them to avoid having to output explicit
3330 stack adjustment instructions. */
3331 if (reg2 != INVALID_REGNUM)
3332 bitmap_clear_bit (components, reg2);
3333 if (reg1 != INVALID_REGNUM)
3334 bitmap_clear_bit (components, reg1);
3335
3336 bitmap_clear_bit (components, LR_REGNUM);
3337 bitmap_clear_bit (components, SP_REGNUM);
3338
3339 return components;
3340}
3341
3342/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3343
3344static sbitmap
3345aarch64_components_for_bb (basic_block bb)
3346{
3347 bitmap in = DF_LIVE_IN (bb);
3348 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3349 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3350
3351 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3352 bitmap_clear (components);
3353
3354 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3355 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3356 if ((!call_used_regs[regno])
3357 && (bitmap_bit_p (in, regno)
3358 || bitmap_bit_p (gen, regno)
3359 || bitmap_bit_p (kill, regno)))
3360 bitmap_set_bit (components, regno);
3361
3362 return components;
3363}
3364
3365/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3366 Nothing to do for aarch64. */
3367
3368static void
3369aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3370{
3371}
3372
3373/* Return the next set bit in BMP from START onwards. Return the total number
3374 of bits in BMP if no set bit is found at or after START. */
3375
3376static unsigned int
3377aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3378{
3379 unsigned int nbits = SBITMAP_SIZE (bmp);
3380 if (start == nbits)
3381 return start;
3382
3383 gcc_assert (start < nbits);
3384 for (unsigned int i = start; i < nbits; i++)
3385 if (bitmap_bit_p (bmp, i))
3386 return i;
3387
3388 return nbits;
3389}
3390
3391/* Do the work for aarch64_emit_prologue_components and
3392 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3393 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3394 for these components or the epilogue sequence. That is, it determines
3395 whether we should emit stores or loads and what kind of CFA notes to attach
3396 to the insns. Otherwise the logic for the two sequences is very
3397 similar. */
3398
3399static void
3400aarch64_process_components (sbitmap components, bool prologue_p)
3401{
3402 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3403 ? HARD_FRAME_POINTER_REGNUM
3404 : STACK_POINTER_REGNUM);
3405
3406 unsigned last_regno = SBITMAP_SIZE (components);
3407 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3408 rtx_insn *insn = NULL;
3409
3410 while (regno != last_regno)
3411 {
3412 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3413 so DFmode for the vector registers is enough. */
3414 machine_mode mode = GP_REGNUM_P (regno) ? DImode : DFmode;
3415 rtx reg = gen_rtx_REG (mode, regno);
3416 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3417 if (!frame_pointer_needed)
3418 offset += cfun->machine->frame.frame_size
3419 - cfun->machine->frame.hard_fp_offset;
3420 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3421 rtx mem = gen_frame_mem (mode, addr);
3422
3423 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3424 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3425 /* No more registers to handle after REGNO.
3426 Emit a single save/restore and exit. */
3427 if (regno2 == last_regno)
3428 {
3429 insn = emit_insn (set);
3430 RTX_FRAME_RELATED_P (insn) = 1;
3431 if (prologue_p)
3432 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3433 else
3434 add_reg_note (insn, REG_CFA_RESTORE, reg);
3435 break;
3436 }
3437
3438 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3439 /* The next register is not of the same class or its offset is not
3440 mergeable with the current one into a pair. */
3441 if (!satisfies_constraint_Ump (mem)
3442 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3443 || (offset2 - cfun->machine->frame.reg_offset[regno])
3444 != GET_MODE_SIZE (mode))
3445 {
3446 insn = emit_insn (set);
3447 RTX_FRAME_RELATED_P (insn) = 1;
3448 if (prologue_p)
3449 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3450 else
3451 add_reg_note (insn, REG_CFA_RESTORE, reg);
3452
3453 regno = regno2;
3454 continue;
3455 }
3456
3457 /* REGNO2 can be saved/restored in a pair with REGNO. */
3458 rtx reg2 = gen_rtx_REG (mode, regno2);
3459 if (!frame_pointer_needed)
3460 offset2 += cfun->machine->frame.frame_size
3461 - cfun->machine->frame.hard_fp_offset;
3462 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3463 rtx mem2 = gen_frame_mem (mode, addr2);
3464 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3465 : gen_rtx_SET (reg2, mem2);
3466
3467 if (prologue_p)
3468 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3469 else
3470 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3471
3472 RTX_FRAME_RELATED_P (insn) = 1;
3473 if (prologue_p)
3474 {
3475 add_reg_note (insn, REG_CFA_OFFSET, set);
3476 add_reg_note (insn, REG_CFA_OFFSET, set2);
3477 }
3478 else
3479 {
3480 add_reg_note (insn, REG_CFA_RESTORE, reg);
3481 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3482 }
3483
3484 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3485 }
3486}
3487
3488/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3489
3490static void
3491aarch64_emit_prologue_components (sbitmap components)
3492{
3493 aarch64_process_components (components, true);
3494}
3495
3496/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3497
3498static void
3499aarch64_emit_epilogue_components (sbitmap components)
3500{
3501 aarch64_process_components (components, false);
3502}
3503
3504/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3505
3506static void
3507aarch64_set_handled_components (sbitmap components)
3508{
3509 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3510 if (bitmap_bit_p (components, regno))
3511 cfun->machine->reg_is_wrapped_separately[regno] = true;
3512}
3513
43e9d192
IB
3514/* AArch64 stack frames generated by this compiler look like:
3515
3516 +-------------------------------+
3517 | |
3518 | incoming stack arguments |
3519 | |
34834420
MS
3520 +-------------------------------+
3521 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3522 | callee-allocated save area |
3523 | for register varargs |
3524 | |
34834420
MS
3525 +-------------------------------+
3526 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3527 | |
3528 +-------------------------------+
454fdba9
RL
3529 | padding0 | \
3530 +-------------------------------+ |
454fdba9 3531 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3532 +-------------------------------+ |
3533 | LR' | |
3534 +-------------------------------+ |
34834420
MS
3535 | FP' | / <- hard_frame_pointer_rtx (aligned)
3536 +-------------------------------+
43e9d192
IB
3537 | dynamic allocation |
3538 +-------------------------------+
34834420
MS
3539 | padding |
3540 +-------------------------------+
3541 | outgoing stack arguments | <-- arg_pointer
3542 | |
3543 +-------------------------------+
3544 | | <-- stack_pointer_rtx (aligned)
43e9d192 3545
34834420
MS
3546 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3547 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3548 unchanged. */
43e9d192
IB
3549
3550/* Generate the prologue instructions for entry into a function.
3551 Establish the stack frame by decreasing the stack pointer with a
3552 properly calculated size and, if necessary, create a frame record
3553 filled with the values of LR and previous frame pointer. The
6991c977 3554 current FP is also set up if it is in use. */
43e9d192
IB
3555
3556void
3557aarch64_expand_prologue (void)
3558{
43e9d192 3559 aarch64_layout_frame ();
43e9d192 3560
71bfb77a
WD
3561 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3562 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3563 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3564 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3565 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3566 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3567 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3568 rtx_insn *insn;
43e9d192 3569
db58fd89
JW
3570 /* Sign return address for functions. */
3571 if (aarch64_return_address_signing_enabled ())
27169e45
JW
3572 {
3573 insn = emit_insn (gen_pacisp ());
3574 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3575 RTX_FRAME_RELATED_P (insn) = 1;
3576 }
db58fd89 3577
dd991abb
RH
3578 if (flag_stack_usage_info)
3579 current_function_static_stack_size = frame_size;
43e9d192 3580
a3eb8a52
EB
3581 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3582 {
3583 if (crtl->is_leaf && !cfun->calls_alloca)
3584 {
3585 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3586 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3587 frame_size - STACK_CHECK_PROTECT);
3588 }
3589 else if (frame_size > 0)
3590 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3591 }
3592
5be6b295 3593 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3594
71bfb77a
WD
3595 if (callee_adjust != 0)
3596 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3597
71bfb77a 3598 if (frame_pointer_needed)
43e9d192 3599 {
71bfb77a
WD
3600 if (callee_adjust == 0)
3601 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3602 R30_REGNUM, false);
3603 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3604 stack_pointer_rtx,
3605 GEN_INT (callee_offset)));
3606 RTX_FRAME_RELATED_P (insn) = 1;
3607 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3608 }
71bfb77a
WD
3609
3610 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3611 callee_adjust != 0 || frame_pointer_needed);
3612 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3613 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3614 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3615}
3616
4f942779
RL
3617/* Return TRUE if we can use a simple_return insn.
3618
3619 This function checks whether the callee saved stack is empty, which
3620 means no restore actions are need. The pro_and_epilogue will use
3621 this to check whether shrink-wrapping opt is feasible. */
3622
3623bool
3624aarch64_use_return_insn_p (void)
3625{
3626 if (!reload_completed)
3627 return false;
3628
3629 if (crtl->profile)
3630 return false;
3631
3632 aarch64_layout_frame ();
3633
3634 return cfun->machine->frame.frame_size == 0;
3635}
3636
71bfb77a
WD
3637/* Generate the epilogue instructions for returning from a function.
3638 This is almost exactly the reverse of the prolog sequence, except
3639 that we need to insert barriers to avoid scheduling loads that read
3640 from a deallocated stack, and we optimize the unwind records by
3641 emitting them all together if possible. */
43e9d192
IB
3642void
3643aarch64_expand_epilogue (bool for_sibcall)
3644{
43e9d192 3645 aarch64_layout_frame ();
43e9d192 3646
71bfb77a
WD
3647 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3648 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3649 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3650 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3651 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3652 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3653 rtx cfi_ops = NULL;
3654 rtx_insn *insn;
44c0e7b9 3655
71bfb77a
WD
3656 /* We need to add memory barrier to prevent read from deallocated stack. */
3657 bool need_barrier_p = (get_frame_size ()
3658 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3659
71bfb77a 3660 /* Emit a barrier to prevent loads from a deallocated stack. */
8144a493
WD
3661 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
3662 || crtl->calls_eh_return)
43e9d192 3663 {
71bfb77a
WD
3664 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3665 need_barrier_p = false;
3666 }
7e8c2bd5 3667
71bfb77a
WD
3668 /* Restore the stack pointer from the frame pointer if it may not
3669 be the same as the stack pointer. */
3670 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3671 {
43e9d192
IB
3672 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3673 hard_frame_pointer_rtx,
71bfb77a
WD
3674 GEN_INT (-callee_offset)));
3675 /* If writeback is used when restoring callee-saves, the CFA
3676 is restored on the instruction doing the writeback. */
3677 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3678 }
71bfb77a 3679 else
5be6b295 3680 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3681
71bfb77a
WD
3682 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3683 callee_adjust != 0, &cfi_ops);
3684 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3685 callee_adjust != 0, &cfi_ops);
43e9d192 3686
71bfb77a
WD
3687 if (need_barrier_p)
3688 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3689
3690 if (callee_adjust != 0)
3691 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3692
3693 if (callee_adjust != 0 || initial_adjust > 65536)
3694 {
3695 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3696 insn = get_last_insn ();
71bfb77a
WD
3697 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3698 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3699 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3700 cfi_ops = NULL;
43e9d192
IB
3701 }
3702
5be6b295 3703 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3704
71bfb77a
WD
3705 if (cfi_ops)
3706 {
3707 /* Emit delayed restores and reset the CFA to be SP. */
3708 insn = get_last_insn ();
3709 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3710 REG_NOTES (insn) = cfi_ops;
3711 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3712 }
3713
db58fd89
JW
3714 /* We prefer to emit the combined return/authenticate instruction RETAA,
3715 however there are three cases in which we must instead emit an explicit
3716 authentication instruction.
3717
3718 1) Sibcalls don't return in a normal way, so if we're about to call one
3719 we must authenticate.
3720
3721 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3722 generating code for !TARGET_ARMV8_3 we can't use it and must
3723 explicitly authenticate.
3724
3725 3) On an eh_return path we make extra stack adjustments to update the
3726 canonical frame address to be the exception handler's CFA. We want
3727 to authenticate using the CFA of the function which calls eh_return.
3728 */
3729 if (aarch64_return_address_signing_enabled ()
3730 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
3731 {
3732 insn = emit_insn (gen_autisp ());
3733 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3734 RTX_FRAME_RELATED_P (insn) = 1;
3735 }
db58fd89 3736
dd991abb
RH
3737 /* Stack adjustment for exception handler. */
3738 if (crtl->calls_eh_return)
3739 {
3740 /* We need to unwind the stack by the offset computed by
3741 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3742 to be SP; letting the CFA move during this adjustment
3743 is just as correct as retaining the CFA from the body
3744 of the function. Therefore, do nothing special. */
3745 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3746 }
3747
3748 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3749 if (!for_sibcall)
3750 emit_jump_insn (ret_rtx);
3751}
3752
8144a493
WD
3753/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3754 normally or return to a previous frame after unwinding.
1c960e02 3755
8144a493
WD
3756 An EH return uses a single shared return sequence. The epilogue is
3757 exactly like a normal epilogue except that it has an extra input
3758 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3759 that must be applied after the frame has been destroyed. An extra label
3760 is inserted before the epilogue which initializes this register to zero,
3761 and this is the entry point for a normal return.
43e9d192 3762
8144a493
WD
3763 An actual EH return updates the return address, initializes the stack
3764 adjustment and jumps directly into the epilogue (bypassing the zeroing
3765 of the adjustment). Since the return address is typically saved on the
3766 stack when a function makes a call, the saved LR must be updated outside
3767 the epilogue.
43e9d192 3768
8144a493
WD
3769 This poses problems as the store is generated well before the epilogue,
3770 so the offset of LR is not known yet. Also optimizations will remove the
3771 store as it appears dead, even after the epilogue is generated (as the
3772 base or offset for loading LR is different in many cases).
43e9d192 3773
8144a493
WD
3774 To avoid these problems this implementation forces the frame pointer
3775 in eh_return functions so that the location of LR is fixed and known early.
3776 It also marks the store volatile, so no optimization is permitted to
3777 remove the store. */
3778rtx
3779aarch64_eh_return_handler_rtx (void)
3780{
3781 rtx tmp = gen_frame_mem (Pmode,
3782 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 3783
8144a493
WD
3784 /* Mark the store volatile, so no optimization is permitted to remove it. */
3785 MEM_VOLATILE_P (tmp) = true;
3786 return tmp;
43e9d192
IB
3787}
3788
43e9d192
IB
3789/* Output code to add DELTA to the first argument, and then jump
3790 to FUNCTION. Used for C++ multiple inheritance. */
3791static void
3792aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3793 HOST_WIDE_INT delta,
3794 HOST_WIDE_INT vcall_offset,
3795 tree function)
3796{
3797 /* The this pointer is always in x0. Note that this differs from
3798 Arm where the this pointer maybe bumped to r1 if r0 is required
3799 to return a pointer to an aggregate. On AArch64 a result value
3800 pointer will be in x8. */
3801 int this_regno = R0_REGNUM;
5d8a22a5
DM
3802 rtx this_rtx, temp0, temp1, addr, funexp;
3803 rtx_insn *insn;
43e9d192 3804
75f1d6fc
SN
3805 reload_completed = 1;
3806 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3807
3808 if (vcall_offset == 0)
5be6b295 3809 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3810 else
3811 {
28514dda 3812 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3813
75f1d6fc
SN
3814 this_rtx = gen_rtx_REG (Pmode, this_regno);
3815 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3816 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3817
75f1d6fc
SN
3818 addr = this_rtx;
3819 if (delta != 0)
3820 {
3821 if (delta >= -256 && delta < 256)
3822 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3823 plus_constant (Pmode, this_rtx, delta));
3824 else
5be6b295 3825 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3826 }
3827
28514dda
YZ
3828 if (Pmode == ptr_mode)
3829 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3830 else
3831 aarch64_emit_move (temp0,
3832 gen_rtx_ZERO_EXTEND (Pmode,
3833 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3834
28514dda 3835 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3836 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3837 else
3838 {
f43657b4
JW
3839 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3840 Pmode);
75f1d6fc 3841 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3842 }
3843
28514dda
YZ
3844 if (Pmode == ptr_mode)
3845 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3846 else
3847 aarch64_emit_move (temp1,
3848 gen_rtx_SIGN_EXTEND (Pmode,
3849 gen_rtx_MEM (ptr_mode, addr)));
3850
75f1d6fc 3851 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3852 }
3853
75f1d6fc
SN
3854 /* Generate a tail call to the target function. */
3855 if (!TREE_USED (function))
3856 {
3857 assemble_external (function);
3858 TREE_USED (function) = 1;
3859 }
3860 funexp = XEXP (DECL_RTL (function), 0);
3861 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3862 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3863 SIBLING_CALL_P (insn) = 1;
3864
3865 insn = get_insns ();
3866 shorten_branches (insn);
3867 final_start_function (insn, file, 1);
3868 final (insn, file, 1);
43e9d192 3869 final_end_function ();
75f1d6fc
SN
3870
3871 /* Stop pretending to be a post-reload pass. */
3872 reload_completed = 0;
43e9d192
IB
3873}
3874
43e9d192
IB
3875static bool
3876aarch64_tls_referenced_p (rtx x)
3877{
3878 if (!TARGET_HAVE_TLS)
3879 return false;
e7de8563
RS
3880 subrtx_iterator::array_type array;
3881 FOR_EACH_SUBRTX (iter, array, x, ALL)
3882 {
3883 const_rtx x = *iter;
3884 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3885 return true;
3886 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3887 TLS offsets, not real symbol references. */
3888 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3889 iter.skip_subrtxes ();
3890 }
3891 return false;
43e9d192
IB
3892}
3893
3894
43e9d192
IB
3895/* Return true if val can be encoded as a 12-bit unsigned immediate with
3896 a left shift of 0 or 12 bits. */
3897bool
3898aarch64_uimm12_shift (HOST_WIDE_INT val)
3899{
3900 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3901 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3902 );
3903}
3904
3905
3906/* Return true if val is an immediate that can be loaded into a
3907 register by a MOVZ instruction. */
3908static bool
ef4bddc2 3909aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3910{
3911 if (GET_MODE_SIZE (mode) > 4)
3912 {
3913 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3914 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3915 return 1;
3916 }
3917 else
3918 {
3919 /* Ignore sign extension. */
3920 val &= (HOST_WIDE_INT) 0xffffffff;
3921 }
3922 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3923 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3924}
3925
a64c73a2
WD
3926/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3927
3928static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
3929 {
3930 0x0000000100000001ull,
3931 0x0001000100010001ull,
3932 0x0101010101010101ull,
3933 0x1111111111111111ull,
3934 0x5555555555555555ull,
3935 };
3936
43e9d192
IB
3937
3938/* Return true if val is a valid bitmask immediate. */
a64c73a2 3939
43e9d192 3940bool
a64c73a2 3941aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 3942{
a64c73a2
WD
3943 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
3944 int bits;
3945
3946 /* Check for a single sequence of one bits and return quickly if so.
3947 The special cases of all ones and all zeroes returns false. */
3948 val = (unsigned HOST_WIDE_INT) val_in;
3949 tmp = val + (val & -val);
3950
3951 if (tmp == (tmp & -tmp))
3952 return (val + 1) > 1;
3953
3954 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3955 if (mode == SImode)
3956 val = (val << 32) | (val & 0xffffffff);
3957
3958 /* Invert if the immediate doesn't start with a zero bit - this means we
3959 only need to search for sequences of one bits. */
3960 if (val & 1)
3961 val = ~val;
3962
3963 /* Find the first set bit and set tmp to val with the first sequence of one
3964 bits removed. Return success if there is a single sequence of ones. */
3965 first_one = val & -val;
3966 tmp = val & (val + first_one);
3967
3968 if (tmp == 0)
3969 return true;
3970
3971 /* Find the next set bit and compute the difference in bit position. */
3972 next_one = tmp & -tmp;
3973 bits = clz_hwi (first_one) - clz_hwi (next_one);
3974 mask = val ^ tmp;
3975
3976 /* Check the bit position difference is a power of 2, and that the first
3977 sequence of one bits fits within 'bits' bits. */
3978 if ((mask >> bits) != 0 || bits != (bits & -bits))
3979 return false;
3980
3981 /* Check the sequence of one bits is repeated 64/bits times. */
3982 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
3983}
3984
43fd192f
MC
3985/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
3986 Assumed precondition: VAL_IN Is not zero. */
3987
3988unsigned HOST_WIDE_INT
3989aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
3990{
3991 int lowest_bit_set = ctz_hwi (val_in);
3992 int highest_bit_set = floor_log2 (val_in);
3993 gcc_assert (val_in != 0);
3994
3995 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
3996 (HOST_WIDE_INT_1U << lowest_bit_set));
3997}
3998
3999/* Create constant where bits outside of lowest bit set to highest bit set
4000 are set to 1. */
4001
4002unsigned HOST_WIDE_INT
4003aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
4004{
4005 return val_in | ~aarch64_and_split_imm1 (val_in);
4006}
4007
4008/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4009
4010bool
4011aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
4012{
4013 if (aarch64_bitmask_imm (val_in, mode))
4014 return false;
4015
4016 if (aarch64_move_imm (val_in, mode))
4017 return false;
4018
4019 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
4020
4021 return aarch64_bitmask_imm (imm2, mode);
4022}
43e9d192
IB
4023
4024/* Return true if val is an immediate that can be loaded into a
4025 register in a single instruction. */
4026bool
ef4bddc2 4027aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
4028{
4029 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
4030 return 1;
4031 return aarch64_bitmask_imm (val, mode);
4032}
4033
4034static bool
ef4bddc2 4035aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
4036{
4037 rtx base, offset;
7eda14e1 4038
43e9d192
IB
4039 if (GET_CODE (x) == HIGH)
4040 return true;
4041
4042 split_const (x, &base, &offset);
4043 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 4044 {
a6e0bfa7 4045 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
4046 != SYMBOL_FORCE_TO_MEM)
4047 return true;
4048 else
4049 /* Avoid generating a 64-bit relocation in ILP32; leave
4050 to aarch64_expand_mov_immediate to handle it properly. */
4051 return mode != ptr_mode;
4052 }
43e9d192
IB
4053
4054 return aarch64_tls_referenced_p (x);
4055}
4056
e79136e4
WD
4057/* Implement TARGET_CASE_VALUES_THRESHOLD.
4058 The expansion for a table switch is quite expensive due to the number
4059 of instructions, the table lookup and hard to predict indirect jump.
4060 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4061 set, otherwise use tables for > 16 cases as a tradeoff between size and
4062 performance. When optimizing for size, use the default setting. */
50487d79
EM
4063
4064static unsigned int
4065aarch64_case_values_threshold (void)
4066{
4067 /* Use the specified limit for the number of cases before using jump
4068 tables at higher optimization levels. */
4069 if (optimize > 2
4070 && selected_cpu->tune->max_case_values != 0)
4071 return selected_cpu->tune->max_case_values;
4072 else
e79136e4 4073 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
4074}
4075
43e9d192
IB
4076/* Return true if register REGNO is a valid index register.
4077 STRICT_P is true if REG_OK_STRICT is in effect. */
4078
4079bool
4080aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4081{
4082 if (!HARD_REGISTER_NUM_P (regno))
4083 {
4084 if (!strict_p)
4085 return true;
4086
4087 if (!reg_renumber)
4088 return false;
4089
4090 regno = reg_renumber[regno];
4091 }
4092 return GP_REGNUM_P (regno);
4093}
4094
4095/* Return true if register REGNO is a valid base register for mode MODE.
4096 STRICT_P is true if REG_OK_STRICT is in effect. */
4097
4098bool
4099aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4100{
4101 if (!HARD_REGISTER_NUM_P (regno))
4102 {
4103 if (!strict_p)
4104 return true;
4105
4106 if (!reg_renumber)
4107 return false;
4108
4109 regno = reg_renumber[regno];
4110 }
4111
4112 /* The fake registers will be eliminated to either the stack or
4113 hard frame pointer, both of which are usually valid base registers.
4114 Reload deals with the cases where the eliminated form isn't valid. */
4115 return (GP_REGNUM_P (regno)
4116 || regno == SP_REGNUM
4117 || regno == FRAME_POINTER_REGNUM
4118 || regno == ARG_POINTER_REGNUM);
4119}
4120
4121/* Return true if X is a valid base register for mode MODE.
4122 STRICT_P is true if REG_OK_STRICT is in effect. */
4123
4124static bool
4125aarch64_base_register_rtx_p (rtx x, bool strict_p)
4126{
4127 if (!strict_p && GET_CODE (x) == SUBREG)
4128 x = SUBREG_REG (x);
4129
4130 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4131}
4132
4133/* Return true if address offset is a valid index. If it is, fill in INFO
4134 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4135
4136static bool
4137aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 4138 machine_mode mode, bool strict_p)
43e9d192
IB
4139{
4140 enum aarch64_address_type type;
4141 rtx index;
4142 int shift;
4143
4144 /* (reg:P) */
4145 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4146 && GET_MODE (x) == Pmode)
4147 {
4148 type = ADDRESS_REG_REG;
4149 index = x;
4150 shift = 0;
4151 }
4152 /* (sign_extend:DI (reg:SI)) */
4153 else if ((GET_CODE (x) == SIGN_EXTEND
4154 || GET_CODE (x) == ZERO_EXTEND)
4155 && GET_MODE (x) == DImode
4156 && GET_MODE (XEXP (x, 0)) == SImode)
4157 {
4158 type = (GET_CODE (x) == SIGN_EXTEND)
4159 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4160 index = XEXP (x, 0);
4161 shift = 0;
4162 }
4163 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4164 else if (GET_CODE (x) == MULT
4165 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4166 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4167 && GET_MODE (XEXP (x, 0)) == DImode
4168 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4169 && CONST_INT_P (XEXP (x, 1)))
4170 {
4171 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4172 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4173 index = XEXP (XEXP (x, 0), 0);
4174 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4175 }
4176 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4177 else if (GET_CODE (x) == ASHIFT
4178 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4179 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4180 && GET_MODE (XEXP (x, 0)) == DImode
4181 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4182 && CONST_INT_P (XEXP (x, 1)))
4183 {
4184 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4185 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4186 index = XEXP (XEXP (x, 0), 0);
4187 shift = INTVAL (XEXP (x, 1));
4188 }
4189 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4190 else if ((GET_CODE (x) == SIGN_EXTRACT
4191 || GET_CODE (x) == ZERO_EXTRACT)
4192 && GET_MODE (x) == DImode
4193 && GET_CODE (XEXP (x, 0)) == MULT
4194 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4195 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4196 {
4197 type = (GET_CODE (x) == SIGN_EXTRACT)
4198 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4199 index = XEXP (XEXP (x, 0), 0);
4200 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4201 if (INTVAL (XEXP (x, 1)) != 32 + shift
4202 || INTVAL (XEXP (x, 2)) != 0)
4203 shift = -1;
4204 }
4205 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4206 (const_int 0xffffffff<<shift)) */
4207 else if (GET_CODE (x) == AND
4208 && GET_MODE (x) == DImode
4209 && GET_CODE (XEXP (x, 0)) == MULT
4210 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4211 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4212 && CONST_INT_P (XEXP (x, 1)))
4213 {
4214 type = ADDRESS_REG_UXTW;
4215 index = XEXP (XEXP (x, 0), 0);
4216 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4217 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4218 shift = -1;
4219 }
4220 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4221 else if ((GET_CODE (x) == SIGN_EXTRACT
4222 || GET_CODE (x) == ZERO_EXTRACT)
4223 && GET_MODE (x) == DImode
4224 && GET_CODE (XEXP (x, 0)) == ASHIFT
4225 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4226 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4227 {
4228 type = (GET_CODE (x) == SIGN_EXTRACT)
4229 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4230 index = XEXP (XEXP (x, 0), 0);
4231 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4232 if (INTVAL (XEXP (x, 1)) != 32 + shift
4233 || INTVAL (XEXP (x, 2)) != 0)
4234 shift = -1;
4235 }
4236 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4237 (const_int 0xffffffff<<shift)) */
4238 else if (GET_CODE (x) == AND
4239 && GET_MODE (x) == DImode
4240 && GET_CODE (XEXP (x, 0)) == ASHIFT
4241 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4242 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4243 && CONST_INT_P (XEXP (x, 1)))
4244 {
4245 type = ADDRESS_REG_UXTW;
4246 index = XEXP (XEXP (x, 0), 0);
4247 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4248 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4249 shift = -1;
4250 }
4251 /* (mult:P (reg:P) (const_int scale)) */
4252 else if (GET_CODE (x) == MULT
4253 && GET_MODE (x) == Pmode
4254 && GET_MODE (XEXP (x, 0)) == Pmode
4255 && CONST_INT_P (XEXP (x, 1)))
4256 {
4257 type = ADDRESS_REG_REG;
4258 index = XEXP (x, 0);
4259 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4260 }
4261 /* (ashift:P (reg:P) (const_int shift)) */
4262 else if (GET_CODE (x) == ASHIFT
4263 && GET_MODE (x) == Pmode
4264 && GET_MODE (XEXP (x, 0)) == Pmode
4265 && CONST_INT_P (XEXP (x, 1)))
4266 {
4267 type = ADDRESS_REG_REG;
4268 index = XEXP (x, 0);
4269 shift = INTVAL (XEXP (x, 1));
4270 }
4271 else
4272 return false;
4273
4274 if (GET_CODE (index) == SUBREG)
4275 index = SUBREG_REG (index);
4276
4277 if ((shift == 0 ||
4278 (shift > 0 && shift <= 3
4279 && (1 << shift) == GET_MODE_SIZE (mode)))
4280 && REG_P (index)
4281 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4282 {
4283 info->type = type;
4284 info->offset = index;
4285 info->shift = shift;
4286 return true;
4287 }
4288
4289 return false;
4290}
4291
abc52318
KT
4292/* Return true if MODE is one of the modes for which we
4293 support LDP/STP operations. */
4294
4295static bool
4296aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4297{
4298 return mode == SImode || mode == DImode
4299 || mode == SFmode || mode == DFmode
4300 || (aarch64_vector_mode_supported_p (mode)
4301 && GET_MODE_SIZE (mode) == 8);
4302}
4303
9e0218fc
RH
4304/* Return true if REGNO is a virtual pointer register, or an eliminable
4305 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4306 include stack_pointer or hard_frame_pointer. */
4307static bool
4308virt_or_elim_regno_p (unsigned regno)
4309{
4310 return ((regno >= FIRST_VIRTUAL_REGISTER
4311 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4312 || regno == FRAME_POINTER_REGNUM
4313 || regno == ARG_POINTER_REGNUM);
4314}
4315
43e9d192
IB
4316/* Return true if X is a valid address for machine mode MODE. If it is,
4317 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4318 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4319
4320static bool
4321aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 4322 rtx x, machine_mode mode,
43e9d192
IB
4323 RTX_CODE outer_code, bool strict_p)
4324{
4325 enum rtx_code code = GET_CODE (x);
4326 rtx op0, op1;
2d8c6dc1 4327
80d43579
WD
4328 /* On BE, we use load/store pair for all large int mode load/stores.
4329 TI/TFmode may also use a load/store pair. */
2d8c6dc1 4330 bool load_store_pair_p = (outer_code == PARALLEL
80d43579
WD
4331 || mode == TImode
4332 || mode == TFmode
2d8c6dc1
AH
4333 || (BYTES_BIG_ENDIAN
4334 && aarch64_vect_struct_mode_p (mode)));
4335
43e9d192 4336 bool allow_reg_index_p =
2d8c6dc1
AH
4337 !load_store_pair_p
4338 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4339 && !aarch64_vect_struct_mode_p (mode);
4340
4341 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4342 REG addressing. */
4343 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4344 && (code != POST_INC && code != REG))
4345 return false;
4346
4347 switch (code)
4348 {
4349 case REG:
4350 case SUBREG:
4351 info->type = ADDRESS_REG_IMM;
4352 info->base = x;
4353 info->offset = const0_rtx;
4354 return aarch64_base_register_rtx_p (x, strict_p);
4355
4356 case PLUS:
4357 op0 = XEXP (x, 0);
4358 op1 = XEXP (x, 1);
15c0c5c9
JW
4359
4360 if (! strict_p
4aa81c2e 4361 && REG_P (op0)
9e0218fc 4362 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4363 && CONST_INT_P (op1))
15c0c5c9
JW
4364 {
4365 info->type = ADDRESS_REG_IMM;
4366 info->base = op0;
4367 info->offset = op1;
4368
4369 return true;
4370 }
4371
43e9d192
IB
4372 if (GET_MODE_SIZE (mode) != 0
4373 && CONST_INT_P (op1)
4374 && aarch64_base_register_rtx_p (op0, strict_p))
4375 {
4376 HOST_WIDE_INT offset = INTVAL (op1);
4377
4378 info->type = ADDRESS_REG_IMM;
4379 info->base = op0;
4380 info->offset = op1;
4381
4382 /* TImode and TFmode values are allowed in both pairs of X
4383 registers and individual Q registers. The available
4384 address modes are:
4385 X,X: 7-bit signed scaled offset
4386 Q: 9-bit signed offset
4387 We conservatively require an offset representable in either mode.
8ed49fab
KT
4388 When performing the check for pairs of X registers i.e. LDP/STP
4389 pass down DImode since that is the natural size of the LDP/STP
4390 instruction memory accesses. */
43e9d192 4391 if (mode == TImode || mode == TFmode)
8ed49fab 4392 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
4393 && (offset_9bit_signed_unscaled_p (mode, offset)
4394 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 4395
2d8c6dc1
AH
4396 /* A 7bit offset check because OImode will emit a ldp/stp
4397 instruction (only big endian will get here).
4398 For ldp/stp instructions, the offset is scaled for the size of a
4399 single element of the pair. */
4400 if (mode == OImode)
4401 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4402
4403 /* Three 9/12 bit offsets checks because CImode will emit three
4404 ldr/str instructions (only big endian will get here). */
4405 if (mode == CImode)
4406 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4407 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4408 || offset_12bit_unsigned_scaled_p (V16QImode,
4409 offset + 32)));
4410
4411 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4412 instructions (only big endian will get here). */
4413 if (mode == XImode)
4414 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4415 && aarch64_offset_7bit_signed_scaled_p (TImode,
4416 offset + 32));
4417
4418 if (load_store_pair_p)
43e9d192 4419 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4420 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4421 else
4422 return (offset_9bit_signed_unscaled_p (mode, offset)
4423 || offset_12bit_unsigned_scaled_p (mode, offset));
4424 }
4425
4426 if (allow_reg_index_p)
4427 {
4428 /* Look for base + (scaled/extended) index register. */
4429 if (aarch64_base_register_rtx_p (op0, strict_p)
4430 && aarch64_classify_index (info, op1, mode, strict_p))
4431 {
4432 info->base = op0;
4433 return true;
4434 }
4435 if (aarch64_base_register_rtx_p (op1, strict_p)
4436 && aarch64_classify_index (info, op0, mode, strict_p))
4437 {
4438 info->base = op1;
4439 return true;
4440 }
4441 }
4442
4443 return false;
4444
4445 case POST_INC:
4446 case POST_DEC:
4447 case PRE_INC:
4448 case PRE_DEC:
4449 info->type = ADDRESS_REG_WB;
4450 info->base = XEXP (x, 0);
4451 info->offset = NULL_RTX;
4452 return aarch64_base_register_rtx_p (info->base, strict_p);
4453
4454 case POST_MODIFY:
4455 case PRE_MODIFY:
4456 info->type = ADDRESS_REG_WB;
4457 info->base = XEXP (x, 0);
4458 if (GET_CODE (XEXP (x, 1)) == PLUS
4459 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4460 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4461 && aarch64_base_register_rtx_p (info->base, strict_p))
4462 {
4463 HOST_WIDE_INT offset;
4464 info->offset = XEXP (XEXP (x, 1), 1);
4465 offset = INTVAL (info->offset);
4466
4467 /* TImode and TFmode values are allowed in both pairs of X
4468 registers and individual Q registers. The available
4469 address modes are:
4470 X,X: 7-bit signed scaled offset
4471 Q: 9-bit signed offset
4472 We conservatively require an offset representable in either mode.
4473 */
4474 if (mode == TImode || mode == TFmode)
44707478 4475 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4476 && offset_9bit_signed_unscaled_p (mode, offset));
4477
2d8c6dc1 4478 if (load_store_pair_p)
43e9d192 4479 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4480 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4481 else
4482 return offset_9bit_signed_unscaled_p (mode, offset);
4483 }
4484 return false;
4485
4486 case CONST:
4487 case SYMBOL_REF:
4488 case LABEL_REF:
79517551
SN
4489 /* load literal: pc-relative constant pool entry. Only supported
4490 for SI mode or larger. */
43e9d192 4491 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4492
4493 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4494 {
4495 rtx sym, addend;
4496
4497 split_const (x, &sym, &addend);
b4f50fd4
RR
4498 return ((GET_CODE (sym) == LABEL_REF
4499 || (GET_CODE (sym) == SYMBOL_REF
4500 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4501 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4502 }
4503 return false;
4504
4505 case LO_SUM:
4506 info->type = ADDRESS_LO_SUM;
4507 info->base = XEXP (x, 0);
4508 info->offset = XEXP (x, 1);
4509 if (allow_reg_index_p
4510 && aarch64_base_register_rtx_p (info->base, strict_p))
4511 {
4512 rtx sym, offs;
4513 split_const (info->offset, &sym, &offs);
4514 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4515 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4516 {
4517 /* The symbol and offset must be aligned to the access size. */
4518 unsigned int align;
4519 unsigned int ref_size;
4520
4521 if (CONSTANT_POOL_ADDRESS_P (sym))
4522 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4523 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4524 {
4525 tree exp = SYMBOL_REF_DECL (sym);
4526 align = TYPE_ALIGN (TREE_TYPE (exp));
4527 align = CONSTANT_ALIGNMENT (exp, align);
4528 }
4529 else if (SYMBOL_REF_DECL (sym))
4530 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4531 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4532 && SYMBOL_REF_BLOCK (sym) != NULL)
4533 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4534 else
4535 align = BITS_PER_UNIT;
4536
4537 ref_size = GET_MODE_SIZE (mode);
4538 if (ref_size == 0)
4539 ref_size = GET_MODE_SIZE (DImode);
4540
4541 return ((INTVAL (offs) & (ref_size - 1)) == 0
4542 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4543 }
4544 }
4545 return false;
4546
4547 default:
4548 return false;
4549 }
4550}
4551
9bf2f779
KT
4552/* Return true if the address X is valid for a PRFM instruction.
4553 STRICT_P is true if we should do strict checking with
4554 aarch64_classify_address. */
4555
4556bool
4557aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
4558{
4559 struct aarch64_address_info addr;
4560
4561 /* PRFM accepts the same addresses as DImode... */
4562 bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
4563 if (!res)
4564 return false;
4565
4566 /* ... except writeback forms. */
4567 return addr.type != ADDRESS_REG_WB;
4568}
4569
43e9d192
IB
4570bool
4571aarch64_symbolic_address_p (rtx x)
4572{
4573 rtx offset;
4574
4575 split_const (x, &x, &offset);
4576 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4577}
4578
a6e0bfa7 4579/* Classify the base of symbolic expression X. */
da4f13a4
MS
4580
4581enum aarch64_symbol_type
a6e0bfa7 4582aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4583{
4584 rtx offset;
da4f13a4 4585
43e9d192 4586 split_const (x, &x, &offset);
a6e0bfa7 4587 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4588}
4589
4590
4591/* Return TRUE if X is a legitimate address for accessing memory in
4592 mode MODE. */
4593static bool
ef4bddc2 4594aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4595{
4596 struct aarch64_address_info addr;
4597
4598 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4599}
4600
4601/* Return TRUE if X is a legitimate address for accessing memory in
4602 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4603 pair operation. */
4604bool
ef4bddc2 4605aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4606 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4607{
4608 struct aarch64_address_info addr;
4609
4610 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4611}
4612
491ec060
WD
4613/* Split an out-of-range address displacement into a base and offset.
4614 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4615 to increase opportunities for sharing the base address of different sizes.
8734dfac 4616 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
491ec060
WD
4617static bool
4618aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4619{
8734dfac
WD
4620 HOST_WIDE_INT offset = INTVAL (*disp);
4621 HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
491ec060 4622
8734dfac
WD
4623 if (mode == TImode || mode == TFmode
4624 || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4625 base = (offset + 0x100) & ~0x1ff;
491ec060 4626
8734dfac
WD
4627 *off = GEN_INT (base);
4628 *disp = GEN_INT (offset - base);
491ec060
WD
4629 return true;
4630}
4631
43e9d192
IB
4632/* Return TRUE if rtx X is immediate constant 0.0 */
4633bool
3520f7cc 4634aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4635{
43e9d192
IB
4636 if (GET_MODE (x) == VOIDmode)
4637 return false;
4638
34a72c33 4639 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4640 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4641 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4642}
4643
70f09188
AP
4644/* Return the fixed registers used for condition codes. */
4645
4646static bool
4647aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4648{
4649 *p1 = CC_REGNUM;
4650 *p2 = INVALID_REGNUM;
4651 return true;
4652}
4653
78607708
TV
4654/* Emit call insn with PAT and do aarch64-specific handling. */
4655
d07a3fed 4656void
78607708
TV
4657aarch64_emit_call_insn (rtx pat)
4658{
4659 rtx insn = emit_call_insn (pat);
4660
4661 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4662 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4663 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4664}
4665
ef4bddc2 4666machine_mode
43e9d192
IB
4667aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4668{
4669 /* All floating point compares return CCFP if it is an equality
4670 comparison, and CCFPE otherwise. */
4671 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4672 {
4673 switch (code)
4674 {
4675 case EQ:
4676 case NE:
4677 case UNORDERED:
4678 case ORDERED:
4679 case UNLT:
4680 case UNLE:
4681 case UNGT:
4682 case UNGE:
4683 case UNEQ:
4684 case LTGT:
4685 return CCFPmode;
4686
4687 case LT:
4688 case LE:
4689 case GT:
4690 case GE:
4691 return CCFPEmode;
4692
4693 default:
4694 gcc_unreachable ();
4695 }
4696 }
4697
2b8568fe
KT
4698 /* Equality comparisons of short modes against zero can be performed
4699 using the TST instruction with the appropriate bitmask. */
4700 if (y == const0_rtx && REG_P (x)
4701 && (code == EQ || code == NE)
4702 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4703 return CC_NZmode;
4704
b06335f9
KT
4705 /* Similarly, comparisons of zero_extends from shorter modes can
4706 be performed using an ANDS with an immediate mask. */
4707 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4708 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4709 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4710 && (code == EQ || code == NE))
4711 return CC_NZmode;
4712
43e9d192
IB
4713 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4714 && y == const0_rtx
4715 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4716 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4717 || GET_CODE (x) == NEG
4718 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4719 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4720 return CC_NZmode;
4721
1c992d1e 4722 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4723 the comparison will have to be swapped when we emit the assembly
4724 code. */
4725 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4726 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
4727 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4728 || GET_CODE (x) == LSHIFTRT
1c992d1e 4729 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4730 return CC_SWPmode;
4731
1c992d1e
RE
4732 /* Similarly for a negated operand, but we can only do this for
4733 equalities. */
4734 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4735 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4736 && (code == EQ || code == NE)
4737 && GET_CODE (x) == NEG)
4738 return CC_Zmode;
4739
ef22810a
RH
4740 /* A test for unsigned overflow. */
4741 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4742 && code == NE
4743 && GET_CODE (x) == PLUS
4744 && GET_CODE (y) == ZERO_EXTEND)
4745 return CC_Cmode;
4746
43e9d192
IB
4747 /* For everything else, return CCmode. */
4748 return CCmode;
4749}
4750
3dfa7055
ZC
4751static int
4752aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4753
cd5660ab 4754int
43e9d192
IB
4755aarch64_get_condition_code (rtx x)
4756{
ef4bddc2 4757 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4758 enum rtx_code comp_code = GET_CODE (x);
4759
4760 if (GET_MODE_CLASS (mode) != MODE_CC)
4761 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4762 return aarch64_get_condition_code_1 (mode, comp_code);
4763}
43e9d192 4764
3dfa7055
ZC
4765static int
4766aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4767{
43e9d192
IB
4768 switch (mode)
4769 {
4770 case CCFPmode:
4771 case CCFPEmode:
4772 switch (comp_code)
4773 {
4774 case GE: return AARCH64_GE;
4775 case GT: return AARCH64_GT;
4776 case LE: return AARCH64_LS;
4777 case LT: return AARCH64_MI;
4778 case NE: return AARCH64_NE;
4779 case EQ: return AARCH64_EQ;
4780 case ORDERED: return AARCH64_VC;
4781 case UNORDERED: return AARCH64_VS;
4782 case UNLT: return AARCH64_LT;
4783 case UNLE: return AARCH64_LE;
4784 case UNGT: return AARCH64_HI;
4785 case UNGE: return AARCH64_PL;
cd5660ab 4786 default: return -1;
43e9d192
IB
4787 }
4788 break;
4789
4790 case CCmode:
4791 switch (comp_code)
4792 {
4793 case NE: return AARCH64_NE;
4794 case EQ: return AARCH64_EQ;
4795 case GE: return AARCH64_GE;
4796 case GT: return AARCH64_GT;
4797 case LE: return AARCH64_LE;
4798 case LT: return AARCH64_LT;
4799 case GEU: return AARCH64_CS;
4800 case GTU: return AARCH64_HI;
4801 case LEU: return AARCH64_LS;
4802 case LTU: return AARCH64_CC;
cd5660ab 4803 default: return -1;
43e9d192
IB
4804 }
4805 break;
4806
4807 case CC_SWPmode:
43e9d192
IB
4808 switch (comp_code)
4809 {
4810 case NE: return AARCH64_NE;
4811 case EQ: return AARCH64_EQ;
4812 case GE: return AARCH64_LE;
4813 case GT: return AARCH64_LT;
4814 case LE: return AARCH64_GE;
4815 case LT: return AARCH64_GT;
4816 case GEU: return AARCH64_LS;
4817 case GTU: return AARCH64_CC;
4818 case LEU: return AARCH64_CS;
4819 case LTU: return AARCH64_HI;
cd5660ab 4820 default: return -1;
43e9d192
IB
4821 }
4822 break;
4823
4824 case CC_NZmode:
4825 switch (comp_code)
4826 {
4827 case NE: return AARCH64_NE;
4828 case EQ: return AARCH64_EQ;
4829 case GE: return AARCH64_PL;
4830 case LT: return AARCH64_MI;
cd5660ab 4831 default: return -1;
43e9d192
IB
4832 }
4833 break;
4834
1c992d1e
RE
4835 case CC_Zmode:
4836 switch (comp_code)
4837 {
4838 case NE: return AARCH64_NE;
4839 case EQ: return AARCH64_EQ;
cd5660ab 4840 default: return -1;
1c992d1e
RE
4841 }
4842 break;
4843
ef22810a
RH
4844 case CC_Cmode:
4845 switch (comp_code)
4846 {
4847 case NE: return AARCH64_CS;
4848 case EQ: return AARCH64_CC;
4849 default: return -1;
4850 }
4851 break;
4852
43e9d192 4853 default:
cd5660ab 4854 return -1;
43e9d192 4855 }
3dfa7055 4856
3dfa7055 4857 return -1;
43e9d192
IB
4858}
4859
ddeabd3e
AL
4860bool
4861aarch64_const_vec_all_same_in_range_p (rtx x,
4862 HOST_WIDE_INT minval,
4863 HOST_WIDE_INT maxval)
4864{
4865 HOST_WIDE_INT firstval;
4866 int count, i;
4867
4868 if (GET_CODE (x) != CONST_VECTOR
4869 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4870 return false;
4871
4872 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4873 if (firstval < minval || firstval > maxval)
4874 return false;
4875
4876 count = CONST_VECTOR_NUNITS (x);
4877 for (i = 1; i < count; i++)
4878 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4879 return false;
4880
4881 return true;
4882}
4883
4884bool
4885aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4886{
4887 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4888}
4889
43e9d192 4890
cf670503
ZC
4891/* N Z C V. */
4892#define AARCH64_CC_V 1
4893#define AARCH64_CC_C (1 << 1)
4894#define AARCH64_CC_Z (1 << 2)
4895#define AARCH64_CC_N (1 << 3)
4896
c8012fbc
WD
4897/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4898static const int aarch64_nzcv_codes[] =
4899{
4900 0, /* EQ, Z == 1. */
4901 AARCH64_CC_Z, /* NE, Z == 0. */
4902 0, /* CS, C == 1. */
4903 AARCH64_CC_C, /* CC, C == 0. */
4904 0, /* MI, N == 1. */
4905 AARCH64_CC_N, /* PL, N == 0. */
4906 0, /* VS, V == 1. */
4907 AARCH64_CC_V, /* VC, V == 0. */
4908 0, /* HI, C ==1 && Z == 0. */
4909 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
4910 AARCH64_CC_V, /* GE, N == V. */
4911 0, /* LT, N != V. */
4912 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
4913 0, /* LE, !(Z == 0 && N == V). */
4914 0, /* AL, Any. */
4915 0 /* NV, Any. */
cf670503
ZC
4916};
4917
cc8ca59e
JB
4918static void
4919aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
4920{
4921 switch (code)
4922 {
f541a481
KT
4923 /* An integer or symbol address without a preceding # sign. */
4924 case 'c':
4925 switch (GET_CODE (x))
4926 {
4927 case CONST_INT:
4928 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4929 break;
4930
4931 case SYMBOL_REF:
4932 output_addr_const (f, x);
4933 break;
4934
4935 case CONST:
4936 if (GET_CODE (XEXP (x, 0)) == PLUS
4937 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4938 {
4939 output_addr_const (f, x);
4940 break;
4941 }
4942 /* Fall through. */
4943
4944 default:
4945 output_operand_lossage ("Unsupported operand for code '%c'", code);
4946 }
4947 break;
4948
43e9d192
IB
4949 case 'e':
4950 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4951 {
4952 int n;
4953
4aa81c2e 4954 if (!CONST_INT_P (x)
43e9d192
IB
4955 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4956 {
4957 output_operand_lossage ("invalid operand for '%%%c'", code);
4958 return;
4959 }
4960
4961 switch (n)
4962 {
4963 case 3:
4964 fputc ('b', f);
4965 break;
4966 case 4:
4967 fputc ('h', f);
4968 break;
4969 case 5:
4970 fputc ('w', f);
4971 break;
4972 default:
4973 output_operand_lossage ("invalid operand for '%%%c'", code);
4974 return;
4975 }
4976 }
4977 break;
4978
4979 case 'p':
4980 {
4981 int n;
4982
4983 /* Print N such that 2^N == X. */
4aa81c2e 4984 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4985 {
4986 output_operand_lossage ("invalid operand for '%%%c'", code);
4987 return;
4988 }
4989
4990 asm_fprintf (f, "%d", n);
4991 }
4992 break;
4993
4994 case 'P':
4995 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4996 if (!CONST_INT_P (x))
43e9d192
IB
4997 {
4998 output_operand_lossage ("invalid operand for '%%%c'", code);
4999 return;
5000 }
5001
8d55c61b 5002 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
5003 break;
5004
5005 case 'H':
5006 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 5007 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
5008 {
5009 output_operand_lossage ("invalid operand for '%%%c'", code);
5010 return;
5011 }
5012
01a3a324 5013 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
5014 break;
5015
43e9d192 5016 case 'M':
c8012fbc 5017 case 'm':
cd5660ab
KT
5018 {
5019 int cond_code;
c8012fbc 5020 /* Print a condition (eq, ne, etc) or its inverse. */
43e9d192 5021
c8012fbc
WD
5022 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5023 if (x == const_true_rtx)
cd5660ab 5024 {
c8012fbc
WD
5025 if (code == 'M')
5026 fputs ("nv", f);
cd5660ab
KT
5027 return;
5028 }
43e9d192 5029
cd5660ab
KT
5030 if (!COMPARISON_P (x))
5031 {
5032 output_operand_lossage ("invalid operand for '%%%c'", code);
5033 return;
5034 }
c8012fbc 5035
cd5660ab
KT
5036 cond_code = aarch64_get_condition_code (x);
5037 gcc_assert (cond_code >= 0);
c8012fbc
WD
5038 if (code == 'M')
5039 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
5040 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 5041 }
43e9d192
IB
5042 break;
5043
5044 case 'b':
5045 case 'h':
5046 case 's':
5047 case 'd':
5048 case 'q':
5049 /* Print a scalar FP/SIMD register name. */
5050 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5051 {
5052 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5053 return;
5054 }
50ce6f88 5055 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
5056 break;
5057
5058 case 'S':
5059 case 'T':
5060 case 'U':
5061 case 'V':
5062 /* Print the first FP/SIMD register name in a list. */
5063 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5064 {
5065 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5066 return;
5067 }
50ce6f88 5068 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
5069 break;
5070
2d8c6dc1
AH
5071 case 'R':
5072 /* Print a scalar FP/SIMD register name + 1. */
5073 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5074 {
5075 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5076 return;
5077 }
5078 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5079 break;
5080
a05c0ddf 5081 case 'X':
50d38551 5082 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 5083 if (!CONST_INT_P (x))
a05c0ddf
IB
5084 {
5085 output_operand_lossage ("invalid operand for '%%%c'", code);
5086 return;
5087 }
50d38551 5088 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
5089 break;
5090
43e9d192
IB
5091 case 'w':
5092 case 'x':
5093 /* Print a general register name or the zero register (32-bit or
5094 64-bit). */
3520f7cc
JG
5095 if (x == const0_rtx
5096 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 5097 {
50ce6f88 5098 asm_fprintf (f, "%czr", code);
43e9d192
IB
5099 break;
5100 }
5101
5102 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5103 {
50ce6f88 5104 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
5105 break;
5106 }
5107
5108 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5109 {
50ce6f88 5110 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
5111 break;
5112 }
5113
5114 /* Fall through */
5115
5116 case 0:
5117 /* Print a normal operand, if it's a general register, then we
5118 assume DImode. */
5119 if (x == NULL)
5120 {
5121 output_operand_lossage ("missing operand");
5122 return;
5123 }
5124
5125 switch (GET_CODE (x))
5126 {
5127 case REG:
01a3a324 5128 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
5129 break;
5130
5131 case MEM:
cc8ca59e 5132 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
5133 break;
5134
2af16a7c 5135 case CONST:
43e9d192
IB
5136 case LABEL_REF:
5137 case SYMBOL_REF:
5138 output_addr_const (asm_out_file, x);
5139 break;
5140
5141 case CONST_INT:
5142 asm_fprintf (f, "%wd", INTVAL (x));
5143 break;
5144
5145 case CONST_VECTOR:
3520f7cc
JG
5146 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5147 {
ddeabd3e
AL
5148 gcc_assert (
5149 aarch64_const_vec_all_same_in_range_p (x,
5150 HOST_WIDE_INT_MIN,
5151 HOST_WIDE_INT_MAX));
3520f7cc
JG
5152 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5153 }
5154 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5155 {
5156 fputc ('0', f);
5157 }
5158 else
5159 gcc_unreachable ();
43e9d192
IB
5160 break;
5161
3520f7cc 5162 case CONST_DOUBLE:
2ca5b430
KT
5163 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5164 be getting CONST_DOUBLEs holding integers. */
5165 gcc_assert (GET_MODE (x) != VOIDmode);
5166 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
5167 {
5168 fputc ('0', f);
5169 break;
5170 }
5171 else if (aarch64_float_const_representable_p (x))
5172 {
5173#define buf_size 20
5174 char float_buf[buf_size] = {'\0'};
34a72c33
RS
5175 real_to_decimal_for_mode (float_buf,
5176 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
5177 buf_size, buf_size,
5178 1, GET_MODE (x));
5179 asm_fprintf (asm_out_file, "%s", float_buf);
5180 break;
5181#undef buf_size
5182 }
5183 output_operand_lossage ("invalid constant");
5184 return;
43e9d192
IB
5185 default:
5186 output_operand_lossage ("invalid operand");
5187 return;
5188 }
5189 break;
5190
5191 case 'A':
5192 if (GET_CODE (x) == HIGH)
5193 x = XEXP (x, 0);
5194
a6e0bfa7 5195 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5196 {
6642bdb4 5197 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5198 asm_fprintf (asm_out_file, ":got:");
5199 break;
5200
5201 case SYMBOL_SMALL_TLSGD:
5202 asm_fprintf (asm_out_file, ":tlsgd:");
5203 break;
5204
5205 case SYMBOL_SMALL_TLSDESC:
5206 asm_fprintf (asm_out_file, ":tlsdesc:");
5207 break;
5208
79496620 5209 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5210 asm_fprintf (asm_out_file, ":gottprel:");
5211 break;
5212
d18ba284 5213 case SYMBOL_TLSLE24:
43e9d192
IB
5214 asm_fprintf (asm_out_file, ":tprel:");
5215 break;
5216
87dd8ab0
MS
5217 case SYMBOL_TINY_GOT:
5218 gcc_unreachable ();
5219 break;
5220
43e9d192
IB
5221 default:
5222 break;
5223 }
5224 output_addr_const (asm_out_file, x);
5225 break;
5226
5227 case 'L':
a6e0bfa7 5228 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5229 {
6642bdb4 5230 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5231 asm_fprintf (asm_out_file, ":lo12:");
5232 break;
5233
5234 case SYMBOL_SMALL_TLSGD:
5235 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5236 break;
5237
5238 case SYMBOL_SMALL_TLSDESC:
5239 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5240 break;
5241
79496620 5242 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5243 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5244 break;
5245
cbf5629e
JW
5246 case SYMBOL_TLSLE12:
5247 asm_fprintf (asm_out_file, ":tprel_lo12:");
5248 break;
5249
d18ba284 5250 case SYMBOL_TLSLE24:
43e9d192
IB
5251 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5252 break;
5253
87dd8ab0
MS
5254 case SYMBOL_TINY_GOT:
5255 asm_fprintf (asm_out_file, ":got:");
5256 break;
5257
5ae7caad
JW
5258 case SYMBOL_TINY_TLSIE:
5259 asm_fprintf (asm_out_file, ":gottprel:");
5260 break;
5261
43e9d192
IB
5262 default:
5263 break;
5264 }
5265 output_addr_const (asm_out_file, x);
5266 break;
5267
5268 case 'G':
5269
a6e0bfa7 5270 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5271 {
d18ba284 5272 case SYMBOL_TLSLE24:
43e9d192
IB
5273 asm_fprintf (asm_out_file, ":tprel_hi12:");
5274 break;
5275 default:
5276 break;
5277 }
5278 output_addr_const (asm_out_file, x);
5279 break;
5280
cf670503
ZC
5281 case 'k':
5282 {
c8012fbc 5283 HOST_WIDE_INT cond_code;
cf670503
ZC
5284 /* Print nzcv. */
5285
c8012fbc 5286 if (!CONST_INT_P (x))
cf670503
ZC
5287 {
5288 output_operand_lossage ("invalid operand for '%%%c'", code);
5289 return;
5290 }
5291
c8012fbc
WD
5292 cond_code = INTVAL (x);
5293 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5294 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
5295 }
5296 break;
5297
43e9d192
IB
5298 default:
5299 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5300 return;
5301 }
5302}
5303
cc8ca59e
JB
5304static void
5305aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
5306{
5307 struct aarch64_address_info addr;
5308
cc8ca59e 5309 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
5310 switch (addr.type)
5311 {
5312 case ADDRESS_REG_IMM:
5313 if (addr.offset == const0_rtx)
01a3a324 5314 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 5315 else
16a3246f 5316 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
5317 INTVAL (addr.offset));
5318 return;
5319
5320 case ADDRESS_REG_REG:
5321 if (addr.shift == 0)
16a3246f 5322 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 5323 reg_names [REGNO (addr.offset)]);
43e9d192 5324 else
16a3246f 5325 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 5326 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
5327 return;
5328
5329 case ADDRESS_REG_UXTW:
5330 if (addr.shift == 0)
16a3246f 5331 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5332 REGNO (addr.offset) - R0_REGNUM);
5333 else
16a3246f 5334 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5335 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5336 return;
5337
5338 case ADDRESS_REG_SXTW:
5339 if (addr.shift == 0)
16a3246f 5340 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5341 REGNO (addr.offset) - R0_REGNUM);
5342 else
16a3246f 5343 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5344 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5345 return;
5346
5347 case ADDRESS_REG_WB:
5348 switch (GET_CODE (x))
5349 {
5350 case PRE_INC:
16a3246f 5351 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5352 GET_MODE_SIZE (mode));
43e9d192
IB
5353 return;
5354 case POST_INC:
16a3246f 5355 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5356 GET_MODE_SIZE (mode));
43e9d192
IB
5357 return;
5358 case PRE_DEC:
16a3246f 5359 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5360 GET_MODE_SIZE (mode));
43e9d192
IB
5361 return;
5362 case POST_DEC:
16a3246f 5363 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5364 GET_MODE_SIZE (mode));
43e9d192
IB
5365 return;
5366 case PRE_MODIFY:
16a3246f 5367 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5368 INTVAL (addr.offset));
5369 return;
5370 case POST_MODIFY:
16a3246f 5371 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5372 INTVAL (addr.offset));
5373 return;
5374 default:
5375 break;
5376 }
5377 break;
5378
5379 case ADDRESS_LO_SUM:
16a3246f 5380 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5381 output_addr_const (f, addr.offset);
5382 asm_fprintf (f, "]");
5383 return;
5384
5385 case ADDRESS_SYMBOLIC:
5386 break;
5387 }
5388
5389 output_addr_const (f, x);
5390}
5391
43e9d192
IB
5392bool
5393aarch64_label_mentioned_p (rtx x)
5394{
5395 const char *fmt;
5396 int i;
5397
5398 if (GET_CODE (x) == LABEL_REF)
5399 return true;
5400
5401 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5402 referencing instruction, but they are constant offsets, not
5403 symbols. */
5404 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5405 return false;
5406
5407 fmt = GET_RTX_FORMAT (GET_CODE (x));
5408 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5409 {
5410 if (fmt[i] == 'E')
5411 {
5412 int j;
5413
5414 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5415 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5416 return 1;
5417 }
5418 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5419 return 1;
5420 }
5421
5422 return 0;
5423}
5424
5425/* Implement REGNO_REG_CLASS. */
5426
5427enum reg_class
5428aarch64_regno_regclass (unsigned regno)
5429{
5430 if (GP_REGNUM_P (regno))
a4a182c6 5431 return GENERAL_REGS;
43e9d192
IB
5432
5433 if (regno == SP_REGNUM)
5434 return STACK_REG;
5435
5436 if (regno == FRAME_POINTER_REGNUM
5437 || regno == ARG_POINTER_REGNUM)
f24bb080 5438 return POINTER_REGS;
43e9d192
IB
5439
5440 if (FP_REGNUM_P (regno))
5441 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5442
5443 return NO_REGS;
5444}
5445
0c4ec427 5446static rtx
ef4bddc2 5447aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5448{
5449 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5450 where mask is selected by alignment and size of the offset.
5451 We try to pick as large a range for the offset as possible to
5452 maximize the chance of a CSE. However, for aligned addresses
5453 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5454 elements are likely to use the same base. We need to be careful
5455 not to split a CONST for some forms of address expression, otherwise
5456 it will generate sub-optimal code. */
0c4ec427
RE
5457
5458 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5459 {
9e0218fc 5460 rtx base = XEXP (x, 0);
17d7bdd8 5461 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5462 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5463
9e0218fc 5464 if (GET_CODE (base) == PLUS)
e8426e0a 5465 {
9e0218fc
RH
5466 rtx op0 = XEXP (base, 0);
5467 rtx op1 = XEXP (base, 1);
5468
5469 /* Force any scaling into a temp for CSE. */
5470 op0 = force_reg (Pmode, op0);
5471 op1 = force_reg (Pmode, op1);
5472
5473 /* Let the pointer register be in op0. */
5474 if (REG_POINTER (op1))
5475 std::swap (op0, op1);
5476
5477 /* If the pointer is virtual or frame related, then we know that
5478 virtual register instantiation or register elimination is going
5479 to apply a second constant. We want the two constants folded
5480 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5481 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5482 {
9e0218fc
RH
5483 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5484 NULL_RTX, true, OPTAB_DIRECT);
5485 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5486 }
e8426e0a 5487
9e0218fc
RH
5488 /* Otherwise, in order to encourage CSE (and thence loop strength
5489 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5490 base = expand_binop (Pmode, add_optab, op0, op1,
5491 NULL_RTX, true, OPTAB_DIRECT);
5492 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5493 }
5494
8734dfac 5495 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9e0218fc 5496 HOST_WIDE_INT base_offset;
8734dfac
WD
5497 if (GET_MODE_SIZE (mode) > 16)
5498 base_offset = (offset + 0x400) & ~0x7f0;
0c4ec427
RE
5499 /* For offsets aren't a multiple of the access size, the limit is
5500 -256...255. */
5501 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5502 {
5503 base_offset = (offset + 0x100) & ~0x1ff;
5504
5505 /* BLKmode typically uses LDP of X-registers. */
5506 if (mode == BLKmode)
5507 base_offset = (offset + 512) & ~0x3ff;
5508 }
5509 /* Small negative offsets are supported. */
5510 else if (IN_RANGE (offset, -256, 0))
5511 base_offset = 0;
8734dfac
WD
5512 else if (mode == TImode || mode == TFmode)
5513 base_offset = (offset + 0x100) & ~0x1ff;
ff0f3f1c 5514 /* Use 12-bit offset by access size. */
0c4ec427 5515 else
ff0f3f1c 5516 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5517
9e0218fc
RH
5518 if (base_offset != 0)
5519 {
5520 base = plus_constant (Pmode, base, base_offset);
5521 base = force_operand (base, NULL_RTX);
5522 return plus_constant (Pmode, base, offset - base_offset);
5523 }
0c4ec427
RE
5524 }
5525
5526 return x;
5527}
5528
b4f50fd4
RR
5529/* Return the reload icode required for a constant pool in mode. */
5530static enum insn_code
5531aarch64_constant_pool_reload_icode (machine_mode mode)
5532{
5533 switch (mode)
5534 {
5535 case SFmode:
5536 return CODE_FOR_aarch64_reload_movcpsfdi;
5537
5538 case DFmode:
5539 return CODE_FOR_aarch64_reload_movcpdfdi;
5540
5541 case TFmode:
5542 return CODE_FOR_aarch64_reload_movcptfdi;
5543
5544 case V8QImode:
5545 return CODE_FOR_aarch64_reload_movcpv8qidi;
5546
5547 case V16QImode:
5548 return CODE_FOR_aarch64_reload_movcpv16qidi;
5549
5550 case V4HImode:
5551 return CODE_FOR_aarch64_reload_movcpv4hidi;
5552
5553 case V8HImode:
5554 return CODE_FOR_aarch64_reload_movcpv8hidi;
5555
5556 case V2SImode:
5557 return CODE_FOR_aarch64_reload_movcpv2sidi;
5558
5559 case V4SImode:
5560 return CODE_FOR_aarch64_reload_movcpv4sidi;
5561
5562 case V2DImode:
5563 return CODE_FOR_aarch64_reload_movcpv2didi;
5564
5565 case V2DFmode:
5566 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5567
5568 default:
5569 gcc_unreachable ();
5570 }
5571
5572 gcc_unreachable ();
5573}
43e9d192
IB
5574static reg_class_t
5575aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5576 reg_class_t rclass,
ef4bddc2 5577 machine_mode mode,
43e9d192
IB
5578 secondary_reload_info *sri)
5579{
b4f50fd4
RR
5580
5581 /* If we have to disable direct literal pool loads and stores because the
5582 function is too big, then we need a scratch register. */
5583 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5584 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5585 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5586 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5587 {
5588 sri->icode = aarch64_constant_pool_reload_icode (mode);
5589 return NO_REGS;
5590 }
5591
43e9d192
IB
5592 /* Without the TARGET_SIMD instructions we cannot move a Q register
5593 to a Q register directly. We need a scratch. */
5594 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5595 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5596 && reg_class_subset_p (rclass, FP_REGS))
5597 {
5598 if (mode == TFmode)
5599 sri->icode = CODE_FOR_aarch64_reload_movtf;
5600 else if (mode == TImode)
5601 sri->icode = CODE_FOR_aarch64_reload_movti;
5602 return NO_REGS;
5603 }
5604
5605 /* A TFmode or TImode memory access should be handled via an FP_REGS
5606 because AArch64 has richer addressing modes for LDR/STR instructions
5607 than LDP/STP instructions. */
d5726973 5608 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5609 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5610 return FP_REGS;
5611
5612 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5613 return GENERAL_REGS;
43e9d192
IB
5614
5615 return NO_REGS;
5616}
5617
5618static bool
5619aarch64_can_eliminate (const int from, const int to)
5620{
5621 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5622 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5623
5624 if (frame_pointer_needed)
5625 {
5626 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5627 return true;
5628 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5629 return false;
5630 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5631 && !cfun->calls_alloca)
5632 return true;
5633 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5634 return true;
0b7f8166
MS
5635
5636 return false;
43e9d192 5637 }
1c923b60
JW
5638 else
5639 {
5640 /* If we decided that we didn't need a leaf frame pointer but then used
5641 LR in the function, then we'll want a frame pointer after all, so
5642 prevent this elimination to ensure a frame pointer is used. */
5643 if (to == STACK_POINTER_REGNUM
5644 && flag_omit_leaf_frame_pointer
5645 && df_regs_ever_live_p (LR_REGNUM))
5646 return false;
5647 }
777e6976 5648
43e9d192
IB
5649 return true;
5650}
5651
5652HOST_WIDE_INT
5653aarch64_initial_elimination_offset (unsigned from, unsigned to)
5654{
43e9d192 5655 aarch64_layout_frame ();
78c29983
MS
5656
5657 if (to == HARD_FRAME_POINTER_REGNUM)
5658 {
5659 if (from == ARG_POINTER_REGNUM)
71bfb77a 5660 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5661
5662 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5663 return cfun->machine->frame.hard_fp_offset
5664 - cfun->machine->frame.locals_offset;
78c29983
MS
5665 }
5666
5667 if (to == STACK_POINTER_REGNUM)
5668 {
5669 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5670 return cfun->machine->frame.frame_size
5671 - cfun->machine->frame.locals_offset;
78c29983
MS
5672 }
5673
1c960e02 5674 return cfun->machine->frame.frame_size;
43e9d192
IB
5675}
5676
43e9d192
IB
5677/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5678 previous frame. */
5679
5680rtx
5681aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5682{
5683 if (count != 0)
5684 return const0_rtx;
5685 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5686}
5687
5688
5689static void
5690aarch64_asm_trampoline_template (FILE *f)
5691{
28514dda
YZ
5692 if (TARGET_ILP32)
5693 {
5694 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5695 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5696 }
5697 else
5698 {
5699 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5700 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5701 }
01a3a324 5702 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5703 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5704 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5705 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5706}
5707
5708static void
5709aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5710{
5711 rtx fnaddr, mem, a_tramp;
28514dda 5712 const int tramp_code_sz = 16;
43e9d192
IB
5713
5714 /* Don't need to copy the trailing D-words, we fill those in below. */
5715 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5716 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5717 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5718 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5719 if (GET_MODE (fnaddr) != ptr_mode)
5720 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5721 emit_move_insn (mem, fnaddr);
5722
28514dda 5723 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5724 emit_move_insn (mem, chain_value);
5725
5726 /* XXX We should really define a "clear_cache" pattern and use
5727 gen_clear_cache(). */
5728 a_tramp = XEXP (m_tramp, 0);
5729 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5730 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5731 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5732 ptr_mode);
43e9d192
IB
5733}
5734
5735static unsigned char
ef4bddc2 5736aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5737{
5738 switch (regclass)
5739 {
fee9ba42 5740 case CALLER_SAVE_REGS:
43e9d192
IB
5741 case POINTER_REGS:
5742 case GENERAL_REGS:
5743 case ALL_REGS:
5744 case FP_REGS:
5745 case FP_LO_REGS:
5746 return
7bd11911
KT
5747 aarch64_vector_mode_p (mode)
5748 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5749 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5750 case STACK_REG:
5751 return 1;
5752
5753 case NO_REGS:
5754 return 0;
5755
5756 default:
5757 break;
5758 }
5759 gcc_unreachable ();
5760}
5761
5762static reg_class_t
78d8b9f0 5763aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5764{
51bb310d 5765 if (regclass == POINTER_REGS)
78d8b9f0
IB
5766 return GENERAL_REGS;
5767
51bb310d
MS
5768 if (regclass == STACK_REG)
5769 {
5770 if (REG_P(x)
5771 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5772 return regclass;
5773
5774 return NO_REGS;
5775 }
5776
78d8b9f0
IB
5777 /* If it's an integer immediate that MOVI can't handle, then
5778 FP_REGS is not an option, so we return NO_REGS instead. */
5779 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5780 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5781 return NO_REGS;
5782
27bd251b
IB
5783 /* Register eliminiation can result in a request for
5784 SP+constant->FP_REGS. We cannot support such operations which
5785 use SP as source and an FP_REG as destination, so reject out
5786 right now. */
5787 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5788 {
5789 rtx lhs = XEXP (x, 0);
5790
5791 /* Look through a possible SUBREG introduced by ILP32. */
5792 if (GET_CODE (lhs) == SUBREG)
5793 lhs = SUBREG_REG (lhs);
5794
5795 gcc_assert (REG_P (lhs));
5796 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5797 POINTER_REGS));
5798 return NO_REGS;
5799 }
5800
78d8b9f0 5801 return regclass;
43e9d192
IB
5802}
5803
5804void
5805aarch64_asm_output_labelref (FILE* f, const char *name)
5806{
5807 asm_fprintf (f, "%U%s", name);
5808}
5809
5810static void
5811aarch64_elf_asm_constructor (rtx symbol, int priority)
5812{
5813 if (priority == DEFAULT_INIT_PRIORITY)
5814 default_ctor_section_asm_out_constructor (symbol, priority);
5815 else
5816 {
5817 section *s;
53d190c1
AT
5818 /* While priority is known to be in range [0, 65535], so 18 bytes
5819 would be enough, the compiler might not know that. To avoid
5820 -Wformat-truncation false positive, use a larger size. */
5821 char buf[23];
43e9d192
IB
5822 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5823 s = get_section (buf, SECTION_WRITE, NULL);
5824 switch_to_section (s);
5825 assemble_align (POINTER_SIZE);
28514dda 5826 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5827 }
5828}
5829
5830static void
5831aarch64_elf_asm_destructor (rtx symbol, int priority)
5832{
5833 if (priority == DEFAULT_INIT_PRIORITY)
5834 default_dtor_section_asm_out_destructor (symbol, priority);
5835 else
5836 {
5837 section *s;
53d190c1
AT
5838 /* While priority is known to be in range [0, 65535], so 18 bytes
5839 would be enough, the compiler might not know that. To avoid
5840 -Wformat-truncation false positive, use a larger size. */
5841 char buf[23];
43e9d192
IB
5842 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5843 s = get_section (buf, SECTION_WRITE, NULL);
5844 switch_to_section (s);
5845 assemble_align (POINTER_SIZE);
28514dda 5846 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5847 }
5848}
5849
5850const char*
5851aarch64_output_casesi (rtx *operands)
5852{
5853 char buf[100];
5854 char label[100];
b32d5189 5855 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5856 int index;
5857 static const char *const patterns[4][2] =
5858 {
5859 {
5860 "ldrb\t%w3, [%0,%w1,uxtw]",
5861 "add\t%3, %4, %w3, sxtb #2"
5862 },
5863 {
5864 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5865 "add\t%3, %4, %w3, sxth #2"
5866 },
5867 {
5868 "ldr\t%w3, [%0,%w1,uxtw #2]",
5869 "add\t%3, %4, %w3, sxtw #2"
5870 },
5871 /* We assume that DImode is only generated when not optimizing and
5872 that we don't really need 64-bit address offsets. That would
5873 imply an object file with 8GB of code in a single function! */
5874 {
5875 "ldr\t%w3, [%0,%w1,uxtw #2]",
5876 "add\t%3, %4, %w3, sxtw #2"
5877 }
5878 };
5879
5880 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5881
5882 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5883
5884 gcc_assert (index >= 0 && index <= 3);
5885
5886 /* Need to implement table size reduction, by chaning the code below. */
5887 output_asm_insn (patterns[index][0], operands);
5888 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5889 snprintf (buf, sizeof (buf),
5890 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5891 output_asm_insn (buf, operands);
5892 output_asm_insn (patterns[index][1], operands);
5893 output_asm_insn ("br\t%3", operands);
5894 assemble_label (asm_out_file, label);
5895 return "";
5896}
5897
5898
5899/* Return size in bits of an arithmetic operand which is shifted/scaled and
5900 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5901 operator. */
5902
5903int
5904aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5905{
5906 if (shift >= 0 && shift <= 3)
5907 {
5908 int size;
5909 for (size = 8; size <= 32; size *= 2)
5910 {
5911 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5912 if (mask == bits << shift)
5913 return size;
5914 }
5915 }
5916 return 0;
5917}
5918
e78d485e
RR
5919/* Constant pools are per function only when PC relative
5920 literal loads are true or we are in the large memory
5921 model. */
5922
5923static inline bool
5924aarch64_can_use_per_function_literal_pools_p (void)
5925{
9ee6540a 5926 return (aarch64_pcrelative_literal_loads
e78d485e
RR
5927 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
5928}
5929
43e9d192 5930static bool
e78d485e 5931aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 5932{
3eece53d
RR
5933 /* Fixme:: In an ideal world this would work similar
5934 to the logic in aarch64_select_rtx_section but this
5935 breaks bootstrap in gcc go. For now we workaround
5936 this by returning false here. */
5937 return false;
43e9d192
IB
5938}
5939
e78d485e
RR
5940/* Select appropriate section for constants depending
5941 on where we place literal pools. */
5942
43e9d192 5943static section *
e78d485e
RR
5944aarch64_select_rtx_section (machine_mode mode,
5945 rtx x,
5946 unsigned HOST_WIDE_INT align)
43e9d192 5947{
e78d485e
RR
5948 if (aarch64_can_use_per_function_literal_pools_p ())
5949 return function_section (current_function_decl);
43e9d192 5950
e78d485e
RR
5951 return default_elf_select_rtx_section (mode, x, align);
5952}
43e9d192 5953
5fca7b66
RH
5954/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
5955void
5956aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
5957 HOST_WIDE_INT offset)
5958{
5959 /* When using per-function literal pools, we must ensure that any code
5960 section is aligned to the minimal instruction length, lest we get
5961 errors from the assembler re "unaligned instructions". */
5962 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
5963 ASM_OUTPUT_ALIGN (f, 2);
5964}
5965
43e9d192
IB
5966/* Costs. */
5967
5968/* Helper function for rtx cost calculation. Strip a shift expression
5969 from X. Returns the inner operand if successful, or the original
5970 expression on failure. */
5971static rtx
5972aarch64_strip_shift (rtx x)
5973{
5974 rtx op = x;
5975
57b77d46
RE
5976 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5977 we can convert both to ROR during final output. */
43e9d192
IB
5978 if ((GET_CODE (op) == ASHIFT
5979 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5980 || GET_CODE (op) == LSHIFTRT
5981 || GET_CODE (op) == ROTATERT
5982 || GET_CODE (op) == ROTATE)
43e9d192
IB
5983 && CONST_INT_P (XEXP (op, 1)))
5984 return XEXP (op, 0);
5985
5986 if (GET_CODE (op) == MULT
5987 && CONST_INT_P (XEXP (op, 1))
5988 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5989 return XEXP (op, 0);
5990
5991 return x;
5992}
5993
4745e701 5994/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5995 expression from X. Returns the inner operand if successful, or the
5996 original expression on failure. We deal with a number of possible
5997 canonicalization variations here. */
5998static rtx
4745e701 5999aarch64_strip_extend (rtx x)
43e9d192
IB
6000{
6001 rtx op = x;
6002
6003 /* Zero and sign extraction of a widened value. */
6004 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
6005 && XEXP (op, 2) == const0_rtx
4745e701 6006 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
6007 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
6008 XEXP (op, 1)))
6009 return XEXP (XEXP (op, 0), 0);
6010
6011 /* It can also be represented (for zero-extend) as an AND with an
6012 immediate. */
6013 if (GET_CODE (op) == AND
6014 && GET_CODE (XEXP (op, 0)) == MULT
6015 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
6016 && CONST_INT_P (XEXP (op, 1))
6017 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
6018 INTVAL (XEXP (op, 1))) != 0)
6019 return XEXP (XEXP (op, 0), 0);
6020
6021 /* Now handle extended register, as this may also have an optional
6022 left shift by 1..4. */
6023 if (GET_CODE (op) == ASHIFT
6024 && CONST_INT_P (XEXP (op, 1))
6025 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
6026 op = XEXP (op, 0);
6027
6028 if (GET_CODE (op) == ZERO_EXTEND
6029 || GET_CODE (op) == SIGN_EXTEND)
6030 op = XEXP (op, 0);
6031
6032 if (op != x)
6033 return op;
6034
4745e701
JG
6035 return x;
6036}
6037
0a78ebe4
KT
6038/* Return true iff CODE is a shift supported in combination
6039 with arithmetic instructions. */
4d1919ed 6040
0a78ebe4
KT
6041static bool
6042aarch64_shift_p (enum rtx_code code)
6043{
6044 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
6045}
6046
4745e701 6047/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
6048 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6049 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
6050 operands where needed. */
6051
6052static int
e548c9df 6053aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
6054{
6055 rtx op0, op1;
6056 const struct cpu_cost_table *extra_cost
b175b679 6057 = aarch64_tune_params.insn_extra_cost;
4745e701 6058 int cost = 0;
0a78ebe4 6059 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 6060 machine_mode mode = GET_MODE (x);
4745e701
JG
6061
6062 gcc_checking_assert (code == MULT);
6063
6064 op0 = XEXP (x, 0);
6065 op1 = XEXP (x, 1);
6066
6067 if (VECTOR_MODE_P (mode))
6068 mode = GET_MODE_INNER (mode);
6069
6070 /* Integer multiply/fma. */
6071 if (GET_MODE_CLASS (mode) == MODE_INT)
6072 {
6073 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
6074 if (aarch64_shift_p (GET_CODE (x))
6075 || (CONST_INT_P (op1)
6076 && exact_log2 (INTVAL (op1)) > 0))
4745e701 6077 {
0a78ebe4
KT
6078 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6079 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
6080 if (speed)
6081 {
0a78ebe4
KT
6082 if (compound_p)
6083 {
6084 if (REG_P (op1))
6085 /* ARITH + shift-by-register. */
6086 cost += extra_cost->alu.arith_shift_reg;
6087 else if (is_extend)
6088 /* ARITH + extended register. We don't have a cost field
6089 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6090 cost += extra_cost->alu.extend_arith;
6091 else
6092 /* ARITH + shift-by-immediate. */
6093 cost += extra_cost->alu.arith_shift;
6094 }
4745e701
JG
6095 else
6096 /* LSL (immediate). */
0a78ebe4
KT
6097 cost += extra_cost->alu.shift;
6098
4745e701 6099 }
0a78ebe4
KT
6100 /* Strip extends as we will have costed them in the case above. */
6101 if (is_extend)
6102 op0 = aarch64_strip_extend (op0);
4745e701 6103
e548c9df 6104 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
6105
6106 return cost;
6107 }
6108
d2ac256b
KT
6109 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6110 compound and let the below cases handle it. After all, MNEG is a
6111 special-case alias of MSUB. */
6112 if (GET_CODE (op0) == NEG)
6113 {
6114 op0 = XEXP (op0, 0);
6115 compound_p = true;
6116 }
6117
4745e701
JG
6118 /* Integer multiplies or FMAs have zero/sign extending variants. */
6119 if ((GET_CODE (op0) == ZERO_EXTEND
6120 && GET_CODE (op1) == ZERO_EXTEND)
6121 || (GET_CODE (op0) == SIGN_EXTEND
6122 && GET_CODE (op1) == SIGN_EXTEND))
6123 {
e548c9df
AM
6124 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6125 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
6126
6127 if (speed)
6128 {
0a78ebe4 6129 if (compound_p)
d2ac256b 6130 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
6131 cost += extra_cost->mult[0].extend_add;
6132 else
6133 /* MUL/SMULL/UMULL. */
6134 cost += extra_cost->mult[0].extend;
6135 }
6136
6137 return cost;
6138 }
6139
d2ac256b 6140 /* This is either an integer multiply or a MADD. In both cases
4745e701 6141 we want to recurse and cost the operands. */
e548c9df
AM
6142 cost += rtx_cost (op0, mode, MULT, 0, speed);
6143 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6144
6145 if (speed)
6146 {
0a78ebe4 6147 if (compound_p)
d2ac256b 6148 /* MADD/MSUB. */
4745e701
JG
6149 cost += extra_cost->mult[mode == DImode].add;
6150 else
6151 /* MUL. */
6152 cost += extra_cost->mult[mode == DImode].simple;
6153 }
6154
6155 return cost;
6156 }
6157 else
6158 {
6159 if (speed)
6160 {
3d840f7d 6161 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
6162 operands, unless the rounding mode is upward or downward in
6163 which case FNMUL is different than FMUL with operand negation. */
6164 bool neg0 = GET_CODE (op0) == NEG;
6165 bool neg1 = GET_CODE (op1) == NEG;
6166 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6167 {
6168 if (neg0)
6169 op0 = XEXP (op0, 0);
6170 if (neg1)
6171 op1 = XEXP (op1, 0);
6172 }
4745e701 6173
0a78ebe4 6174 if (compound_p)
4745e701
JG
6175 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6176 cost += extra_cost->fp[mode == DFmode].fma;
6177 else
3d840f7d 6178 /* FMUL/FNMUL. */
4745e701
JG
6179 cost += extra_cost->fp[mode == DFmode].mult;
6180 }
6181
e548c9df
AM
6182 cost += rtx_cost (op0, mode, MULT, 0, speed);
6183 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6184 return cost;
6185 }
43e9d192
IB
6186}
6187
67747367
JG
6188static int
6189aarch64_address_cost (rtx x,
ef4bddc2 6190 machine_mode mode,
67747367
JG
6191 addr_space_t as ATTRIBUTE_UNUSED,
6192 bool speed)
6193{
6194 enum rtx_code c = GET_CODE (x);
b175b679 6195 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
6196 struct aarch64_address_info info;
6197 int cost = 0;
6198 info.shift = 0;
6199
6200 if (!aarch64_classify_address (&info, x, mode, c, false))
6201 {
6202 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6203 {
6204 /* This is a CONST or SYMBOL ref which will be split
6205 in a different way depending on the code model in use.
6206 Cost it through the generic infrastructure. */
e548c9df 6207 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
6208 /* Divide through by the cost of one instruction to
6209 bring it to the same units as the address costs. */
6210 cost_symbol_ref /= COSTS_N_INSNS (1);
6211 /* The cost is then the cost of preparing the address,
6212 followed by an immediate (possibly 0) offset. */
6213 return cost_symbol_ref + addr_cost->imm_offset;
6214 }
6215 else
6216 {
6217 /* This is most likely a jump table from a case
6218 statement. */
6219 return addr_cost->register_offset;
6220 }
6221 }
6222
6223 switch (info.type)
6224 {
6225 case ADDRESS_LO_SUM:
6226 case ADDRESS_SYMBOLIC:
6227 case ADDRESS_REG_IMM:
6228 cost += addr_cost->imm_offset;
6229 break;
6230
6231 case ADDRESS_REG_WB:
6232 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6233 cost += addr_cost->pre_modify;
6234 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6235 cost += addr_cost->post_modify;
6236 else
6237 gcc_unreachable ();
6238
6239 break;
6240
6241 case ADDRESS_REG_REG:
6242 cost += addr_cost->register_offset;
6243 break;
6244
67747367 6245 case ADDRESS_REG_SXTW:
783879e6
EM
6246 cost += addr_cost->register_sextend;
6247 break;
6248
6249 case ADDRESS_REG_UXTW:
6250 cost += addr_cost->register_zextend;
67747367
JG
6251 break;
6252
6253 default:
6254 gcc_unreachable ();
6255 }
6256
6257
6258 if (info.shift > 0)
6259 {
6260 /* For the sake of calculating the cost of the shifted register
6261 component, we can treat same sized modes in the same way. */
6262 switch (GET_MODE_BITSIZE (mode))
6263 {
6264 case 16:
6265 cost += addr_cost->addr_scale_costs.hi;
6266 break;
6267
6268 case 32:
6269 cost += addr_cost->addr_scale_costs.si;
6270 break;
6271
6272 case 64:
6273 cost += addr_cost->addr_scale_costs.di;
6274 break;
6275
6276 /* We can't tell, or this is a 128-bit vector. */
6277 default:
6278 cost += addr_cost->addr_scale_costs.ti;
6279 break;
6280 }
6281 }
6282
6283 return cost;
6284}
6285
b9066f5a
MW
6286/* Return the cost of a branch. If SPEED_P is true then the compiler is
6287 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6288 to be taken. */
6289
6290int
6291aarch64_branch_cost (bool speed_p, bool predictable_p)
6292{
6293 /* When optimizing for speed, use the cost of unpredictable branches. */
6294 const struct cpu_branch_cost *branch_costs =
b175b679 6295 aarch64_tune_params.branch_costs;
b9066f5a
MW
6296
6297 if (!speed_p || predictable_p)
6298 return branch_costs->predictable;
6299 else
6300 return branch_costs->unpredictable;
6301}
6302
7cc2145f
JG
6303/* Return true if the RTX X in mode MODE is a zero or sign extract
6304 usable in an ADD or SUB (extended register) instruction. */
6305static bool
ef4bddc2 6306aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
6307{
6308 /* Catch add with a sign extract.
6309 This is add_<optab><mode>_multp2. */
6310 if (GET_CODE (x) == SIGN_EXTRACT
6311 || GET_CODE (x) == ZERO_EXTRACT)
6312 {
6313 rtx op0 = XEXP (x, 0);
6314 rtx op1 = XEXP (x, 1);
6315 rtx op2 = XEXP (x, 2);
6316
6317 if (GET_CODE (op0) == MULT
6318 && CONST_INT_P (op1)
6319 && op2 == const0_rtx
6320 && CONST_INT_P (XEXP (op0, 1))
6321 && aarch64_is_extend_from_extract (mode,
6322 XEXP (op0, 1),
6323 op1))
6324 {
6325 return true;
6326 }
6327 }
e47c4031
KT
6328 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6329 No shift. */
6330 else if (GET_CODE (x) == SIGN_EXTEND
6331 || GET_CODE (x) == ZERO_EXTEND)
6332 return REG_P (XEXP (x, 0));
7cc2145f
JG
6333
6334 return false;
6335}
6336
61263118
KT
6337static bool
6338aarch64_frint_unspec_p (unsigned int u)
6339{
6340 switch (u)
6341 {
6342 case UNSPEC_FRINTZ:
6343 case UNSPEC_FRINTP:
6344 case UNSPEC_FRINTM:
6345 case UNSPEC_FRINTA:
6346 case UNSPEC_FRINTN:
6347 case UNSPEC_FRINTX:
6348 case UNSPEC_FRINTI:
6349 return true;
6350
6351 default:
6352 return false;
6353 }
6354}
6355
fb0cb7fa
KT
6356/* Return true iff X is an rtx that will match an extr instruction
6357 i.e. as described in the *extr<mode>5_insn family of patterns.
6358 OP0 and OP1 will be set to the operands of the shifts involved
6359 on success and will be NULL_RTX otherwise. */
6360
6361static bool
6362aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6363{
6364 rtx op0, op1;
6365 machine_mode mode = GET_MODE (x);
6366
6367 *res_op0 = NULL_RTX;
6368 *res_op1 = NULL_RTX;
6369
6370 if (GET_CODE (x) != IOR)
6371 return false;
6372
6373 op0 = XEXP (x, 0);
6374 op1 = XEXP (x, 1);
6375
6376 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6377 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6378 {
6379 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6380 if (GET_CODE (op1) == ASHIFT)
6381 std::swap (op0, op1);
6382
6383 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6384 return false;
6385
6386 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6387 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6388
6389 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6390 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6391 {
6392 *res_op0 = XEXP (op0, 0);
6393 *res_op1 = XEXP (op1, 0);
6394 return true;
6395 }
6396 }
6397
6398 return false;
6399}
6400
2d5ffe46
AP
6401/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6402 storing it in *COST. Result is true if the total cost of the operation
6403 has now been calculated. */
6404static bool
6405aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6406{
b9e3afe9
AP
6407 rtx inner;
6408 rtx comparator;
6409 enum rtx_code cmpcode;
6410
6411 if (COMPARISON_P (op0))
6412 {
6413 inner = XEXP (op0, 0);
6414 comparator = XEXP (op0, 1);
6415 cmpcode = GET_CODE (op0);
6416 }
6417 else
6418 {
6419 inner = op0;
6420 comparator = const0_rtx;
6421 cmpcode = NE;
6422 }
6423
2d5ffe46
AP
6424 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6425 {
6426 /* Conditional branch. */
b9e3afe9 6427 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6428 return true;
6429 else
6430 {
b9e3afe9 6431 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6432 {
2d5ffe46
AP
6433 if (comparator == const0_rtx)
6434 {
6435 /* TBZ/TBNZ/CBZ/CBNZ. */
6436 if (GET_CODE (inner) == ZERO_EXTRACT)
6437 /* TBZ/TBNZ. */
e548c9df
AM
6438 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6439 ZERO_EXTRACT, 0, speed);
6440 else
6441 /* CBZ/CBNZ. */
6442 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6443
6444 return true;
6445 }
6446 }
b9e3afe9 6447 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6448 {
2d5ffe46
AP
6449 /* TBZ/TBNZ. */
6450 if (comparator == const0_rtx)
6451 return true;
6452 }
6453 }
6454 }
b9e3afe9 6455 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6456 {
786298dc 6457 /* CCMP. */
6dfeb7ce 6458 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6459 {
6460 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6461 if (XEXP (op1, 1) == const0_rtx)
6462 *cost += 1;
6463 if (speed)
6464 {
6465 machine_mode mode = GET_MODE (XEXP (op1, 0));
6466 const struct cpu_cost_table *extra_cost
6467 = aarch64_tune_params.insn_extra_cost;
6468
6469 if (GET_MODE_CLASS (mode) == MODE_INT)
6470 *cost += extra_cost->alu.arith;
6471 else
6472 *cost += extra_cost->fp[mode == DFmode].compare;
6473 }
6474 return true;
6475 }
6476
2d5ffe46
AP
6477 /* It's a conditional operation based on the status flags,
6478 so it must be some flavor of CSEL. */
6479
6480 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6481 if (GET_CODE (op1) == NEG
6482 || GET_CODE (op1) == NOT
6483 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6484 op1 = XEXP (op1, 0);
bad00732
KT
6485 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6486 {
6487 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6488 op1 = XEXP (op1, 0);
6489 op2 = XEXP (op2, 0);
6490 }
2d5ffe46 6491
e548c9df
AM
6492 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6493 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6494 return true;
6495 }
6496
6497 /* We don't know what this is, cost all operands. */
6498 return false;
6499}
6500
283b6c85
KT
6501/* Check whether X is a bitfield operation of the form shift + extend that
6502 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6503 operand to which the bitfield operation is applied. Otherwise return
6504 NULL_RTX. */
6505
6506static rtx
6507aarch64_extend_bitfield_pattern_p (rtx x)
6508{
6509 rtx_code outer_code = GET_CODE (x);
6510 machine_mode outer_mode = GET_MODE (x);
6511
6512 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6513 && outer_mode != SImode && outer_mode != DImode)
6514 return NULL_RTX;
6515
6516 rtx inner = XEXP (x, 0);
6517 rtx_code inner_code = GET_CODE (inner);
6518 machine_mode inner_mode = GET_MODE (inner);
6519 rtx op = NULL_RTX;
6520
6521 switch (inner_code)
6522 {
6523 case ASHIFT:
6524 if (CONST_INT_P (XEXP (inner, 1))
6525 && (inner_mode == QImode || inner_mode == HImode))
6526 op = XEXP (inner, 0);
6527 break;
6528 case LSHIFTRT:
6529 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6530 && (inner_mode == QImode || inner_mode == HImode))
6531 op = XEXP (inner, 0);
6532 break;
6533 case ASHIFTRT:
6534 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6535 && (inner_mode == QImode || inner_mode == HImode))
6536 op = XEXP (inner, 0);
6537 break;
6538 default:
6539 break;
6540 }
6541
6542 return op;
6543}
6544
8c83f71d
KT
6545/* Return true if the mask and a shift amount from an RTX of the form
6546 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6547 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6548
6549bool
6550aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6551{
6552 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6553 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6554 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6555 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6556}
6557
43e9d192
IB
6558/* Calculate the cost of calculating X, storing it in *COST. Result
6559 is true if the total cost of the operation has now been calculated. */
6560static bool
e548c9df 6561aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6562 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6563{
a8eecd00 6564 rtx op0, op1, op2;
73250c4c 6565 const struct cpu_cost_table *extra_cost
b175b679 6566 = aarch64_tune_params.insn_extra_cost;
e548c9df 6567 int code = GET_CODE (x);
43e9d192 6568
7fc5ef02
JG
6569 /* By default, assume that everything has equivalent cost to the
6570 cheapest instruction. Any additional costs are applied as a delta
6571 above this default. */
6572 *cost = COSTS_N_INSNS (1);
6573
43e9d192
IB
6574 switch (code)
6575 {
6576 case SET:
ba123b0d
JG
6577 /* The cost depends entirely on the operands to SET. */
6578 *cost = 0;
43e9d192
IB
6579 op0 = SET_DEST (x);
6580 op1 = SET_SRC (x);
6581
6582 switch (GET_CODE (op0))
6583 {
6584 case MEM:
6585 if (speed)
2961177e
JG
6586 {
6587 rtx address = XEXP (op0, 0);
b6875aac
KV
6588 if (VECTOR_MODE_P (mode))
6589 *cost += extra_cost->ldst.storev;
6590 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6591 *cost += extra_cost->ldst.store;
6592 else if (mode == SFmode)
6593 *cost += extra_cost->ldst.storef;
6594 else if (mode == DFmode)
6595 *cost += extra_cost->ldst.stored;
6596
6597 *cost +=
6598 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6599 0, speed));
6600 }
43e9d192 6601
e548c9df 6602 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6603 return true;
6604
6605 case SUBREG:
6606 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6607 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6608
43e9d192
IB
6609 /* Fall through. */
6610 case REG:
b6875aac
KV
6611 /* The cost is one per vector-register copied. */
6612 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6613 {
6614 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6615 / GET_MODE_SIZE (V4SImode);
6616 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6617 }
ba123b0d
JG
6618 /* const0_rtx is in general free, but we will use an
6619 instruction to set a register to 0. */
b6875aac
KV
6620 else if (REG_P (op1) || op1 == const0_rtx)
6621 {
6622 /* The cost is 1 per register copied. */
6623 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6624 / UNITS_PER_WORD;
b6875aac
KV
6625 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6626 }
ba123b0d
JG
6627 else
6628 /* Cost is just the cost of the RHS of the set. */
e548c9df 6629 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6630 return true;
6631
ba123b0d 6632 case ZERO_EXTRACT:
43e9d192 6633 case SIGN_EXTRACT:
ba123b0d
JG
6634 /* Bit-field insertion. Strip any redundant widening of
6635 the RHS to meet the width of the target. */
43e9d192
IB
6636 if (GET_CODE (op1) == SUBREG)
6637 op1 = SUBREG_REG (op1);
6638 if ((GET_CODE (op1) == ZERO_EXTEND
6639 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6640 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6641 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6642 >= INTVAL (XEXP (op0, 1))))
6643 op1 = XEXP (op1, 0);
ba123b0d
JG
6644
6645 if (CONST_INT_P (op1))
6646 {
6647 /* MOV immediate is assumed to always be cheap. */
6648 *cost = COSTS_N_INSNS (1);
6649 }
6650 else
6651 {
6652 /* BFM. */
6653 if (speed)
6654 *cost += extra_cost->alu.bfi;
e548c9df 6655 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6656 }
6657
43e9d192
IB
6658 return true;
6659
6660 default:
ba123b0d
JG
6661 /* We can't make sense of this, assume default cost. */
6662 *cost = COSTS_N_INSNS (1);
61263118 6663 return false;
43e9d192
IB
6664 }
6665 return false;
6666
9dfc162c
JG
6667 case CONST_INT:
6668 /* If an instruction can incorporate a constant within the
6669 instruction, the instruction's expression avoids calling
6670 rtx_cost() on the constant. If rtx_cost() is called on a
6671 constant, then it is usually because the constant must be
6672 moved into a register by one or more instructions.
6673
6674 The exception is constant 0, which can be expressed
6675 as XZR/WZR and is therefore free. The exception to this is
6676 if we have (set (reg) (const0_rtx)) in which case we must cost
6677 the move. However, we can catch that when we cost the SET, so
6678 we don't need to consider that here. */
6679 if (x == const0_rtx)
6680 *cost = 0;
6681 else
6682 {
6683 /* To an approximation, building any other constant is
6684 proportionally expensive to the number of instructions
6685 required to build that constant. This is true whether we
6686 are compiling for SPEED or otherwise. */
82614948
RR
6687 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6688 (NULL_RTX, x, false, mode));
9dfc162c
JG
6689 }
6690 return true;
6691
6692 case CONST_DOUBLE:
6693 if (speed)
6694 {
6695 /* mov[df,sf]_aarch64. */
6696 if (aarch64_float_const_representable_p (x))
6697 /* FMOV (scalar immediate). */
6698 *cost += extra_cost->fp[mode == DFmode].fpconst;
6699 else if (!aarch64_float_const_zero_rtx_p (x))
6700 {
6701 /* This will be a load from memory. */
6702 if (mode == DFmode)
6703 *cost += extra_cost->ldst.loadd;
6704 else
6705 *cost += extra_cost->ldst.loadf;
6706 }
6707 else
6708 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6709 or MOV v0.s[0], wzr - neither of which are modeled by the
6710 cost tables. Just use the default cost. */
6711 {
6712 }
6713 }
6714
6715 return true;
6716
43e9d192
IB
6717 case MEM:
6718 if (speed)
2961177e
JG
6719 {
6720 /* For loads we want the base cost of a load, plus an
6721 approximation for the additional cost of the addressing
6722 mode. */
6723 rtx address = XEXP (x, 0);
b6875aac
KV
6724 if (VECTOR_MODE_P (mode))
6725 *cost += extra_cost->ldst.loadv;
6726 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6727 *cost += extra_cost->ldst.load;
6728 else if (mode == SFmode)
6729 *cost += extra_cost->ldst.loadf;
6730 else if (mode == DFmode)
6731 *cost += extra_cost->ldst.loadd;
6732
6733 *cost +=
6734 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6735 0, speed));
6736 }
43e9d192
IB
6737
6738 return true;
6739
6740 case NEG:
4745e701
JG
6741 op0 = XEXP (x, 0);
6742
b6875aac
KV
6743 if (VECTOR_MODE_P (mode))
6744 {
6745 if (speed)
6746 {
6747 /* FNEG. */
6748 *cost += extra_cost->vect.alu;
6749 }
6750 return false;
6751 }
6752
e548c9df
AM
6753 if (GET_MODE_CLASS (mode) == MODE_INT)
6754 {
4745e701
JG
6755 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6756 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6757 {
6758 /* CSETM. */
e548c9df 6759 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6760 return true;
6761 }
6762
6763 /* Cost this as SUB wzr, X. */
e548c9df 6764 op0 = CONST0_RTX (mode);
4745e701
JG
6765 op1 = XEXP (x, 0);
6766 goto cost_minus;
6767 }
6768
e548c9df 6769 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6770 {
6771 /* Support (neg(fma...)) as a single instruction only if
6772 sign of zeros is unimportant. This matches the decision
6773 making in aarch64.md. */
6774 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6775 {
6776 /* FNMADD. */
e548c9df 6777 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6778 return true;
6779 }
d318517d
SN
6780 if (GET_CODE (op0) == MULT)
6781 {
6782 /* FNMUL. */
6783 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6784 return true;
6785 }
4745e701
JG
6786 if (speed)
6787 /* FNEG. */
6788 *cost += extra_cost->fp[mode == DFmode].neg;
6789 return false;
6790 }
6791
6792 return false;
43e9d192 6793
781aeb73
KT
6794 case CLRSB:
6795 case CLZ:
6796 if (speed)
b6875aac
KV
6797 {
6798 if (VECTOR_MODE_P (mode))
6799 *cost += extra_cost->vect.alu;
6800 else
6801 *cost += extra_cost->alu.clz;
6802 }
781aeb73
KT
6803
6804 return false;
6805
43e9d192
IB
6806 case COMPARE:
6807 op0 = XEXP (x, 0);
6808 op1 = XEXP (x, 1);
6809
6810 if (op1 == const0_rtx
6811 && GET_CODE (op0) == AND)
6812 {
6813 x = op0;
e548c9df 6814 mode = GET_MODE (op0);
43e9d192
IB
6815 goto cost_logic;
6816 }
6817
a8eecd00
JG
6818 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6819 {
6820 /* TODO: A write to the CC flags possibly costs extra, this
6821 needs encoding in the cost tables. */
6822
e548c9df 6823 mode = GET_MODE (op0);
a8eecd00
JG
6824 /* ANDS. */
6825 if (GET_CODE (op0) == AND)
6826 {
6827 x = op0;
6828 goto cost_logic;
6829 }
6830
6831 if (GET_CODE (op0) == PLUS)
6832 {
6833 /* ADDS (and CMN alias). */
6834 x = op0;
6835 goto cost_plus;
6836 }
6837
6838 if (GET_CODE (op0) == MINUS)
6839 {
6840 /* SUBS. */
6841 x = op0;
6842 goto cost_minus;
6843 }
6844
345854d8
KT
6845 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6846 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
6847 && CONST_INT_P (XEXP (op0, 2)))
6848 {
6849 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6850 Handle it here directly rather than going to cost_logic
6851 since we know the immediate generated for the TST is valid
6852 so we can avoid creating an intermediate rtx for it only
6853 for costing purposes. */
6854 if (speed)
6855 *cost += extra_cost->alu.logical;
6856
6857 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
6858 ZERO_EXTRACT, 0, speed);
6859 return true;
6860 }
6861
a8eecd00
JG
6862 if (GET_CODE (op1) == NEG)
6863 {
6864 /* CMN. */
6865 if (speed)
6866 *cost += extra_cost->alu.arith;
6867
e548c9df
AM
6868 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6869 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6870 return true;
6871 }
6872
6873 /* CMP.
6874
6875 Compare can freely swap the order of operands, and
6876 canonicalization puts the more complex operation first.
6877 But the integer MINUS logic expects the shift/extend
6878 operation in op1. */
6879 if (! (REG_P (op0)
6880 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6881 {
6882 op0 = XEXP (x, 1);
6883 op1 = XEXP (x, 0);
6884 }
6885 goto cost_minus;
6886 }
6887
6888 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6889 {
6890 /* FCMP. */
6891 if (speed)
6892 *cost += extra_cost->fp[mode == DFmode].compare;
6893
6894 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6895 {
e548c9df 6896 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6897 /* FCMP supports constant 0.0 for no extra cost. */
6898 return true;
6899 }
6900 return false;
6901 }
6902
b6875aac
KV
6903 if (VECTOR_MODE_P (mode))
6904 {
6905 /* Vector compare. */
6906 if (speed)
6907 *cost += extra_cost->vect.alu;
6908
6909 if (aarch64_float_const_zero_rtx_p (op1))
6910 {
6911 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6912 cost. */
6913 return true;
6914 }
6915 return false;
6916 }
a8eecd00 6917 return false;
43e9d192
IB
6918
6919 case MINUS:
4745e701
JG
6920 {
6921 op0 = XEXP (x, 0);
6922 op1 = XEXP (x, 1);
6923
6924cost_minus:
e548c9df 6925 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6926
4745e701
JG
6927 /* Detect valid immediates. */
6928 if ((GET_MODE_CLASS (mode) == MODE_INT
6929 || (GET_MODE_CLASS (mode) == MODE_CC
6930 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6931 && CONST_INT_P (op1)
6932 && aarch64_uimm12_shift (INTVAL (op1)))
6933 {
4745e701
JG
6934 if (speed)
6935 /* SUB(S) (immediate). */
6936 *cost += extra_cost->alu.arith;
6937 return true;
4745e701
JG
6938 }
6939
7cc2145f
JG
6940 /* Look for SUB (extended register). */
6941 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6942 {
6943 if (speed)
2533c820 6944 *cost += extra_cost->alu.extend_arith;
7cc2145f 6945
e47c4031
KT
6946 op1 = aarch64_strip_extend (op1);
6947 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6948 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6949 return true;
6950 }
6951
4745e701
JG
6952 rtx new_op1 = aarch64_strip_extend (op1);
6953
6954 /* Cost this as an FMA-alike operation. */
6955 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6956 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6957 && code != COMPARE)
6958 {
6959 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6960 (enum rtx_code) code,
6961 speed);
4745e701
JG
6962 return true;
6963 }
43e9d192 6964
e548c9df 6965 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6966
4745e701
JG
6967 if (speed)
6968 {
b6875aac
KV
6969 if (VECTOR_MODE_P (mode))
6970 {
6971 /* Vector SUB. */
6972 *cost += extra_cost->vect.alu;
6973 }
6974 else if (GET_MODE_CLASS (mode) == MODE_INT)
6975 {
6976 /* SUB(S). */
6977 *cost += extra_cost->alu.arith;
6978 }
4745e701 6979 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6980 {
6981 /* FSUB. */
6982 *cost += extra_cost->fp[mode == DFmode].addsub;
6983 }
4745e701
JG
6984 }
6985 return true;
6986 }
43e9d192
IB
6987
6988 case PLUS:
4745e701
JG
6989 {
6990 rtx new_op0;
43e9d192 6991
4745e701
JG
6992 op0 = XEXP (x, 0);
6993 op1 = XEXP (x, 1);
43e9d192 6994
a8eecd00 6995cost_plus:
4745e701
JG
6996 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6997 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6998 {
6999 /* CSINC. */
e548c9df
AM
7000 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
7001 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
7002 return true;
7003 }
43e9d192 7004
4745e701
JG
7005 if (GET_MODE_CLASS (mode) == MODE_INT
7006 && CONST_INT_P (op1)
7007 && aarch64_uimm12_shift (INTVAL (op1)))
7008 {
e548c9df 7009 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 7010
4745e701
JG
7011 if (speed)
7012 /* ADD (immediate). */
7013 *cost += extra_cost->alu.arith;
7014 return true;
7015 }
7016
e548c9df 7017 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 7018
7cc2145f
JG
7019 /* Look for ADD (extended register). */
7020 if (aarch64_rtx_arith_op_extract_p (op0, mode))
7021 {
7022 if (speed)
2533c820 7023 *cost += extra_cost->alu.extend_arith;
7cc2145f 7024
e47c4031
KT
7025 op0 = aarch64_strip_extend (op0);
7026 *cost += rtx_cost (op0, VOIDmode,
e548c9df 7027 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
7028 return true;
7029 }
7030
4745e701
JG
7031 /* Strip any extend, leave shifts behind as we will
7032 cost them through mult_cost. */
7033 new_op0 = aarch64_strip_extend (op0);
7034
7035 if (GET_CODE (new_op0) == MULT
0a78ebe4 7036 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
7037 {
7038 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
7039 speed);
4745e701
JG
7040 return true;
7041 }
7042
e548c9df 7043 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
7044
7045 if (speed)
7046 {
b6875aac
KV
7047 if (VECTOR_MODE_P (mode))
7048 {
7049 /* Vector ADD. */
7050 *cost += extra_cost->vect.alu;
7051 }
7052 else if (GET_MODE_CLASS (mode) == MODE_INT)
7053 {
7054 /* ADD. */
7055 *cost += extra_cost->alu.arith;
7056 }
4745e701 7057 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7058 {
7059 /* FADD. */
7060 *cost += extra_cost->fp[mode == DFmode].addsub;
7061 }
4745e701
JG
7062 }
7063 return true;
7064 }
43e9d192 7065
18b42b2a
KT
7066 case BSWAP:
7067 *cost = COSTS_N_INSNS (1);
7068
7069 if (speed)
b6875aac
KV
7070 {
7071 if (VECTOR_MODE_P (mode))
7072 *cost += extra_cost->vect.alu;
7073 else
7074 *cost += extra_cost->alu.rev;
7075 }
18b42b2a
KT
7076 return false;
7077
43e9d192 7078 case IOR:
f7d5cf8d
KT
7079 if (aarch_rev16_p (x))
7080 {
7081 *cost = COSTS_N_INSNS (1);
7082
b6875aac
KV
7083 if (speed)
7084 {
7085 if (VECTOR_MODE_P (mode))
7086 *cost += extra_cost->vect.alu;
7087 else
7088 *cost += extra_cost->alu.rev;
7089 }
7090 return true;
f7d5cf8d 7091 }
fb0cb7fa
KT
7092
7093 if (aarch64_extr_rtx_p (x, &op0, &op1))
7094 {
e548c9df
AM
7095 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7096 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
7097 if (speed)
7098 *cost += extra_cost->alu.shift;
7099
7100 return true;
7101 }
f7d5cf8d 7102 /* Fall through. */
43e9d192
IB
7103 case XOR:
7104 case AND:
7105 cost_logic:
7106 op0 = XEXP (x, 0);
7107 op1 = XEXP (x, 1);
7108
b6875aac
KV
7109 if (VECTOR_MODE_P (mode))
7110 {
7111 if (speed)
7112 *cost += extra_cost->vect.alu;
7113 return true;
7114 }
7115
268c3b47
JG
7116 if (code == AND
7117 && GET_CODE (op0) == MULT
7118 && CONST_INT_P (XEXP (op0, 1))
7119 && CONST_INT_P (op1)
7120 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7121 INTVAL (op1)) != 0)
7122 {
7123 /* This is a UBFM/SBFM. */
e548c9df 7124 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
7125 if (speed)
7126 *cost += extra_cost->alu.bfx;
7127 return true;
7128 }
7129
e548c9df 7130 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 7131 {
8c83f71d 7132 if (CONST_INT_P (op1))
43e9d192 7133 {
8c83f71d
KT
7134 /* We have a mask + shift version of a UBFIZ
7135 i.e. the *andim_ashift<mode>_bfiz pattern. */
7136 if (GET_CODE (op0) == ASHIFT
7137 && aarch64_mask_and_shift_for_ubfiz_p (mode, op1,
7138 XEXP (op0, 1)))
7139 {
7140 *cost += rtx_cost (XEXP (op0, 0), mode,
7141 (enum rtx_code) code, 0, speed);
7142 if (speed)
7143 *cost += extra_cost->alu.bfx;
268c3b47 7144
8c83f71d
KT
7145 return true;
7146 }
7147 else if (aarch64_bitmask_imm (INTVAL (op1), mode))
7148 {
7149 /* We possibly get the immediate for free, this is not
7150 modelled. */
7151 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
7152 if (speed)
7153 *cost += extra_cost->alu.logical;
268c3b47 7154
8c83f71d
KT
7155 return true;
7156 }
43e9d192
IB
7157 }
7158 else
7159 {
268c3b47
JG
7160 rtx new_op0 = op0;
7161
7162 /* Handle ORN, EON, or BIC. */
43e9d192
IB
7163 if (GET_CODE (op0) == NOT)
7164 op0 = XEXP (op0, 0);
268c3b47
JG
7165
7166 new_op0 = aarch64_strip_shift (op0);
7167
7168 /* If we had a shift on op0 then this is a logical-shift-
7169 by-register/immediate operation. Otherwise, this is just
7170 a logical operation. */
7171 if (speed)
7172 {
7173 if (new_op0 != op0)
7174 {
7175 /* Shift by immediate. */
7176 if (CONST_INT_P (XEXP (op0, 1)))
7177 *cost += extra_cost->alu.log_shift;
7178 else
7179 *cost += extra_cost->alu.log_shift_reg;
7180 }
7181 else
7182 *cost += extra_cost->alu.logical;
7183 }
7184
7185 /* In both cases we want to cost both operands. */
e548c9df
AM
7186 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
7187 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
7188
7189 return true;
43e9d192 7190 }
43e9d192
IB
7191 }
7192 return false;
7193
268c3b47 7194 case NOT:
6365da9e
KT
7195 x = XEXP (x, 0);
7196 op0 = aarch64_strip_shift (x);
7197
b6875aac
KV
7198 if (VECTOR_MODE_P (mode))
7199 {
7200 /* Vector NOT. */
7201 *cost += extra_cost->vect.alu;
7202 return false;
7203 }
7204
6365da9e
KT
7205 /* MVN-shifted-reg. */
7206 if (op0 != x)
7207 {
e548c9df 7208 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
7209
7210 if (speed)
7211 *cost += extra_cost->alu.log_shift;
7212
7213 return true;
7214 }
7215 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7216 Handle the second form here taking care that 'a' in the above can
7217 be a shift. */
7218 else if (GET_CODE (op0) == XOR)
7219 {
7220 rtx newop0 = XEXP (op0, 0);
7221 rtx newop1 = XEXP (op0, 1);
7222 rtx op0_stripped = aarch64_strip_shift (newop0);
7223
e548c9df
AM
7224 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7225 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
7226
7227 if (speed)
7228 {
7229 if (op0_stripped != newop0)
7230 *cost += extra_cost->alu.log_shift;
7231 else
7232 *cost += extra_cost->alu.logical;
7233 }
7234
7235 return true;
7236 }
268c3b47
JG
7237 /* MVN. */
7238 if (speed)
7239 *cost += extra_cost->alu.logical;
7240
268c3b47
JG
7241 return false;
7242
43e9d192 7243 case ZERO_EXTEND:
b1685e62
JG
7244
7245 op0 = XEXP (x, 0);
7246 /* If a value is written in SI mode, then zero extended to DI
7247 mode, the operation will in general be free as a write to
7248 a 'w' register implicitly zeroes the upper bits of an 'x'
7249 register. However, if this is
7250
7251 (set (reg) (zero_extend (reg)))
7252
7253 we must cost the explicit register move. */
7254 if (mode == DImode
7255 && GET_MODE (op0) == SImode
7256 && outer == SET)
7257 {
e548c9df 7258 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 7259
dde23f43
KM
7260 /* If OP_COST is non-zero, then the cost of the zero extend
7261 is effectively the cost of the inner operation. Otherwise
7262 we have a MOV instruction and we take the cost from the MOV
7263 itself. This is true independently of whether we are
7264 optimizing for space or time. */
7265 if (op_cost)
b1685e62
JG
7266 *cost = op_cost;
7267
7268 return true;
7269 }
e548c9df 7270 else if (MEM_P (op0))
43e9d192 7271 {
b1685e62 7272 /* All loads can zero extend to any size for free. */
e548c9df 7273 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
7274 return true;
7275 }
b1685e62 7276
283b6c85
KT
7277 op0 = aarch64_extend_bitfield_pattern_p (x);
7278 if (op0)
7279 {
7280 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7281 if (speed)
7282 *cost += extra_cost->alu.bfx;
7283 return true;
7284 }
7285
b1685e62 7286 if (speed)
b6875aac
KV
7287 {
7288 if (VECTOR_MODE_P (mode))
7289 {
7290 /* UMOV. */
7291 *cost += extra_cost->vect.alu;
7292 }
7293 else
7294 {
63715e5e
WD
7295 /* We generate an AND instead of UXTB/UXTH. */
7296 *cost += extra_cost->alu.logical;
b6875aac
KV
7297 }
7298 }
43e9d192
IB
7299 return false;
7300
7301 case SIGN_EXTEND:
b1685e62 7302 if (MEM_P (XEXP (x, 0)))
43e9d192 7303 {
b1685e62
JG
7304 /* LDRSH. */
7305 if (speed)
7306 {
7307 rtx address = XEXP (XEXP (x, 0), 0);
7308 *cost += extra_cost->ldst.load_sign_extend;
7309
7310 *cost +=
7311 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7312 0, speed));
7313 }
43e9d192
IB
7314 return true;
7315 }
b1685e62 7316
283b6c85
KT
7317 op0 = aarch64_extend_bitfield_pattern_p (x);
7318 if (op0)
7319 {
7320 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7321 if (speed)
7322 *cost += extra_cost->alu.bfx;
7323 return true;
7324 }
7325
b1685e62 7326 if (speed)
b6875aac
KV
7327 {
7328 if (VECTOR_MODE_P (mode))
7329 *cost += extra_cost->vect.alu;
7330 else
7331 *cost += extra_cost->alu.extend;
7332 }
43e9d192
IB
7333 return false;
7334
ba0cfa17
JG
7335 case ASHIFT:
7336 op0 = XEXP (x, 0);
7337 op1 = XEXP (x, 1);
7338
7339 if (CONST_INT_P (op1))
7340 {
ba0cfa17 7341 if (speed)
b6875aac
KV
7342 {
7343 if (VECTOR_MODE_P (mode))
7344 {
7345 /* Vector shift (immediate). */
7346 *cost += extra_cost->vect.alu;
7347 }
7348 else
7349 {
7350 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7351 aliases. */
7352 *cost += extra_cost->alu.shift;
7353 }
7354 }
ba0cfa17
JG
7355
7356 /* We can incorporate zero/sign extend for free. */
7357 if (GET_CODE (op0) == ZERO_EXTEND
7358 || GET_CODE (op0) == SIGN_EXTEND)
7359 op0 = XEXP (op0, 0);
7360
e548c9df 7361 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7362 return true;
7363 }
7364 else
7365 {
ba0cfa17 7366 if (speed)
b6875aac
KV
7367 {
7368 if (VECTOR_MODE_P (mode))
7369 {
7370 /* Vector shift (register). */
7371 *cost += extra_cost->vect.alu;
7372 }
7373 else
7374 {
7375 /* LSLV. */
7376 *cost += extra_cost->alu.shift_reg;
7377 }
7378 }
ba0cfa17
JG
7379 return false; /* All arguments need to be in registers. */
7380 }
7381
43e9d192 7382 case ROTATE:
43e9d192
IB
7383 case ROTATERT:
7384 case LSHIFTRT:
43e9d192 7385 case ASHIFTRT:
ba0cfa17
JG
7386 op0 = XEXP (x, 0);
7387 op1 = XEXP (x, 1);
43e9d192 7388
ba0cfa17
JG
7389 if (CONST_INT_P (op1))
7390 {
7391 /* ASR (immediate) and friends. */
7392 if (speed)
b6875aac
KV
7393 {
7394 if (VECTOR_MODE_P (mode))
7395 *cost += extra_cost->vect.alu;
7396 else
7397 *cost += extra_cost->alu.shift;
7398 }
43e9d192 7399
e548c9df 7400 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7401 return true;
7402 }
7403 else
7404 {
7405
7406 /* ASR (register) and friends. */
7407 if (speed)
b6875aac
KV
7408 {
7409 if (VECTOR_MODE_P (mode))
7410 *cost += extra_cost->vect.alu;
7411 else
7412 *cost += extra_cost->alu.shift_reg;
7413 }
ba0cfa17
JG
7414 return false; /* All arguments need to be in registers. */
7415 }
43e9d192 7416
909734be
JG
7417 case SYMBOL_REF:
7418
1b1e81f8
JW
7419 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7420 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7421 {
7422 /* LDR. */
7423 if (speed)
7424 *cost += extra_cost->ldst.load;
7425 }
7426 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7427 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7428 {
7429 /* ADRP, followed by ADD. */
7430 *cost += COSTS_N_INSNS (1);
7431 if (speed)
7432 *cost += 2 * extra_cost->alu.arith;
7433 }
7434 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7435 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7436 {
7437 /* ADR. */
7438 if (speed)
7439 *cost += extra_cost->alu.arith;
7440 }
7441
7442 if (flag_pic)
7443 {
7444 /* One extra load instruction, after accessing the GOT. */
7445 *cost += COSTS_N_INSNS (1);
7446 if (speed)
7447 *cost += extra_cost->ldst.load;
7448 }
43e9d192
IB
7449 return true;
7450
909734be 7451 case HIGH:
43e9d192 7452 case LO_SUM:
909734be
JG
7453 /* ADRP/ADD (immediate). */
7454 if (speed)
7455 *cost += extra_cost->alu.arith;
43e9d192
IB
7456 return true;
7457
7458 case ZERO_EXTRACT:
7459 case SIGN_EXTRACT:
7cc2145f
JG
7460 /* UBFX/SBFX. */
7461 if (speed)
b6875aac
KV
7462 {
7463 if (VECTOR_MODE_P (mode))
7464 *cost += extra_cost->vect.alu;
7465 else
7466 *cost += extra_cost->alu.bfx;
7467 }
7cc2145f
JG
7468
7469 /* We can trust that the immediates used will be correct (there
7470 are no by-register forms), so we need only cost op0. */
e548c9df 7471 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7472 return true;
7473
7474 case MULT:
4745e701
JG
7475 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7476 /* aarch64_rtx_mult_cost always handles recursion to its
7477 operands. */
7478 return true;
43e9d192
IB
7479
7480 case MOD:
4f58fe36
KT
7481 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7482 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7483 an unconditional negate. This case should only ever be reached through
7484 the set_smod_pow2_cheap check in expmed.c. */
7485 if (CONST_INT_P (XEXP (x, 1))
7486 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7487 && (mode == SImode || mode == DImode))
7488 {
7489 /* We expand to 4 instructions. Reset the baseline. */
7490 *cost = COSTS_N_INSNS (4);
7491
7492 if (speed)
7493 *cost += 2 * extra_cost->alu.logical
7494 + 2 * extra_cost->alu.arith;
7495
7496 return true;
7497 }
7498
7499 /* Fall-through. */
43e9d192 7500 case UMOD:
43e9d192
IB
7501 if (speed)
7502 {
b6875aac
KV
7503 if (VECTOR_MODE_P (mode))
7504 *cost += extra_cost->vect.alu;
e548c9df
AM
7505 else if (GET_MODE_CLASS (mode) == MODE_INT)
7506 *cost += (extra_cost->mult[mode == DImode].add
7507 + extra_cost->mult[mode == DImode].idiv);
7508 else if (mode == DFmode)
73250c4c
KT
7509 *cost += (extra_cost->fp[1].mult
7510 + extra_cost->fp[1].div);
e548c9df 7511 else if (mode == SFmode)
73250c4c
KT
7512 *cost += (extra_cost->fp[0].mult
7513 + extra_cost->fp[0].div);
43e9d192
IB
7514 }
7515 return false; /* All arguments need to be in registers. */
7516
7517 case DIV:
7518 case UDIV:
4105fe38 7519 case SQRT:
43e9d192
IB
7520 if (speed)
7521 {
b6875aac
KV
7522 if (VECTOR_MODE_P (mode))
7523 *cost += extra_cost->vect.alu;
7524 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7525 /* There is no integer SQRT, so only DIV and UDIV can get
7526 here. */
7527 *cost += extra_cost->mult[mode == DImode].idiv;
7528 else
7529 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7530 }
7531 return false; /* All arguments need to be in registers. */
7532
a8eecd00 7533 case IF_THEN_ELSE:
2d5ffe46
AP
7534 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7535 XEXP (x, 2), cost, speed);
a8eecd00
JG
7536
7537 case EQ:
7538 case NE:
7539 case GT:
7540 case GTU:
7541 case LT:
7542 case LTU:
7543 case GE:
7544 case GEU:
7545 case LE:
7546 case LEU:
7547
7548 return false; /* All arguments must be in registers. */
7549
b292109f
JG
7550 case FMA:
7551 op0 = XEXP (x, 0);
7552 op1 = XEXP (x, 1);
7553 op2 = XEXP (x, 2);
7554
7555 if (speed)
b6875aac
KV
7556 {
7557 if (VECTOR_MODE_P (mode))
7558 *cost += extra_cost->vect.alu;
7559 else
7560 *cost += extra_cost->fp[mode == DFmode].fma;
7561 }
b292109f
JG
7562
7563 /* FMSUB, FNMADD, and FNMSUB are free. */
7564 if (GET_CODE (op0) == NEG)
7565 op0 = XEXP (op0, 0);
7566
7567 if (GET_CODE (op2) == NEG)
7568 op2 = XEXP (op2, 0);
7569
7570 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7571 and the by-element operand as operand 0. */
7572 if (GET_CODE (op1) == NEG)
7573 op1 = XEXP (op1, 0);
7574
7575 /* Catch vector-by-element operations. The by-element operand can
7576 either be (vec_duplicate (vec_select (x))) or just
7577 (vec_select (x)), depending on whether we are multiplying by
7578 a vector or a scalar.
7579
7580 Canonicalization is not very good in these cases, FMA4 will put the
7581 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7582 if (GET_CODE (op0) == VEC_DUPLICATE)
7583 op0 = XEXP (op0, 0);
7584 else if (GET_CODE (op1) == VEC_DUPLICATE)
7585 op1 = XEXP (op1, 0);
7586
7587 if (GET_CODE (op0) == VEC_SELECT)
7588 op0 = XEXP (op0, 0);
7589 else if (GET_CODE (op1) == VEC_SELECT)
7590 op1 = XEXP (op1, 0);
7591
7592 /* If the remaining parameters are not registers,
7593 get the cost to put them into registers. */
e548c9df
AM
7594 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7595 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7596 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7597 return true;
7598
5e2a765b
KT
7599 case FLOAT:
7600 case UNSIGNED_FLOAT:
7601 if (speed)
7602 *cost += extra_cost->fp[mode == DFmode].fromint;
7603 return false;
7604
b292109f
JG
7605 case FLOAT_EXTEND:
7606 if (speed)
b6875aac
KV
7607 {
7608 if (VECTOR_MODE_P (mode))
7609 {
7610 /*Vector truncate. */
7611 *cost += extra_cost->vect.alu;
7612 }
7613 else
7614 *cost += extra_cost->fp[mode == DFmode].widen;
7615 }
b292109f
JG
7616 return false;
7617
7618 case FLOAT_TRUNCATE:
7619 if (speed)
b6875aac
KV
7620 {
7621 if (VECTOR_MODE_P (mode))
7622 {
7623 /*Vector conversion. */
7624 *cost += extra_cost->vect.alu;
7625 }
7626 else
7627 *cost += extra_cost->fp[mode == DFmode].narrow;
7628 }
b292109f
JG
7629 return false;
7630
61263118
KT
7631 case FIX:
7632 case UNSIGNED_FIX:
7633 x = XEXP (x, 0);
7634 /* Strip the rounding part. They will all be implemented
7635 by the fcvt* family of instructions anyway. */
7636 if (GET_CODE (x) == UNSPEC)
7637 {
7638 unsigned int uns_code = XINT (x, 1);
7639
7640 if (uns_code == UNSPEC_FRINTA
7641 || uns_code == UNSPEC_FRINTM
7642 || uns_code == UNSPEC_FRINTN
7643 || uns_code == UNSPEC_FRINTP
7644 || uns_code == UNSPEC_FRINTZ)
7645 x = XVECEXP (x, 0, 0);
7646 }
7647
7648 if (speed)
b6875aac
KV
7649 {
7650 if (VECTOR_MODE_P (mode))
7651 *cost += extra_cost->vect.alu;
7652 else
7653 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7654 }
39252973
KT
7655
7656 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7657 fixed-point fcvt. */
7658 if (GET_CODE (x) == MULT
7659 && ((VECTOR_MODE_P (mode)
7660 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7661 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7662 {
7663 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7664 0, speed);
7665 return true;
7666 }
7667
e548c9df 7668 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7669 return true;
7670
b292109f 7671 case ABS:
b6875aac
KV
7672 if (VECTOR_MODE_P (mode))
7673 {
7674 /* ABS (vector). */
7675 if (speed)
7676 *cost += extra_cost->vect.alu;
7677 }
7678 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7679 {
19261b99
KT
7680 op0 = XEXP (x, 0);
7681
7682 /* FABD, which is analogous to FADD. */
7683 if (GET_CODE (op0) == MINUS)
7684 {
e548c9df
AM
7685 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7686 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7687 if (speed)
7688 *cost += extra_cost->fp[mode == DFmode].addsub;
7689
7690 return true;
7691 }
7692 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7693 if (speed)
7694 *cost += extra_cost->fp[mode == DFmode].neg;
7695 }
7696 else
7697 {
7698 /* Integer ABS will either be split to
7699 two arithmetic instructions, or will be an ABS
7700 (scalar), which we don't model. */
7701 *cost = COSTS_N_INSNS (2);
7702 if (speed)
7703 *cost += 2 * extra_cost->alu.arith;
7704 }
7705 return false;
7706
7707 case SMAX:
7708 case SMIN:
7709 if (speed)
7710 {
b6875aac
KV
7711 if (VECTOR_MODE_P (mode))
7712 *cost += extra_cost->vect.alu;
7713 else
7714 {
7715 /* FMAXNM/FMINNM/FMAX/FMIN.
7716 TODO: This may not be accurate for all implementations, but
7717 we do not model this in the cost tables. */
7718 *cost += extra_cost->fp[mode == DFmode].addsub;
7719 }
b292109f
JG
7720 }
7721 return false;
7722
61263118
KT
7723 case UNSPEC:
7724 /* The floating point round to integer frint* instructions. */
7725 if (aarch64_frint_unspec_p (XINT (x, 1)))
7726 {
7727 if (speed)
7728 *cost += extra_cost->fp[mode == DFmode].roundint;
7729
7730 return false;
7731 }
781aeb73
KT
7732
7733 if (XINT (x, 1) == UNSPEC_RBIT)
7734 {
7735 if (speed)
7736 *cost += extra_cost->alu.rev;
7737
7738 return false;
7739 }
61263118
KT
7740 break;
7741
fb620c4a
JG
7742 case TRUNCATE:
7743
7744 /* Decompose <su>muldi3_highpart. */
7745 if (/* (truncate:DI */
7746 mode == DImode
7747 /* (lshiftrt:TI */
7748 && GET_MODE (XEXP (x, 0)) == TImode
7749 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7750 /* (mult:TI */
7751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7752 /* (ANY_EXTEND:TI (reg:DI))
7753 (ANY_EXTEND:TI (reg:DI))) */
7754 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7755 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7756 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7757 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7758 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7759 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7760 /* (const_int 64) */
7761 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7762 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7763 {
7764 /* UMULH/SMULH. */
7765 if (speed)
7766 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7767 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7768 mode, MULT, 0, speed);
7769 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7770 mode, MULT, 1, speed);
fb620c4a
JG
7771 return true;
7772 }
7773
7774 /* Fall through. */
43e9d192 7775 default:
61263118 7776 break;
43e9d192 7777 }
61263118 7778
c10e3d7f
AP
7779 if (dump_file
7780 && flag_aarch64_verbose_cost)
61263118
KT
7781 fprintf (dump_file,
7782 "\nFailed to cost RTX. Assuming default cost.\n");
7783
7784 return true;
43e9d192
IB
7785}
7786
0ee859b5
JG
7787/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7788 calculated for X. This cost is stored in *COST. Returns true
7789 if the total cost of X was calculated. */
7790static bool
e548c9df 7791aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7792 int param, int *cost, bool speed)
7793{
e548c9df 7794 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 7795
c10e3d7f
AP
7796 if (dump_file
7797 && flag_aarch64_verbose_cost)
0ee859b5
JG
7798 {
7799 print_rtl_single (dump_file, x);
7800 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7801 speed ? "Hot" : "Cold",
7802 *cost, result ? "final" : "partial");
7803 }
7804
7805 return result;
7806}
7807
43e9d192 7808static int
ef4bddc2 7809aarch64_register_move_cost (machine_mode mode,
8a3a7e67 7810 reg_class_t from_i, reg_class_t to_i)
43e9d192 7811{
8a3a7e67
RH
7812 enum reg_class from = (enum reg_class) from_i;
7813 enum reg_class to = (enum reg_class) to_i;
43e9d192 7814 const struct cpu_regmove_cost *regmove_cost
b175b679 7815 = aarch64_tune_params.regmove_cost;
43e9d192 7816
3be07662 7817 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 7818 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
7819 to = GENERAL_REGS;
7820
2876a13f 7821 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
7822 from = GENERAL_REGS;
7823
6ee70f81
AP
7824 /* Moving between GPR and stack cost is the same as GP2GP. */
7825 if ((from == GENERAL_REGS && to == STACK_REG)
7826 || (to == GENERAL_REGS && from == STACK_REG))
7827 return regmove_cost->GP2GP;
7828
7829 /* To/From the stack register, we move via the gprs. */
7830 if (to == STACK_REG || from == STACK_REG)
7831 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7832 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7833
8919453c
WD
7834 if (GET_MODE_SIZE (mode) == 16)
7835 {
7836 /* 128-bit operations on general registers require 2 instructions. */
7837 if (from == GENERAL_REGS && to == GENERAL_REGS)
7838 return regmove_cost->GP2GP * 2;
7839 else if (from == GENERAL_REGS)
7840 return regmove_cost->GP2FP * 2;
7841 else if (to == GENERAL_REGS)
7842 return regmove_cost->FP2GP * 2;
7843
7844 /* When AdvSIMD instructions are disabled it is not possible to move
7845 a 128-bit value directly between Q registers. This is handled in
7846 secondary reload. A general register is used as a scratch to move
7847 the upper DI value and the lower DI value is moved directly,
7848 hence the cost is the sum of three moves. */
7849 if (! TARGET_SIMD)
7850 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7851
7852 return regmove_cost->FP2FP;
7853 }
7854
43e9d192
IB
7855 if (from == GENERAL_REGS && to == GENERAL_REGS)
7856 return regmove_cost->GP2GP;
7857 else if (from == GENERAL_REGS)
7858 return regmove_cost->GP2FP;
7859 else if (to == GENERAL_REGS)
7860 return regmove_cost->FP2GP;
7861
43e9d192
IB
7862 return regmove_cost->FP2FP;
7863}
7864
7865static int
ef4bddc2 7866aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
7867 reg_class_t rclass ATTRIBUTE_UNUSED,
7868 bool in ATTRIBUTE_UNUSED)
7869{
b175b679 7870 return aarch64_tune_params.memmov_cost;
43e9d192
IB
7871}
7872
0c30e0f3
EM
7873/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7874 to optimize 1.0/sqrt. */
ee62a5a6
RS
7875
7876static bool
9acc9cbe 7877use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
7878{
7879 return (!flag_trapping_math
7880 && flag_unsafe_math_optimizations
9acc9cbe
EM
7881 && ((aarch64_tune_params.approx_modes->recip_sqrt
7882 & AARCH64_APPROX_MODE (mode))
1a33079e 7883 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
7884}
7885
0c30e0f3
EM
7886/* Function to decide when to use the approximate reciprocal square root
7887 builtin. */
a6fc00da
BH
7888
7889static tree
ee62a5a6 7890aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 7891{
9acc9cbe
EM
7892 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
7893
7894 if (!use_rsqrt_p (mode))
a6fc00da 7895 return NULL_TREE;
ee62a5a6 7896 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
7897}
7898
7899typedef rtx (*rsqrte_type) (rtx, rtx);
7900
98daafa0
EM
7901/* Select reciprocal square root initial estimate insn depending on machine
7902 mode. */
a6fc00da 7903
98daafa0 7904static rsqrte_type
a6fc00da
BH
7905get_rsqrte_type (machine_mode mode)
7906{
7907 switch (mode)
7908 {
2a823433
JW
7909 case DFmode: return gen_aarch64_rsqrtedf;
7910 case SFmode: return gen_aarch64_rsqrtesf;
7911 case V2DFmode: return gen_aarch64_rsqrtev2df;
7912 case V2SFmode: return gen_aarch64_rsqrtev2sf;
7913 case V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
7914 default: gcc_unreachable ();
7915 }
7916}
7917
7918typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
7919
98daafa0 7920/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 7921
98daafa0 7922static rsqrts_type
a6fc00da
BH
7923get_rsqrts_type (machine_mode mode)
7924{
7925 switch (mode)
7926 {
00ea75d4
JW
7927 case DFmode: return gen_aarch64_rsqrtsdf;
7928 case SFmode: return gen_aarch64_rsqrtssf;
7929 case V2DFmode: return gen_aarch64_rsqrtsv2df;
7930 case V2SFmode: return gen_aarch64_rsqrtsv2sf;
7931 case V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
7932 default: gcc_unreachable ();
7933 }
7934}
7935
98daafa0
EM
7936/* Emit instruction sequence to compute either the approximate square root
7937 or its approximate reciprocal, depending on the flag RECP, and return
7938 whether the sequence was emitted or not. */
a6fc00da 7939
98daafa0
EM
7940bool
7941aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 7942{
98daafa0 7943 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
7944
7945 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
7946 {
7947 gcc_assert (!recp);
7948 return false;
7949 }
7950
7951 machine_mode mmsk
7952 = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode)),
7953 GET_MODE_NUNITS (mode));
7954 if (!recp)
7955 {
7956 if (!(flag_mlow_precision_sqrt
7957 || (aarch64_tune_params.approx_modes->sqrt
7958 & AARCH64_APPROX_MODE (mode))))
7959 return false;
7960
7961 if (flag_finite_math_only
7962 || flag_trapping_math
7963 || !flag_unsafe_math_optimizations
7964 || optimize_function_for_size_p (cfun))
7965 return false;
7966 }
7967 else
7968 /* Caller assumes we cannot fail. */
7969 gcc_assert (use_rsqrt_p (mode));
daef0a8c 7970
a6fc00da 7971
98daafa0
EM
7972 rtx xmsk = gen_reg_rtx (mmsk);
7973 if (!recp)
2e19adc8
RE
7974 /* When calculating the approximate square root, compare the
7975 argument with 0.0 and create a mask. */
7976 emit_insn (gen_rtx_SET (xmsk,
7977 gen_rtx_NEG (mmsk,
7978 gen_rtx_EQ (mmsk, src,
7979 CONST0_RTX (mode)))));
a6fc00da 7980
98daafa0
EM
7981 /* Estimate the approximate reciprocal square root. */
7982 rtx xdst = gen_reg_rtx (mode);
7983 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 7984
98daafa0
EM
7985 /* Iterate over the series twice for SF and thrice for DF. */
7986 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 7987
98daafa0
EM
7988 /* Optionally iterate over the series once less for faster performance
7989 while sacrificing the accuracy. */
7990 if ((recp && flag_mrecip_low_precision_sqrt)
7991 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
7992 iterations--;
7993
98daafa0
EM
7994 /* Iterate over the series to calculate the approximate reciprocal square
7995 root. */
7996 rtx x1 = gen_reg_rtx (mode);
7997 while (iterations--)
a6fc00da 7998 {
a6fc00da 7999 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
8000 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
8001
8002 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 8003
98daafa0
EM
8004 if (iterations > 0)
8005 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
8006 }
8007
8008 if (!recp)
8009 {
8010 /* Qualify the approximate reciprocal square root when the argument is
8011 0.0 by squashing the intermediary result to 0.0. */
8012 rtx xtmp = gen_reg_rtx (mmsk);
8013 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
8014 gen_rtx_SUBREG (mmsk, xdst, 0)));
8015 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 8016
98daafa0
EM
8017 /* Calculate the approximate square root. */
8018 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
8019 }
8020
98daafa0
EM
8021 /* Finalize the approximation. */
8022 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
8023
8024 return true;
a6fc00da
BH
8025}
8026
79a2bc2d
EM
8027typedef rtx (*recpe_type) (rtx, rtx);
8028
8029/* Select reciprocal initial estimate insn depending on machine mode. */
8030
8031static recpe_type
8032get_recpe_type (machine_mode mode)
8033{
8034 switch (mode)
8035 {
8036 case SFmode: return (gen_aarch64_frecpesf);
8037 case V2SFmode: return (gen_aarch64_frecpev2sf);
8038 case V4SFmode: return (gen_aarch64_frecpev4sf);
8039 case DFmode: return (gen_aarch64_frecpedf);
8040 case V2DFmode: return (gen_aarch64_frecpev2df);
8041 default: gcc_unreachable ();
8042 }
8043}
8044
8045typedef rtx (*recps_type) (rtx, rtx, rtx);
8046
8047/* Select reciprocal series step insn depending on machine mode. */
8048
8049static recps_type
8050get_recps_type (machine_mode mode)
8051{
8052 switch (mode)
8053 {
8054 case SFmode: return (gen_aarch64_frecpssf);
8055 case V2SFmode: return (gen_aarch64_frecpsv2sf);
8056 case V4SFmode: return (gen_aarch64_frecpsv4sf);
8057 case DFmode: return (gen_aarch64_frecpsdf);
8058 case V2DFmode: return (gen_aarch64_frecpsv2df);
8059 default: gcc_unreachable ();
8060 }
8061}
8062
8063/* Emit the instruction sequence to compute the approximation for the division
8064 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8065
8066bool
8067aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
8068{
8069 machine_mode mode = GET_MODE (quo);
33d72b63
JW
8070
8071 if (GET_MODE_INNER (mode) == HFmode)
8072 return false;
8073
79a2bc2d
EM
8074 bool use_approx_division_p = (flag_mlow_precision_div
8075 || (aarch64_tune_params.approx_modes->division
8076 & AARCH64_APPROX_MODE (mode)));
8077
8078 if (!flag_finite_math_only
8079 || flag_trapping_math
8080 || !flag_unsafe_math_optimizations
8081 || optimize_function_for_size_p (cfun)
8082 || !use_approx_division_p)
8083 return false;
8084
8085 /* Estimate the approximate reciprocal. */
8086 rtx xrcp = gen_reg_rtx (mode);
8087 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8088
8089 /* Iterate over the series twice for SF and thrice for DF. */
8090 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8091
8092 /* Optionally iterate over the series once less for faster performance,
8093 while sacrificing the accuracy. */
8094 if (flag_mlow_precision_div)
8095 iterations--;
8096
8097 /* Iterate over the series to calculate the approximate reciprocal. */
8098 rtx xtmp = gen_reg_rtx (mode);
8099 while (iterations--)
8100 {
8101 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8102
8103 if (iterations > 0)
8104 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8105 }
8106
8107 if (num != CONST1_RTX (mode))
8108 {
8109 /* As the approximate reciprocal of DEN is already calculated, only
8110 calculate the approximate division when NUM is not 1.0. */
8111 rtx xnum = force_reg (mode, num);
8112 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8113 }
8114
8115 /* Finalize the approximation. */
8116 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8117 return true;
8118}
8119
d126a4ae
AP
8120/* Return the number of instructions that can be issued per cycle. */
8121static int
8122aarch64_sched_issue_rate (void)
8123{
b175b679 8124 return aarch64_tune_params.issue_rate;
d126a4ae
AP
8125}
8126
d03f7e44
MK
8127static int
8128aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8129{
8130 int issue_rate = aarch64_sched_issue_rate ();
8131
8132 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8133}
8134
2d6bc7fa
KT
8135
8136/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8137 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8138 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8139
8140static int
8141aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8142 int ready_index)
8143{
8144 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8145}
8146
8147
8990e73a
TB
8148/* Vectorizer cost model target hooks. */
8149
8150/* Implement targetm.vectorize.builtin_vectorization_cost. */
8151static int
8152aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8153 tree vectype,
8154 int misalign ATTRIBUTE_UNUSED)
8155{
8156 unsigned elements;
cd8ae5ed
AP
8157 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
8158 bool fp = false;
8159
8160 if (vectype != NULL)
8161 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
8162
8163 switch (type_of_cost)
8164 {
8165 case scalar_stmt:
cd8ae5ed 8166 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
8167
8168 case scalar_load:
cd8ae5ed 8169 return costs->scalar_load_cost;
8990e73a
TB
8170
8171 case scalar_store:
cd8ae5ed 8172 return costs->scalar_store_cost;
8990e73a
TB
8173
8174 case vector_stmt:
cd8ae5ed 8175 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8176
8177 case vector_load:
cd8ae5ed 8178 return costs->vec_align_load_cost;
8990e73a
TB
8179
8180 case vector_store:
cd8ae5ed 8181 return costs->vec_store_cost;
8990e73a
TB
8182
8183 case vec_to_scalar:
cd8ae5ed 8184 return costs->vec_to_scalar_cost;
8990e73a
TB
8185
8186 case scalar_to_vec:
cd8ae5ed 8187 return costs->scalar_to_vec_cost;
8990e73a
TB
8188
8189 case unaligned_load:
cd8ae5ed 8190 return costs->vec_unalign_load_cost;
8990e73a
TB
8191
8192 case unaligned_store:
cd8ae5ed 8193 return costs->vec_unalign_store_cost;
8990e73a
TB
8194
8195 case cond_branch_taken:
cd8ae5ed 8196 return costs->cond_taken_branch_cost;
8990e73a
TB
8197
8198 case cond_branch_not_taken:
cd8ae5ed 8199 return costs->cond_not_taken_branch_cost;
8990e73a
TB
8200
8201 case vec_perm:
cd8ae5ed 8202 return costs->vec_permute_cost;
c428f91c 8203
8990e73a 8204 case vec_promote_demote:
cd8ae5ed 8205 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8206
8207 case vec_construct:
8208 elements = TYPE_VECTOR_SUBPARTS (vectype);
8209 return elements / 2 + 1;
8210
8211 default:
8212 gcc_unreachable ();
8213 }
8214}
8215
8216/* Implement targetm.vectorize.add_stmt_cost. */
8217static unsigned
8218aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8219 struct _stmt_vec_info *stmt_info, int misalign,
8220 enum vect_cost_model_location where)
8221{
8222 unsigned *cost = (unsigned *) data;
8223 unsigned retval = 0;
8224
8225 if (flag_vect_cost_model)
8226 {
8227 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8228 int stmt_cost =
8229 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8230
8231 /* Statements in an inner loop relative to the loop being
8232 vectorized are weighted more heavily. The value here is
058e4c71 8233 arbitrary and could potentially be improved with analysis. */
8990e73a 8234 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 8235 count *= 50; /* FIXME */
8990e73a
TB
8236
8237 retval = (unsigned) (count * stmt_cost);
8238 cost[where] += retval;
8239 }
8240
8241 return retval;
8242}
8243
0cfff2a1 8244static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 8245
0cfff2a1
KT
8246/* Parse the TO_PARSE string and put the architecture struct that it
8247 selects into RES and the architectural features into ISA_FLAGS.
8248 Return an aarch64_parse_opt_result describing the parse result.
8249 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 8250
0cfff2a1
KT
8251static enum aarch64_parse_opt_result
8252aarch64_parse_arch (const char *to_parse, const struct processor **res,
8253 unsigned long *isa_flags)
43e9d192
IB
8254{
8255 char *ext;
8256 const struct processor *arch;
0cfff2a1 8257 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8258 size_t len;
8259
0cfff2a1 8260 strcpy (str, to_parse);
43e9d192
IB
8261
8262 ext = strchr (str, '+');
8263
8264 if (ext != NULL)
8265 len = ext - str;
8266 else
8267 len = strlen (str);
8268
8269 if (len == 0)
0cfff2a1
KT
8270 return AARCH64_PARSE_MISSING_ARG;
8271
43e9d192 8272
0cfff2a1 8273 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
8274 for (arch = all_architectures; arch->name != NULL; arch++)
8275 {
8276 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8277 {
0cfff2a1 8278 unsigned long isa_temp = arch->flags;
43e9d192
IB
8279
8280 if (ext != NULL)
8281 {
0cfff2a1
KT
8282 /* TO_PARSE string contains at least one extension. */
8283 enum aarch64_parse_opt_result ext_res
8284 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8285
0cfff2a1
KT
8286 if (ext_res != AARCH64_PARSE_OK)
8287 return ext_res;
ffee7aa9 8288 }
0cfff2a1
KT
8289 /* Extension parsing was successful. Confirm the result
8290 arch and ISA flags. */
8291 *res = arch;
8292 *isa_flags = isa_temp;
8293 return AARCH64_PARSE_OK;
43e9d192
IB
8294 }
8295 }
8296
8297 /* ARCH name not found in list. */
0cfff2a1 8298 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8299}
8300
0cfff2a1
KT
8301/* Parse the TO_PARSE string and put the result tuning in RES and the
8302 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8303 describing the parse result. If there is an error parsing, RES and
8304 ISA_FLAGS are left unchanged. */
43e9d192 8305
0cfff2a1
KT
8306static enum aarch64_parse_opt_result
8307aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8308 unsigned long *isa_flags)
43e9d192
IB
8309{
8310 char *ext;
8311 const struct processor *cpu;
0cfff2a1 8312 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8313 size_t len;
8314
0cfff2a1 8315 strcpy (str, to_parse);
43e9d192
IB
8316
8317 ext = strchr (str, '+');
8318
8319 if (ext != NULL)
8320 len = ext - str;
8321 else
8322 len = strlen (str);
8323
8324 if (len == 0)
0cfff2a1
KT
8325 return AARCH64_PARSE_MISSING_ARG;
8326
43e9d192
IB
8327
8328 /* Loop through the list of supported CPUs to find a match. */
8329 for (cpu = all_cores; cpu->name != NULL; cpu++)
8330 {
8331 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8332 {
0cfff2a1
KT
8333 unsigned long isa_temp = cpu->flags;
8334
43e9d192
IB
8335
8336 if (ext != NULL)
8337 {
0cfff2a1
KT
8338 /* TO_PARSE string contains at least one extension. */
8339 enum aarch64_parse_opt_result ext_res
8340 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8341
0cfff2a1
KT
8342 if (ext_res != AARCH64_PARSE_OK)
8343 return ext_res;
8344 }
8345 /* Extension parsing was successfull. Confirm the result
8346 cpu and ISA flags. */
8347 *res = cpu;
8348 *isa_flags = isa_temp;
8349 return AARCH64_PARSE_OK;
43e9d192
IB
8350 }
8351 }
8352
8353 /* CPU name not found in list. */
0cfff2a1 8354 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8355}
8356
0cfff2a1
KT
8357/* Parse the TO_PARSE string and put the cpu it selects into RES.
8358 Return an aarch64_parse_opt_result describing the parse result.
8359 If the parsing fails the RES does not change. */
43e9d192 8360
0cfff2a1
KT
8361static enum aarch64_parse_opt_result
8362aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
8363{
8364 const struct processor *cpu;
0cfff2a1
KT
8365 char *str = (char *) alloca (strlen (to_parse) + 1);
8366
8367 strcpy (str, to_parse);
43e9d192
IB
8368
8369 /* Loop through the list of supported CPUs to find a match. */
8370 for (cpu = all_cores; cpu->name != NULL; cpu++)
8371 {
8372 if (strcmp (cpu->name, str) == 0)
8373 {
0cfff2a1
KT
8374 *res = cpu;
8375 return AARCH64_PARSE_OK;
43e9d192
IB
8376 }
8377 }
8378
8379 /* CPU name not found in list. */
0cfff2a1 8380 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8381}
8382
8dec06f2
JG
8383/* Parse TOKEN, which has length LENGTH to see if it is an option
8384 described in FLAG. If it is, return the index bit for that fusion type.
8385 If not, error (printing OPTION_NAME) and return zero. */
8386
8387static unsigned int
8388aarch64_parse_one_option_token (const char *token,
8389 size_t length,
8390 const struct aarch64_flag_desc *flag,
8391 const char *option_name)
8392{
8393 for (; flag->name != NULL; flag++)
8394 {
8395 if (length == strlen (flag->name)
8396 && !strncmp (flag->name, token, length))
8397 return flag->flag;
8398 }
8399
8400 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8401 return 0;
8402}
8403
8404/* Parse OPTION which is a comma-separated list of flags to enable.
8405 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8406 default state we inherit from the CPU tuning structures. OPTION_NAME
8407 gives the top-level option we are parsing in the -moverride string,
8408 for use in error messages. */
8409
8410static unsigned int
8411aarch64_parse_boolean_options (const char *option,
8412 const struct aarch64_flag_desc *flags,
8413 unsigned int initial_state,
8414 const char *option_name)
8415{
8416 const char separator = '.';
8417 const char* specs = option;
8418 const char* ntoken = option;
8419 unsigned int found_flags = initial_state;
8420
8421 while ((ntoken = strchr (specs, separator)))
8422 {
8423 size_t token_length = ntoken - specs;
8424 unsigned token_ops = aarch64_parse_one_option_token (specs,
8425 token_length,
8426 flags,
8427 option_name);
8428 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8429 in the token stream, reset the supported operations. So:
8430
8431 adrp+add.cmp+branch.none.adrp+add
8432
8433 would have the result of turning on only adrp+add fusion. */
8434 if (!token_ops)
8435 found_flags = 0;
8436
8437 found_flags |= token_ops;
8438 specs = ++ntoken;
8439 }
8440
8441 /* We ended with a comma, print something. */
8442 if (!(*specs))
8443 {
8444 error ("%s string ill-formed\n", option_name);
8445 return 0;
8446 }
8447
8448 /* We still have one more token to parse. */
8449 size_t token_length = strlen (specs);
8450 unsigned token_ops = aarch64_parse_one_option_token (specs,
8451 token_length,
8452 flags,
8453 option_name);
8454 if (!token_ops)
8455 found_flags = 0;
8456
8457 found_flags |= token_ops;
8458 return found_flags;
8459}
8460
8461/* Support for overriding instruction fusion. */
8462
8463static void
8464aarch64_parse_fuse_string (const char *fuse_string,
8465 struct tune_params *tune)
8466{
8467 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8468 aarch64_fusible_pairs,
8469 tune->fusible_ops,
8470 "fuse=");
8471}
8472
8473/* Support for overriding other tuning flags. */
8474
8475static void
8476aarch64_parse_tune_string (const char *tune_string,
8477 struct tune_params *tune)
8478{
8479 tune->extra_tuning_flags
8480 = aarch64_parse_boolean_options (tune_string,
8481 aarch64_tuning_flags,
8482 tune->extra_tuning_flags,
8483 "tune=");
8484}
8485
8486/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8487 we understand. If it is, extract the option string and handoff to
8488 the appropriate function. */
8489
8490void
8491aarch64_parse_one_override_token (const char* token,
8492 size_t length,
8493 struct tune_params *tune)
8494{
8495 const struct aarch64_tuning_override_function *fn
8496 = aarch64_tuning_override_functions;
8497
8498 const char *option_part = strchr (token, '=');
8499 if (!option_part)
8500 {
8501 error ("tuning string missing in option (%s)", token);
8502 return;
8503 }
8504
8505 /* Get the length of the option name. */
8506 length = option_part - token;
8507 /* Skip the '=' to get to the option string. */
8508 option_part++;
8509
8510 for (; fn->name != NULL; fn++)
8511 {
8512 if (!strncmp (fn->name, token, length))
8513 {
8514 fn->parse_override (option_part, tune);
8515 return;
8516 }
8517 }
8518
8519 error ("unknown tuning option (%s)",token);
8520 return;
8521}
8522
5eee3c34
JW
8523/* A checking mechanism for the implementation of the tls size. */
8524
8525static void
8526initialize_aarch64_tls_size (struct gcc_options *opts)
8527{
8528 if (aarch64_tls_size == 0)
8529 aarch64_tls_size = 24;
8530
8531 switch (opts->x_aarch64_cmodel_var)
8532 {
8533 case AARCH64_CMODEL_TINY:
8534 /* Both the default and maximum TLS size allowed under tiny is 1M which
8535 needs two instructions to address, so we clamp the size to 24. */
8536 if (aarch64_tls_size > 24)
8537 aarch64_tls_size = 24;
8538 break;
8539 case AARCH64_CMODEL_SMALL:
8540 /* The maximum TLS size allowed under small is 4G. */
8541 if (aarch64_tls_size > 32)
8542 aarch64_tls_size = 32;
8543 break;
8544 case AARCH64_CMODEL_LARGE:
8545 /* The maximum TLS size allowed under large is 16E.
8546 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8547 if (aarch64_tls_size > 48)
8548 aarch64_tls_size = 48;
8549 break;
8550 default:
8551 gcc_unreachable ();
8552 }
8553
8554 return;
8555}
8556
8dec06f2
JG
8557/* Parse STRING looking for options in the format:
8558 string :: option:string
8559 option :: name=substring
8560 name :: {a-z}
8561 substring :: defined by option. */
8562
8563static void
8564aarch64_parse_override_string (const char* input_string,
8565 struct tune_params* tune)
8566{
8567 const char separator = ':';
8568 size_t string_length = strlen (input_string) + 1;
8569 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8570 char *string = string_root;
8571 strncpy (string, input_string, string_length);
8572 string[string_length - 1] = '\0';
8573
8574 char* ntoken = string;
8575
8576 while ((ntoken = strchr (string, separator)))
8577 {
8578 size_t token_length = ntoken - string;
8579 /* Make this substring look like a string. */
8580 *ntoken = '\0';
8581 aarch64_parse_one_override_token (string, token_length, tune);
8582 string = ++ntoken;
8583 }
8584
8585 /* One last option to parse. */
8586 aarch64_parse_one_override_token (string, strlen (string), tune);
8587 free (string_root);
8588}
43e9d192 8589
43e9d192
IB
8590
8591static void
0cfff2a1 8592aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8593{
a3dc8760
NC
8594 /* The logic here is that if we are disabling all frame pointer generation
8595 then we do not need to disable leaf frame pointer generation as a
8596 separate operation. But if we are *only* disabling leaf frame pointer
8597 generation then we set flag_omit_frame_pointer to true, but in
8598 aarch64_frame_pointer_required we return false only for leaf functions.
8599
8600 PR 70044: We have to be careful about being called multiple times for the
8601 same function. Once we have decided to set flag_omit_frame_pointer just
8602 so that we can omit leaf frame pointers, we must then not interpret a
8603 second call as meaning that all frame pointer generation should be
8604 omitted. We do this by setting flag_omit_frame_pointer to a special,
8605 non-zero value. */
8606 if (opts->x_flag_omit_frame_pointer == 2)
8607 opts->x_flag_omit_frame_pointer = 0;
8608
0cfff2a1
KT
8609 if (opts->x_flag_omit_frame_pointer)
8610 opts->x_flag_omit_leaf_frame_pointer = false;
8611 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8612 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8613
1be34295 8614 /* If not optimizing for size, set the default
0cfff2a1
KT
8615 alignment to what the target wants. */
8616 if (!opts->x_optimize_size)
43e9d192 8617 {
0cfff2a1
KT
8618 if (opts->x_align_loops <= 0)
8619 opts->x_align_loops = aarch64_tune_params.loop_align;
8620 if (opts->x_align_jumps <= 0)
8621 opts->x_align_jumps = aarch64_tune_params.jump_align;
8622 if (opts->x_align_functions <= 0)
8623 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8624 }
b4f50fd4 8625
9ee6540a
WD
8626 /* We default to no pc-relative literal loads. */
8627
8628 aarch64_pcrelative_literal_loads = false;
8629
8630 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8631 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8632 if (opts->x_pcrelative_literal_loads == 1)
8633 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8634
48bb1a55
CL
8635 /* This is PR70113. When building the Linux kernel with
8636 CONFIG_ARM64_ERRATUM_843419, support for relocations
8637 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8638 removed from the kernel to avoid loading objects with possibly
9ee6540a 8639 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8640 generate such relocations, preventing the kernel build from
8641 succeeding. */
9ee6540a
WD
8642 if (opts->x_pcrelative_literal_loads == 2
8643 && TARGET_FIX_ERR_A53_843419)
8644 aarch64_pcrelative_literal_loads = true;
8645
8646 /* In the tiny memory model it makes no sense to disallow PC relative
8647 literal pool loads. */
8648 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8649 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8650 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8651
8652 /* When enabling the lower precision Newton series for the square root, also
8653 enable it for the reciprocal square root, since the latter is an
8654 intermediary step for the former. */
8655 if (flag_mlow_precision_sqrt)
8656 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8657}
43e9d192 8658
0cfff2a1
KT
8659/* 'Unpack' up the internal tuning structs and update the options
8660 in OPTS. The caller must have set up selected_tune and selected_arch
8661 as all the other target-specific codegen decisions are
8662 derived from them. */
8663
e4ea20c8 8664void
0cfff2a1
KT
8665aarch64_override_options_internal (struct gcc_options *opts)
8666{
8667 aarch64_tune_flags = selected_tune->flags;
8668 aarch64_tune = selected_tune->sched_core;
8669 /* Make a copy of the tuning parameters attached to the core, which
8670 we may later overwrite. */
8671 aarch64_tune_params = *(selected_tune->tune);
8672 aarch64_architecture_version = selected_arch->architecture_version;
8673
8674 if (opts->x_aarch64_override_tune_string)
8675 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8676 &aarch64_tune_params);
8677
8678 /* This target defaults to strict volatile bitfields. */
8679 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8680 opts->x_flag_strict_volatile_bitfields = 1;
8681
0cfff2a1 8682 initialize_aarch64_code_model (opts);
5eee3c34 8683 initialize_aarch64_tls_size (opts);
63892fa2 8684
2d6bc7fa
KT
8685 int queue_depth = 0;
8686 switch (aarch64_tune_params.autoprefetcher_model)
8687 {
8688 case tune_params::AUTOPREFETCHER_OFF:
8689 queue_depth = -1;
8690 break;
8691 case tune_params::AUTOPREFETCHER_WEAK:
8692 queue_depth = 0;
8693 break;
8694 case tune_params::AUTOPREFETCHER_STRONG:
8695 queue_depth = max_insn_queue_index + 1;
8696 break;
8697 default:
8698 gcc_unreachable ();
8699 }
8700
8701 /* We don't mind passing in global_options_set here as we don't use
8702 the *options_set structs anyway. */
8703 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8704 queue_depth,
8705 opts->x_param_values,
8706 global_options_set.x_param_values);
8707
50487d79
EM
8708 /* Set the L1 cache line size. */
8709 if (selected_cpu->tune->cache_line_size != 0)
8710 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
8711 selected_cpu->tune->cache_line_size,
8712 opts->x_param_values,
8713 global_options_set.x_param_values);
8714
0cfff2a1
KT
8715 aarch64_override_options_after_change_1 (opts);
8716}
43e9d192 8717
01f44038
KT
8718/* Print a hint with a suggestion for a core or architecture name that
8719 most closely resembles what the user passed in STR. ARCH is true if
8720 the user is asking for an architecture name. ARCH is false if the user
8721 is asking for a core name. */
8722
8723static void
8724aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
8725{
8726 auto_vec<const char *> candidates;
8727 const struct processor *entry = arch ? all_architectures : all_cores;
8728 for (; entry->name != NULL; entry++)
8729 candidates.safe_push (entry->name);
8730 char *s;
8731 const char *hint = candidates_list_and_hint (str, s, candidates);
8732 if (hint)
8733 inform (input_location, "valid arguments are: %s;"
8734 " did you mean %qs?", s, hint);
8735 XDELETEVEC (s);
8736}
8737
8738/* Print a hint with a suggestion for a core name that most closely resembles
8739 what the user passed in STR. */
8740
8741inline static void
8742aarch64_print_hint_for_core (const char *str)
8743{
8744 aarch64_print_hint_for_core_or_arch (str, false);
8745}
8746
8747/* Print a hint with a suggestion for an architecture name that most closely
8748 resembles what the user passed in STR. */
8749
8750inline static void
8751aarch64_print_hint_for_arch (const char *str)
8752{
8753 aarch64_print_hint_for_core_or_arch (str, true);
8754}
8755
0cfff2a1
KT
8756/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8757 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
8758 they are valid in RES and ISA_FLAGS. Return whether the option is
8759 valid. */
43e9d192 8760
361fb3ee 8761static bool
0cfff2a1
KT
8762aarch64_validate_mcpu (const char *str, const struct processor **res,
8763 unsigned long *isa_flags)
8764{
8765 enum aarch64_parse_opt_result parse_res
8766 = aarch64_parse_cpu (str, res, isa_flags);
8767
8768 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8769 return true;
0cfff2a1
KT
8770
8771 switch (parse_res)
8772 {
8773 case AARCH64_PARSE_MISSING_ARG:
fb241da2 8774 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
8775 break;
8776 case AARCH64_PARSE_INVALID_ARG:
8777 error ("unknown value %qs for -mcpu", str);
01f44038 8778 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8779 break;
8780 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 8781 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
8782 break;
8783 default:
8784 gcc_unreachable ();
8785 }
361fb3ee
KT
8786
8787 return false;
0cfff2a1
KT
8788}
8789
8790/* Validate a command-line -march option. Parse the arch and extensions
8791 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8792 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8793 option is valid. */
0cfff2a1 8794
361fb3ee 8795static bool
0cfff2a1 8796aarch64_validate_march (const char *str, const struct processor **res,
01f44038 8797 unsigned long *isa_flags)
0cfff2a1
KT
8798{
8799 enum aarch64_parse_opt_result parse_res
8800 = aarch64_parse_arch (str, res, isa_flags);
8801
8802 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8803 return true;
0cfff2a1
KT
8804
8805 switch (parse_res)
8806 {
8807 case AARCH64_PARSE_MISSING_ARG:
fb241da2 8808 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
8809 break;
8810 case AARCH64_PARSE_INVALID_ARG:
8811 error ("unknown value %qs for -march", str);
01f44038 8812 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
8813 break;
8814 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 8815 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
8816 break;
8817 default:
8818 gcc_unreachable ();
8819 }
361fb3ee
KT
8820
8821 return false;
0cfff2a1
KT
8822}
8823
8824/* Validate a command-line -mtune option. Parse the cpu
8825 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8826 result, if it is valid, in RES. Return whether the option is
8827 valid. */
0cfff2a1 8828
361fb3ee 8829static bool
0cfff2a1
KT
8830aarch64_validate_mtune (const char *str, const struct processor **res)
8831{
8832 enum aarch64_parse_opt_result parse_res
8833 = aarch64_parse_tune (str, res);
8834
8835 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8836 return true;
0cfff2a1
KT
8837
8838 switch (parse_res)
8839 {
8840 case AARCH64_PARSE_MISSING_ARG:
fb241da2 8841 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
8842 break;
8843 case AARCH64_PARSE_INVALID_ARG:
8844 error ("unknown value %qs for -mtune", str);
01f44038 8845 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8846 break;
8847 default:
8848 gcc_unreachable ();
8849 }
361fb3ee
KT
8850 return false;
8851}
8852
8853/* Return the CPU corresponding to the enum CPU.
8854 If it doesn't specify a cpu, return the default. */
8855
8856static const struct processor *
8857aarch64_get_tune_cpu (enum aarch64_processor cpu)
8858{
8859 if (cpu != aarch64_none)
8860 return &all_cores[cpu];
8861
8862 /* The & 0x3f is to extract the bottom 6 bits that encode the
8863 default cpu as selected by the --with-cpu GCC configure option
8864 in config.gcc.
8865 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8866 flags mechanism should be reworked to make it more sane. */
8867 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8868}
8869
8870/* Return the architecture corresponding to the enum ARCH.
8871 If it doesn't specify a valid architecture, return the default. */
8872
8873static const struct processor *
8874aarch64_get_arch (enum aarch64_arch arch)
8875{
8876 if (arch != aarch64_no_arch)
8877 return &all_architectures[arch];
8878
8879 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8880
8881 return &all_architectures[cpu->arch];
0cfff2a1
KT
8882}
8883
8884/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8885 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8886 tuning structs. In particular it must set selected_tune and
8887 aarch64_isa_flags that define the available ISA features and tuning
8888 decisions. It must also set selected_arch as this will be used to
8889 output the .arch asm tags for each function. */
8890
8891static void
8892aarch64_override_options (void)
8893{
8894 unsigned long cpu_isa = 0;
8895 unsigned long arch_isa = 0;
8896 aarch64_isa_flags = 0;
8897
361fb3ee
KT
8898 bool valid_cpu = true;
8899 bool valid_tune = true;
8900 bool valid_arch = true;
8901
0cfff2a1
KT
8902 selected_cpu = NULL;
8903 selected_arch = NULL;
8904 selected_tune = NULL;
8905
8906 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8907 If either of -march or -mtune is given, they override their
8908 respective component of -mcpu. */
8909 if (aarch64_cpu_string)
361fb3ee
KT
8910 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
8911 &cpu_isa);
0cfff2a1
KT
8912
8913 if (aarch64_arch_string)
361fb3ee
KT
8914 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
8915 &arch_isa);
0cfff2a1
KT
8916
8917 if (aarch64_tune_string)
361fb3ee 8918 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
8919
8920 /* If the user did not specify a processor, choose the default
8921 one for them. This will be the CPU set during configuration using
a3cd0246 8922 --with-cpu, otherwise it is "generic". */
43e9d192
IB
8923 if (!selected_cpu)
8924 {
0cfff2a1
KT
8925 if (selected_arch)
8926 {
8927 selected_cpu = &all_cores[selected_arch->ident];
8928 aarch64_isa_flags = arch_isa;
361fb3ee 8929 explicit_arch = selected_arch->arch;
0cfff2a1
KT
8930 }
8931 else
8932 {
361fb3ee
KT
8933 /* Get default configure-time CPU. */
8934 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
8935 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8936 }
361fb3ee
KT
8937
8938 if (selected_tune)
8939 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
8940 }
8941 /* If both -mcpu and -march are specified check that they are architecturally
8942 compatible, warn if they're not and prefer the -march ISA flags. */
8943 else if (selected_arch)
8944 {
8945 if (selected_arch->arch != selected_cpu->arch)
8946 {
8947 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8948 all_architectures[selected_cpu->arch].name,
8949 selected_arch->name);
8950 }
8951 aarch64_isa_flags = arch_isa;
361fb3ee
KT
8952 explicit_arch = selected_arch->arch;
8953 explicit_tune_core = selected_tune ? selected_tune->ident
8954 : selected_cpu->ident;
0cfff2a1
KT
8955 }
8956 else
8957 {
8958 /* -mcpu but no -march. */
8959 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
8960 explicit_tune_core = selected_tune ? selected_tune->ident
8961 : selected_cpu->ident;
8962 gcc_assert (selected_cpu);
8963 selected_arch = &all_architectures[selected_cpu->arch];
8964 explicit_arch = selected_arch->arch;
43e9d192
IB
8965 }
8966
0cfff2a1
KT
8967 /* Set the arch as well as we will need it when outputing
8968 the .arch directive in assembly. */
8969 if (!selected_arch)
8970 {
8971 gcc_assert (selected_cpu);
8972 selected_arch = &all_architectures[selected_cpu->arch];
8973 }
43e9d192 8974
43e9d192 8975 if (!selected_tune)
3edaf26d 8976 selected_tune = selected_cpu;
43e9d192 8977
0cfff2a1
KT
8978#ifndef HAVE_AS_MABI_OPTION
8979 /* The compiler may have been configured with 2.23.* binutils, which does
8980 not have support for ILP32. */
8981 if (TARGET_ILP32)
8982 error ("Assembler does not support -mabi=ilp32");
8983#endif
43e9d192 8984
db58fd89
JW
8985 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
8986 sorry ("Return address signing is only supported for -mabi=lp64");
8987
361fb3ee
KT
8988 /* Make sure we properly set up the explicit options. */
8989 if ((aarch64_cpu_string && valid_cpu)
8990 || (aarch64_tune_string && valid_tune))
8991 gcc_assert (explicit_tune_core != aarch64_none);
8992
8993 if ((aarch64_cpu_string && valid_cpu)
8994 || (aarch64_arch_string && valid_arch))
8995 gcc_assert (explicit_arch != aarch64_no_arch);
8996
0cfff2a1
KT
8997 aarch64_override_options_internal (&global_options);
8998
8999 /* Save these options as the default ones in case we push and pop them later
9000 while processing functions with potential target attributes. */
9001 target_option_default_node = target_option_current_node
9002 = build_target_option_node (&global_options);
43e9d192
IB
9003}
9004
9005/* Implement targetm.override_options_after_change. */
9006
9007static void
9008aarch64_override_options_after_change (void)
9009{
0cfff2a1 9010 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
9011}
9012
9013static struct machine_function *
9014aarch64_init_machine_status (void)
9015{
9016 struct machine_function *machine;
766090c2 9017 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
9018 return machine;
9019}
9020
9021void
9022aarch64_init_expanders (void)
9023{
9024 init_machine_status = aarch64_init_machine_status;
9025}
9026
9027/* A checking mechanism for the implementation of the various code models. */
9028static void
0cfff2a1 9029initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 9030{
0cfff2a1 9031 if (opts->x_flag_pic)
43e9d192 9032 {
0cfff2a1 9033 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
9034 {
9035 case AARCH64_CMODEL_TINY:
9036 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
9037 break;
9038 case AARCH64_CMODEL_SMALL:
34ecdb0f 9039#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
9040 aarch64_cmodel = (flag_pic == 2
9041 ? AARCH64_CMODEL_SMALL_PIC
9042 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
9043#else
9044 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
9045#endif
43e9d192
IB
9046 break;
9047 case AARCH64_CMODEL_LARGE:
9048 sorry ("code model %qs with -f%s", "large",
0cfff2a1 9049 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 9050 break;
43e9d192
IB
9051 default:
9052 gcc_unreachable ();
9053 }
9054 }
9055 else
0cfff2a1 9056 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
9057}
9058
361fb3ee
KT
9059/* Implement TARGET_OPTION_SAVE. */
9060
9061static void
9062aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
9063{
9064 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
9065}
9066
9067/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9068 using the information saved in PTR. */
9069
9070static void
9071aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
9072{
9073 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
9074 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9075 opts->x_explicit_arch = ptr->x_explicit_arch;
9076 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
9077 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
9078
9079 aarch64_override_options_internal (opts);
9080}
9081
9082/* Implement TARGET_OPTION_PRINT. */
9083
9084static void
9085aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
9086{
9087 const struct processor *cpu
9088 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9089 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
9090 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 9091 std::string extension
04a99ebe 9092 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
9093
9094 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
9095 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9096 arch->name, extension.c_str ());
361fb3ee
KT
9097}
9098
d78006d9
KT
9099static GTY(()) tree aarch64_previous_fndecl;
9100
e4ea20c8
KT
9101void
9102aarch64_reset_previous_fndecl (void)
9103{
9104 aarch64_previous_fndecl = NULL;
9105}
9106
acfc1ac1
KT
9107/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9108 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9109 make sure optab availability predicates are recomputed when necessary. */
9110
9111void
9112aarch64_save_restore_target_globals (tree new_tree)
9113{
9114 if (TREE_TARGET_GLOBALS (new_tree))
9115 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9116 else if (new_tree == target_option_default_node)
9117 restore_target_globals (&default_target_globals);
9118 else
9119 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9120}
9121
d78006d9
KT
9122/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9123 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9124 of the function, if such exists. This function may be called multiple
9125 times on a single function so use aarch64_previous_fndecl to avoid
9126 setting up identical state. */
9127
9128static void
9129aarch64_set_current_function (tree fndecl)
9130{
acfc1ac1
KT
9131 if (!fndecl || fndecl == aarch64_previous_fndecl)
9132 return;
9133
d78006d9
KT
9134 tree old_tree = (aarch64_previous_fndecl
9135 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9136 : NULL_TREE);
9137
acfc1ac1 9138 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 9139
acfc1ac1
KT
9140 /* If current function has no attributes but the previous one did,
9141 use the default node. */
9142 if (!new_tree && old_tree)
9143 new_tree = target_option_default_node;
d78006d9 9144
acfc1ac1
KT
9145 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9146 the default have been handled by aarch64_save_restore_target_globals from
9147 aarch64_pragma_target_parse. */
9148 if (old_tree == new_tree)
9149 return;
d78006d9 9150
acfc1ac1 9151 aarch64_previous_fndecl = fndecl;
6e17a23b 9152
acfc1ac1
KT
9153 /* First set the target options. */
9154 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 9155
acfc1ac1 9156 aarch64_save_restore_target_globals (new_tree);
d78006d9 9157}
361fb3ee 9158
5a2c8331
KT
9159/* Enum describing the various ways we can handle attributes.
9160 In many cases we can reuse the generic option handling machinery. */
9161
9162enum aarch64_attr_opt_type
9163{
9164 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9165 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9166 aarch64_attr_enum, /* Attribute sets an enum variable. */
9167 aarch64_attr_custom /* Attribute requires a custom handling function. */
9168};
9169
9170/* All the information needed to handle a target attribute.
9171 NAME is the name of the attribute.
9c582551 9172 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
9173 in the definition of enum aarch64_attr_opt_type.
9174 ALLOW_NEG is true if the attribute supports a "no-" form.
9175 HANDLER is the function that takes the attribute string and whether
9176 it is a pragma or attribute and handles the option. It is needed only
9177 when the ATTR_TYPE is aarch64_attr_custom.
9178 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 9179 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
9180 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9181 aarch64_attr_enum. */
9182
9183struct aarch64_attribute_info
9184{
9185 const char *name;
9186 enum aarch64_attr_opt_type attr_type;
9187 bool allow_neg;
9188 bool (*handler) (const char *, const char *);
9189 enum opt_code opt_num;
9190};
9191
9192/* Handle the ARCH_STR argument to the arch= target attribute.
9193 PRAGMA_OR_ATTR is used in potential error messages. */
9194
9195static bool
9196aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
9197{
9198 const struct processor *tmp_arch = NULL;
9199 enum aarch64_parse_opt_result parse_res
9200 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9201
9202 if (parse_res == AARCH64_PARSE_OK)
9203 {
9204 gcc_assert (tmp_arch);
9205 selected_arch = tmp_arch;
9206 explicit_arch = selected_arch->arch;
9207 return true;
9208 }
9209
9210 switch (parse_res)
9211 {
9212 case AARCH64_PARSE_MISSING_ARG:
9213 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
9214 break;
9215 case AARCH64_PARSE_INVALID_ARG:
9216 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 9217 aarch64_print_hint_for_arch (str);
5a2c8331
KT
9218 break;
9219 case AARCH64_PARSE_INVALID_FEATURE:
9220 error ("invalid feature modifier %qs for 'arch' target %s",
9221 str, pragma_or_attr);
9222 break;
9223 default:
9224 gcc_unreachable ();
9225 }
9226
9227 return false;
9228}
9229
9230/* Handle the argument CPU_STR to the cpu= target attribute.
9231 PRAGMA_OR_ATTR is used in potential error messages. */
9232
9233static bool
9234aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
9235{
9236 const struct processor *tmp_cpu = NULL;
9237 enum aarch64_parse_opt_result parse_res
9238 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9239
9240 if (parse_res == AARCH64_PARSE_OK)
9241 {
9242 gcc_assert (tmp_cpu);
9243 selected_tune = tmp_cpu;
9244 explicit_tune_core = selected_tune->ident;
9245
9246 selected_arch = &all_architectures[tmp_cpu->arch];
9247 explicit_arch = selected_arch->arch;
9248 return true;
9249 }
9250
9251 switch (parse_res)
9252 {
9253 case AARCH64_PARSE_MISSING_ARG:
9254 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
9255 break;
9256 case AARCH64_PARSE_INVALID_ARG:
9257 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 9258 aarch64_print_hint_for_core (str);
5a2c8331
KT
9259 break;
9260 case AARCH64_PARSE_INVALID_FEATURE:
9261 error ("invalid feature modifier %qs for 'cpu' target %s",
9262 str, pragma_or_attr);
9263 break;
9264 default:
9265 gcc_unreachable ();
9266 }
9267
9268 return false;
9269}
9270
9271/* Handle the argument STR to the tune= target attribute.
9272 PRAGMA_OR_ATTR is used in potential error messages. */
9273
9274static bool
9275aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
9276{
9277 const struct processor *tmp_tune = NULL;
9278 enum aarch64_parse_opt_result parse_res
9279 = aarch64_parse_tune (str, &tmp_tune);
9280
9281 if (parse_res == AARCH64_PARSE_OK)
9282 {
9283 gcc_assert (tmp_tune);
9284 selected_tune = tmp_tune;
9285 explicit_tune_core = selected_tune->ident;
9286 return true;
9287 }
9288
9289 switch (parse_res)
9290 {
9291 case AARCH64_PARSE_INVALID_ARG:
9292 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 9293 aarch64_print_hint_for_core (str);
5a2c8331
KT
9294 break;
9295 default:
9296 gcc_unreachable ();
9297 }
9298
9299 return false;
9300}
9301
9302/* Parse an architecture extensions target attribute string specified in STR.
9303 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9304 if successful. Update aarch64_isa_flags to reflect the ISA features
9305 modified.
9306 PRAGMA_OR_ATTR is used in potential error messages. */
9307
9308static bool
9309aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
9310{
9311 enum aarch64_parse_opt_result parse_res;
9312 unsigned long isa_flags = aarch64_isa_flags;
9313
e4ea20c8
KT
9314 /* We allow "+nothing" in the beginning to clear out all architectural
9315 features if the user wants to handpick specific features. */
9316 if (strncmp ("+nothing", str, 8) == 0)
9317 {
9318 isa_flags = 0;
9319 str += 8;
9320 }
9321
5a2c8331
KT
9322 parse_res = aarch64_parse_extension (str, &isa_flags);
9323
9324 if (parse_res == AARCH64_PARSE_OK)
9325 {
9326 aarch64_isa_flags = isa_flags;
9327 return true;
9328 }
9329
9330 switch (parse_res)
9331 {
9332 case AARCH64_PARSE_MISSING_ARG:
9333 error ("missing feature modifier in target %s %qs",
9334 pragma_or_attr, str);
9335 break;
9336
9337 case AARCH64_PARSE_INVALID_FEATURE:
9338 error ("invalid feature modifier in target %s %qs",
9339 pragma_or_attr, str);
9340 break;
9341
9342 default:
9343 gcc_unreachable ();
9344 }
9345
9346 return false;
9347}
9348
9349/* The target attributes that we support. On top of these we also support just
9350 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9351 handled explicitly in aarch64_process_one_target_attr. */
9352
9353static const struct aarch64_attribute_info aarch64_attributes[] =
9354{
9355 { "general-regs-only", aarch64_attr_mask, false, NULL,
9356 OPT_mgeneral_regs_only },
9357 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9358 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
9359 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9360 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
9361 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9362 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9363 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9364 OPT_momit_leaf_frame_pointer },
9365 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9366 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9367 OPT_march_ },
9368 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9369 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9370 OPT_mtune_ },
db58fd89
JW
9371 { "sign-return-address", aarch64_attr_enum, false, NULL,
9372 OPT_msign_return_address_ },
5a2c8331
KT
9373 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9374};
9375
9376/* Parse ARG_STR which contains the definition of one target attribute.
9377 Show appropriate errors if any or return true if the attribute is valid.
9378 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9379 we're processing a target attribute or pragma. */
9380
9381static bool
9382aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9383{
9384 bool invert = false;
9385
9386 size_t len = strlen (arg_str);
9387
9388 if (len == 0)
9389 {
9390 error ("malformed target %s", pragma_or_attr);
9391 return false;
9392 }
9393
9394 char *str_to_check = (char *) alloca (len + 1);
9395 strcpy (str_to_check, arg_str);
9396
9397 /* Skip leading whitespace. */
9398 while (*str_to_check == ' ' || *str_to_check == '\t')
9399 str_to_check++;
9400
9401 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9402 It is easier to detect and handle it explicitly here rather than going
9403 through the machinery for the rest of the target attributes in this
9404 function. */
9405 if (*str_to_check == '+')
9406 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9407
9408 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9409 {
9410 invert = true;
9411 str_to_check += 3;
9412 }
9413 char *arg = strchr (str_to_check, '=');
9414
9415 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9416 and point ARG to "foo". */
9417 if (arg)
9418 {
9419 *arg = '\0';
9420 arg++;
9421 }
9422 const struct aarch64_attribute_info *p_attr;
16d12992 9423 bool found = false;
5a2c8331
KT
9424 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9425 {
9426 /* If the names don't match up, or the user has given an argument
9427 to an attribute that doesn't accept one, or didn't give an argument
9428 to an attribute that expects one, fail to match. */
9429 if (strcmp (str_to_check, p_attr->name) != 0)
9430 continue;
9431
16d12992 9432 found = true;
5a2c8331
KT
9433 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9434 || p_attr->attr_type == aarch64_attr_enum;
9435
9436 if (attr_need_arg_p ^ (arg != NULL))
9437 {
9438 error ("target %s %qs does not accept an argument",
9439 pragma_or_attr, str_to_check);
9440 return false;
9441 }
9442
9443 /* If the name matches but the attribute does not allow "no-" versions
9444 then we can't match. */
9445 if (invert && !p_attr->allow_neg)
9446 {
9447 error ("target %s %qs does not allow a negated form",
9448 pragma_or_attr, str_to_check);
9449 return false;
9450 }
9451
9452 switch (p_attr->attr_type)
9453 {
9454 /* Has a custom handler registered.
9455 For example, cpu=, arch=, tune=. */
9456 case aarch64_attr_custom:
9457 gcc_assert (p_attr->handler);
9458 if (!p_attr->handler (arg, pragma_or_attr))
9459 return false;
9460 break;
9461
9462 /* Either set or unset a boolean option. */
9463 case aarch64_attr_bool:
9464 {
9465 struct cl_decoded_option decoded;
9466
9467 generate_option (p_attr->opt_num, NULL, !invert,
9468 CL_TARGET, &decoded);
9469 aarch64_handle_option (&global_options, &global_options_set,
9470 &decoded, input_location);
9471 break;
9472 }
9473 /* Set or unset a bit in the target_flags. aarch64_handle_option
9474 should know what mask to apply given the option number. */
9475 case aarch64_attr_mask:
9476 {
9477 struct cl_decoded_option decoded;
9478 /* We only need to specify the option number.
9479 aarch64_handle_option will know which mask to apply. */
9480 decoded.opt_index = p_attr->opt_num;
9481 decoded.value = !invert;
9482 aarch64_handle_option (&global_options, &global_options_set,
9483 &decoded, input_location);
9484 break;
9485 }
9486 /* Use the option setting machinery to set an option to an enum. */
9487 case aarch64_attr_enum:
9488 {
9489 gcc_assert (arg);
9490 bool valid;
9491 int value;
9492 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9493 &value, CL_TARGET);
9494 if (valid)
9495 {
9496 set_option (&global_options, NULL, p_attr->opt_num, value,
9497 NULL, DK_UNSPECIFIED, input_location,
9498 global_dc);
9499 }
9500 else
9501 {
9502 error ("target %s %s=%s is not valid",
9503 pragma_or_attr, str_to_check, arg);
9504 }
9505 break;
9506 }
9507 default:
9508 gcc_unreachable ();
9509 }
9510 }
9511
16d12992
KT
9512 /* If we reached here we either have found an attribute and validated
9513 it or didn't match any. If we matched an attribute but its arguments
9514 were malformed we will have returned false already. */
9515 return found;
5a2c8331
KT
9516}
9517
9518/* Count how many times the character C appears in
9519 NULL-terminated string STR. */
9520
9521static unsigned int
9522num_occurences_in_str (char c, char *str)
9523{
9524 unsigned int res = 0;
9525 while (*str != '\0')
9526 {
9527 if (*str == c)
9528 res++;
9529
9530 str++;
9531 }
9532
9533 return res;
9534}
9535
9536/* Parse the tree in ARGS that contains the target attribute information
9537 and update the global target options space. PRAGMA_OR_ATTR is a string
9538 to be used in error messages, specifying whether this is processing
9539 a target attribute or a target pragma. */
9540
9541bool
9542aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9543{
9544 if (TREE_CODE (args) == TREE_LIST)
9545 {
9546 do
9547 {
9548 tree head = TREE_VALUE (args);
9549 if (head)
9550 {
9551 if (!aarch64_process_target_attr (head, pragma_or_attr))
9552 return false;
9553 }
9554 args = TREE_CHAIN (args);
9555 } while (args);
9556
9557 return true;
9558 }
3b6cb9e3
ML
9559
9560 if (TREE_CODE (args) != STRING_CST)
9561 {
9562 error ("attribute %<target%> argument not a string");
9563 return false;
9564 }
5a2c8331
KT
9565
9566 size_t len = strlen (TREE_STRING_POINTER (args));
9567 char *str_to_check = (char *) alloca (len + 1);
9568 strcpy (str_to_check, TREE_STRING_POINTER (args));
9569
9570 if (len == 0)
9571 {
9572 error ("malformed target %s value", pragma_or_attr);
9573 return false;
9574 }
9575
9576 /* Used to catch empty spaces between commas i.e.
9577 attribute ((target ("attr1,,attr2"))). */
9578 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9579
9580 /* Handle multiple target attributes separated by ','. */
9581 char *token = strtok (str_to_check, ",");
9582
9583 unsigned int num_attrs = 0;
9584 while (token)
9585 {
9586 num_attrs++;
9587 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9588 {
9589 error ("target %s %qs is invalid", pragma_or_attr, token);
9590 return false;
9591 }
9592
9593 token = strtok (NULL, ",");
9594 }
9595
9596 if (num_attrs != num_commas + 1)
9597 {
9598 error ("malformed target %s list %qs",
9599 pragma_or_attr, TREE_STRING_POINTER (args));
9600 return false;
9601 }
9602
9603 return true;
9604}
9605
9606/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9607 process attribute ((target ("..."))). */
9608
9609static bool
9610aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9611{
9612 struct cl_target_option cur_target;
9613 bool ret;
9614 tree old_optimize;
9615 tree new_target, new_optimize;
9616 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9617
9618 /* If what we're processing is the current pragma string then the
9619 target option node is already stored in target_option_current_node
9620 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9621 having to re-parse the string. This is especially useful to keep
9622 arm_neon.h compile times down since that header contains a lot
9623 of intrinsics enclosed in pragmas. */
9624 if (!existing_target && args == current_target_pragma)
9625 {
9626 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9627 return true;
9628 }
5a2c8331
KT
9629 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9630
9631 old_optimize = build_optimization_node (&global_options);
9632 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9633
9634 /* If the function changed the optimization levels as well as setting
9635 target options, start with the optimizations specified. */
9636 if (func_optimize && func_optimize != old_optimize)
9637 cl_optimization_restore (&global_options,
9638 TREE_OPTIMIZATION (func_optimize));
9639
9640 /* Save the current target options to restore at the end. */
9641 cl_target_option_save (&cur_target, &global_options);
9642
9643 /* If fndecl already has some target attributes applied to it, unpack
9644 them so that we add this attribute on top of them, rather than
9645 overwriting them. */
9646 if (existing_target)
9647 {
9648 struct cl_target_option *existing_options
9649 = TREE_TARGET_OPTION (existing_target);
9650
9651 if (existing_options)
9652 cl_target_option_restore (&global_options, existing_options);
9653 }
9654 else
9655 cl_target_option_restore (&global_options,
9656 TREE_TARGET_OPTION (target_option_current_node));
9657
9658
9659 ret = aarch64_process_target_attr (args, "attribute");
9660
9661 /* Set up any additional state. */
9662 if (ret)
9663 {
9664 aarch64_override_options_internal (&global_options);
e95a988a
KT
9665 /* Initialize SIMD builtins if we haven't already.
9666 Set current_target_pragma to NULL for the duration so that
9667 the builtin initialization code doesn't try to tag the functions
9668 being built with the attributes specified by any current pragma, thus
9669 going into an infinite recursion. */
9670 if (TARGET_SIMD)
9671 {
9672 tree saved_current_target_pragma = current_target_pragma;
9673 current_target_pragma = NULL;
9674 aarch64_init_simd_builtins ();
9675 current_target_pragma = saved_current_target_pragma;
9676 }
5a2c8331
KT
9677 new_target = build_target_option_node (&global_options);
9678 }
9679 else
9680 new_target = NULL;
9681
9682 new_optimize = build_optimization_node (&global_options);
9683
9684 if (fndecl && ret)
9685 {
9686 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9687
9688 if (old_optimize != new_optimize)
9689 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9690 }
9691
9692 cl_target_option_restore (&global_options, &cur_target);
9693
9694 if (old_optimize != new_optimize)
9695 cl_optimization_restore (&global_options,
9696 TREE_OPTIMIZATION (old_optimize));
9697 return ret;
9698}
9699
1fd8d40c
KT
9700/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9701 tri-bool options (yes, no, don't care) and the default value is
9702 DEF, determine whether to reject inlining. */
9703
9704static bool
9705aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9706 int dont_care, int def)
9707{
9708 /* If the callee doesn't care, always allow inlining. */
9709 if (callee == dont_care)
9710 return true;
9711
9712 /* If the caller doesn't care, always allow inlining. */
9713 if (caller == dont_care)
9714 return true;
9715
9716 /* Otherwise, allow inlining if either the callee and caller values
9717 agree, or if the callee is using the default value. */
9718 return (callee == caller || callee == def);
9719}
9720
9721/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9722 to inline CALLEE into CALLER based on target-specific info.
9723 Make sure that the caller and callee have compatible architectural
9724 features. Then go through the other possible target attributes
9725 and see if they can block inlining. Try not to reject always_inline
9726 callees unless they are incompatible architecturally. */
9727
9728static bool
9729aarch64_can_inline_p (tree caller, tree callee)
9730{
9731 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9732 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9733
9734 /* If callee has no option attributes, then it is ok to inline. */
9735 if (!callee_tree)
9736 return true;
9737
9738 struct cl_target_option *caller_opts
9739 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9740 : target_option_default_node);
9741
9742 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9743
9744
9745 /* Callee's ISA flags should be a subset of the caller's. */
9746 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9747 != callee_opts->x_aarch64_isa_flags)
9748 return false;
9749
9750 /* Allow non-strict aligned functions inlining into strict
9751 aligned ones. */
9752 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9753 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9754 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9755 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9756 return false;
9757
9758 bool always_inline = lookup_attribute ("always_inline",
9759 DECL_ATTRIBUTES (callee));
9760
9761 /* If the architectural features match up and the callee is always_inline
9762 then the other attributes don't matter. */
9763 if (always_inline)
9764 return true;
9765
9766 if (caller_opts->x_aarch64_cmodel_var
9767 != callee_opts->x_aarch64_cmodel_var)
9768 return false;
9769
9770 if (caller_opts->x_aarch64_tls_dialect
9771 != callee_opts->x_aarch64_tls_dialect)
9772 return false;
9773
9774 /* Honour explicit requests to workaround errata. */
9775 if (!aarch64_tribools_ok_for_inlining_p (
9776 caller_opts->x_aarch64_fix_a53_err835769,
9777 callee_opts->x_aarch64_fix_a53_err835769,
9778 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
9779 return false;
9780
48bb1a55
CL
9781 if (!aarch64_tribools_ok_for_inlining_p (
9782 caller_opts->x_aarch64_fix_a53_err843419,
9783 callee_opts->x_aarch64_fix_a53_err843419,
9784 2, TARGET_FIX_ERR_A53_843419))
9785 return false;
9786
1fd8d40c
KT
9787 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9788 caller and calle and they don't match up, reject inlining. */
9789 if (!aarch64_tribools_ok_for_inlining_p (
9790 caller_opts->x_flag_omit_leaf_frame_pointer,
9791 callee_opts->x_flag_omit_leaf_frame_pointer,
9792 2, 1))
9793 return false;
9794
9795 /* If the callee has specific tuning overrides, respect them. */
9796 if (callee_opts->x_aarch64_override_tune_string != NULL
9797 && caller_opts->x_aarch64_override_tune_string == NULL)
9798 return false;
9799
9800 /* If the user specified tuning override strings for the
9801 caller and callee and they don't match up, reject inlining.
9802 We just do a string compare here, we don't analyze the meaning
9803 of the string, as it would be too costly for little gain. */
9804 if (callee_opts->x_aarch64_override_tune_string
9805 && caller_opts->x_aarch64_override_tune_string
9806 && (strcmp (callee_opts->x_aarch64_override_tune_string,
9807 caller_opts->x_aarch64_override_tune_string) != 0))
9808 return false;
9809
9810 return true;
9811}
9812
43e9d192
IB
9813/* Return true if SYMBOL_REF X binds locally. */
9814
9815static bool
9816aarch64_symbol_binds_local_p (const_rtx x)
9817{
9818 return (SYMBOL_REF_DECL (x)
9819 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
9820 : SYMBOL_REF_LOCAL_P (x));
9821}
9822
9823/* Return true if SYMBOL_REF X is thread local */
9824static bool
9825aarch64_tls_symbol_p (rtx x)
9826{
9827 if (! TARGET_HAVE_TLS)
9828 return false;
9829
9830 if (GET_CODE (x) != SYMBOL_REF)
9831 return false;
9832
9833 return SYMBOL_REF_TLS_MODEL (x) != 0;
9834}
9835
9836/* Classify a TLS symbol into one of the TLS kinds. */
9837enum aarch64_symbol_type
9838aarch64_classify_tls_symbol (rtx x)
9839{
9840 enum tls_model tls_kind = tls_symbolic_operand_type (x);
9841
9842 switch (tls_kind)
9843 {
9844 case TLS_MODEL_GLOBAL_DYNAMIC:
9845 case TLS_MODEL_LOCAL_DYNAMIC:
9846 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
9847
9848 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
9849 switch (aarch64_cmodel)
9850 {
9851 case AARCH64_CMODEL_TINY:
9852 case AARCH64_CMODEL_TINY_PIC:
9853 return SYMBOL_TINY_TLSIE;
9854 default:
79496620 9855 return SYMBOL_SMALL_TLSIE;
5ae7caad 9856 }
43e9d192
IB
9857
9858 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
9859 if (aarch64_tls_size == 12)
9860 return SYMBOL_TLSLE12;
9861 else if (aarch64_tls_size == 24)
9862 return SYMBOL_TLSLE24;
9863 else if (aarch64_tls_size == 32)
9864 return SYMBOL_TLSLE32;
9865 else if (aarch64_tls_size == 48)
9866 return SYMBOL_TLSLE48;
9867 else
9868 gcc_unreachable ();
43e9d192
IB
9869
9870 case TLS_MODEL_EMULATED:
9871 case TLS_MODEL_NONE:
9872 return SYMBOL_FORCE_TO_MEM;
9873
9874 default:
9875 gcc_unreachable ();
9876 }
9877}
9878
9879/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 9880 LABEL_REF X. */
17f4d4bf 9881
43e9d192 9882enum aarch64_symbol_type
a6e0bfa7 9883aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
9884{
9885 if (GET_CODE (x) == LABEL_REF)
9886 {
9887 switch (aarch64_cmodel)
9888 {
9889 case AARCH64_CMODEL_LARGE:
9890 return SYMBOL_FORCE_TO_MEM;
9891
9892 case AARCH64_CMODEL_TINY_PIC:
9893 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
9894 return SYMBOL_TINY_ABSOLUTE;
9895
1b1e81f8 9896 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9897 case AARCH64_CMODEL_SMALL_PIC:
9898 case AARCH64_CMODEL_SMALL:
9899 return SYMBOL_SMALL_ABSOLUTE;
9900
9901 default:
9902 gcc_unreachable ();
9903 }
9904 }
9905
17f4d4bf 9906 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 9907 {
43e9d192
IB
9908 if (aarch64_tls_symbol_p (x))
9909 return aarch64_classify_tls_symbol (x);
9910
17f4d4bf
CSS
9911 switch (aarch64_cmodel)
9912 {
9913 case AARCH64_CMODEL_TINY:
15f6e0da 9914 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
9915 the offset does not cause overflow of the final address. But
9916 we have no way of knowing the address of symbol at compile time
9917 so we can't accurately say if the distance between the PC and
9918 symbol + offset is outside the addressible range of +/-1M in the
9919 TINY code model. So we rely on images not being greater than
9920 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
9921 be loaded using an alternative mechanism. Furthermore if the
9922 symbol is a weak reference to something that isn't known to
9923 resolve to a symbol in this module, then force to memory. */
9924 if ((SYMBOL_REF_WEAK (x)
9925 && !aarch64_symbol_binds_local_p (x))
f8b756b7 9926 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
9927 return SYMBOL_FORCE_TO_MEM;
9928 return SYMBOL_TINY_ABSOLUTE;
9929
17f4d4bf 9930 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
9931 /* Same reasoning as the tiny code model, but the offset cap here is
9932 4G. */
15f6e0da
RR
9933 if ((SYMBOL_REF_WEAK (x)
9934 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
9935 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9936 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
9937 return SYMBOL_FORCE_TO_MEM;
9938 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9939
17f4d4bf 9940 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 9941 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 9942 return SYMBOL_TINY_GOT;
38e6c9a6
MS
9943 return SYMBOL_TINY_ABSOLUTE;
9944
1b1e81f8 9945 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
9946 case AARCH64_CMODEL_SMALL_PIC:
9947 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
9948 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9949 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 9950 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9951
9ee6540a
WD
9952 case AARCH64_CMODEL_LARGE:
9953 /* This is alright even in PIC code as the constant
9954 pool reference is always PC relative and within
9955 the same translation unit. */
9956 if (CONSTANT_POOL_ADDRESS_P (x))
9957 return SYMBOL_SMALL_ABSOLUTE;
9958 else
9959 return SYMBOL_FORCE_TO_MEM;
9960
17f4d4bf
CSS
9961 default:
9962 gcc_unreachable ();
9963 }
43e9d192 9964 }
17f4d4bf 9965
43e9d192
IB
9966 /* By default push everything into the constant pool. */
9967 return SYMBOL_FORCE_TO_MEM;
9968}
9969
43e9d192
IB
9970bool
9971aarch64_constant_address_p (rtx x)
9972{
9973 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9974}
9975
9976bool
9977aarch64_legitimate_pic_operand_p (rtx x)
9978{
9979 if (GET_CODE (x) == SYMBOL_REF
9980 || (GET_CODE (x) == CONST
9981 && GET_CODE (XEXP (x, 0)) == PLUS
9982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9983 return false;
9984
9985 return true;
9986}
9987
3520f7cc
JG
9988/* Return true if X holds either a quarter-precision or
9989 floating-point +0.0 constant. */
9990static bool
ef4bddc2 9991aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
9992{
9993 if (!CONST_DOUBLE_P (x))
9994 return false;
9995
6a0f8c01
JW
9996 if (aarch64_float_const_zero_rtx_p (x))
9997 return true;
9998
9999 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
10000 if (!(mode == SFmode || mode == DFmode))
10001 return false;
10002
3520f7cc
JG
10003 return aarch64_float_const_representable_p (x);
10004}
10005
43e9d192 10006static bool
ef4bddc2 10007aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
10008{
10009 /* Do not allow vector struct mode constants. We could support
10010 0 and -1 easily, but they need support in aarch64-simd.md. */
10011 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
10012 return false;
10013
10014 /* This could probably go away because
10015 we now decompose CONST_INTs according to expand_mov_immediate. */
10016 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 10017 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
10018 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
10019 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
10020
10021 if (GET_CODE (x) == HIGH
10022 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10023 return true;
10024
10025 return aarch64_constant_address_p (x);
10026}
10027
a5bc806c 10028rtx
43e9d192
IB
10029aarch64_load_tp (rtx target)
10030{
10031 if (!target
10032 || GET_MODE (target) != Pmode
10033 || !register_operand (target, Pmode))
10034 target = gen_reg_rtx (Pmode);
10035
10036 /* Can return in any reg. */
10037 emit_insn (gen_aarch64_load_tp_hard (target));
10038 return target;
10039}
10040
43e9d192
IB
10041/* On AAPCS systems, this is the "struct __va_list". */
10042static GTY(()) tree va_list_type;
10043
10044/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10045 Return the type to use as __builtin_va_list.
10046
10047 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10048
10049 struct __va_list
10050 {
10051 void *__stack;
10052 void *__gr_top;
10053 void *__vr_top;
10054 int __gr_offs;
10055 int __vr_offs;
10056 }; */
10057
10058static tree
10059aarch64_build_builtin_va_list (void)
10060{
10061 tree va_list_name;
10062 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10063
10064 /* Create the type. */
10065 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
10066 /* Give it the required name. */
10067 va_list_name = build_decl (BUILTINS_LOCATION,
10068 TYPE_DECL,
10069 get_identifier ("__va_list"),
10070 va_list_type);
10071 DECL_ARTIFICIAL (va_list_name) = 1;
10072 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 10073 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
10074
10075 /* Create the fields. */
10076 f_stack = build_decl (BUILTINS_LOCATION,
10077 FIELD_DECL, get_identifier ("__stack"),
10078 ptr_type_node);
10079 f_grtop = build_decl (BUILTINS_LOCATION,
10080 FIELD_DECL, get_identifier ("__gr_top"),
10081 ptr_type_node);
10082 f_vrtop = build_decl (BUILTINS_LOCATION,
10083 FIELD_DECL, get_identifier ("__vr_top"),
10084 ptr_type_node);
10085 f_groff = build_decl (BUILTINS_LOCATION,
10086 FIELD_DECL, get_identifier ("__gr_offs"),
10087 integer_type_node);
10088 f_vroff = build_decl (BUILTINS_LOCATION,
10089 FIELD_DECL, get_identifier ("__vr_offs"),
10090 integer_type_node);
10091
88e3bdd1 10092 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
10093 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10094 purpose to identify whether the code is updating va_list internal
10095 offset fields through irregular way. */
10096 va_list_gpr_counter_field = f_groff;
10097 va_list_fpr_counter_field = f_vroff;
10098
43e9d192
IB
10099 DECL_ARTIFICIAL (f_stack) = 1;
10100 DECL_ARTIFICIAL (f_grtop) = 1;
10101 DECL_ARTIFICIAL (f_vrtop) = 1;
10102 DECL_ARTIFICIAL (f_groff) = 1;
10103 DECL_ARTIFICIAL (f_vroff) = 1;
10104
10105 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10106 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10107 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10108 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10109 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10110
10111 TYPE_FIELDS (va_list_type) = f_stack;
10112 DECL_CHAIN (f_stack) = f_grtop;
10113 DECL_CHAIN (f_grtop) = f_vrtop;
10114 DECL_CHAIN (f_vrtop) = f_groff;
10115 DECL_CHAIN (f_groff) = f_vroff;
10116
10117 /* Compute its layout. */
10118 layout_type (va_list_type);
10119
10120 return va_list_type;
10121}
10122
10123/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10124static void
10125aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10126{
10127 const CUMULATIVE_ARGS *cum;
10128 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10129 tree stack, grtop, vrtop, groff, vroff;
10130 tree t;
88e3bdd1
JW
10131 int gr_save_area_size = cfun->va_list_gpr_size;
10132 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
10133 int vr_offset;
10134
10135 cum = &crtl->args.info;
88e3bdd1
JW
10136 if (cfun->va_list_gpr_size)
10137 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10138 cfun->va_list_gpr_size);
10139 if (cfun->va_list_fpr_size)
10140 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10141 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 10142
d5726973 10143 if (!TARGET_FLOAT)
43e9d192 10144 {
261fb553 10145 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
10146 vr_save_area_size = 0;
10147 }
10148
10149 f_stack = TYPE_FIELDS (va_list_type_node);
10150 f_grtop = DECL_CHAIN (f_stack);
10151 f_vrtop = DECL_CHAIN (f_grtop);
10152 f_groff = DECL_CHAIN (f_vrtop);
10153 f_vroff = DECL_CHAIN (f_groff);
10154
10155 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10156 NULL_TREE);
10157 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10158 NULL_TREE);
10159 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10160 NULL_TREE);
10161 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10162 NULL_TREE);
10163 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10164 NULL_TREE);
10165
10166 /* Emit code to initialize STACK, which points to the next varargs stack
10167 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10168 by named arguments. STACK is 8-byte aligned. */
10169 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10170 if (cum->aapcs_stack_size > 0)
10171 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10172 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10174
10175 /* Emit code to initialize GRTOP, the top of the GR save area.
10176 virtual_incoming_args_rtx should have been 16 byte aligned. */
10177 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10178 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10180
10181 /* Emit code to initialize VRTOP, the top of the VR save area.
10182 This address is gr_save_area_bytes below GRTOP, rounded
10183 down to the next 16-byte boundary. */
10184 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
10185 vr_offset = ROUND_UP (gr_save_area_size,
10186 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10187
10188 if (vr_offset)
10189 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10190 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10191 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10192
10193 /* Emit code to initialize GROFF, the offset from GRTOP of the
10194 next GPR argument. */
10195 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10196 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10197 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10198
10199 /* Likewise emit code to initialize VROFF, the offset from FTOP
10200 of the next VR argument. */
10201 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10202 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10203 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10204}
10205
10206/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10207
10208static tree
10209aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10210 gimple_seq *post_p ATTRIBUTE_UNUSED)
10211{
10212 tree addr;
10213 bool indirect_p;
10214 bool is_ha; /* is HFA or HVA. */
10215 bool dw_align; /* double-word align. */
ef4bddc2 10216 machine_mode ag_mode = VOIDmode;
43e9d192 10217 int nregs;
ef4bddc2 10218 machine_mode mode;
43e9d192
IB
10219
10220 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10221 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10222 HOST_WIDE_INT size, rsize, adjust, align;
10223 tree t, u, cond1, cond2;
10224
10225 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10226 if (indirect_p)
10227 type = build_pointer_type (type);
10228
10229 mode = TYPE_MODE (type);
10230
10231 f_stack = TYPE_FIELDS (va_list_type_node);
10232 f_grtop = DECL_CHAIN (f_stack);
10233 f_vrtop = DECL_CHAIN (f_grtop);
10234 f_groff = DECL_CHAIN (f_vrtop);
10235 f_vroff = DECL_CHAIN (f_groff);
10236
10237 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10238 f_stack, NULL_TREE);
10239 size = int_size_in_bytes (type);
985b8393 10240 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
10241
10242 dw_align = false;
10243 adjust = 0;
10244 if (aarch64_vfp_is_call_or_return_candidate (mode,
10245 type,
10246 &ag_mode,
10247 &nregs,
10248 &is_ha))
10249 {
10250 /* TYPE passed in fp/simd registers. */
d5726973 10251 if (!TARGET_FLOAT)
261fb553 10252 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
10253
10254 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10255 unshare_expr (valist), f_vrtop, NULL_TREE);
10256 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10257 unshare_expr (valist), f_vroff, NULL_TREE);
10258
10259 rsize = nregs * UNITS_PER_VREG;
10260
10261 if (is_ha)
10262 {
10263 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10264 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10265 }
10266 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
10267 && size < UNITS_PER_VREG)
10268 {
10269 adjust = UNITS_PER_VREG - size;
10270 }
10271 }
10272 else
10273 {
10274 /* TYPE passed in general registers. */
10275 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10276 unshare_expr (valist), f_grtop, NULL_TREE);
10277 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10278 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 10279 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
10280 nregs = rsize / UNITS_PER_WORD;
10281
10282 if (align > 8)
10283 dw_align = true;
10284
10285 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10286 && size < UNITS_PER_WORD)
10287 {
10288 adjust = UNITS_PER_WORD - size;
10289 }
10290 }
10291
10292 /* Get a local temporary for the field value. */
10293 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10294
10295 /* Emit code to branch if off >= 0. */
10296 t = build2 (GE_EXPR, boolean_type_node, off,
10297 build_int_cst (TREE_TYPE (off), 0));
10298 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10299
10300 if (dw_align)
10301 {
10302 /* Emit: offs = (offs + 15) & -16. */
10303 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10304 build_int_cst (TREE_TYPE (off), 15));
10305 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10306 build_int_cst (TREE_TYPE (off), -16));
10307 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10308 }
10309 else
10310 roundup = NULL;
10311
10312 /* Update ap.__[g|v]r_offs */
10313 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10314 build_int_cst (TREE_TYPE (off), rsize));
10315 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10316
10317 /* String up. */
10318 if (roundup)
10319 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10320
10321 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10322 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10323 build_int_cst (TREE_TYPE (f_off), 0));
10324 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10325
10326 /* String up: make sure the assignment happens before the use. */
10327 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10328 COND_EXPR_ELSE (cond1) = t;
10329
10330 /* Prepare the trees handling the argument that is passed on the stack;
10331 the top level node will store in ON_STACK. */
10332 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10333 if (align > 8)
10334 {
10335 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10336 t = fold_convert (intDI_type_node, arg);
10337 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10338 build_int_cst (TREE_TYPE (t), 15));
10339 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10340 build_int_cst (TREE_TYPE (t), -16));
10341 t = fold_convert (TREE_TYPE (arg), t);
10342 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10343 }
10344 else
10345 roundup = NULL;
10346 /* Advance ap.__stack */
10347 t = fold_convert (intDI_type_node, arg);
10348 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10349 build_int_cst (TREE_TYPE (t), size + 7));
10350 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10351 build_int_cst (TREE_TYPE (t), -8));
10352 t = fold_convert (TREE_TYPE (arg), t);
10353 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10354 /* String up roundup and advance. */
10355 if (roundup)
10356 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10357 /* String up with arg */
10358 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10359 /* Big-endianness related address adjustment. */
10360 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10361 && size < UNITS_PER_WORD)
10362 {
10363 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10364 size_int (UNITS_PER_WORD - size));
10365 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10366 }
10367
10368 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10369 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10370
10371 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10372 t = off;
10373 if (adjust)
10374 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10375 build_int_cst (TREE_TYPE (off), adjust));
10376
10377 t = fold_convert (sizetype, t);
10378 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10379
10380 if (is_ha)
10381 {
10382 /* type ha; // treat as "struct {ftype field[n];}"
10383 ... [computing offs]
10384 for (i = 0; i <nregs; ++i, offs += 16)
10385 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10386 return ha; */
10387 int i;
10388 tree tmp_ha, field_t, field_ptr_t;
10389
10390 /* Declare a local variable. */
10391 tmp_ha = create_tmp_var_raw (type, "ha");
10392 gimple_add_tmp_var (tmp_ha);
10393
10394 /* Establish the base type. */
10395 switch (ag_mode)
10396 {
10397 case SFmode:
10398 field_t = float_type_node;
10399 field_ptr_t = float_ptr_type_node;
10400 break;
10401 case DFmode:
10402 field_t = double_type_node;
10403 field_ptr_t = double_ptr_type_node;
10404 break;
10405 case TFmode:
10406 field_t = long_double_type_node;
10407 field_ptr_t = long_double_ptr_type_node;
10408 break;
43e9d192 10409 case HFmode:
1b62ed4f
JG
10410 field_t = aarch64_fp16_type_node;
10411 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10412 break;
43e9d192
IB
10413 case V2SImode:
10414 case V4SImode:
10415 {
10416 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10417 field_t = build_vector_type_for_mode (innertype, ag_mode);
10418 field_ptr_t = build_pointer_type (field_t);
10419 }
10420 break;
10421 default:
10422 gcc_assert (0);
10423 }
10424
10425 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10426 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10427 addr = t;
10428 t = fold_convert (field_ptr_t, addr);
10429 t = build2 (MODIFY_EXPR, field_t,
10430 build1 (INDIRECT_REF, field_t, tmp_ha),
10431 build1 (INDIRECT_REF, field_t, t));
10432
10433 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10434 for (i = 1; i < nregs; ++i)
10435 {
10436 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10437 u = fold_convert (field_ptr_t, addr);
10438 u = build2 (MODIFY_EXPR, field_t,
10439 build2 (MEM_REF, field_t, tmp_ha,
10440 build_int_cst (field_ptr_t,
10441 (i *
10442 int_size_in_bytes (field_t)))),
10443 build1 (INDIRECT_REF, field_t, u));
10444 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10445 }
10446
10447 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10448 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10449 }
10450
10451 COND_EXPR_ELSE (cond2) = t;
10452 addr = fold_convert (build_pointer_type (type), cond1);
10453 addr = build_va_arg_indirect_ref (addr);
10454
10455 if (indirect_p)
10456 addr = build_va_arg_indirect_ref (addr);
10457
10458 return addr;
10459}
10460
10461/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10462
10463static void
ef4bddc2 10464aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10465 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10466 int no_rtl)
10467{
10468 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10469 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10470 int gr_saved = cfun->va_list_gpr_size;
10471 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10472
10473 /* The caller has advanced CUM up to, but not beyond, the last named
10474 argument. Advance a local copy of CUM past the last "real" named
10475 argument, to find out how many registers are left over. */
10476 local_cum = *cum;
10477 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10478
88e3bdd1
JW
10479 /* Found out how many registers we need to save.
10480 Honor tree-stdvar analysis results. */
10481 if (cfun->va_list_gpr_size)
10482 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10483 cfun->va_list_gpr_size / UNITS_PER_WORD);
10484 if (cfun->va_list_fpr_size)
10485 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10486 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10487
d5726973 10488 if (!TARGET_FLOAT)
43e9d192 10489 {
261fb553 10490 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10491 vr_saved = 0;
10492 }
10493
10494 if (!no_rtl)
10495 {
10496 if (gr_saved > 0)
10497 {
10498 rtx ptr, mem;
10499
10500 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10501 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10502 - gr_saved * UNITS_PER_WORD);
10503 mem = gen_frame_mem (BLKmode, ptr);
10504 set_mem_alias_set (mem, get_varargs_alias_set ());
10505
10506 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10507 mem, gr_saved);
10508 }
10509 if (vr_saved > 0)
10510 {
10511 /* We can't use move_block_from_reg, because it will use
10512 the wrong mode, storing D regs only. */
ef4bddc2 10513 machine_mode mode = TImode;
88e3bdd1 10514 int off, i, vr_start;
43e9d192
IB
10515
10516 /* Set OFF to the offset from virtual_incoming_args_rtx of
10517 the first vector register. The VR save area lies below
10518 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10519 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10520 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10521 off -= vr_saved * UNITS_PER_VREG;
10522
88e3bdd1
JW
10523 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10524 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10525 {
10526 rtx ptr, mem;
10527
10528 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10529 mem = gen_frame_mem (mode, ptr);
10530 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10531 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10532 off += UNITS_PER_VREG;
10533 }
10534 }
10535 }
10536
10537 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10538 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10539 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10540 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10541 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10542 + vr_saved * UNITS_PER_VREG);
10543}
10544
10545static void
10546aarch64_conditional_register_usage (void)
10547{
10548 int i;
10549 if (!TARGET_FLOAT)
10550 {
10551 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10552 {
10553 fixed_regs[i] = 1;
10554 call_used_regs[i] = 1;
10555 }
10556 }
10557}
10558
10559/* Walk down the type tree of TYPE counting consecutive base elements.
10560 If *MODEP is VOIDmode, then set it to the first valid floating point
10561 type. If a non-floating point type is found, or if a floating point
10562 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10563 otherwise return the count in the sub-tree. */
10564static int
ef4bddc2 10565aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10566{
ef4bddc2 10567 machine_mode mode;
43e9d192
IB
10568 HOST_WIDE_INT size;
10569
10570 switch (TREE_CODE (type))
10571 {
10572 case REAL_TYPE:
10573 mode = TYPE_MODE (type);
1b62ed4f
JG
10574 if (mode != DFmode && mode != SFmode
10575 && mode != TFmode && mode != HFmode)
43e9d192
IB
10576 return -1;
10577
10578 if (*modep == VOIDmode)
10579 *modep = mode;
10580
10581 if (*modep == mode)
10582 return 1;
10583
10584 break;
10585
10586 case COMPLEX_TYPE:
10587 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10588 if (mode != DFmode && mode != SFmode
10589 && mode != TFmode && mode != HFmode)
43e9d192
IB
10590 return -1;
10591
10592 if (*modep == VOIDmode)
10593 *modep = mode;
10594
10595 if (*modep == mode)
10596 return 2;
10597
10598 break;
10599
10600 case VECTOR_TYPE:
10601 /* Use V2SImode and V4SImode as representatives of all 64-bit
10602 and 128-bit vector types. */
10603 size = int_size_in_bytes (type);
10604 switch (size)
10605 {
10606 case 8:
10607 mode = V2SImode;
10608 break;
10609 case 16:
10610 mode = V4SImode;
10611 break;
10612 default:
10613 return -1;
10614 }
10615
10616 if (*modep == VOIDmode)
10617 *modep = mode;
10618
10619 /* Vector modes are considered to be opaque: two vectors are
10620 equivalent for the purposes of being homogeneous aggregates
10621 if they are the same size. */
10622 if (*modep == mode)
10623 return 1;
10624
10625 break;
10626
10627 case ARRAY_TYPE:
10628 {
10629 int count;
10630 tree index = TYPE_DOMAIN (type);
10631
807e902e
KZ
10632 /* Can't handle incomplete types nor sizes that are not
10633 fixed. */
10634 if (!COMPLETE_TYPE_P (type)
10635 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10636 return -1;
10637
10638 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10639 if (count == -1
10640 || !index
10641 || !TYPE_MAX_VALUE (index)
cc269bb6 10642 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10643 || !TYPE_MIN_VALUE (index)
cc269bb6 10644 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
10645 || count < 0)
10646 return -1;
10647
ae7e9ddd
RS
10648 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10649 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
10650
10651 /* There must be no padding. */
807e902e 10652 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10653 return -1;
10654
10655 return count;
10656 }
10657
10658 case RECORD_TYPE:
10659 {
10660 int count = 0;
10661 int sub_count;
10662 tree field;
10663
807e902e
KZ
10664 /* Can't handle incomplete types nor sizes that are not
10665 fixed. */
10666 if (!COMPLETE_TYPE_P (type)
10667 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10668 return -1;
10669
10670 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10671 {
10672 if (TREE_CODE (field) != FIELD_DECL)
10673 continue;
10674
10675 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10676 if (sub_count < 0)
10677 return -1;
10678 count += sub_count;
10679 }
10680
10681 /* There must be no padding. */
807e902e 10682 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10683 return -1;
10684
10685 return count;
10686 }
10687
10688 case UNION_TYPE:
10689 case QUAL_UNION_TYPE:
10690 {
10691 /* These aren't very interesting except in a degenerate case. */
10692 int count = 0;
10693 int sub_count;
10694 tree field;
10695
807e902e
KZ
10696 /* Can't handle incomplete types nor sizes that are not
10697 fixed. */
10698 if (!COMPLETE_TYPE_P (type)
10699 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10700 return -1;
10701
10702 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10703 {
10704 if (TREE_CODE (field) != FIELD_DECL)
10705 continue;
10706
10707 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10708 if (sub_count < 0)
10709 return -1;
10710 count = count > sub_count ? count : sub_count;
10711 }
10712
10713 /* There must be no padding. */
807e902e 10714 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10715 return -1;
10716
10717 return count;
10718 }
10719
10720 default:
10721 break;
10722 }
10723
10724 return -1;
10725}
10726
b6ec6215
KT
10727/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10728 type as described in AAPCS64 \S 4.1.2.
10729
10730 See the comment above aarch64_composite_type_p for the notes on MODE. */
10731
10732static bool
10733aarch64_short_vector_p (const_tree type,
10734 machine_mode mode)
10735{
10736 HOST_WIDE_INT size = -1;
10737
10738 if (type && TREE_CODE (type) == VECTOR_TYPE)
10739 size = int_size_in_bytes (type);
10740 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10741 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10742 size = GET_MODE_SIZE (mode);
10743
10744 return (size == 8 || size == 16);
10745}
10746
43e9d192
IB
10747/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10748 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10749 array types. The C99 floating-point complex types are also considered
10750 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10751 types, which are GCC extensions and out of the scope of AAPCS64, are
10752 treated as composite types here as well.
10753
10754 Note that MODE itself is not sufficient in determining whether a type
10755 is such a composite type or not. This is because
10756 stor-layout.c:compute_record_mode may have already changed the MODE
10757 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10758 structure with only one field may have its MODE set to the mode of the
10759 field. Also an integer mode whose size matches the size of the
10760 RECORD_TYPE type may be used to substitute the original mode
10761 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10762 solely relied on. */
10763
10764static bool
10765aarch64_composite_type_p (const_tree type,
ef4bddc2 10766 machine_mode mode)
43e9d192 10767{
b6ec6215
KT
10768 if (aarch64_short_vector_p (type, mode))
10769 return false;
10770
43e9d192
IB
10771 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
10772 return true;
10773
10774 if (mode == BLKmode
10775 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
10776 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
10777 return true;
10778
10779 return false;
10780}
10781
43e9d192
IB
10782/* Return TRUE if an argument, whose type is described by TYPE and MODE,
10783 shall be passed or returned in simd/fp register(s) (providing these
10784 parameter passing registers are available).
10785
10786 Upon successful return, *COUNT returns the number of needed registers,
10787 *BASE_MODE returns the mode of the individual register and when IS_HAF
10788 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10789 floating-point aggregate or a homogeneous short-vector aggregate. */
10790
10791static bool
ef4bddc2 10792aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 10793 const_tree type,
ef4bddc2 10794 machine_mode *base_mode,
43e9d192
IB
10795 int *count,
10796 bool *is_ha)
10797{
ef4bddc2 10798 machine_mode new_mode = VOIDmode;
43e9d192
IB
10799 bool composite_p = aarch64_composite_type_p (type, mode);
10800
10801 if (is_ha != NULL) *is_ha = false;
10802
10803 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
10804 || aarch64_short_vector_p (type, mode))
10805 {
10806 *count = 1;
10807 new_mode = mode;
10808 }
10809 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10810 {
10811 if (is_ha != NULL) *is_ha = true;
10812 *count = 2;
10813 new_mode = GET_MODE_INNER (mode);
10814 }
10815 else if (type && composite_p)
10816 {
10817 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
10818
10819 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
10820 {
10821 if (is_ha != NULL) *is_ha = true;
10822 *count = ag_count;
10823 }
10824 else
10825 return false;
10826 }
10827 else
10828 return false;
10829
10830 *base_mode = new_mode;
10831 return true;
10832}
10833
10834/* Implement TARGET_STRUCT_VALUE_RTX. */
10835
10836static rtx
10837aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
10838 int incoming ATTRIBUTE_UNUSED)
10839{
10840 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
10841}
10842
10843/* Implements target hook vector_mode_supported_p. */
10844static bool
ef4bddc2 10845aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
10846{
10847 if (TARGET_SIMD
10848 && (mode == V4SImode || mode == V8HImode
10849 || mode == V16QImode || mode == V2DImode
10850 || mode == V2SImode || mode == V4HImode
10851 || mode == V8QImode || mode == V2SFmode
ad7d90cc 10852 || mode == V4SFmode || mode == V2DFmode
71a11456 10853 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 10854 || mode == V1DFmode))
43e9d192
IB
10855 return true;
10856
10857 return false;
10858}
10859
b7342d25
IB
10860/* Return appropriate SIMD container
10861 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
10862static machine_mode
10863aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 10864{
b7342d25 10865 gcc_assert (width == 64 || width == 128);
43e9d192 10866 if (TARGET_SIMD)
b7342d25
IB
10867 {
10868 if (width == 128)
10869 switch (mode)
10870 {
10871 case DFmode:
10872 return V2DFmode;
10873 case SFmode:
10874 return V4SFmode;
b719f884
JG
10875 case HFmode:
10876 return V8HFmode;
b7342d25
IB
10877 case SImode:
10878 return V4SImode;
10879 case HImode:
10880 return V8HImode;
10881 case QImode:
10882 return V16QImode;
10883 case DImode:
10884 return V2DImode;
10885 default:
10886 break;
10887 }
10888 else
10889 switch (mode)
10890 {
10891 case SFmode:
10892 return V2SFmode;
b719f884
JG
10893 case HFmode:
10894 return V4HFmode;
b7342d25
IB
10895 case SImode:
10896 return V2SImode;
10897 case HImode:
10898 return V4HImode;
10899 case QImode:
10900 return V8QImode;
10901 default:
10902 break;
10903 }
10904 }
43e9d192
IB
10905 return word_mode;
10906}
10907
b7342d25 10908/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
10909static machine_mode
10910aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
10911{
10912 return aarch64_simd_container_mode (mode, 128);
10913}
10914
3b357264
JG
10915/* Return the bitmask of possible vector sizes for the vectorizer
10916 to iterate over. */
10917static unsigned int
10918aarch64_autovectorize_vector_sizes (void)
10919{
10920 return (16 | 8);
10921}
10922
ac2b960f
YZ
10923/* Implement TARGET_MANGLE_TYPE. */
10924
6f549691 10925static const char *
ac2b960f
YZ
10926aarch64_mangle_type (const_tree type)
10927{
10928 /* The AArch64 ABI documents say that "__va_list" has to be
10929 managled as if it is in the "std" namespace. */
10930 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
10931 return "St9__va_list";
10932
c2ec330c
AL
10933 /* Half-precision float. */
10934 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
10935 return "Dh";
10936
f9d53c27
TB
10937 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10938 builtin types. */
10939 if (TYPE_NAME (type) != NULL)
10940 return aarch64_mangle_builtin_type (type);
c6fc9e43 10941
ac2b960f
YZ
10942 /* Use the default mangling. */
10943 return NULL;
10944}
10945
75cf1494
KT
10946/* Find the first rtx_insn before insn that will generate an assembly
10947 instruction. */
10948
10949static rtx_insn *
10950aarch64_prev_real_insn (rtx_insn *insn)
10951{
10952 if (!insn)
10953 return NULL;
10954
10955 do
10956 {
10957 insn = prev_real_insn (insn);
10958 }
10959 while (insn && recog_memoized (insn) < 0);
10960
10961 return insn;
10962}
10963
10964static bool
10965is_madd_op (enum attr_type t1)
10966{
10967 unsigned int i;
10968 /* A number of these may be AArch32 only. */
10969 enum attr_type mlatypes[] = {
10970 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10971 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10972 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10973 };
10974
10975 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10976 {
10977 if (t1 == mlatypes[i])
10978 return true;
10979 }
10980
10981 return false;
10982}
10983
10984/* Check if there is a register dependency between a load and the insn
10985 for which we hold recog_data. */
10986
10987static bool
10988dep_between_memop_and_curr (rtx memop)
10989{
10990 rtx load_reg;
10991 int opno;
10992
8baff86e 10993 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
10994
10995 if (!REG_P (SET_DEST (memop)))
10996 return false;
10997
10998 load_reg = SET_DEST (memop);
8baff86e 10999 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
11000 {
11001 rtx operand = recog_data.operand[opno];
11002 if (REG_P (operand)
11003 && reg_overlap_mentioned_p (load_reg, operand))
11004 return true;
11005
11006 }
11007 return false;
11008}
11009
8baff86e
KT
11010
11011/* When working around the Cortex-A53 erratum 835769,
11012 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11013 instruction and has a preceding memory instruction such that a NOP
11014 should be inserted between them. */
11015
75cf1494
KT
11016bool
11017aarch64_madd_needs_nop (rtx_insn* insn)
11018{
11019 enum attr_type attr_type;
11020 rtx_insn *prev;
11021 rtx body;
11022
b32c1043 11023 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
11024 return false;
11025
e322d6e3 11026 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
11027 return false;
11028
11029 attr_type = get_attr_type (insn);
11030 if (!is_madd_op (attr_type))
11031 return false;
11032
11033 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
11034 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11035 Restore recog state to INSN to avoid state corruption. */
11036 extract_constrain_insn_cached (insn);
11037
550e2205 11038 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
11039 return false;
11040
11041 body = single_set (prev);
11042
11043 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
11044 it and the DImode madd, emit a NOP between them. If body is NULL then we
11045 have a complex memory operation, probably a load/store pair.
11046 Be conservative for now and emit a NOP. */
11047 if (GET_MODE (recog_data.operand[0]) == DImode
11048 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
11049 return true;
11050
11051 return false;
11052
11053}
11054
8baff86e
KT
11055
11056/* Implement FINAL_PRESCAN_INSN. */
11057
75cf1494
KT
11058void
11059aarch64_final_prescan_insn (rtx_insn *insn)
11060{
11061 if (aarch64_madd_needs_nop (insn))
11062 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
11063}
11064
11065
43e9d192 11066/* Return the equivalent letter for size. */
81c2dfb9 11067static char
43e9d192
IB
11068sizetochar (int size)
11069{
11070 switch (size)
11071 {
11072 case 64: return 'd';
11073 case 32: return 's';
11074 case 16: return 'h';
11075 case 8 : return 'b';
11076 default: gcc_unreachable ();
11077 }
11078}
11079
3520f7cc
JG
11080/* Return true iff x is a uniform vector of floating-point
11081 constants, and the constant can be represented in
11082 quarter-precision form. Note, as aarch64_float_const_representable
11083 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11084static bool
11085aarch64_vect_float_const_representable_p (rtx x)
11086{
92695fbb
RS
11087 rtx elt;
11088 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11089 && const_vec_duplicate_p (x, &elt)
11090 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
11091}
11092
d8edd899 11093/* Return true for valid and false for invalid. */
3ea63f60 11094bool
ef4bddc2 11095aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 11096 struct simd_immediate_info *info)
43e9d192
IB
11097{
11098#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11099 matches = 1; \
11100 for (i = 0; i < idx; i += (STRIDE)) \
11101 if (!(TEST)) \
11102 matches = 0; \
11103 if (matches) \
11104 { \
11105 immtype = (CLASS); \
11106 elsize = (ELSIZE); \
43e9d192
IB
11107 eshift = (SHIFT); \
11108 emvn = (NEG); \
11109 break; \
11110 }
11111
11112 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 11113 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 11114 unsigned char bytes[16];
43e9d192
IB
11115 int immtype = -1, matches;
11116 unsigned int invmask = inverse ? 0xff : 0;
11117 int eshift, emvn;
11118
43e9d192 11119 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 11120 {
81c2dfb9
IB
11121 if (! (aarch64_simd_imm_zero_p (op, mode)
11122 || aarch64_vect_float_const_representable_p (op)))
d8edd899 11123 return false;
3520f7cc 11124
48063b9d
IB
11125 if (info)
11126 {
11127 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 11128 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
11129 info->mvn = false;
11130 info->shift = 0;
11131 }
3520f7cc 11132
d8edd899 11133 return true;
3520f7cc 11134 }
43e9d192
IB
11135
11136 /* Splat vector constant out into a byte vector. */
11137 for (i = 0; i < n_elts; i++)
11138 {
4b1e108c
AL
11139 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11140 it must be laid out in the vector register in reverse order. */
11141 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 11142 unsigned HOST_WIDE_INT elpart;
43e9d192 11143
ee78df47
KT
11144 gcc_assert (CONST_INT_P (el));
11145 elpart = INTVAL (el);
11146
11147 for (unsigned int byte = 0; byte < innersize; byte++)
11148 {
11149 bytes[idx++] = (elpart & 0xff) ^ invmask;
11150 elpart >>= BITS_PER_UNIT;
11151 }
43e9d192 11152
43e9d192
IB
11153 }
11154
11155 /* Sanity check. */
11156 gcc_assert (idx == GET_MODE_SIZE (mode));
11157
11158 do
11159 {
11160 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11161 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11162
11163 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11164 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11165
11166 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11167 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11168
11169 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11170 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11171
11172 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11173
11174 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11175
11176 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11177 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11178
11179 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11180 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11181
11182 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11183 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11184
11185 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11186 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11187
11188 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11189
11190 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11191
11192 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 11193 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
11194
11195 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 11196 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
11197
11198 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 11199 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
11200
11201 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 11202 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
11203
11204 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11205
11206 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11207 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11208 }
11209 while (0);
11210
e4f0f84d 11211 if (immtype == -1)
d8edd899 11212 return false;
43e9d192 11213
48063b9d 11214 if (info)
43e9d192 11215 {
48063b9d 11216 info->element_width = elsize;
48063b9d
IB
11217 info->mvn = emvn != 0;
11218 info->shift = eshift;
11219
43e9d192
IB
11220 unsigned HOST_WIDE_INT imm = 0;
11221
e4f0f84d
TB
11222 if (immtype >= 12 && immtype <= 15)
11223 info->msl = true;
11224
43e9d192
IB
11225 /* Un-invert bytes of recognized vector, if necessary. */
11226 if (invmask != 0)
11227 for (i = 0; i < idx; i++)
11228 bytes[i] ^= invmask;
11229
11230 if (immtype == 17)
11231 {
11232 /* FIXME: Broken on 32-bit H_W_I hosts. */
11233 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11234
11235 for (i = 0; i < 8; i++)
11236 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11237 << (i * BITS_PER_UNIT);
11238
43e9d192 11239
48063b9d
IB
11240 info->value = GEN_INT (imm);
11241 }
11242 else
11243 {
11244 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11245 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
11246
11247 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
11248 generic constants. */
11249 if (info->mvn)
43e9d192 11250 imm = ~imm;
48063b9d
IB
11251 imm = (imm >> info->shift) & 0xff;
11252 info->value = GEN_INT (imm);
11253 }
43e9d192
IB
11254 }
11255
48063b9d 11256 return true;
43e9d192
IB
11257#undef CHECK
11258}
11259
43e9d192
IB
11260/* Check of immediate shift constants are within range. */
11261bool
ef4bddc2 11262aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
11263{
11264 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11265 if (left)
ddeabd3e 11266 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 11267 else
ddeabd3e 11268 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
11269}
11270
3520f7cc
JG
11271/* Return true if X is a uniform vector where all elements
11272 are either the floating-point constant 0.0 or the
11273 integer constant 0. */
43e9d192 11274bool
ef4bddc2 11275aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 11276{
3520f7cc 11277 return x == CONST0_RTX (mode);
43e9d192
IB
11278}
11279
7325d85a
KT
11280
11281/* Return the bitmask CONST_INT to select the bits required by a zero extract
11282 operation of width WIDTH at bit position POS. */
11283
11284rtx
11285aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11286{
11287 gcc_assert (CONST_INT_P (width));
11288 gcc_assert (CONST_INT_P (pos));
11289
11290 unsigned HOST_WIDE_INT mask
11291 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11292 return GEN_INT (mask << UINTVAL (pos));
11293}
11294
43e9d192 11295bool
ef4bddc2 11296aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
11297{
11298 HOST_WIDE_INT imm = INTVAL (x);
11299 int i;
11300
11301 for (i = 0; i < 8; i++)
11302 {
11303 unsigned int byte = imm & 0xff;
11304 if (byte != 0xff && byte != 0)
11305 return false;
11306 imm >>= 8;
11307 }
11308
11309 return true;
11310}
11311
83f8c414 11312bool
a6e0bfa7 11313aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 11314{
83f8c414
CSS
11315 if (GET_CODE (x) == HIGH
11316 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11317 return true;
11318
82614948 11319 if (CONST_INT_P (x))
83f8c414
CSS
11320 return true;
11321
11322 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11323 return true;
11324
a6e0bfa7 11325 return aarch64_classify_symbolic_expression (x)
a5350ddc 11326 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
11327}
11328
43e9d192
IB
11329/* Return a const_int vector of VAL. */
11330rtx
ab014eb3 11331aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192
IB
11332{
11333 int nunits = GET_MODE_NUNITS (mode);
11334 rtvec v = rtvec_alloc (nunits);
11335 int i;
11336
ab014eb3
TC
11337 rtx cache = GEN_INT (val);
11338
43e9d192 11339 for (i=0; i < nunits; i++)
ab014eb3 11340 RTVEC_ELT (v, i) = cache;
43e9d192
IB
11341
11342 return gen_rtx_CONST_VECTOR (mode, v);
11343}
11344
051d0e2f
SN
11345/* Check OP is a legal scalar immediate for the MOVI instruction. */
11346
11347bool
ef4bddc2 11348aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 11349{
ef4bddc2 11350 machine_mode vmode;
051d0e2f
SN
11351
11352 gcc_assert (!VECTOR_MODE_P (mode));
11353 vmode = aarch64_preferred_simd_mode (mode);
11354 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 11355 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
11356}
11357
988fa693
JG
11358/* Construct and return a PARALLEL RTX vector with elements numbering the
11359 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11360 the vector - from the perspective of the architecture. This does not
11361 line up with GCC's perspective on lane numbers, so we end up with
11362 different masks depending on our target endian-ness. The diagram
11363 below may help. We must draw the distinction when building masks
11364 which select one half of the vector. An instruction selecting
11365 architectural low-lanes for a big-endian target, must be described using
11366 a mask selecting GCC high-lanes.
11367
11368 Big-Endian Little-Endian
11369
11370GCC 0 1 2 3 3 2 1 0
11371 | x | x | x | x | | x | x | x | x |
11372Architecture 3 2 1 0 3 2 1 0
11373
11374Low Mask: { 2, 3 } { 0, 1 }
11375High Mask: { 0, 1 } { 2, 3 }
11376*/
11377
43e9d192 11378rtx
ef4bddc2 11379aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11380{
11381 int nunits = GET_MODE_NUNITS (mode);
11382 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11383 int high_base = nunits / 2;
11384 int low_base = 0;
11385 int base;
43e9d192
IB
11386 rtx t1;
11387 int i;
11388
988fa693
JG
11389 if (BYTES_BIG_ENDIAN)
11390 base = high ? low_base : high_base;
11391 else
11392 base = high ? high_base : low_base;
11393
11394 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11395 RTVEC_ELT (v, i) = GEN_INT (base + i);
11396
11397 t1 = gen_rtx_PARALLEL (mode, v);
11398 return t1;
11399}
11400
988fa693
JG
11401/* Check OP for validity as a PARALLEL RTX vector with elements
11402 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11403 from the perspective of the architecture. See the diagram above
11404 aarch64_simd_vect_par_cnst_half for more details. */
11405
11406bool
ef4bddc2 11407aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11408 bool high)
11409{
11410 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11411 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11412 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11413 int i = 0;
11414
11415 if (!VECTOR_MODE_P (mode))
11416 return false;
11417
11418 if (count_op != count_ideal)
11419 return false;
11420
11421 for (i = 0; i < count_ideal; i++)
11422 {
11423 rtx elt_op = XVECEXP (op, 0, i);
11424 rtx elt_ideal = XVECEXP (ideal, 0, i);
11425
4aa81c2e 11426 if (!CONST_INT_P (elt_op)
988fa693
JG
11427 || INTVAL (elt_ideal) != INTVAL (elt_op))
11428 return false;
11429 }
11430 return true;
11431}
11432
43e9d192
IB
11433/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11434 HIGH (exclusive). */
11435void
46ed6024
CB
11436aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11437 const_tree exp)
43e9d192
IB
11438{
11439 HOST_WIDE_INT lane;
4aa81c2e 11440 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11441 lane = INTVAL (operand);
11442
11443 if (lane < low || lane >= high)
46ed6024
CB
11444 {
11445 if (exp)
cf0c27ef 11446 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11447 else
cf0c27ef 11448 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11449 }
43e9d192
IB
11450}
11451
43e9d192
IB
11452/* Return TRUE if OP is a valid vector addressing mode. */
11453bool
11454aarch64_simd_mem_operand_p (rtx op)
11455{
11456 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11457 || REG_P (XEXP (op, 0)));
43e9d192
IB
11458}
11459
2d8c6dc1
AH
11460/* Emit a register copy from operand to operand, taking care not to
11461 early-clobber source registers in the process.
43e9d192 11462
2d8c6dc1
AH
11463 COUNT is the number of components into which the copy needs to be
11464 decomposed. */
43e9d192 11465void
2d8c6dc1
AH
11466aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
11467 unsigned int count)
43e9d192
IB
11468{
11469 unsigned int i;
2d8c6dc1
AH
11470 int rdest = REGNO (operands[0]);
11471 int rsrc = REGNO (operands[1]);
43e9d192
IB
11472
11473 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11474 || rdest < rsrc)
11475 for (i = 0; i < count; i++)
11476 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11477 gen_rtx_REG (mode, rsrc + i));
43e9d192 11478 else
2d8c6dc1
AH
11479 for (i = 0; i < count; i++)
11480 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11481 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11482}
11483
668046d1 11484/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11485 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
11486int
11487aarch64_simd_attr_length_rglist (enum machine_mode mode)
11488{
11489 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11490}
11491
db0253a4
TB
11492/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11493 alignment of a vector to 128 bits. */
11494static HOST_WIDE_INT
11495aarch64_simd_vector_alignment (const_tree type)
11496{
9439e9a1 11497 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11498 return MIN (align, 128);
11499}
11500
11501/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11502static bool
11503aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11504{
11505 if (is_packed)
11506 return false;
11507
11508 /* We guarantee alignment for vectors up to 128-bits. */
11509 if (tree_int_cst_compare (TYPE_SIZE (type),
11510 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11511 return false;
11512
11513 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11514 return true;
11515}
11516
7df76747
N
11517/* Return true if the vector misalignment factor is supported by the
11518 target. */
11519static bool
11520aarch64_builtin_support_vector_misalignment (machine_mode mode,
11521 const_tree type, int misalignment,
11522 bool is_packed)
11523{
11524 if (TARGET_SIMD && STRICT_ALIGNMENT)
11525 {
11526 /* Return if movmisalign pattern is not supported for this mode. */
11527 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11528 return false;
11529
11530 if (misalignment == -1)
11531 {
11532 /* Misalignment factor is unknown at compile time but we know
11533 it's word aligned. */
11534 if (aarch64_simd_vector_alignment_reachable (type, is_packed))
11535 {
11536 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
11537
11538 if (element_size != 64)
11539 return true;
11540 }
11541 return false;
11542 }
11543 }
11544 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11545 is_packed);
11546}
11547
4369c11e
TB
11548/* If VALS is a vector constant that can be loaded into a register
11549 using DUP, generate instructions to do so and return an RTX to
11550 assign to the register. Otherwise return NULL_RTX. */
11551static rtx
11552aarch64_simd_dup_constant (rtx vals)
11553{
ef4bddc2
RS
11554 machine_mode mode = GET_MODE (vals);
11555 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11556 rtx x;
4369c11e 11557
92695fbb 11558 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11559 return NULL_RTX;
11560
11561 /* We can load this constant by using DUP and a constant in a
11562 single ARM register. This will be cheaper than a vector
11563 load. */
92695fbb 11564 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11565 return gen_rtx_VEC_DUPLICATE (mode, x);
11566}
11567
11568
11569/* Generate code to load VALS, which is a PARALLEL containing only
11570 constants (for vec_init) or CONST_VECTOR, efficiently into a
11571 register. Returns an RTX to copy into the register, or NULL_RTX
11572 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11573static rtx
4369c11e
TB
11574aarch64_simd_make_constant (rtx vals)
11575{
ef4bddc2 11576 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11577 rtx const_dup;
11578 rtx const_vec = NULL_RTX;
11579 int n_elts = GET_MODE_NUNITS (mode);
11580 int n_const = 0;
11581 int i;
11582
11583 if (GET_CODE (vals) == CONST_VECTOR)
11584 const_vec = vals;
11585 else if (GET_CODE (vals) == PARALLEL)
11586 {
11587 /* A CONST_VECTOR must contain only CONST_INTs and
11588 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11589 Only store valid constants in a CONST_VECTOR. */
11590 for (i = 0; i < n_elts; ++i)
11591 {
11592 rtx x = XVECEXP (vals, 0, i);
11593 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11594 n_const++;
11595 }
11596 if (n_const == n_elts)
11597 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11598 }
11599 else
11600 gcc_unreachable ();
11601
11602 if (const_vec != NULL_RTX
48063b9d 11603 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11604 /* Load using MOVI/MVNI. */
11605 return const_vec;
11606 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11607 /* Loaded using DUP. */
11608 return const_dup;
11609 else if (const_vec != NULL_RTX)
11610 /* Load from constant pool. We can not take advantage of single-cycle
11611 LD1 because we need a PC-relative addressing mode. */
11612 return const_vec;
11613 else
11614 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11615 We can not construct an initializer. */
11616 return NULL_RTX;
11617}
11618
35a093b6
JG
11619/* Expand a vector initialisation sequence, such that TARGET is
11620 initialised to contain VALS. */
11621
4369c11e
TB
11622void
11623aarch64_expand_vector_init (rtx target, rtx vals)
11624{
ef4bddc2
RS
11625 machine_mode mode = GET_MODE (target);
11626 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11627 /* The number of vector elements. */
4369c11e 11628 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11629 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11630 int n_var = 0;
11631 rtx any_const = NULL_RTX;
35a093b6
JG
11632 /* The first element of vals. */
11633 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11634 bool all_same = true;
4369c11e 11635
35a093b6 11636 /* Count the number of variable elements to initialise. */
8b66a2d4 11637 for (int i = 0; i < n_elts; ++i)
4369c11e 11638 {
8b66a2d4 11639 rtx x = XVECEXP (vals, 0, i);
35a093b6 11640 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11641 ++n_var;
11642 else
11643 any_const = x;
4369c11e 11644
35a093b6 11645 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11646 }
11647
35a093b6
JG
11648 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11649 how best to handle this. */
4369c11e
TB
11650 if (n_var == 0)
11651 {
11652 rtx constant = aarch64_simd_make_constant (vals);
11653 if (constant != NULL_RTX)
11654 {
11655 emit_move_insn (target, constant);
11656 return;
11657 }
11658 }
11659
11660 /* Splat a single non-constant element if we can. */
11661 if (all_same)
11662 {
35a093b6 11663 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
11664 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11665 return;
11666 }
11667
35a093b6
JG
11668 /* Initialise a vector which is part-variable. We want to first try
11669 to build those lanes which are constant in the most efficient way we
11670 can. */
11671 if (n_var != n_elts)
4369c11e
TB
11672 {
11673 rtx copy = copy_rtx (vals);
4369c11e 11674
8b66a2d4
AL
11675 /* Load constant part of vector. We really don't care what goes into the
11676 parts we will overwrite, but we're more likely to be able to load the
11677 constant efficiently if it has fewer, larger, repeating parts
11678 (see aarch64_simd_valid_immediate). */
11679 for (int i = 0; i < n_elts; i++)
11680 {
11681 rtx x = XVECEXP (vals, 0, i);
11682 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11683 continue;
11684 rtx subst = any_const;
11685 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11686 {
11687 /* Look in the copied vector, as more elements are const. */
11688 rtx test = XVECEXP (copy, 0, i ^ bit);
11689 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11690 {
11691 subst = test;
11692 break;
11693 }
11694 }
11695 XVECEXP (copy, 0, i) = subst;
11696 }
4369c11e 11697 aarch64_expand_vector_init (target, copy);
35a093b6 11698 }
4369c11e 11699
35a093b6 11700 /* Insert the variable lanes directly. */
8b66a2d4 11701
35a093b6
JG
11702 enum insn_code icode = optab_handler (vec_set_optab, mode);
11703 gcc_assert (icode != CODE_FOR_nothing);
4369c11e 11704
8b66a2d4 11705 for (int i = 0; i < n_elts; i++)
35a093b6
JG
11706 {
11707 rtx x = XVECEXP (vals, 0, i);
11708 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11709 continue;
11710 x = copy_to_mode_reg (inner_mode, x);
11711 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11712 }
4369c11e
TB
11713}
11714
43e9d192 11715static unsigned HOST_WIDE_INT
ef4bddc2 11716aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
11717{
11718 return
ac59ad4e
KT
11719 (!SHIFT_COUNT_TRUNCATED
11720 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
11721 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11722}
11723
43e9d192
IB
11724/* Select a format to encode pointers in exception handling data. */
11725int
11726aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
11727{
11728 int type;
11729 switch (aarch64_cmodel)
11730 {
11731 case AARCH64_CMODEL_TINY:
11732 case AARCH64_CMODEL_TINY_PIC:
11733 case AARCH64_CMODEL_SMALL:
11734 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 11735 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11736 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11737 for everything. */
11738 type = DW_EH_PE_sdata4;
11739 break;
11740 default:
11741 /* No assumptions here. 8-byte relocs required. */
11742 type = DW_EH_PE_sdata8;
11743 break;
11744 }
11745 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
11746}
11747
e1c1ecb0
KT
11748/* The last .arch and .tune assembly strings that we printed. */
11749static std::string aarch64_last_printed_arch_string;
11750static std::string aarch64_last_printed_tune_string;
11751
361fb3ee
KT
11752/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11753 by the function fndecl. */
11754
11755void
11756aarch64_declare_function_name (FILE *stream, const char* name,
11757 tree fndecl)
11758{
11759 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
11760
11761 struct cl_target_option *targ_options;
11762 if (target_parts)
11763 targ_options = TREE_TARGET_OPTION (target_parts);
11764 else
11765 targ_options = TREE_TARGET_OPTION (target_option_current_node);
11766 gcc_assert (targ_options);
11767
11768 const struct processor *this_arch
11769 = aarch64_get_arch (targ_options->x_explicit_arch);
11770
054b4005
JG
11771 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
11772 std::string extension
04a99ebe
JG
11773 = aarch64_get_extension_string_for_isa_flags (isa_flags,
11774 this_arch->flags);
e1c1ecb0
KT
11775 /* Only update the assembler .arch string if it is distinct from the last
11776 such string we printed. */
11777 std::string to_print = this_arch->name + extension;
11778 if (to_print != aarch64_last_printed_arch_string)
11779 {
11780 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
11781 aarch64_last_printed_arch_string = to_print;
11782 }
361fb3ee
KT
11783
11784 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
11785 useful to readers of the generated asm. Do it only when it changes
11786 from function to function and verbose assembly is requested. */
361fb3ee
KT
11787 const struct processor *this_tune
11788 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
11789
e1c1ecb0
KT
11790 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
11791 {
11792 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
11793 this_tune->name);
11794 aarch64_last_printed_tune_string = this_tune->name;
11795 }
361fb3ee
KT
11796
11797 /* Don't forget the type directive for ELF. */
11798 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
11799 ASM_OUTPUT_LABEL (stream, name);
11800}
11801
e1c1ecb0
KT
11802/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11803
11804static void
11805aarch64_start_file (void)
11806{
11807 struct cl_target_option *default_options
11808 = TREE_TARGET_OPTION (target_option_default_node);
11809
11810 const struct processor *default_arch
11811 = aarch64_get_arch (default_options->x_explicit_arch);
11812 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
11813 std::string extension
04a99ebe
JG
11814 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
11815 default_arch->flags);
e1c1ecb0
KT
11816
11817 aarch64_last_printed_arch_string = default_arch->name + extension;
11818 aarch64_last_printed_tune_string = "";
11819 asm_fprintf (asm_out_file, "\t.arch %s\n",
11820 aarch64_last_printed_arch_string.c_str ());
11821
11822 default_file_start ();
11823}
11824
0462169c
SN
11825/* Emit load exclusive. */
11826
11827static void
ef4bddc2 11828aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
11829 rtx mem, rtx model_rtx)
11830{
11831 rtx (*gen) (rtx, rtx, rtx);
11832
11833 switch (mode)
11834 {
11835 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
11836 case HImode: gen = gen_aarch64_load_exclusivehi; break;
11837 case SImode: gen = gen_aarch64_load_exclusivesi; break;
11838 case DImode: gen = gen_aarch64_load_exclusivedi; break;
11839 default:
11840 gcc_unreachable ();
11841 }
11842
11843 emit_insn (gen (rval, mem, model_rtx));
11844}
11845
11846/* Emit store exclusive. */
11847
11848static void
ef4bddc2 11849aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
11850 rtx rval, rtx mem, rtx model_rtx)
11851{
11852 rtx (*gen) (rtx, rtx, rtx, rtx);
11853
11854 switch (mode)
11855 {
11856 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
11857 case HImode: gen = gen_aarch64_store_exclusivehi; break;
11858 case SImode: gen = gen_aarch64_store_exclusivesi; break;
11859 case DImode: gen = gen_aarch64_store_exclusivedi; break;
11860 default:
11861 gcc_unreachable ();
11862 }
11863
11864 emit_insn (gen (bval, rval, mem, model_rtx));
11865}
11866
11867/* Mark the previous jump instruction as unlikely. */
11868
11869static void
11870aarch64_emit_unlikely_jump (rtx insn)
11871{
e5af9ddd 11872 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c 11873
f370536c
TS
11874 rtx_insn *jump = emit_jump_insn (insn);
11875 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
0462169c
SN
11876}
11877
11878/* Expand a compare and swap pattern. */
11879
11880void
11881aarch64_expand_compare_and_swap (rtx operands[])
11882{
11883 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 11884 machine_mode mode, cmp_mode;
b0770c0f
MW
11885 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
11886 int idx;
11887 gen_cas_fn gen;
11888 const gen_cas_fn split_cas[] =
11889 {
11890 gen_aarch64_compare_and_swapqi,
11891 gen_aarch64_compare_and_swaphi,
11892 gen_aarch64_compare_and_swapsi,
11893 gen_aarch64_compare_and_swapdi
11894 };
11895 const gen_cas_fn atomic_cas[] =
11896 {
11897 gen_aarch64_compare_and_swapqi_lse,
11898 gen_aarch64_compare_and_swaphi_lse,
11899 gen_aarch64_compare_and_swapsi_lse,
11900 gen_aarch64_compare_and_swapdi_lse
11901 };
0462169c
SN
11902
11903 bval = operands[0];
11904 rval = operands[1];
11905 mem = operands[2];
11906 oldval = operands[3];
11907 newval = operands[4];
11908 is_weak = operands[5];
11909 mod_s = operands[6];
11910 mod_f = operands[7];
11911 mode = GET_MODE (mem);
11912 cmp_mode = mode;
11913
11914 /* Normally the succ memory model must be stronger than fail, but in the
11915 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11916 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11917
46b35980
AM
11918 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11919 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
11920 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11921
11922 switch (mode)
11923 {
11924 case QImode:
11925 case HImode:
11926 /* For short modes, we're going to perform the comparison in SImode,
11927 so do the zero-extension now. */
11928 cmp_mode = SImode;
11929 rval = gen_reg_rtx (SImode);
11930 oldval = convert_modes (SImode, mode, oldval, true);
11931 /* Fall through. */
11932
11933 case SImode:
11934 case DImode:
11935 /* Force the value into a register if needed. */
11936 if (!aarch64_plus_operand (oldval, mode))
11937 oldval = force_reg (cmp_mode, oldval);
11938 break;
11939
11940 default:
11941 gcc_unreachable ();
11942 }
11943
11944 switch (mode)
11945 {
b0770c0f
MW
11946 case QImode: idx = 0; break;
11947 case HImode: idx = 1; break;
11948 case SImode: idx = 2; break;
11949 case DImode: idx = 3; break;
0462169c
SN
11950 default:
11951 gcc_unreachable ();
11952 }
b0770c0f
MW
11953 if (TARGET_LSE)
11954 gen = atomic_cas[idx];
11955 else
11956 gen = split_cas[idx];
0462169c
SN
11957
11958 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11959
11960 if (mode == QImode || mode == HImode)
11961 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11962
11963 x = gen_rtx_REG (CCmode, CC_REGNUM);
11964 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 11965 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
11966}
11967
641c2f8b
MW
11968/* Test whether the target supports using a atomic load-operate instruction.
11969 CODE is the operation and AFTER is TRUE if the data in memory after the
11970 operation should be returned and FALSE if the data before the operation
11971 should be returned. Returns FALSE if the operation isn't supported by the
11972 architecture. */
11973
11974bool
11975aarch64_atomic_ldop_supported_p (enum rtx_code code)
11976{
11977 if (!TARGET_LSE)
11978 return false;
11979
11980 switch (code)
11981 {
11982 case SET:
11983 case AND:
11984 case IOR:
11985 case XOR:
11986 case MINUS:
11987 case PLUS:
11988 return true;
11989 default:
11990 return false;
11991 }
11992}
11993
f70fb3b6
MW
11994/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11995 sequence implementing an atomic operation. */
11996
11997static void
11998aarch64_emit_post_barrier (enum memmodel model)
11999{
12000 const enum memmodel base_model = memmodel_base (model);
12001
12002 if (is_mm_sync (model)
12003 && (base_model == MEMMODEL_ACQUIRE
12004 || base_model == MEMMODEL_ACQ_REL
12005 || base_model == MEMMODEL_SEQ_CST))
12006 {
12007 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
12008 }
12009}
12010
b0770c0f
MW
12011/* Emit an atomic compare-and-swap operation. RVAL is the destination register
12012 for the data in memory. EXPECTED is the value expected to be in memory.
12013 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12014 is the memory ordering to use. */
12015
12016void
12017aarch64_gen_atomic_cas (rtx rval, rtx mem,
12018 rtx expected, rtx desired,
12019 rtx model)
12020{
12021 rtx (*gen) (rtx, rtx, rtx, rtx);
12022 machine_mode mode;
12023
12024 mode = GET_MODE (mem);
12025
12026 switch (mode)
12027 {
12028 case QImode: gen = gen_aarch64_atomic_casqi; break;
12029 case HImode: gen = gen_aarch64_atomic_cashi; break;
12030 case SImode: gen = gen_aarch64_atomic_cassi; break;
12031 case DImode: gen = gen_aarch64_atomic_casdi; break;
12032 default:
12033 gcc_unreachable ();
12034 }
12035
12036 /* Move the expected value into the CAS destination register. */
12037 emit_insn (gen_rtx_SET (rval, expected));
12038
12039 /* Emit the CAS. */
12040 emit_insn (gen (rval, mem, desired, model));
12041
12042 /* Compare the expected value with the value loaded by the CAS, to establish
12043 whether the swap was made. */
12044 aarch64_gen_compare_reg (EQ, rval, expected);
12045}
12046
0462169c
SN
12047/* Split a compare and swap pattern. */
12048
12049void
12050aarch64_split_compare_and_swap (rtx operands[])
12051{
12052 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 12053 machine_mode mode;
0462169c 12054 bool is_weak;
5d8a22a5
DM
12055 rtx_code_label *label1, *label2;
12056 rtx x, cond;
ab876106
MW
12057 enum memmodel model;
12058 rtx model_rtx;
0462169c
SN
12059
12060 rval = operands[0];
12061 mem = operands[1];
12062 oldval = operands[2];
12063 newval = operands[3];
12064 is_weak = (operands[4] != const0_rtx);
ab876106 12065 model_rtx = operands[5];
0462169c
SN
12066 scratch = operands[7];
12067 mode = GET_MODE (mem);
ab876106 12068 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 12069
5d8a22a5 12070 label1 = NULL;
0462169c
SN
12071 if (!is_weak)
12072 {
12073 label1 = gen_label_rtx ();
12074 emit_label (label1);
12075 }
12076 label2 = gen_label_rtx ();
12077
ab876106
MW
12078 /* The initial load can be relaxed for a __sync operation since a final
12079 barrier will be emitted to stop code hoisting. */
12080 if (is_mm_sync (model))
12081 aarch64_emit_load_exclusive (mode, rval, mem,
12082 GEN_INT (MEMMODEL_RELAXED));
12083 else
12084 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
12085
12086 cond = aarch64_gen_compare_reg (NE, rval, oldval);
12087 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12088 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12089 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 12090 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 12091
ab876106 12092 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
12093
12094 if (!is_weak)
12095 {
12096 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12097 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12098 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 12099 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
12100 }
12101 else
12102 {
12103 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12104 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 12105 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
12106 }
12107
12108 emit_label (label2);
ab876106
MW
12109
12110 /* Emit any final barrier needed for a __sync operation. */
12111 if (is_mm_sync (model))
12112 aarch64_emit_post_barrier (model);
0462169c
SN
12113}
12114
68729b06
MW
12115/* Emit a BIC instruction. */
12116
12117static void
12118aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12119{
12120 rtx shift_rtx = GEN_INT (shift);
12121 rtx (*gen) (rtx, rtx, rtx, rtx);
12122
12123 switch (mode)
12124 {
12125 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12126 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
12127 default:
12128 gcc_unreachable ();
12129 }
12130
12131 emit_insn (gen (dst, s2, shift_rtx, s1));
12132}
12133
9cd7b720
MW
12134/* Emit an atomic swap. */
12135
12136static void
12137aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12138 rtx mem, rtx model)
12139{
12140 rtx (*gen) (rtx, rtx, rtx, rtx);
12141
12142 switch (mode)
12143 {
12144 case QImode: gen = gen_aarch64_atomic_swpqi; break;
12145 case HImode: gen = gen_aarch64_atomic_swphi; break;
12146 case SImode: gen = gen_aarch64_atomic_swpsi; break;
12147 case DImode: gen = gen_aarch64_atomic_swpdi; break;
12148 default:
12149 gcc_unreachable ();
12150 }
12151
12152 emit_insn (gen (dst, mem, value, model));
12153}
12154
641c2f8b
MW
12155/* Operations supported by aarch64_emit_atomic_load_op. */
12156
12157enum aarch64_atomic_load_op_code
12158{
12159 AARCH64_LDOP_PLUS, /* A + B */
12160 AARCH64_LDOP_XOR, /* A ^ B */
12161 AARCH64_LDOP_OR, /* A | B */
12162 AARCH64_LDOP_BIC /* A & ~B */
12163};
12164
12165/* Emit an atomic load-operate. */
12166
12167static void
12168aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12169 machine_mode mode, rtx dst, rtx src,
12170 rtx mem, rtx model)
12171{
12172 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12173 const aarch64_atomic_load_op_fn plus[] =
12174 {
12175 gen_aarch64_atomic_loadaddqi,
12176 gen_aarch64_atomic_loadaddhi,
12177 gen_aarch64_atomic_loadaddsi,
12178 gen_aarch64_atomic_loadadddi
12179 };
12180 const aarch64_atomic_load_op_fn eor[] =
12181 {
12182 gen_aarch64_atomic_loadeorqi,
12183 gen_aarch64_atomic_loadeorhi,
12184 gen_aarch64_atomic_loadeorsi,
12185 gen_aarch64_atomic_loadeordi
12186 };
12187 const aarch64_atomic_load_op_fn ior[] =
12188 {
12189 gen_aarch64_atomic_loadsetqi,
12190 gen_aarch64_atomic_loadsethi,
12191 gen_aarch64_atomic_loadsetsi,
12192 gen_aarch64_atomic_loadsetdi
12193 };
12194 const aarch64_atomic_load_op_fn bic[] =
12195 {
12196 gen_aarch64_atomic_loadclrqi,
12197 gen_aarch64_atomic_loadclrhi,
12198 gen_aarch64_atomic_loadclrsi,
12199 gen_aarch64_atomic_loadclrdi
12200 };
12201 aarch64_atomic_load_op_fn gen;
12202 int idx = 0;
12203
12204 switch (mode)
12205 {
12206 case QImode: idx = 0; break;
12207 case HImode: idx = 1; break;
12208 case SImode: idx = 2; break;
12209 case DImode: idx = 3; break;
12210 default:
12211 gcc_unreachable ();
12212 }
12213
12214 switch (code)
12215 {
12216 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12217 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12218 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12219 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12220 default:
12221 gcc_unreachable ();
12222 }
12223
12224 emit_insn (gen (dst, mem, src, model));
12225}
12226
12227/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
12228 location to store the data read from memory. OUT_RESULT is the location to
12229 store the result of the operation. MEM is the memory location to read and
12230 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12231 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12232 be NULL. */
9cd7b720
MW
12233
12234void
68729b06 12235aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
12236 rtx mem, rtx value, rtx model_rtx)
12237{
12238 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
12239 machine_mode wmode = (mode == DImode ? DImode : SImode);
12240 const bool short_mode = (mode < SImode);
12241 aarch64_atomic_load_op_code ldop_code;
12242 rtx src;
12243 rtx x;
12244
12245 if (out_data)
12246 out_data = gen_lowpart (mode, out_data);
9cd7b720 12247
68729b06
MW
12248 if (out_result)
12249 out_result = gen_lowpart (mode, out_result);
12250
641c2f8b
MW
12251 /* Make sure the value is in a register, putting it into a destination
12252 register if it needs to be manipulated. */
12253 if (!register_operand (value, mode)
12254 || code == AND || code == MINUS)
12255 {
68729b06 12256 src = out_result ? out_result : out_data;
641c2f8b
MW
12257 emit_move_insn (src, gen_lowpart (mode, value));
12258 }
12259 else
12260 src = value;
12261 gcc_assert (register_operand (src, mode));
9cd7b720 12262
641c2f8b
MW
12263 /* Preprocess the data for the operation as necessary. If the operation is
12264 a SET then emit a swap instruction and finish. */
9cd7b720
MW
12265 switch (code)
12266 {
12267 case SET:
641c2f8b 12268 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
12269 return;
12270
641c2f8b
MW
12271 case MINUS:
12272 /* Negate the value and treat it as a PLUS. */
12273 {
12274 rtx neg_src;
12275
12276 /* Resize the value if necessary. */
12277 if (short_mode)
12278 src = gen_lowpart (wmode, src);
12279
12280 neg_src = gen_rtx_NEG (wmode, src);
12281 emit_insn (gen_rtx_SET (src, neg_src));
12282
12283 if (short_mode)
12284 src = gen_lowpart (mode, src);
12285 }
12286 /* Fall-through. */
12287 case PLUS:
12288 ldop_code = AARCH64_LDOP_PLUS;
12289 break;
12290
12291 case IOR:
12292 ldop_code = AARCH64_LDOP_OR;
12293 break;
12294
12295 case XOR:
12296 ldop_code = AARCH64_LDOP_XOR;
12297 break;
12298
12299 case AND:
12300 {
12301 rtx not_src;
12302
12303 /* Resize the value if necessary. */
12304 if (short_mode)
12305 src = gen_lowpart (wmode, src);
12306
12307 not_src = gen_rtx_NOT (wmode, src);
12308 emit_insn (gen_rtx_SET (src, not_src));
12309
12310 if (short_mode)
12311 src = gen_lowpart (mode, src);
12312 }
12313 ldop_code = AARCH64_LDOP_BIC;
12314 break;
12315
9cd7b720
MW
12316 default:
12317 /* The operation can't be done with atomic instructions. */
12318 gcc_unreachable ();
12319 }
641c2f8b
MW
12320
12321 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
12322
12323 /* If necessary, calculate the data in memory after the update by redoing the
12324 operation from values in registers. */
12325 if (!out_result)
12326 return;
12327
12328 if (short_mode)
12329 {
12330 src = gen_lowpart (wmode, src);
12331 out_data = gen_lowpart (wmode, out_data);
12332 out_result = gen_lowpart (wmode, out_result);
12333 }
12334
12335 x = NULL_RTX;
12336
12337 switch (code)
12338 {
12339 case MINUS:
12340 case PLUS:
12341 x = gen_rtx_PLUS (wmode, out_data, src);
12342 break;
12343 case IOR:
12344 x = gen_rtx_IOR (wmode, out_data, src);
12345 break;
12346 case XOR:
12347 x = gen_rtx_XOR (wmode, out_data, src);
12348 break;
12349 case AND:
12350 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12351 return;
12352 default:
12353 gcc_unreachable ();
12354 }
12355
12356 emit_set_insn (out_result, x);
12357
12358 return;
9cd7b720
MW
12359}
12360
0462169c
SN
12361/* Split an atomic operation. */
12362
12363void
12364aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 12365 rtx value, rtx model_rtx, rtx cond)
0462169c 12366{
ef4bddc2
RS
12367 machine_mode mode = GET_MODE (mem);
12368 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
12369 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12370 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
12371 rtx_code_label *label;
12372 rtx x;
0462169c 12373
9cd7b720 12374 /* Split the atomic operation into a sequence. */
0462169c
SN
12375 label = gen_label_rtx ();
12376 emit_label (label);
12377
12378 if (new_out)
12379 new_out = gen_lowpart (wmode, new_out);
12380 if (old_out)
12381 old_out = gen_lowpart (wmode, old_out);
12382 else
12383 old_out = new_out;
12384 value = simplify_gen_subreg (wmode, value, mode, 0);
12385
f70fb3b6
MW
12386 /* The initial load can be relaxed for a __sync operation since a final
12387 barrier will be emitted to stop code hoisting. */
12388 if (is_sync)
12389 aarch64_emit_load_exclusive (mode, old_out, mem,
12390 GEN_INT (MEMMODEL_RELAXED));
12391 else
12392 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
12393
12394 switch (code)
12395 {
12396 case SET:
12397 new_out = value;
12398 break;
12399
12400 case NOT:
12401 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12402 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12403 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12404 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12405 break;
12406
12407 case MINUS:
12408 if (CONST_INT_P (value))
12409 {
12410 value = GEN_INT (-INTVAL (value));
12411 code = PLUS;
12412 }
12413 /* Fall through. */
12414
12415 default:
12416 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12417 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12418 break;
12419 }
12420
12421 aarch64_emit_store_exclusive (mode, cond, mem,
12422 gen_lowpart (mode, new_out), model_rtx);
12423
12424 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12425 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12426 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12427 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12428
12429 /* Emit any final barrier needed for a __sync operation. */
12430 if (is_sync)
12431 aarch64_emit_post_barrier (model);
0462169c
SN
12432}
12433
c2ec330c
AL
12434static void
12435aarch64_init_libfuncs (void)
12436{
12437 /* Half-precision float operations. The compiler handles all operations
12438 with NULL libfuncs by converting to SFmode. */
12439
12440 /* Conversions. */
12441 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12442 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12443
12444 /* Arithmetic. */
12445 set_optab_libfunc (add_optab, HFmode, NULL);
12446 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12447 set_optab_libfunc (smul_optab, HFmode, NULL);
12448 set_optab_libfunc (neg_optab, HFmode, NULL);
12449 set_optab_libfunc (sub_optab, HFmode, NULL);
12450
12451 /* Comparisons. */
12452 set_optab_libfunc (eq_optab, HFmode, NULL);
12453 set_optab_libfunc (ne_optab, HFmode, NULL);
12454 set_optab_libfunc (lt_optab, HFmode, NULL);
12455 set_optab_libfunc (le_optab, HFmode, NULL);
12456 set_optab_libfunc (ge_optab, HFmode, NULL);
12457 set_optab_libfunc (gt_optab, HFmode, NULL);
12458 set_optab_libfunc (unord_optab, HFmode, NULL);
12459}
12460
43e9d192 12461/* Target hook for c_mode_for_suffix. */
ef4bddc2 12462static machine_mode
43e9d192
IB
12463aarch64_c_mode_for_suffix (char suffix)
12464{
12465 if (suffix == 'q')
12466 return TFmode;
12467
12468 return VOIDmode;
12469}
12470
3520f7cc
JG
12471/* We can only represent floating point constants which will fit in
12472 "quarter-precision" values. These values are characterised by
12473 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12474 by:
12475
12476 (-1)^s * (n/16) * 2^r
12477
12478 Where:
12479 's' is the sign bit.
12480 'n' is an integer in the range 16 <= n <= 31.
12481 'r' is an integer in the range -3 <= r <= 4. */
12482
12483/* Return true iff X can be represented by a quarter-precision
12484 floating point immediate operand X. Note, we cannot represent 0.0. */
12485bool
12486aarch64_float_const_representable_p (rtx x)
12487{
12488 /* This represents our current view of how many bits
12489 make up the mantissa. */
12490 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12491 int exponent;
3520f7cc 12492 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12493 REAL_VALUE_TYPE r, m;
807e902e 12494 bool fail;
3520f7cc
JG
12495
12496 if (!CONST_DOUBLE_P (x))
12497 return false;
12498
c2ec330c
AL
12499 /* We don't support HFmode constants yet. */
12500 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12501 return false;
12502
34a72c33 12503 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12504
12505 /* We cannot represent infinities, NaNs or +/-zero. We won't
12506 know if we have +zero until we analyse the mantissa, but we
12507 can reject the other invalid values. */
12508 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12509 || REAL_VALUE_MINUS_ZERO (r))
12510 return false;
12511
ba96cdfb 12512 /* Extract exponent. */
3520f7cc
JG
12513 r = real_value_abs (&r);
12514 exponent = REAL_EXP (&r);
12515
12516 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12517 highest (sign) bit, with a fixed binary point at bit point_pos.
12518 m1 holds the low part of the mantissa, m2 the high part.
12519 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12520 bits for the mantissa, this can fail (low bits will be lost). */
12521 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12522 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12523
12524 /* If the low part of the mantissa has bits set we cannot represent
12525 the value. */
d9074b29 12526 if (w.ulow () != 0)
3520f7cc
JG
12527 return false;
12528 /* We have rejected the lower HOST_WIDE_INT, so update our
12529 understanding of how many bits lie in the mantissa and
12530 look only at the high HOST_WIDE_INT. */
807e902e 12531 mantissa = w.elt (1);
3520f7cc
JG
12532 point_pos -= HOST_BITS_PER_WIDE_INT;
12533
12534 /* We can only represent values with a mantissa of the form 1.xxxx. */
12535 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12536 if ((mantissa & mask) != 0)
12537 return false;
12538
12539 /* Having filtered unrepresentable values, we may now remove all
12540 but the highest 5 bits. */
12541 mantissa >>= point_pos - 5;
12542
12543 /* We cannot represent the value 0.0, so reject it. This is handled
12544 elsewhere. */
12545 if (mantissa == 0)
12546 return false;
12547
12548 /* Then, as bit 4 is always set, we can mask it off, leaving
12549 the mantissa in the range [0, 15]. */
12550 mantissa &= ~(1 << 4);
12551 gcc_assert (mantissa <= 15);
12552
12553 /* GCC internally does not use IEEE754-like encoding (where normalized
12554 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12555 Our mantissa values are shifted 4 places to the left relative to
12556 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12557 by 5 places to correct for GCC's representation. */
12558 exponent = 5 - exponent;
12559
12560 return (exponent >= 0 && exponent <= 7);
12561}
12562
12563char*
81c2dfb9 12564aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12565 machine_mode mode,
3520f7cc
JG
12566 unsigned width)
12567{
3ea63f60 12568 bool is_valid;
3520f7cc 12569 static char templ[40];
3520f7cc 12570 const char *mnemonic;
e4f0f84d 12571 const char *shift_op;
3520f7cc 12572 unsigned int lane_count = 0;
81c2dfb9 12573 char element_char;
3520f7cc 12574
e4f0f84d 12575 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12576
12577 /* This will return true to show const_vector is legal for use as either
12578 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12579 also update INFO to show how the immediate should be generated. */
81c2dfb9 12580 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12581 gcc_assert (is_valid);
12582
81c2dfb9 12583 element_char = sizetochar (info.element_width);
48063b9d
IB
12584 lane_count = width / info.element_width;
12585
3520f7cc 12586 mode = GET_MODE_INNER (mode);
0d8e1702 12587 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 12588 {
48063b9d 12589 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
12590 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12591 move immediate path. */
48063b9d
IB
12592 if (aarch64_float_const_zero_rtx_p (info.value))
12593 info.value = GEN_INT (0);
12594 else
12595 {
83faf7d0 12596 const unsigned int buf_size = 20;
48063b9d 12597 char float_buf[buf_size] = {'\0'};
34a72c33
RS
12598 real_to_decimal_for_mode (float_buf,
12599 CONST_DOUBLE_REAL_VALUE (info.value),
12600 buf_size, buf_size, 1, mode);
48063b9d
IB
12601
12602 if (lane_count == 1)
12603 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12604 else
12605 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 12606 lane_count, element_char, float_buf);
48063b9d
IB
12607 return templ;
12608 }
3520f7cc 12609 }
3520f7cc 12610
48063b9d 12611 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 12612 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 12613
0d8e1702 12614 gcc_assert (CONST_INT_P (info.value));
3520f7cc 12615 if (lane_count == 1)
48063b9d
IB
12616 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12617 mnemonic, UINTVAL (info.value));
12618 else if (info.shift)
12619 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
12620 ", %s %d", mnemonic, lane_count, element_char,
12621 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 12622 else
48063b9d 12623 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 12624 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
12625 return templ;
12626}
12627
b7342d25
IB
12628char*
12629aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 12630 machine_mode mode)
b7342d25 12631{
ef4bddc2 12632 machine_mode vmode;
b7342d25
IB
12633
12634 gcc_assert (!VECTOR_MODE_P (mode));
12635 vmode = aarch64_simd_container_mode (mode, 64);
12636 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12637 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12638}
12639
88b08073
JG
12640/* Split operands into moves from op[1] + op[2] into op[0]. */
12641
12642void
12643aarch64_split_combinev16qi (rtx operands[3])
12644{
12645 unsigned int dest = REGNO (operands[0]);
12646 unsigned int src1 = REGNO (operands[1]);
12647 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 12648 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
12649 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12650 rtx destlo, desthi;
12651
12652 gcc_assert (halfmode == V16QImode);
12653
12654 if (src1 == dest && src2 == dest + halfregs)
12655 {
12656 /* No-op move. Can't split to nothing; emit something. */
12657 emit_note (NOTE_INSN_DELETED);
12658 return;
12659 }
12660
12661 /* Preserve register attributes for variable tracking. */
12662 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12663 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12664 GET_MODE_SIZE (halfmode));
12665
12666 /* Special case of reversed high/low parts. */
12667 if (reg_overlap_mentioned_p (operands[2], destlo)
12668 && reg_overlap_mentioned_p (operands[1], desthi))
12669 {
12670 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12671 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12672 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12673 }
12674 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12675 {
12676 /* Try to avoid unnecessary moves if part of the result
12677 is in the right place already. */
12678 if (src1 != dest)
12679 emit_move_insn (destlo, operands[1]);
12680 if (src2 != dest + halfregs)
12681 emit_move_insn (desthi, operands[2]);
12682 }
12683 else
12684 {
12685 if (src2 != dest + halfregs)
12686 emit_move_insn (desthi, operands[2]);
12687 if (src1 != dest)
12688 emit_move_insn (destlo, operands[1]);
12689 }
12690}
12691
12692/* vec_perm support. */
12693
12694#define MAX_VECT_LEN 16
12695
12696struct expand_vec_perm_d
12697{
12698 rtx target, op0, op1;
12699 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 12700 machine_mode vmode;
88b08073
JG
12701 unsigned char nelt;
12702 bool one_vector_p;
12703 bool testing_p;
12704};
12705
12706/* Generate a variable permutation. */
12707
12708static void
12709aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
12710{
ef4bddc2 12711 machine_mode vmode = GET_MODE (target);
88b08073
JG
12712 bool one_vector_p = rtx_equal_p (op0, op1);
12713
12714 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
12715 gcc_checking_assert (GET_MODE (op0) == vmode);
12716 gcc_checking_assert (GET_MODE (op1) == vmode);
12717 gcc_checking_assert (GET_MODE (sel) == vmode);
12718 gcc_checking_assert (TARGET_SIMD);
12719
12720 if (one_vector_p)
12721 {
12722 if (vmode == V8QImode)
12723 {
12724 /* Expand the argument to a V16QI mode by duplicating it. */
12725 rtx pair = gen_reg_rtx (V16QImode);
12726 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
12727 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12728 }
12729 else
12730 {
12731 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
12732 }
12733 }
12734 else
12735 {
12736 rtx pair;
12737
12738 if (vmode == V8QImode)
12739 {
12740 pair = gen_reg_rtx (V16QImode);
12741 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
12742 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12743 }
12744 else
12745 {
12746 pair = gen_reg_rtx (OImode);
12747 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
12748 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
12749 }
12750 }
12751}
12752
12753void
12754aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
12755{
ef4bddc2 12756 machine_mode vmode = GET_MODE (target);
c9d1a16a 12757 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 12758 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 12759 rtx mask;
88b08073
JG
12760
12761 /* The TBL instruction does not use a modulo index, so we must take care
12762 of that ourselves. */
f7c4e5b8
AL
12763 mask = aarch64_simd_gen_const_vector_dup (vmode,
12764 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
12765 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
12766
f7c4e5b8
AL
12767 /* For big-endian, we also need to reverse the index within the vector
12768 (but not which vector). */
12769 if (BYTES_BIG_ENDIAN)
12770 {
12771 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12772 if (!one_vector_p)
12773 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
12774 sel = expand_simple_binop (vmode, XOR, sel, mask,
12775 NULL, 0, OPTAB_LIB_WIDEN);
12776 }
88b08073
JG
12777 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
12778}
12779
cc4d934f
JG
12780/* Recognize patterns suitable for the TRN instructions. */
12781static bool
12782aarch64_evpc_trn (struct expand_vec_perm_d *d)
12783{
12784 unsigned int i, odd, mask, nelt = d->nelt;
12785 rtx out, in0, in1, x;
12786 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12787 machine_mode vmode = d->vmode;
cc4d934f
JG
12788
12789 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12790 return false;
12791
12792 /* Note that these are little-endian tests.
12793 We correct for big-endian later. */
12794 if (d->perm[0] == 0)
12795 odd = 0;
12796 else if (d->perm[0] == 1)
12797 odd = 1;
12798 else
12799 return false;
12800 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12801
12802 for (i = 0; i < nelt; i += 2)
12803 {
12804 if (d->perm[i] != i + odd)
12805 return false;
12806 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
12807 return false;
12808 }
12809
12810 /* Success! */
12811 if (d->testing_p)
12812 return true;
12813
12814 in0 = d->op0;
12815 in1 = d->op1;
12816 if (BYTES_BIG_ENDIAN)
12817 {
12818 x = in0, in0 = in1, in1 = x;
12819 odd = !odd;
12820 }
12821 out = d->target;
12822
12823 if (odd)
12824 {
12825 switch (vmode)
12826 {
12827 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
12828 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
12829 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
12830 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
12831 case V4SImode: gen = gen_aarch64_trn2v4si; break;
12832 case V2SImode: gen = gen_aarch64_trn2v2si; break;
12833 case V2DImode: gen = gen_aarch64_trn2v2di; break;
358decd5
JW
12834 case V4HFmode: gen = gen_aarch64_trn2v4hf; break;
12835 case V8HFmode: gen = gen_aarch64_trn2v8hf; break;
cc4d934f
JG
12836 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
12837 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
12838 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
12839 default:
12840 return false;
12841 }
12842 }
12843 else
12844 {
12845 switch (vmode)
12846 {
12847 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
12848 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
12849 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
12850 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
12851 case V4SImode: gen = gen_aarch64_trn1v4si; break;
12852 case V2SImode: gen = gen_aarch64_trn1v2si; break;
12853 case V2DImode: gen = gen_aarch64_trn1v2di; break;
358decd5
JW
12854 case V4HFmode: gen = gen_aarch64_trn1v4hf; break;
12855 case V8HFmode: gen = gen_aarch64_trn1v8hf; break;
cc4d934f
JG
12856 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
12857 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
12858 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
12859 default:
12860 return false;
12861 }
12862 }
12863
12864 emit_insn (gen (out, in0, in1));
12865 return true;
12866}
12867
12868/* Recognize patterns suitable for the UZP instructions. */
12869static bool
12870aarch64_evpc_uzp (struct expand_vec_perm_d *d)
12871{
12872 unsigned int i, odd, mask, nelt = d->nelt;
12873 rtx out, in0, in1, x;
12874 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12875 machine_mode vmode = d->vmode;
cc4d934f
JG
12876
12877 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12878 return false;
12879
12880 /* Note that these are little-endian tests.
12881 We correct for big-endian later. */
12882 if (d->perm[0] == 0)
12883 odd = 0;
12884 else if (d->perm[0] == 1)
12885 odd = 1;
12886 else
12887 return false;
12888 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12889
12890 for (i = 0; i < nelt; i++)
12891 {
12892 unsigned elt = (i * 2 + odd) & mask;
12893 if (d->perm[i] != elt)
12894 return false;
12895 }
12896
12897 /* Success! */
12898 if (d->testing_p)
12899 return true;
12900
12901 in0 = d->op0;
12902 in1 = d->op1;
12903 if (BYTES_BIG_ENDIAN)
12904 {
12905 x = in0, in0 = in1, in1 = x;
12906 odd = !odd;
12907 }
12908 out = d->target;
12909
12910 if (odd)
12911 {
12912 switch (vmode)
12913 {
12914 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
12915 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
12916 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
12917 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
12918 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
12919 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
12920 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
358decd5
JW
12921 case V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
12922 case V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
cc4d934f
JG
12923 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
12924 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
12925 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
12926 default:
12927 return false;
12928 }
12929 }
12930 else
12931 {
12932 switch (vmode)
12933 {
12934 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
12935 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
12936 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
12937 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
12938 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
12939 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
12940 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
358decd5
JW
12941 case V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
12942 case V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
cc4d934f
JG
12943 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
12944 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
12945 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
12946 default:
12947 return false;
12948 }
12949 }
12950
12951 emit_insn (gen (out, in0, in1));
12952 return true;
12953}
12954
12955/* Recognize patterns suitable for the ZIP instructions. */
12956static bool
12957aarch64_evpc_zip (struct expand_vec_perm_d *d)
12958{
12959 unsigned int i, high, mask, nelt = d->nelt;
12960 rtx out, in0, in1, x;
12961 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12962 machine_mode vmode = d->vmode;
cc4d934f
JG
12963
12964 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12965 return false;
12966
12967 /* Note that these are little-endian tests.
12968 We correct for big-endian later. */
12969 high = nelt / 2;
12970 if (d->perm[0] == high)
12971 /* Do Nothing. */
12972 ;
12973 else if (d->perm[0] == 0)
12974 high = 0;
12975 else
12976 return false;
12977 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12978
12979 for (i = 0; i < nelt / 2; i++)
12980 {
12981 unsigned elt = (i + high) & mask;
12982 if (d->perm[i * 2] != elt)
12983 return false;
12984 elt = (elt + nelt) & mask;
12985 if (d->perm[i * 2 + 1] != elt)
12986 return false;
12987 }
12988
12989 /* Success! */
12990 if (d->testing_p)
12991 return true;
12992
12993 in0 = d->op0;
12994 in1 = d->op1;
12995 if (BYTES_BIG_ENDIAN)
12996 {
12997 x = in0, in0 = in1, in1 = x;
12998 high = !high;
12999 }
13000 out = d->target;
13001
13002 if (high)
13003 {
13004 switch (vmode)
13005 {
13006 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
13007 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
13008 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
13009 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
13010 case V4SImode: gen = gen_aarch64_zip2v4si; break;
13011 case V2SImode: gen = gen_aarch64_zip2v2si; break;
13012 case V2DImode: gen = gen_aarch64_zip2v2di; break;
358decd5
JW
13013 case V4HFmode: gen = gen_aarch64_zip2v4hf; break;
13014 case V8HFmode: gen = gen_aarch64_zip2v8hf; break;
cc4d934f
JG
13015 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
13016 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
13017 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
13018 default:
13019 return false;
13020 }
13021 }
13022 else
13023 {
13024 switch (vmode)
13025 {
13026 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
13027 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
13028 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
13029 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
13030 case V4SImode: gen = gen_aarch64_zip1v4si; break;
13031 case V2SImode: gen = gen_aarch64_zip1v2si; break;
13032 case V2DImode: gen = gen_aarch64_zip1v2di; break;
358decd5
JW
13033 case V4HFmode: gen = gen_aarch64_zip1v4hf; break;
13034 case V8HFmode: gen = gen_aarch64_zip1v8hf; break;
cc4d934f
JG
13035 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
13036 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
13037 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
13038 default:
13039 return false;
13040 }
13041 }
13042
13043 emit_insn (gen (out, in0, in1));
13044 return true;
13045}
13046
ae0533da
AL
13047/* Recognize patterns for the EXT insn. */
13048
13049static bool
13050aarch64_evpc_ext (struct expand_vec_perm_d *d)
13051{
13052 unsigned int i, nelt = d->nelt;
13053 rtx (*gen) (rtx, rtx, rtx, rtx);
13054 rtx offset;
13055
13056 unsigned int location = d->perm[0]; /* Always < nelt. */
13057
13058 /* Check if the extracted indices are increasing by one. */
13059 for (i = 1; i < nelt; i++)
13060 {
13061 unsigned int required = location + i;
13062 if (d->one_vector_p)
13063 {
13064 /* We'll pass the same vector in twice, so allow indices to wrap. */
13065 required &= (nelt - 1);
13066 }
13067 if (d->perm[i] != required)
13068 return false;
13069 }
13070
ae0533da
AL
13071 switch (d->vmode)
13072 {
13073 case V16QImode: gen = gen_aarch64_extv16qi; break;
13074 case V8QImode: gen = gen_aarch64_extv8qi; break;
13075 case V4HImode: gen = gen_aarch64_extv4hi; break;
13076 case V8HImode: gen = gen_aarch64_extv8hi; break;
13077 case V2SImode: gen = gen_aarch64_extv2si; break;
13078 case V4SImode: gen = gen_aarch64_extv4si; break;
358decd5
JW
13079 case V4HFmode: gen = gen_aarch64_extv4hf; break;
13080 case V8HFmode: gen = gen_aarch64_extv8hf; break;
ae0533da
AL
13081 case V2SFmode: gen = gen_aarch64_extv2sf; break;
13082 case V4SFmode: gen = gen_aarch64_extv4sf; break;
13083 case V2DImode: gen = gen_aarch64_extv2di; break;
13084 case V2DFmode: gen = gen_aarch64_extv2df; break;
13085 default:
13086 return false;
13087 }
13088
13089 /* Success! */
13090 if (d->testing_p)
13091 return true;
13092
b31e65bb
AL
13093 /* The case where (location == 0) is a no-op for both big- and little-endian,
13094 and is removed by the mid-end at optimization levels -O1 and higher. */
13095
13096 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
13097 {
13098 /* After setup, we want the high elements of the first vector (stored
13099 at the LSB end of the register), and the low elements of the second
13100 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 13101 std::swap (d->op0, d->op1);
ae0533da
AL
13102 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13103 location = nelt - location;
13104 }
13105
13106 offset = GEN_INT (location);
13107 emit_insn (gen (d->target, d->op0, d->op1, offset));
13108 return true;
13109}
13110
923fcec3
AL
13111/* Recognize patterns for the REV insns. */
13112
13113static bool
13114aarch64_evpc_rev (struct expand_vec_perm_d *d)
13115{
13116 unsigned int i, j, diff, nelt = d->nelt;
13117 rtx (*gen) (rtx, rtx);
13118
13119 if (!d->one_vector_p)
13120 return false;
13121
13122 diff = d->perm[0];
13123 switch (diff)
13124 {
13125 case 7:
13126 switch (d->vmode)
13127 {
13128 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
13129 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
13130 default:
13131 return false;
13132 }
13133 break;
13134 case 3:
13135 switch (d->vmode)
13136 {
13137 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
13138 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
13139 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
13140 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
13141 default:
13142 return false;
13143 }
13144 break;
13145 case 1:
13146 switch (d->vmode)
13147 {
13148 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
13149 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
13150 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
13151 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
13152 case V4SImode: gen = gen_aarch64_rev64v4si; break;
13153 case V2SImode: gen = gen_aarch64_rev64v2si; break;
13154 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
13155 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
358decd5
JW
13156 case V8HFmode: gen = gen_aarch64_rev64v8hf; break;
13157 case V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
13158 default:
13159 return false;
13160 }
13161 break;
13162 default:
13163 return false;
13164 }
13165
13166 for (i = 0; i < nelt ; i += diff + 1)
13167 for (j = 0; j <= diff; j += 1)
13168 {
13169 /* This is guaranteed to be true as the value of diff
13170 is 7, 3, 1 and we should have enough elements in the
13171 queue to generate this. Getting a vector mask with a
13172 value of diff other than these values implies that
13173 something is wrong by the time we get here. */
13174 gcc_assert (i + j < nelt);
13175 if (d->perm[i + j] != i + diff - j)
13176 return false;
13177 }
13178
13179 /* Success! */
13180 if (d->testing_p)
13181 return true;
13182
13183 emit_insn (gen (d->target, d->op0));
13184 return true;
13185}
13186
91bd4114
JG
13187static bool
13188aarch64_evpc_dup (struct expand_vec_perm_d *d)
13189{
13190 rtx (*gen) (rtx, rtx, rtx);
13191 rtx out = d->target;
13192 rtx in0;
ef4bddc2 13193 machine_mode vmode = d->vmode;
91bd4114
JG
13194 unsigned int i, elt, nelt = d->nelt;
13195 rtx lane;
13196
91bd4114
JG
13197 elt = d->perm[0];
13198 for (i = 1; i < nelt; i++)
13199 {
13200 if (elt != d->perm[i])
13201 return false;
13202 }
13203
13204 /* The generic preparation in aarch64_expand_vec_perm_const_1
13205 swaps the operand order and the permute indices if it finds
13206 d->perm[0] to be in the second operand. Thus, we can always
13207 use d->op0 and need not do any extra arithmetic to get the
13208 correct lane number. */
13209 in0 = d->op0;
f901401e 13210 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
13211
13212 switch (vmode)
13213 {
13214 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
13215 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
13216 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
13217 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
13218 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
13219 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
13220 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
13221 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
13222 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
13223 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
13224 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
13225 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
13226 default:
13227 return false;
13228 }
13229
13230 emit_insn (gen (out, in0, lane));
13231 return true;
13232}
13233
88b08073
JG
13234static bool
13235aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13236{
13237 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 13238 machine_mode vmode = d->vmode;
88b08073
JG
13239 unsigned int i, nelt = d->nelt;
13240
88b08073
JG
13241 if (d->testing_p)
13242 return true;
13243
13244 /* Generic code will try constant permutation twice. Once with the
13245 original mode and again with the elements lowered to QImode.
13246 So wait and don't do the selector expansion ourselves. */
13247 if (vmode != V8QImode && vmode != V16QImode)
13248 return false;
13249
13250 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
13251 {
13252 int nunits = GET_MODE_NUNITS (vmode);
13253
13254 /* If big-endian and two vectors we end up with a weird mixed-endian
13255 mode on NEON. Reverse the index within each word but not the word
13256 itself. */
13257 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13258 : d->perm[i]);
13259 }
88b08073
JG
13260 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13261 sel = force_reg (vmode, sel);
13262
13263 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13264 return true;
13265}
13266
13267static bool
13268aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13269{
13270 /* The pattern matching functions above are written to look for a small
13271 number to begin the sequence (0, 1, N/2). If we begin with an index
13272 from the second operand, we can swap the operands. */
13273 if (d->perm[0] >= d->nelt)
13274 {
13275 unsigned i, nelt = d->nelt;
88b08073 13276
0696116a 13277 gcc_assert (nelt == (nelt & -nelt));
88b08073 13278 for (i = 0; i < nelt; ++i)
0696116a 13279 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 13280
cb5c6c29 13281 std::swap (d->op0, d->op1);
88b08073
JG
13282 }
13283
13284 if (TARGET_SIMD)
cc4d934f 13285 {
923fcec3
AL
13286 if (aarch64_evpc_rev (d))
13287 return true;
13288 else if (aarch64_evpc_ext (d))
ae0533da 13289 return true;
f901401e
AL
13290 else if (aarch64_evpc_dup (d))
13291 return true;
ae0533da 13292 else if (aarch64_evpc_zip (d))
cc4d934f
JG
13293 return true;
13294 else if (aarch64_evpc_uzp (d))
13295 return true;
13296 else if (aarch64_evpc_trn (d))
13297 return true;
13298 return aarch64_evpc_tbl (d);
13299 }
88b08073
JG
13300 return false;
13301}
13302
13303/* Expand a vec_perm_const pattern. */
13304
13305bool
13306aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
13307{
13308 struct expand_vec_perm_d d;
13309 int i, nelt, which;
13310
13311 d.target = target;
13312 d.op0 = op0;
13313 d.op1 = op1;
13314
13315 d.vmode = GET_MODE (target);
13316 gcc_assert (VECTOR_MODE_P (d.vmode));
13317 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13318 d.testing_p = false;
13319
13320 for (i = which = 0; i < nelt; ++i)
13321 {
13322 rtx e = XVECEXP (sel, 0, i);
13323 int ei = INTVAL (e) & (2 * nelt - 1);
13324 which |= (ei < nelt ? 1 : 2);
13325 d.perm[i] = ei;
13326 }
13327
13328 switch (which)
13329 {
13330 default:
13331 gcc_unreachable ();
13332
13333 case 3:
13334 d.one_vector_p = false;
13335 if (!rtx_equal_p (op0, op1))
13336 break;
13337
13338 /* The elements of PERM do not suggest that only the first operand
13339 is used, but both operands are identical. Allow easier matching
13340 of the permutation by folding the permutation into the single
13341 input vector. */
13342 /* Fall Through. */
13343 case 2:
13344 for (i = 0; i < nelt; ++i)
13345 d.perm[i] &= nelt - 1;
13346 d.op0 = op1;
13347 d.one_vector_p = true;
13348 break;
13349
13350 case 1:
13351 d.op1 = op0;
13352 d.one_vector_p = true;
13353 break;
13354 }
13355
13356 return aarch64_expand_vec_perm_const_1 (&d);
13357}
13358
13359static bool
ef4bddc2 13360aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
13361 const unsigned char *sel)
13362{
13363 struct expand_vec_perm_d d;
13364 unsigned int i, nelt, which;
13365 bool ret;
13366
13367 d.vmode = vmode;
13368 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13369 d.testing_p = true;
13370 memcpy (d.perm, sel, nelt);
13371
13372 /* Calculate whether all elements are in one vector. */
13373 for (i = which = 0; i < nelt; ++i)
13374 {
13375 unsigned char e = d.perm[i];
13376 gcc_assert (e < 2 * nelt);
13377 which |= (e < nelt ? 1 : 2);
13378 }
13379
13380 /* If all elements are from the second vector, reindex as if from the
13381 first vector. */
13382 if (which == 2)
13383 for (i = 0; i < nelt; ++i)
13384 d.perm[i] -= nelt;
13385
13386 /* Check whether the mask can be applied to a single vector. */
13387 d.one_vector_p = (which != 3);
13388
13389 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13390 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13391 if (!d.one_vector_p)
13392 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13393
13394 start_sequence ();
13395 ret = aarch64_expand_vec_perm_const_1 (&d);
13396 end_sequence ();
13397
13398 return ret;
13399}
13400
668046d1
DS
13401rtx
13402aarch64_reverse_mask (enum machine_mode mode)
13403{
13404 /* We have to reverse each vector because we dont have
13405 a permuted load that can reverse-load according to ABI rules. */
13406 rtx mask;
13407 rtvec v = rtvec_alloc (16);
13408 int i, j;
13409 int nunits = GET_MODE_NUNITS (mode);
13410 int usize = GET_MODE_UNIT_SIZE (mode);
13411
13412 gcc_assert (BYTES_BIG_ENDIAN);
13413 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13414
13415 for (i = 0; i < nunits; i++)
13416 for (j = 0; j < usize; j++)
13417 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13418 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13419 return force_reg (V16QImode, mask);
13420}
13421
61f17a5c
WD
13422/* Implement MODES_TIEABLE_P. In principle we should always return true.
13423 However due to issues with register allocation it is preferable to avoid
13424 tieing integer scalar and FP scalar modes. Executing integer operations
13425 in general registers is better than treating them as scalar vector
13426 operations. This reduces latency and avoids redundant int<->FP moves.
13427 So tie modes if they are either the same class, or vector modes with
13428 other vector modes, vector structs or any scalar mode.
13429*/
97e1ad78
JG
13430
13431bool
ef4bddc2 13432aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13433{
13434 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13435 return true;
13436
13437 /* We specifically want to allow elements of "structure" modes to
13438 be tieable to the structure. This more general condition allows
13439 other rarer situations too. */
61f17a5c
WD
13440 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13441 return true;
13442
13443 /* Also allow any scalar modes with vectors. */
13444 if (aarch64_vector_mode_supported_p (mode1)
13445 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13446 return true;
13447
13448 return false;
13449}
13450
e2c75eea
JG
13451/* Return a new RTX holding the result of moving POINTER forward by
13452 AMOUNT bytes. */
13453
13454static rtx
13455aarch64_move_pointer (rtx pointer, int amount)
13456{
13457 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13458
13459 return adjust_automodify_address (pointer, GET_MODE (pointer),
13460 next, amount);
13461}
13462
13463/* Return a new RTX holding the result of moving POINTER forward by the
13464 size of the mode it points to. */
13465
13466static rtx
13467aarch64_progress_pointer (rtx pointer)
13468{
13469 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13470
13471 return aarch64_move_pointer (pointer, amount);
13472}
13473
13474/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13475 MODE bytes. */
13476
13477static void
13478aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13479 machine_mode mode)
e2c75eea
JG
13480{
13481 rtx reg = gen_reg_rtx (mode);
13482
13483 /* "Cast" the pointers to the correct mode. */
13484 *src = adjust_address (*src, mode, 0);
13485 *dst = adjust_address (*dst, mode, 0);
13486 /* Emit the memcpy. */
13487 emit_move_insn (reg, *src);
13488 emit_move_insn (*dst, reg);
13489 /* Move the pointers forward. */
13490 *src = aarch64_progress_pointer (*src);
13491 *dst = aarch64_progress_pointer (*dst);
13492}
13493
13494/* Expand movmem, as if from a __builtin_memcpy. Return true if
13495 we succeed, otherwise return false. */
13496
13497bool
13498aarch64_expand_movmem (rtx *operands)
13499{
13500 unsigned int n;
13501 rtx dst = operands[0];
13502 rtx src = operands[1];
13503 rtx base;
13504 bool speed_p = !optimize_function_for_size_p (cfun);
13505
13506 /* When optimizing for size, give a better estimate of the length of a
13507 memcpy call, but use the default otherwise. */
13508 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13509
13510 /* We can't do anything smart if the amount to copy is not constant. */
13511 if (!CONST_INT_P (operands[2]))
13512 return false;
13513
13514 n = UINTVAL (operands[2]);
13515
13516 /* Try to keep the number of instructions low. For cases below 16 bytes we
13517 need to make at most two moves. For cases above 16 bytes it will be one
13518 move for each 16 byte chunk, then at most two additional moves. */
13519 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13520 return false;
13521
13522 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13523 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13524
13525 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13526 src = adjust_automodify_address (src, VOIDmode, base, 0);
13527
13528 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13529 1-byte chunk. */
13530 if (n < 4)
13531 {
13532 if (n >= 2)
13533 {
13534 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13535 n -= 2;
13536 }
13537
13538 if (n == 1)
13539 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13540
13541 return true;
13542 }
13543
13544 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13545 4-byte chunk, partially overlapping with the previously copied chunk. */
13546 if (n < 8)
13547 {
13548 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13549 n -= 4;
13550 if (n > 0)
13551 {
13552 int move = n - 4;
13553
13554 src = aarch64_move_pointer (src, move);
13555 dst = aarch64_move_pointer (dst, move);
13556 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13557 }
13558 return true;
13559 }
13560
13561 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13562 them, then (if applicable) an 8-byte chunk. */
13563 while (n >= 8)
13564 {
13565 if (n / 16)
13566 {
13567 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13568 n -= 16;
13569 }
13570 else
13571 {
13572 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13573 n -= 8;
13574 }
13575 }
13576
13577 /* Finish the final bytes of the copy. We can always do this in one
13578 instruction. We either copy the exact amount we need, or partially
13579 overlap with the previous chunk we copied and copy 8-bytes. */
13580 if (n == 0)
13581 return true;
13582 else if (n == 1)
13583 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13584 else if (n == 2)
13585 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13586 else if (n == 4)
13587 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13588 else
13589 {
13590 if (n == 3)
13591 {
13592 src = aarch64_move_pointer (src, -1);
13593 dst = aarch64_move_pointer (dst, -1);
13594 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13595 }
13596 else
13597 {
13598 int move = n - 8;
13599
13600 src = aarch64_move_pointer (src, move);
13601 dst = aarch64_move_pointer (dst, move);
13602 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13603 }
13604 }
13605
13606 return true;
13607}
13608
141a3ccf
KT
13609/* Split a DImode store of a CONST_INT SRC to MEM DST as two
13610 SImode stores. Handle the case when the constant has identical
13611 bottom and top halves. This is beneficial when the two stores can be
13612 merged into an STP and we avoid synthesising potentially expensive
13613 immediates twice. Return true if such a split is possible. */
13614
13615bool
13616aarch64_split_dimode_const_store (rtx dst, rtx src)
13617{
13618 rtx lo = gen_lowpart (SImode, src);
13619 rtx hi = gen_highpart_mode (SImode, DImode, src);
13620
13621 bool size_p = optimize_function_for_size_p (cfun);
13622
13623 if (!rtx_equal_p (lo, hi))
13624 return false;
13625
13626 unsigned int orig_cost
13627 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
13628 unsigned int lo_cost
13629 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
13630
13631 /* We want to transform:
13632 MOV x1, 49370
13633 MOVK x1, 0x140, lsl 16
13634 MOVK x1, 0xc0da, lsl 32
13635 MOVK x1, 0x140, lsl 48
13636 STR x1, [x0]
13637 into:
13638 MOV w1, 49370
13639 MOVK w1, 0x140, lsl 16
13640 STP w1, w1, [x0]
13641 So we want to perform this only when we save two instructions
13642 or more. When optimizing for size, however, accept any code size
13643 savings we can. */
13644 if (size_p && orig_cost <= lo_cost)
13645 return false;
13646
13647 if (!size_p
13648 && (orig_cost <= lo_cost + 1))
13649 return false;
13650
13651 rtx mem_lo = adjust_address (dst, SImode, 0);
13652 if (!aarch64_mem_pair_operand (mem_lo, SImode))
13653 return false;
13654
13655 rtx tmp_reg = gen_reg_rtx (SImode);
13656 aarch64_expand_mov_immediate (tmp_reg, lo);
13657 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
13658 /* Don't emit an explicit store pair as this may not be always profitable.
13659 Let the sched-fusion logic decide whether to merge them. */
13660 emit_move_insn (mem_lo, tmp_reg);
13661 emit_move_insn (mem_hi, tmp_reg);
13662
13663 return true;
13664}
13665
a3125fc2
CL
13666/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13667
13668static unsigned HOST_WIDE_INT
13669aarch64_asan_shadow_offset (void)
13670{
13671 return (HOST_WIDE_INT_1 << 36);
13672}
13673
d3006da6 13674static bool
445d7826 13675aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
13676 unsigned int align,
13677 enum by_pieces_operation op,
13678 bool speed_p)
13679{
13680 /* STORE_BY_PIECES can be used when copying a constant string, but
13681 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13682 For now we always fail this and let the move_by_pieces code copy
13683 the string from read-only memory. */
13684 if (op == STORE_BY_PIECES)
13685 return false;
13686
13687 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13688}
13689
5f3bc026 13690static rtx
cb4347e8 13691aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
13692 int code, tree treeop0, tree treeop1)
13693{
c8012fbc
WD
13694 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
13695 rtx op0, op1;
5f3bc026 13696 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13697 insn_code icode;
5f3bc026
ZC
13698 struct expand_operand ops[4];
13699
5f3bc026
ZC
13700 start_sequence ();
13701 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13702
13703 op_mode = GET_MODE (op0);
13704 if (op_mode == VOIDmode)
13705 op_mode = GET_MODE (op1);
13706
13707 switch (op_mode)
13708 {
13709 case QImode:
13710 case HImode:
13711 case SImode:
13712 cmp_mode = SImode;
13713 icode = CODE_FOR_cmpsi;
13714 break;
13715
13716 case DImode:
13717 cmp_mode = DImode;
13718 icode = CODE_FOR_cmpdi;
13719 break;
13720
786e3c06
WD
13721 case SFmode:
13722 cmp_mode = SFmode;
13723 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13724 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
13725 break;
13726
13727 case DFmode:
13728 cmp_mode = DFmode;
13729 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13730 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
13731 break;
13732
5f3bc026
ZC
13733 default:
13734 end_sequence ();
13735 return NULL_RTX;
13736 }
13737
c8012fbc
WD
13738 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
13739 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
13740 if (!op0 || !op1)
13741 {
13742 end_sequence ();
13743 return NULL_RTX;
13744 }
13745 *prep_seq = get_insns ();
13746 end_sequence ();
13747
c8012fbc
WD
13748 create_fixed_operand (&ops[0], op0);
13749 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
13750
13751 start_sequence ();
c8012fbc 13752 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
13753 {
13754 end_sequence ();
13755 return NULL_RTX;
13756 }
13757 *gen_seq = get_insns ();
13758 end_sequence ();
13759
c8012fbc
WD
13760 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
13761 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
13762}
13763
13764static rtx
cb4347e8
TS
13765aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
13766 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 13767{
c8012fbc
WD
13768 rtx op0, op1, target;
13769 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 13770 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13771 insn_code icode;
5f3bc026 13772 struct expand_operand ops[6];
c8012fbc 13773 int aarch64_cond;
5f3bc026 13774
cb4347e8 13775 push_to_sequence (*prep_seq);
5f3bc026
ZC
13776 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13777
13778 op_mode = GET_MODE (op0);
13779 if (op_mode == VOIDmode)
13780 op_mode = GET_MODE (op1);
13781
13782 switch (op_mode)
13783 {
13784 case QImode:
13785 case HImode:
13786 case SImode:
13787 cmp_mode = SImode;
c8012fbc 13788 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
13789 break;
13790
13791 case DImode:
13792 cmp_mode = DImode;
c8012fbc 13793 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
13794 break;
13795
786e3c06
WD
13796 case SFmode:
13797 cmp_mode = SFmode;
13798 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13799 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
13800 break;
13801
13802 case DFmode:
13803 cmp_mode = DFmode;
13804 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13805 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
13806 break;
13807
5f3bc026
ZC
13808 default:
13809 end_sequence ();
13810 return NULL_RTX;
13811 }
13812
13813 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13814 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13815 if (!op0 || !op1)
13816 {
13817 end_sequence ();
13818 return NULL_RTX;
13819 }
13820 *prep_seq = get_insns ();
13821 end_sequence ();
13822
13823 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 13824 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 13825
c8012fbc
WD
13826 if (bit_code != AND)
13827 {
13828 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
13829 GET_MODE (XEXP (prev, 0))),
13830 VOIDmode, XEXP (prev, 0), const0_rtx);
13831 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
13832 }
13833
13834 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
13835 create_fixed_operand (&ops[1], target);
13836 create_fixed_operand (&ops[2], op0);
13837 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
13838 create_fixed_operand (&ops[4], prev);
13839 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 13840
cb4347e8 13841 push_to_sequence (*gen_seq);
5f3bc026
ZC
13842 if (!maybe_expand_insn (icode, 6, ops))
13843 {
13844 end_sequence ();
13845 return NULL_RTX;
13846 }
13847
13848 *gen_seq = get_insns ();
13849 end_sequence ();
13850
c8012fbc 13851 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
13852}
13853
13854#undef TARGET_GEN_CCMP_FIRST
13855#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13856
13857#undef TARGET_GEN_CCMP_NEXT
13858#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13859
6a569cdd
KT
13860/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13861 instruction fusion of some sort. */
13862
13863static bool
13864aarch64_macro_fusion_p (void)
13865{
b175b679 13866 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
13867}
13868
13869
13870/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13871 should be kept together during scheduling. */
13872
13873static bool
13874aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
13875{
13876 rtx set_dest;
13877 rtx prev_set = single_set (prev);
13878 rtx curr_set = single_set (curr);
13879 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13880 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
13881
13882 if (!aarch64_macro_fusion_p ())
13883 return false;
13884
d7b03373 13885 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
13886 {
13887 /* We are trying to match:
13888 prev (mov) == (set (reg r0) (const_int imm16))
13889 curr (movk) == (set (zero_extract (reg r0)
13890 (const_int 16)
13891 (const_int 16))
13892 (const_int imm16_1)) */
13893
13894 set_dest = SET_DEST (curr_set);
13895
13896 if (GET_CODE (set_dest) == ZERO_EXTRACT
13897 && CONST_INT_P (SET_SRC (curr_set))
13898 && CONST_INT_P (SET_SRC (prev_set))
13899 && CONST_INT_P (XEXP (set_dest, 2))
13900 && INTVAL (XEXP (set_dest, 2)) == 16
13901 && REG_P (XEXP (set_dest, 0))
13902 && REG_P (SET_DEST (prev_set))
13903 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
13904 {
13905 return true;
13906 }
13907 }
13908
d7b03373 13909 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
13910 {
13911
13912 /* We're trying to match:
13913 prev (adrp) == (set (reg r1)
13914 (high (symbol_ref ("SYM"))))
13915 curr (add) == (set (reg r0)
13916 (lo_sum (reg r1)
13917 (symbol_ref ("SYM"))))
13918 Note that r0 need not necessarily be the same as r1, especially
13919 during pre-regalloc scheduling. */
13920
13921 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13922 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13923 {
13924 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
13925 && REG_P (XEXP (SET_SRC (curr_set), 0))
13926 && REGNO (XEXP (SET_SRC (curr_set), 0))
13927 == REGNO (SET_DEST (prev_set))
13928 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
13929 XEXP (SET_SRC (curr_set), 1)))
13930 return true;
13931 }
13932 }
13933
d7b03373 13934 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
13935 {
13936
13937 /* We're trying to match:
13938 prev (movk) == (set (zero_extract (reg r0)
13939 (const_int 16)
13940 (const_int 32))
13941 (const_int imm16_1))
13942 curr (movk) == (set (zero_extract (reg r0)
13943 (const_int 16)
13944 (const_int 48))
13945 (const_int imm16_2)) */
13946
13947 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
13948 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
13949 && REG_P (XEXP (SET_DEST (prev_set), 0))
13950 && REG_P (XEXP (SET_DEST (curr_set), 0))
13951 && REGNO (XEXP (SET_DEST (prev_set), 0))
13952 == REGNO (XEXP (SET_DEST (curr_set), 0))
13953 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
13954 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
13955 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
13956 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
13957 && CONST_INT_P (SET_SRC (prev_set))
13958 && CONST_INT_P (SET_SRC (curr_set)))
13959 return true;
13960
13961 }
d7b03373 13962 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
13963 {
13964 /* We're trying to match:
13965 prev (adrp) == (set (reg r0)
13966 (high (symbol_ref ("SYM"))))
13967 curr (ldr) == (set (reg r1)
13968 (mem (lo_sum (reg r0)
13969 (symbol_ref ("SYM")))))
13970 or
13971 curr (ldr) == (set (reg r1)
13972 (zero_extend (mem
13973 (lo_sum (reg r0)
13974 (symbol_ref ("SYM")))))) */
13975 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13976 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13977 {
13978 rtx curr_src = SET_SRC (curr_set);
13979
13980 if (GET_CODE (curr_src) == ZERO_EXTEND)
13981 curr_src = XEXP (curr_src, 0);
13982
13983 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
13984 && REG_P (XEXP (XEXP (curr_src, 0), 0))
13985 && REGNO (XEXP (XEXP (curr_src, 0), 0))
13986 == REGNO (SET_DEST (prev_set))
13987 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
13988 XEXP (SET_SRC (prev_set), 0)))
13989 return true;
13990 }
13991 }
cd0cb232 13992
d7b03373 13993 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
13994 && aarch_crypto_can_dual_issue (prev, curr))
13995 return true;
13996
d7b03373 13997 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
13998 && any_condjump_p (curr))
13999 {
14000 enum attr_type prev_type = get_attr_type (prev);
14001
14002 /* FIXME: this misses some which is considered simple arthematic
14003 instructions for ThunderX. Simple shifts are missed here. */
14004 if (prev_type == TYPE_ALUS_SREG
14005 || prev_type == TYPE_ALUS_IMM
14006 || prev_type == TYPE_LOGICS_REG
14007 || prev_type == TYPE_LOGICS_IMM)
14008 return true;
14009 }
14010
6a569cdd
KT
14011 return false;
14012}
14013
f2879a90
KT
14014/* Return true iff the instruction fusion described by OP is enabled. */
14015
14016bool
14017aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
14018{
14019 return (aarch64_tune_params.fusible_ops & op) != 0;
14020}
14021
350013bc
BC
14022/* If MEM is in the form of [base+offset], extract the two parts
14023 of address and set to BASE and OFFSET, otherwise return false
14024 after clearing BASE and OFFSET. */
14025
14026bool
14027extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
14028{
14029 rtx addr;
14030
14031 gcc_assert (MEM_P (mem));
14032
14033 addr = XEXP (mem, 0);
14034
14035 if (REG_P (addr))
14036 {
14037 *base = addr;
14038 *offset = const0_rtx;
14039 return true;
14040 }
14041
14042 if (GET_CODE (addr) == PLUS
14043 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
14044 {
14045 *base = XEXP (addr, 0);
14046 *offset = XEXP (addr, 1);
14047 return true;
14048 }
14049
14050 *base = NULL_RTX;
14051 *offset = NULL_RTX;
14052
14053 return false;
14054}
14055
14056/* Types for scheduling fusion. */
14057enum sched_fusion_type
14058{
14059 SCHED_FUSION_NONE = 0,
14060 SCHED_FUSION_LD_SIGN_EXTEND,
14061 SCHED_FUSION_LD_ZERO_EXTEND,
14062 SCHED_FUSION_LD,
14063 SCHED_FUSION_ST,
14064 SCHED_FUSION_NUM
14065};
14066
14067/* If INSN is a load or store of address in the form of [base+offset],
14068 extract the two parts and set to BASE and OFFSET. Return scheduling
14069 fusion type this INSN is. */
14070
14071static enum sched_fusion_type
14072fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
14073{
14074 rtx x, dest, src;
14075 enum sched_fusion_type fusion = SCHED_FUSION_LD;
14076
14077 gcc_assert (INSN_P (insn));
14078 x = PATTERN (insn);
14079 if (GET_CODE (x) != SET)
14080 return SCHED_FUSION_NONE;
14081
14082 src = SET_SRC (x);
14083 dest = SET_DEST (x);
14084
abc52318
KT
14085 machine_mode dest_mode = GET_MODE (dest);
14086
14087 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
14088 return SCHED_FUSION_NONE;
14089
14090 if (GET_CODE (src) == SIGN_EXTEND)
14091 {
14092 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14093 src = XEXP (src, 0);
14094 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14095 return SCHED_FUSION_NONE;
14096 }
14097 else if (GET_CODE (src) == ZERO_EXTEND)
14098 {
14099 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14100 src = XEXP (src, 0);
14101 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14102 return SCHED_FUSION_NONE;
14103 }
14104
14105 if (GET_CODE (src) == MEM && REG_P (dest))
14106 extract_base_offset_in_addr (src, base, offset);
14107 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14108 {
14109 fusion = SCHED_FUSION_ST;
14110 extract_base_offset_in_addr (dest, base, offset);
14111 }
14112 else
14113 return SCHED_FUSION_NONE;
14114
14115 if (*base == NULL_RTX || *offset == NULL_RTX)
14116 fusion = SCHED_FUSION_NONE;
14117
14118 return fusion;
14119}
14120
14121/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14122
14123 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14124 and PRI are only calculated for these instructions. For other instruction,
14125 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14126 type instruction fusion can be added by returning different priorities.
14127
14128 It's important that irrelevant instructions get the largest FUSION_PRI. */
14129
14130static void
14131aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14132 int *fusion_pri, int *pri)
14133{
14134 int tmp, off_val;
14135 rtx base, offset;
14136 enum sched_fusion_type fusion;
14137
14138 gcc_assert (INSN_P (insn));
14139
14140 tmp = max_pri - 1;
14141 fusion = fusion_load_store (insn, &base, &offset);
14142 if (fusion == SCHED_FUSION_NONE)
14143 {
14144 *pri = tmp;
14145 *fusion_pri = tmp;
14146 return;
14147 }
14148
14149 /* Set FUSION_PRI according to fusion type and base register. */
14150 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14151
14152 /* Calculate PRI. */
14153 tmp /= 2;
14154
14155 /* INSN with smaller offset goes first. */
14156 off_val = (int)(INTVAL (offset));
14157 if (off_val >= 0)
14158 tmp -= (off_val & 0xfffff);
14159 else
14160 tmp += ((- off_val) & 0xfffff);
14161
14162 *pri = tmp;
14163 return;
14164}
14165
9bca63d4
WD
14166/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14167 Adjust priority of sha1h instructions so they are scheduled before
14168 other SHA1 instructions. */
14169
14170static int
14171aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
14172{
14173 rtx x = PATTERN (insn);
14174
14175 if (GET_CODE (x) == SET)
14176 {
14177 x = SET_SRC (x);
14178
14179 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
14180 return priority + 10;
14181 }
14182
14183 return priority;
14184}
14185
350013bc
BC
14186/* Given OPERANDS of consecutive load/store, check if we can merge
14187 them into ldp/stp. LOAD is true if they are load instructions.
14188 MODE is the mode of memory operands. */
14189
14190bool
14191aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
14192 enum machine_mode mode)
14193{
14194 HOST_WIDE_INT offval_1, offval_2, msize;
14195 enum reg_class rclass_1, rclass_2;
14196 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14197
14198 if (load)
14199 {
14200 mem_1 = operands[1];
14201 mem_2 = operands[3];
14202 reg_1 = operands[0];
14203 reg_2 = operands[2];
14204 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14205 if (REGNO (reg_1) == REGNO (reg_2))
14206 return false;
14207 }
14208 else
14209 {
14210 mem_1 = operands[0];
14211 mem_2 = operands[2];
14212 reg_1 = operands[1];
14213 reg_2 = operands[3];
14214 }
14215
bf84ac44
AP
14216 /* The mems cannot be volatile. */
14217 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14218 return false;
14219
54700e2e
AP
14220 /* If we have SImode and slow unaligned ldp,
14221 check the alignment to be at least 8 byte. */
14222 if (mode == SImode
14223 && (aarch64_tune_params.extra_tuning_flags
14224 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14225 && !optimize_size
14226 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14227 return false;
14228
350013bc
BC
14229 /* Check if the addresses are in the form of [base+offset]. */
14230 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14231 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14232 return false;
14233 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14234 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14235 return false;
14236
14237 /* Check if the bases are same. */
14238 if (!rtx_equal_p (base_1, base_2))
14239 return false;
14240
14241 offval_1 = INTVAL (offset_1);
14242 offval_2 = INTVAL (offset_2);
14243 msize = GET_MODE_SIZE (mode);
14244 /* Check if the offsets are consecutive. */
14245 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14246 return false;
14247
14248 /* Check if the addresses are clobbered by load. */
14249 if (load)
14250 {
14251 if (reg_mentioned_p (reg_1, mem_1))
14252 return false;
14253
14254 /* In increasing order, the last load can clobber the address. */
14255 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14256 return false;
14257 }
14258
14259 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14260 rclass_1 = FP_REGS;
14261 else
14262 rclass_1 = GENERAL_REGS;
14263
14264 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14265 rclass_2 = FP_REGS;
14266 else
14267 rclass_2 = GENERAL_REGS;
14268
14269 /* Check if the registers are of same class. */
14270 if (rclass_1 != rclass_2)
14271 return false;
14272
14273 return true;
14274}
14275
14276/* Given OPERANDS of consecutive load/store, check if we can merge
14277 them into ldp/stp by adjusting the offset. LOAD is true if they
14278 are load instructions. MODE is the mode of memory operands.
14279
14280 Given below consecutive stores:
14281
14282 str w1, [xb, 0x100]
14283 str w1, [xb, 0x104]
14284 str w1, [xb, 0x108]
14285 str w1, [xb, 0x10c]
14286
14287 Though the offsets are out of the range supported by stp, we can
14288 still pair them after adjusting the offset, like:
14289
14290 add scratch, xb, 0x100
14291 stp w1, w1, [scratch]
14292 stp w1, w1, [scratch, 0x8]
14293
14294 The peephole patterns detecting this opportunity should guarantee
14295 the scratch register is avaliable. */
14296
14297bool
14298aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
14299 enum machine_mode mode)
14300{
14301 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14302 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14303 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14304 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14305
14306 if (load)
14307 {
14308 reg_1 = operands[0];
14309 mem_1 = operands[1];
14310 reg_2 = operands[2];
14311 mem_2 = operands[3];
14312 reg_3 = operands[4];
14313 mem_3 = operands[5];
14314 reg_4 = operands[6];
14315 mem_4 = operands[7];
14316 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14317 && REG_P (reg_3) && REG_P (reg_4));
14318 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14319 return false;
14320 }
14321 else
14322 {
14323 mem_1 = operands[0];
14324 reg_1 = operands[1];
14325 mem_2 = operands[2];
14326 reg_2 = operands[3];
14327 mem_3 = operands[4];
14328 reg_3 = operands[5];
14329 mem_4 = operands[6];
14330 reg_4 = operands[7];
14331 }
14332 /* Skip if memory operand is by itslef valid for ldp/stp. */
14333 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14334 return false;
14335
bf84ac44
AP
14336 /* The mems cannot be volatile. */
14337 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14338 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14339 return false;
14340
350013bc
BC
14341 /* Check if the addresses are in the form of [base+offset]. */
14342 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14343 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14344 return false;
14345 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14346 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14347 return false;
14348 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14349 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14350 return false;
14351 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14352 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14353 return false;
14354
14355 /* Check if the bases are same. */
14356 if (!rtx_equal_p (base_1, base_2)
14357 || !rtx_equal_p (base_2, base_3)
14358 || !rtx_equal_p (base_3, base_4))
14359 return false;
14360
14361 offval_1 = INTVAL (offset_1);
14362 offval_2 = INTVAL (offset_2);
14363 offval_3 = INTVAL (offset_3);
14364 offval_4 = INTVAL (offset_4);
14365 msize = GET_MODE_SIZE (mode);
14366 /* Check if the offsets are consecutive. */
14367 if ((offval_1 != (offval_2 + msize)
14368 || offval_1 != (offval_3 + msize * 2)
14369 || offval_1 != (offval_4 + msize * 3))
14370 && (offval_4 != (offval_3 + msize)
14371 || offval_4 != (offval_2 + msize * 2)
14372 || offval_4 != (offval_1 + msize * 3)))
14373 return false;
14374
14375 /* Check if the addresses are clobbered by load. */
14376 if (load)
14377 {
14378 if (reg_mentioned_p (reg_1, mem_1)
14379 || reg_mentioned_p (reg_2, mem_2)
14380 || reg_mentioned_p (reg_3, mem_3))
14381 return false;
14382
14383 /* In increasing order, the last load can clobber the address. */
14384 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14385 return false;
14386 }
14387
54700e2e
AP
14388 /* If we have SImode and slow unaligned ldp,
14389 check the alignment to be at least 8 byte. */
14390 if (mode == SImode
14391 && (aarch64_tune_params.extra_tuning_flags
14392 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14393 && !optimize_size
14394 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14395 return false;
14396
350013bc
BC
14397 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14398 rclass_1 = FP_REGS;
14399 else
14400 rclass_1 = GENERAL_REGS;
14401
14402 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14403 rclass_2 = FP_REGS;
14404 else
14405 rclass_2 = GENERAL_REGS;
14406
14407 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14408 rclass_3 = FP_REGS;
14409 else
14410 rclass_3 = GENERAL_REGS;
14411
14412 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14413 rclass_4 = FP_REGS;
14414 else
14415 rclass_4 = GENERAL_REGS;
14416
14417 /* Check if the registers are of same class. */
14418 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14419 return false;
14420
14421 return true;
14422}
14423
14424/* Given OPERANDS of consecutive load/store, this function pairs them
14425 into ldp/stp after adjusting the offset. It depends on the fact
14426 that addresses of load/store instructions are in increasing order.
14427 MODE is the mode of memory operands. CODE is the rtl operator
14428 which should be applied to all memory operands, it's SIGN_EXTEND,
14429 ZERO_EXTEND or UNKNOWN. */
14430
14431bool
14432aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
14433 enum machine_mode mode, RTX_CODE code)
14434{
14435 rtx base, offset, t1, t2;
14436 rtx mem_1, mem_2, mem_3, mem_4;
14437 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14438
14439 if (load)
14440 {
14441 mem_1 = operands[1];
14442 mem_2 = operands[3];
14443 mem_3 = operands[5];
14444 mem_4 = operands[7];
14445 }
14446 else
14447 {
14448 mem_1 = operands[0];
14449 mem_2 = operands[2];
14450 mem_3 = operands[4];
14451 mem_4 = operands[6];
14452 gcc_assert (code == UNKNOWN);
14453 }
14454
14455 extract_base_offset_in_addr (mem_1, &base, &offset);
14456 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14457
14458 /* Adjust offset thus it can fit in ldp/stp instruction. */
14459 msize = GET_MODE_SIZE (mode);
14460 stp_off_limit = msize * 0x40;
14461 off_val = INTVAL (offset);
14462 abs_off = (off_val < 0) ? -off_val : off_val;
14463 new_off = abs_off % stp_off_limit;
14464 adj_off = abs_off - new_off;
14465
14466 /* Further adjust to make sure all offsets are OK. */
14467 if ((new_off + msize * 2) >= stp_off_limit)
14468 {
14469 adj_off += stp_off_limit;
14470 new_off -= stp_off_limit;
14471 }
14472
14473 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14474 if (adj_off >= 0x1000)
14475 return false;
14476
14477 if (off_val < 0)
14478 {
14479 adj_off = -adj_off;
14480 new_off = -new_off;
14481 }
14482
14483 /* Create new memory references. */
14484 mem_1 = change_address (mem_1, VOIDmode,
14485 plus_constant (DImode, operands[8], new_off));
14486
14487 /* Check if the adjusted address is OK for ldp/stp. */
14488 if (!aarch64_mem_pair_operand (mem_1, mode))
14489 return false;
14490
14491 msize = GET_MODE_SIZE (mode);
14492 mem_2 = change_address (mem_2, VOIDmode,
14493 plus_constant (DImode,
14494 operands[8],
14495 new_off + msize));
14496 mem_3 = change_address (mem_3, VOIDmode,
14497 plus_constant (DImode,
14498 operands[8],
14499 new_off + msize * 2));
14500 mem_4 = change_address (mem_4, VOIDmode,
14501 plus_constant (DImode,
14502 operands[8],
14503 new_off + msize * 3));
14504
14505 if (code == ZERO_EXTEND)
14506 {
14507 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14508 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14509 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14510 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14511 }
14512 else if (code == SIGN_EXTEND)
14513 {
14514 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
14515 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
14516 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
14517 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
14518 }
14519
14520 if (load)
14521 {
14522 operands[1] = mem_1;
14523 operands[3] = mem_2;
14524 operands[5] = mem_3;
14525 operands[7] = mem_4;
14526 }
14527 else
14528 {
14529 operands[0] = mem_1;
14530 operands[2] = mem_2;
14531 operands[4] = mem_3;
14532 operands[6] = mem_4;
14533 }
14534
14535 /* Emit adjusting instruction. */
f7df4a84 14536 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 14537 /* Emit ldp/stp instructions. */
f7df4a84
RS
14538 t1 = gen_rtx_SET (operands[0], operands[1]);
14539 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 14540 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
14541 t1 = gen_rtx_SET (operands[4], operands[5]);
14542 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
14543 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14544 return true;
14545}
14546
1b1e81f8
JW
14547/* Return 1 if pseudo register should be created and used to hold
14548 GOT address for PIC code. */
14549
14550bool
14551aarch64_use_pseudo_pic_reg (void)
14552{
14553 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
14554}
14555
7b841a12
JW
14556/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14557
14558static int
14559aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
14560{
14561 switch (XINT (x, 1))
14562 {
14563 case UNSPEC_GOTSMALLPIC:
14564 case UNSPEC_GOTSMALLPIC28K:
14565 case UNSPEC_GOTTINYPIC:
14566 return 0;
14567 default:
14568 break;
14569 }
14570
14571 return default_unspec_may_trap_p (x, flags);
14572}
14573
39252973
KT
14574
14575/* If X is a positive CONST_DOUBLE with a value that is a power of 2
14576 return the log2 of that value. Otherwise return -1. */
14577
14578int
14579aarch64_fpconst_pow_of_2 (rtx x)
14580{
14581 const REAL_VALUE_TYPE *r;
14582
14583 if (!CONST_DOUBLE_P (x))
14584 return -1;
14585
14586 r = CONST_DOUBLE_REAL_VALUE (x);
14587
14588 if (REAL_VALUE_NEGATIVE (*r)
14589 || REAL_VALUE_ISNAN (*r)
14590 || REAL_VALUE_ISINF (*r)
14591 || !real_isinteger (r, DFmode))
14592 return -1;
14593
14594 return exact_log2 (real_to_integer (r));
14595}
14596
14597/* If X is a vector of equal CONST_DOUBLE values and that value is
14598 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14599
14600int
14601aarch64_vec_fpconst_pow_of_2 (rtx x)
14602{
14603 if (GET_CODE (x) != CONST_VECTOR)
14604 return -1;
14605
14606 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
14607 return -1;
14608
14609 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
14610 if (firstval <= 0)
14611 return -1;
14612
14613 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
14614 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
14615 return -1;
14616
14617 return firstval;
14618}
14619
11e554b3
JG
14620/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14621 to float.
14622
14623 __fp16 always promotes through this hook.
14624 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14625 through the generic excess precision logic rather than here. */
14626
c2ec330c
AL
14627static tree
14628aarch64_promoted_type (const_tree t)
14629{
11e554b3
JG
14630 if (SCALAR_FLOAT_TYPE_P (t)
14631 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 14632 return float_type_node;
11e554b3 14633
c2ec330c
AL
14634 return NULL_TREE;
14635}
ee62a5a6
RS
14636
14637/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14638
14639static bool
9acc9cbe 14640aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
14641 optimization_type opt_type)
14642{
14643 switch (op)
14644 {
14645 case rsqrt_optab:
9acc9cbe 14646 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
14647
14648 default:
14649 return true;
14650 }
14651}
14652
11e554b3
JG
14653/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14654 if MODE is HFmode, and punt to the generic implementation otherwise. */
14655
14656static bool
14657aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
14658{
14659 return (mode == HFmode
14660 ? true
14661 : default_libgcc_floating_mode_supported_p (mode));
14662}
14663
2e5f8203
JG
14664/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
14665 if MODE is HFmode, and punt to the generic implementation otherwise. */
14666
14667static bool
14668aarch64_scalar_mode_supported_p (machine_mode mode)
14669{
14670 return (mode == HFmode
14671 ? true
14672 : default_scalar_mode_supported_p (mode));
14673}
14674
11e554b3
JG
14675/* Set the value of FLT_EVAL_METHOD.
14676 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
14677
14678 0: evaluate all operations and constants, whose semantic type has at
14679 most the range and precision of type float, to the range and
14680 precision of float; evaluate all other operations and constants to
14681 the range and precision of the semantic type;
14682
14683 N, where _FloatN is a supported interchange floating type
14684 evaluate all operations and constants, whose semantic type has at
14685 most the range and precision of _FloatN type, to the range and
14686 precision of the _FloatN type; evaluate all other operations and
14687 constants to the range and precision of the semantic type;
14688
14689 If we have the ARMv8.2-A extensions then we support _Float16 in native
14690 precision, so we should set this to 16. Otherwise, we support the type,
14691 but want to evaluate expressions in float precision, so set this to
14692 0. */
14693
14694static enum flt_eval_method
14695aarch64_excess_precision (enum excess_precision_type type)
14696{
14697 switch (type)
14698 {
14699 case EXCESS_PRECISION_TYPE_FAST:
14700 case EXCESS_PRECISION_TYPE_STANDARD:
14701 /* We can calculate either in 16-bit range and precision or
14702 32-bit range and precision. Make that decision based on whether
14703 we have native support for the ARMv8.2-A 16-bit floating-point
14704 instructions or not. */
14705 return (TARGET_FP_F16INST
14706 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
14707 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
14708 case EXCESS_PRECISION_TYPE_IMPLICIT:
14709 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
14710 default:
14711 gcc_unreachable ();
14712 }
14713 return FLT_EVAL_METHOD_UNPREDICTABLE;
14714}
14715
b48d6421
KT
14716/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
14717 scheduled for speculative execution. Reject the long-running division
14718 and square-root instructions. */
14719
14720static bool
14721aarch64_sched_can_speculate_insn (rtx_insn *insn)
14722{
14723 switch (get_attr_type (insn))
14724 {
14725 case TYPE_SDIV:
14726 case TYPE_UDIV:
14727 case TYPE_FDIVS:
14728 case TYPE_FDIVD:
14729 case TYPE_FSQRTS:
14730 case TYPE_FSQRTD:
14731 case TYPE_NEON_FP_SQRT_S:
14732 case TYPE_NEON_FP_SQRT_D:
14733 case TYPE_NEON_FP_SQRT_S_Q:
14734 case TYPE_NEON_FP_SQRT_D_Q:
14735 case TYPE_NEON_FP_DIV_S:
14736 case TYPE_NEON_FP_DIV_D:
14737 case TYPE_NEON_FP_DIV_S_Q:
14738 case TYPE_NEON_FP_DIV_D_Q:
14739 return false;
14740 default:
14741 return true;
14742 }
14743}
14744
51b86113
DM
14745/* Target-specific selftests. */
14746
14747#if CHECKING_P
14748
14749namespace selftest {
14750
14751/* Selftest for the RTL loader.
14752 Verify that the RTL loader copes with a dump from
14753 print_rtx_function. This is essentially just a test that class
14754 function_reader can handle a real dump, but it also verifies
14755 that lookup_reg_by_dump_name correctly handles hard regs.
14756 The presence of hard reg names in the dump means that the test is
14757 target-specific, hence it is in this file. */
14758
14759static void
14760aarch64_test_loading_full_dump ()
14761{
14762 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
14763
14764 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
14765
14766 rtx_insn *insn_1 = get_insn_by_uid (1);
14767 ASSERT_EQ (NOTE, GET_CODE (insn_1));
14768
14769 rtx_insn *insn_15 = get_insn_by_uid (15);
14770 ASSERT_EQ (INSN, GET_CODE (insn_15));
14771 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
14772
14773 /* Verify crtl->return_rtx. */
14774 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
14775 ASSERT_EQ (0, REGNO (crtl->return_rtx));
14776 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
14777}
14778
14779/* Run all target-specific selftests. */
14780
14781static void
14782aarch64_run_selftests (void)
14783{
14784 aarch64_test_loading_full_dump ();
14785}
14786
14787} // namespace selftest
14788
14789#endif /* #if CHECKING_P */
14790
43e9d192
IB
14791#undef TARGET_ADDRESS_COST
14792#define TARGET_ADDRESS_COST aarch64_address_cost
14793
14794/* This hook will determines whether unnamed bitfields affect the alignment
14795 of the containing structure. The hook returns true if the structure
14796 should inherit the alignment requirements of an unnamed bitfield's
14797 type. */
14798#undef TARGET_ALIGN_ANON_BITFIELD
14799#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14800
14801#undef TARGET_ASM_ALIGNED_DI_OP
14802#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14803
14804#undef TARGET_ASM_ALIGNED_HI_OP
14805#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14806
14807#undef TARGET_ASM_ALIGNED_SI_OP
14808#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14809
14810#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14811#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
14812 hook_bool_const_tree_hwi_hwi_const_tree_true
14813
e1c1ecb0
KT
14814#undef TARGET_ASM_FILE_START
14815#define TARGET_ASM_FILE_START aarch64_start_file
14816
43e9d192
IB
14817#undef TARGET_ASM_OUTPUT_MI_THUNK
14818#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
14819
14820#undef TARGET_ASM_SELECT_RTX_SECTION
14821#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
14822
14823#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14824#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
14825
14826#undef TARGET_BUILD_BUILTIN_VA_LIST
14827#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
14828
14829#undef TARGET_CALLEE_COPIES
14830#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14831
14832#undef TARGET_CAN_ELIMINATE
14833#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14834
1fd8d40c
KT
14835#undef TARGET_CAN_INLINE_P
14836#define TARGET_CAN_INLINE_P aarch64_can_inline_p
14837
43e9d192
IB
14838#undef TARGET_CANNOT_FORCE_CONST_MEM
14839#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14840
50487d79
EM
14841#undef TARGET_CASE_VALUES_THRESHOLD
14842#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14843
43e9d192
IB
14844#undef TARGET_CONDITIONAL_REGISTER_USAGE
14845#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14846
14847/* Only the least significant bit is used for initialization guard
14848 variables. */
14849#undef TARGET_CXX_GUARD_MASK_BIT
14850#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14851
14852#undef TARGET_C_MODE_FOR_SUFFIX
14853#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14854
14855#ifdef TARGET_BIG_ENDIAN_DEFAULT
14856#undef TARGET_DEFAULT_TARGET_FLAGS
14857#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14858#endif
14859
14860#undef TARGET_CLASS_MAX_NREGS
14861#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14862
119103ca
JG
14863#undef TARGET_BUILTIN_DECL
14864#define TARGET_BUILTIN_DECL aarch64_builtin_decl
14865
a6fc00da
BH
14866#undef TARGET_BUILTIN_RECIPROCAL
14867#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14868
11e554b3
JG
14869#undef TARGET_C_EXCESS_PRECISION
14870#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
14871
43e9d192
IB
14872#undef TARGET_EXPAND_BUILTIN
14873#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14874
14875#undef TARGET_EXPAND_BUILTIN_VA_START
14876#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14877
9697e620
JG
14878#undef TARGET_FOLD_BUILTIN
14879#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14880
43e9d192
IB
14881#undef TARGET_FUNCTION_ARG
14882#define TARGET_FUNCTION_ARG aarch64_function_arg
14883
14884#undef TARGET_FUNCTION_ARG_ADVANCE
14885#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14886
14887#undef TARGET_FUNCTION_ARG_BOUNDARY
14888#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14889
14890#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14891#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14892
14893#undef TARGET_FUNCTION_VALUE
14894#define TARGET_FUNCTION_VALUE aarch64_function_value
14895
14896#undef TARGET_FUNCTION_VALUE_REGNO_P
14897#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14898
14899#undef TARGET_FRAME_POINTER_REQUIRED
14900#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14901
fc72cba7
AL
14902#undef TARGET_GIMPLE_FOLD_BUILTIN
14903#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 14904
43e9d192
IB
14905#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14906#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14907
14908#undef TARGET_INIT_BUILTINS
14909#define TARGET_INIT_BUILTINS aarch64_init_builtins
14910
c64f7d37
WD
14911#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
14912#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
14913 aarch64_ira_change_pseudo_allocno_class
14914
43e9d192
IB
14915#undef TARGET_LEGITIMATE_ADDRESS_P
14916#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14917
14918#undef TARGET_LEGITIMATE_CONSTANT_P
14919#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14920
491ec060
WD
14921#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
14922#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
14923 aarch64_legitimize_address_displacement
14924
43e9d192
IB
14925#undef TARGET_LIBGCC_CMP_RETURN_MODE
14926#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14927
11e554b3
JG
14928#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
14929#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
14930aarch64_libgcc_floating_mode_supported_p
14931
ac2b960f
YZ
14932#undef TARGET_MANGLE_TYPE
14933#define TARGET_MANGLE_TYPE aarch64_mangle_type
14934
43e9d192
IB
14935#undef TARGET_MEMORY_MOVE_COST
14936#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14937
26e0ff94
WD
14938#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14939#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14940
43e9d192
IB
14941#undef TARGET_MUST_PASS_IN_STACK
14942#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14943
14944/* This target hook should return true if accesses to volatile bitfields
14945 should use the narrowest mode possible. It should return false if these
14946 accesses should use the bitfield container type. */
14947#undef TARGET_NARROW_VOLATILE_BITFIELD
14948#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14949
14950#undef TARGET_OPTION_OVERRIDE
14951#define TARGET_OPTION_OVERRIDE aarch64_override_options
14952
14953#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14954#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14955 aarch64_override_options_after_change
14956
361fb3ee
KT
14957#undef TARGET_OPTION_SAVE
14958#define TARGET_OPTION_SAVE aarch64_option_save
14959
14960#undef TARGET_OPTION_RESTORE
14961#define TARGET_OPTION_RESTORE aarch64_option_restore
14962
14963#undef TARGET_OPTION_PRINT
14964#define TARGET_OPTION_PRINT aarch64_option_print
14965
5a2c8331
KT
14966#undef TARGET_OPTION_VALID_ATTRIBUTE_P
14967#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14968
d78006d9
KT
14969#undef TARGET_SET_CURRENT_FUNCTION
14970#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14971
43e9d192
IB
14972#undef TARGET_PASS_BY_REFERENCE
14973#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14974
14975#undef TARGET_PREFERRED_RELOAD_CLASS
14976#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
14977
cee66c68
WD
14978#undef TARGET_SCHED_REASSOCIATION_WIDTH
14979#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
14980
c2ec330c
AL
14981#undef TARGET_PROMOTED_TYPE
14982#define TARGET_PROMOTED_TYPE aarch64_promoted_type
14983
43e9d192
IB
14984#undef TARGET_SECONDARY_RELOAD
14985#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
14986
14987#undef TARGET_SHIFT_TRUNCATION_MASK
14988#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
14989
14990#undef TARGET_SETUP_INCOMING_VARARGS
14991#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
14992
14993#undef TARGET_STRUCT_VALUE_RTX
14994#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
14995
14996#undef TARGET_REGISTER_MOVE_COST
14997#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
14998
14999#undef TARGET_RETURN_IN_MEMORY
15000#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15001
15002#undef TARGET_RETURN_IN_MSB
15003#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15004
15005#undef TARGET_RTX_COSTS
7cc2145f 15006#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 15007
2e5f8203
JG
15008#undef TARGET_SCALAR_MODE_SUPPORTED_P
15009#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15010
d126a4ae
AP
15011#undef TARGET_SCHED_ISSUE_RATE
15012#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15013
d03f7e44
MK
15014#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15015#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15016 aarch64_sched_first_cycle_multipass_dfa_lookahead
15017
2d6bc7fa
KT
15018#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15019#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15020 aarch64_first_cycle_multipass_dfa_lookahead_guard
15021
827ab47a
KT
15022#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15023#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15024 aarch64_get_separate_components
15025
15026#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15027#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15028 aarch64_components_for_bb
15029
15030#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15031#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15032 aarch64_disqualify_components
15033
15034#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15035#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15036 aarch64_emit_prologue_components
15037
15038#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15039#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15040 aarch64_emit_epilogue_components
15041
15042#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15043#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15044 aarch64_set_handled_components
15045
43e9d192
IB
15046#undef TARGET_TRAMPOLINE_INIT
15047#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15048
15049#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15050#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15051
15052#undef TARGET_VECTOR_MODE_SUPPORTED_P
15053#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15054
7df76747
N
15055#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15056#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15057 aarch64_builtin_support_vector_misalignment
15058
43e9d192
IB
15059#undef TARGET_ARRAY_MODE_SUPPORTED_P
15060#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15061
8990e73a
TB
15062#undef TARGET_VECTORIZE_ADD_STMT_COST
15063#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15064
15065#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15066#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15067 aarch64_builtin_vectorization_cost
15068
43e9d192
IB
15069#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15070#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15071
42fc9a7f
JG
15072#undef TARGET_VECTORIZE_BUILTINS
15073#define TARGET_VECTORIZE_BUILTINS
15074
15075#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15076#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15077 aarch64_builtin_vectorized_function
15078
3b357264
JG
15079#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15080#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15081 aarch64_autovectorize_vector_sizes
15082
aa87aced
KV
15083#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15084#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15085 aarch64_atomic_assign_expand_fenv
15086
43e9d192
IB
15087/* Section anchor support. */
15088
15089#undef TARGET_MIN_ANCHOR_OFFSET
15090#define TARGET_MIN_ANCHOR_OFFSET -256
15091
15092/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15093 byte offset; we can do much more for larger data types, but have no way
15094 to determine the size of the access. We assume accesses are aligned. */
15095#undef TARGET_MAX_ANCHOR_OFFSET
15096#define TARGET_MAX_ANCHOR_OFFSET 4095
15097
db0253a4
TB
15098#undef TARGET_VECTOR_ALIGNMENT
15099#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15100
15101#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15102#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15103 aarch64_simd_vector_alignment_reachable
15104
88b08073
JG
15105/* vec_perm support. */
15106
15107#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15108#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15109 aarch64_vectorize_vec_perm_const_ok
15110
c2ec330c
AL
15111#undef TARGET_INIT_LIBFUNCS
15112#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 15113
706b2314 15114#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
15115#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15116
5cb74e90
RR
15117#undef TARGET_FLAGS_REGNUM
15118#define TARGET_FLAGS_REGNUM CC_REGNUM
15119
78607708
TV
15120#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15121#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15122
a3125fc2
CL
15123#undef TARGET_ASAN_SHADOW_OFFSET
15124#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15125
0c4ec427
RE
15126#undef TARGET_LEGITIMIZE_ADDRESS
15127#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15128
d3006da6
JG
15129#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15130#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15131 aarch64_use_by_pieces_infrastructure_p
15132
b48d6421
KT
15133#undef TARGET_SCHED_CAN_SPECULATE_INSN
15134#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15135
594bdd53
FY
15136#undef TARGET_CAN_USE_DOLOOP_P
15137#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15138
9bca63d4
WD
15139#undef TARGET_SCHED_ADJUST_PRIORITY
15140#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15141
6a569cdd
KT
15142#undef TARGET_SCHED_MACRO_FUSION_P
15143#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15144
15145#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15146#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15147
350013bc
BC
15148#undef TARGET_SCHED_FUSION_PRIORITY
15149#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15150
7b841a12
JW
15151#undef TARGET_UNSPEC_MAY_TRAP_P
15152#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15153
1b1e81f8
JW
15154#undef TARGET_USE_PSEUDO_PIC_REG
15155#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15156
cc8ca59e
JB
15157#undef TARGET_PRINT_OPERAND
15158#define TARGET_PRINT_OPERAND aarch64_print_operand
15159
15160#undef TARGET_PRINT_OPERAND_ADDRESS
15161#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15162
ee62a5a6
RS
15163#undef TARGET_OPTAB_SUPPORTED_P
15164#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15165
43203dea
RR
15166#undef TARGET_OMIT_STRUCT_RETURN_REG
15167#define TARGET_OMIT_STRUCT_RETURN_REG true
15168
f46fe37e
EB
15169/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15170#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15171#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15172
51b86113
DM
15173#if CHECKING_P
15174#undef TARGET_RUN_TARGET_SELFTESTS
15175#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15176#endif /* #if CHECKING_P */
15177
43e9d192
IB
15178struct gcc_target targetm = TARGET_INITIALIZER;
15179
15180#include "gt-aarch64.h"