]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
801070f5 1/* Machine description for AArch64 architecture.
f1717362 2 Copyright (C) 2009-2016 Free Software Foundation, Inc.
df401d54 3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
9ef16211 24#include "backend.h"
c1eb80de 25#include "target.h"
26#include "rtl.h"
9ef16211 27#include "tree.h"
28#include "gimple.h"
c1eb80de 29#include "cfghooks.h"
30#include "cfgloop.h"
9ef16211 31#include "df.h"
c1eb80de 32#include "tm_p.h"
33#include "stringpool.h"
34#include "optabs.h"
35#include "regs.h"
36#include "emit-rtl.h"
37#include "recog.h"
38#include "diagnostic.h"
df401d54 39#include "insn-attr.h"
b20a8bb4 40#include "alias.h"
b20a8bb4 41#include "fold-const.h"
9ed99284 42#include "stor-layout.h"
43#include "calls.h"
44#include "varasm.h"
df401d54 45#include "output.h"
d53441c8 46#include "flags.h"
d53441c8 47#include "explow.h"
df401d54 48#include "expr.h"
49#include "reload.h"
df401d54 50#include "langhooks.h"
aadb8e17 51#include "opts.h"
5f73ddf0 52#include "params.h"
a8783bee 53#include "gimplify.h"
df401d54 54#include "dwarf2.h"
4c7587f5 55#include "gimple-iterator.h"
61d9499e 56#include "tree-vectorizer.h"
d14cac46 57#include "aarch64-cost-tables.h"
e50295b3 58#include "dumpfile.h"
f7715905 59#include "builtins.h"
1d45170f 60#include "rtl-iter.h"
a1b874a7 61#include "tm-constrs.h"
65d538fd 62#include "sched-int.h"
ee7ef7ab 63#include "cortex-a57-fma-steering.h"
f59387ab 64#include "target-globals.h"
6a979f73 65#include "common/common-target.h"
df401d54 66
0c71fb4f 67/* This file should be included last. */
4b498588 68#include "target-def.h"
69
011aed18 70/* Defined for convenience. */
71#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
72
df401d54 73/* Classifies an address.
74
75 ADDRESS_REG_IMM
76 A simple base register plus immediate offset.
77
78 ADDRESS_REG_WB
79 A base register indexed by immediate offset with writeback.
80
81 ADDRESS_REG_REG
82 A base register indexed by (optionally scaled) register.
83
84 ADDRESS_REG_UXTW
85 A base register indexed by (optionally scaled) zero-extended register.
86
87 ADDRESS_REG_SXTW
88 A base register indexed by (optionally scaled) sign-extended register.
89
90 ADDRESS_LO_SUM
91 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92
93 ADDRESS_SYMBOLIC:
94 A constant symbolic address, in pc-relative literal pool. */
95
96enum aarch64_address_type {
97 ADDRESS_REG_IMM,
98 ADDRESS_REG_WB,
99 ADDRESS_REG_REG,
100 ADDRESS_REG_UXTW,
101 ADDRESS_REG_SXTW,
102 ADDRESS_LO_SUM,
103 ADDRESS_SYMBOLIC
104};
105
106struct aarch64_address_info {
107 enum aarch64_address_type type;
108 rtx base;
109 rtx offset;
110 int shift;
111 enum aarch64_symbol_type symbol_type;
112};
113
fc3eb658 114struct simd_immediate_info
115{
116 rtx value;
117 int shift;
118 int element_width;
fc3eb658 119 bool mvn;
8458c9e9 120 bool msl;
fc3eb658 121};
122
df401d54 123/* The current code model. */
124enum aarch64_code_model aarch64_cmodel;
125
126#ifdef HAVE_AS_TLS
127#undef TARGET_HAVE_TLS
128#define TARGET_HAVE_TLS 1
129#endif
130
3754d046 131static bool aarch64_composite_type_p (const_tree, machine_mode);
132static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
df401d54 133 const_tree,
3754d046 134 machine_mode *, int *,
df401d54 135 bool *);
136static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
137static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
df401d54 138static void aarch64_override_options_after_change (void);
3754d046 139static bool aarch64_vector_mode_supported_p (machine_mode);
3754d046 140static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
5de1fcdb 141 const unsigned char *sel);
3754d046 142static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
5de1fcdb 143
b92f2770 144/* Major revision number of the ARM Architecture implemented by the target. */
145unsigned aarch64_architecture_version;
146
df401d54 147/* The processor for which instructions should be scheduled. */
29be9de5 148enum aarch64_processor aarch64_tune = cortexa53;
df401d54 149
df401d54 150/* Mask to specify which instruction scheduling options should be used. */
151unsigned long aarch64_tune_flags = 0;
152
acb1dac7 153/* Global flag for PC relative loads. */
154bool aarch64_nopcrelative_literal_loads;
155
4d2c6420 156/* Support for command line parsing of boolean flags in the tuning
157 structures. */
158struct aarch64_flag_desc
159{
160 const char* name;
161 unsigned int flag;
162};
163
a3541110 164#define AARCH64_FUSION_PAIR(name, internal_name) \
4d2c6420 165 { name, AARCH64_FUSE_##internal_name },
166static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
167{
168 { "none", AARCH64_FUSE_NOTHING },
169#include "aarch64-fusion-pairs.def"
170 { "all", AARCH64_FUSE_ALL },
171 { NULL, AARCH64_FUSE_NOTHING }
172};
173#undef AARCH64_FUION_PAIR
174
7b30250d 175#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
4d2c6420 176 { name, AARCH64_EXTRA_TUNE_##internal_name },
177static const struct aarch64_flag_desc aarch64_tuning_flags[] =
178{
179 { "none", AARCH64_EXTRA_TUNE_NONE },
180#include "aarch64-tuning-flags.def"
181 { "all", AARCH64_EXTRA_TUNE_ALL },
182 { NULL, AARCH64_EXTRA_TUNE_NONE }
183};
184#undef AARCH64_EXTRA_TUNING_OPTION
185
df401d54 186/* Tuning parameters. */
187
df401d54 188static const struct cpu_addrcost_table generic_addrcost_table =
189{
3d70178f 190 {
f05bfe67 191 0, /* hi */
192 0, /* si */
193 0, /* di */
194 0, /* ti */
3d70178f 195 },
f05bfe67 196 0, /* pre_modify */
197 0, /* post_modify */
198 0, /* register_offset */
85258792 199 0, /* register_sextend */
200 0, /* register_zextend */
f05bfe67 201 0 /* imm_offset */
df401d54 202};
203
3172ca83 204static const struct cpu_addrcost_table cortexa57_addrcost_table =
205{
3172ca83 206 {
f05bfe67 207 1, /* hi */
208 0, /* si */
209 0, /* di */
210 1, /* ti */
3172ca83 211 },
f05bfe67 212 0, /* pre_modify */
213 0, /* post_modify */
214 0, /* register_offset */
85258792 215 0, /* register_sextend */
216 0, /* register_zextend */
f05bfe67 217 0, /* imm_offset */
3172ca83 218};
219
ceb58cce 220static const struct cpu_addrcost_table exynosm1_addrcost_table =
221{
222 {
223 0, /* hi */
224 0, /* si */
225 0, /* di */
226 2, /* ti */
227 },
228 0, /* pre_modify */
229 0, /* post_modify */
230 1, /* register_offset */
231 1, /* register_sextend */
232 2, /* register_zextend */
233 0, /* imm_offset */
234};
235
552fa694 236static const struct cpu_addrcost_table xgene1_addrcost_table =
237{
552fa694 238 {
f05bfe67 239 1, /* hi */
240 0, /* si */
241 0, /* di */
242 1, /* ti */
552fa694 243 },
f05bfe67 244 1, /* pre_modify */
245 0, /* post_modify */
246 0, /* register_offset */
85258792 247 1, /* register_sextend */
248 1, /* register_zextend */
f05bfe67 249 0, /* imm_offset */
552fa694 250};
251
df401d54 252static const struct cpu_regmove_cost generic_regmove_cost =
253{
f05bfe67 254 1, /* GP2GP */
aa577f0f 255 /* Avoid the use of slow int<->fp moves for spilling by setting
256 their cost higher than memmov_cost. */
f05bfe67 257 5, /* GP2FP */
258 5, /* FP2GP */
259 2 /* FP2FP */
df401d54 260};
261
dda093af 262static const struct cpu_regmove_cost cortexa57_regmove_cost =
263{
f05bfe67 264 1, /* GP2GP */
dda093af 265 /* Avoid the use of slow int<->fp moves for spilling by setting
266 their cost higher than memmov_cost. */
f05bfe67 267 5, /* GP2FP */
268 5, /* FP2GP */
269 2 /* FP2FP */
dda093af 270};
271
272static const struct cpu_regmove_cost cortexa53_regmove_cost =
273{
f05bfe67 274 1, /* GP2GP */
dda093af 275 /* Avoid the use of slow int<->fp moves for spilling by setting
276 their cost higher than memmov_cost. */
f05bfe67 277 5, /* GP2FP */
278 5, /* FP2GP */
279 2 /* FP2FP */
dda093af 280};
281
ceb58cce 282static const struct cpu_regmove_cost exynosm1_regmove_cost =
283{
284 1, /* GP2GP */
285 /* Avoid the use of slow int<->fp moves for spilling by setting
286 their cost higher than memmov_cost (actual, 4 and 9). */
287 9, /* GP2FP */
288 9, /* FP2GP */
289 1 /* FP2FP */
290};
291
d14cac46 292static const struct cpu_regmove_cost thunderx_regmove_cost =
293{
f05bfe67 294 2, /* GP2GP */
295 2, /* GP2FP */
296 6, /* FP2GP */
297 4 /* FP2FP */
d14cac46 298};
299
552fa694 300static const struct cpu_regmove_cost xgene1_regmove_cost =
301{
f05bfe67 302 1, /* GP2GP */
552fa694 303 /* Avoid the use of slow int<->fp moves for spilling by setting
304 their cost higher than memmov_cost. */
f05bfe67 305 8, /* GP2FP */
306 8, /* FP2GP */
307 2 /* FP2FP */
552fa694 308};
309
61d9499e 310/* Generic costs for vector insn classes. */
61d9499e 311static const struct cpu_vector_cost generic_vector_cost =
312{
f05bfe67 313 1, /* scalar_stmt_cost */
314 1, /* scalar_load_cost */
315 1, /* scalar_store_cost */
316 1, /* vec_stmt_cost */
317 1, /* vec_to_scalar_cost */
318 1, /* scalar_to_vec_cost */
319 1, /* vec_align_load_cost */
320 1, /* vec_unalign_load_cost */
321 1, /* vec_unalign_store_cost */
322 1, /* vec_store_cost */
323 3, /* cond_taken_branch_cost */
324 1 /* cond_not_taken_branch_cost */
61d9499e 325};
326
3172ca83 327/* Generic costs for vector insn classes. */
3172ca83 328static const struct cpu_vector_cost cortexa57_vector_cost =
329{
f05bfe67 330 1, /* scalar_stmt_cost */
331 4, /* scalar_load_cost */
332 1, /* scalar_store_cost */
333 3, /* vec_stmt_cost */
334 8, /* vec_to_scalar_cost */
335 8, /* scalar_to_vec_cost */
336 5, /* vec_align_load_cost */
337 5, /* vec_unalign_load_cost */
338 1, /* vec_unalign_store_cost */
339 1, /* vec_store_cost */
340 1, /* cond_taken_branch_cost */
341 1 /* cond_not_taken_branch_cost */
3172ca83 342};
343
ceb58cce 344static const struct cpu_vector_cost exynosm1_vector_cost =
345{
346 1, /* scalar_stmt_cost */
347 5, /* scalar_load_cost */
348 1, /* scalar_store_cost */
349 3, /* vec_stmt_cost */
350 3, /* vec_to_scalar_cost */
351 3, /* scalar_to_vec_cost */
352 5, /* vec_align_load_cost */
353 5, /* vec_unalign_load_cost */
354 1, /* vec_unalign_store_cost */
355 1, /* vec_store_cost */
356 1, /* cond_taken_branch_cost */
357 1 /* cond_not_taken_branch_cost */
358};
359
552fa694 360/* Generic costs for vector insn classes. */
552fa694 361static const struct cpu_vector_cost xgene1_vector_cost =
362{
f05bfe67 363 1, /* scalar_stmt_cost */
364 5, /* scalar_load_cost */
365 1, /* scalar_store_cost */
366 2, /* vec_stmt_cost */
367 4, /* vec_to_scalar_cost */
368 4, /* scalar_to_vec_cost */
369 10, /* vec_align_load_cost */
370 10, /* vec_unalign_load_cost */
371 2, /* vec_unalign_store_cost */
372 2, /* vec_store_cost */
373 2, /* cond_taken_branch_cost */
374 1 /* cond_not_taken_branch_cost */
552fa694 375};
376
d05ee6d2 377/* Generic costs for branch instructions. */
378static const struct cpu_branch_cost generic_branch_cost =
379{
380 2, /* Predictable. */
381 2 /* Unpredictable. */
382};
383
13ebc37b 384/* Branch costs for Cortex-A57. */
385static const struct cpu_branch_cost cortexa57_branch_cost =
386{
387 1, /* Predictable. */
388 3 /* Unpredictable. */
389};
390
df401d54 391static const struct tune_params generic_tunings =
392{
1a3ae6f9 393 &cortexa57_extra_costs,
df401d54 394 &generic_addrcost_table,
395 &generic_regmove_cost,
61d9499e 396 &generic_vector_cost,
d05ee6d2 397 &generic_branch_cost,
f05bfe67 398 4, /* memmov_cost */
399 2, /* issue_rate */
37ef9e23 400 AARCH64_FUSE_NOTHING, /* fusible_ops */
a6f68480 401 8, /* function_align. */
402 8, /* jump_align. */
403 4, /* loop_align. */
88227718 404 2, /* int_reassoc_width. */
405 4, /* fp_reassoc_width. */
f7429d30 406 1, /* vec_reassoc_width. */
407 2, /* min_div_recip_mul_sf. */
5e9fcc70 408 2, /* min_div_recip_mul_df. */
35c51aa0 409 0, /* max_case_values. */
410 0, /* cache_line_size. */
5f73ddf0 411 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
5e9fcc70 412 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
df401d54 413};
414
5478f5dd 415static const struct tune_params cortexa35_tunings =
416{
417 &cortexa53_extra_costs,
418 &generic_addrcost_table,
419 &cortexa53_regmove_cost,
420 &generic_vector_cost,
421 &generic_branch_cost,
422 4, /* memmov_cost */
423 1, /* issue_rate */
424 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
425 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
426 8, /* function_align. */
427 8, /* jump_align. */
428 4, /* loop_align. */
429 2, /* int_reassoc_width. */
430 4, /* fp_reassoc_width. */
431 1, /* vec_reassoc_width. */
432 2, /* min_div_recip_mul_sf. */
433 2, /* min_div_recip_mul_df. */
434 0, /* max_case_values. */
435 0, /* cache_line_size. */
436 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
437 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
438};
439
7f3a8898 440static const struct tune_params cortexa53_tunings =
441{
442 &cortexa53_extra_costs,
443 &generic_addrcost_table,
dda093af 444 &cortexa53_regmove_cost,
7f3a8898 445 &generic_vector_cost,
d05ee6d2 446 &generic_branch_cost,
f05bfe67 447 4, /* memmov_cost */
448 2, /* issue_rate */
449 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
37ef9e23 450 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
a6f68480 451 8, /* function_align. */
452 8, /* jump_align. */
453 4, /* loop_align. */
88227718 454 2, /* int_reassoc_width. */
455 4, /* fp_reassoc_width. */
f7429d30 456 1, /* vec_reassoc_width. */
457 2, /* min_div_recip_mul_sf. */
5e9fcc70 458 2, /* min_div_recip_mul_df. */
35c51aa0 459 0, /* max_case_values. */
460 0, /* cache_line_size. */
5f73ddf0 461 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
5e9fcc70 462 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
7f3a8898 463};
464
d6426295 465static const struct tune_params cortexa57_tunings =
466{
467 &cortexa57_extra_costs,
3172ca83 468 &cortexa57_addrcost_table,
dda093af 469 &cortexa57_regmove_cost,
3172ca83 470 &cortexa57_vector_cost,
13ebc37b 471 &cortexa57_branch_cost,
f05bfe67 472 4, /* memmov_cost */
473 3, /* issue_rate */
474 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
37ef9e23 475 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
a6f68480 476 16, /* function_align. */
477 8, /* jump_align. */
478 4, /* loop_align. */
88227718 479 2, /* int_reassoc_width. */
480 4, /* fp_reassoc_width. */
f7429d30 481 1, /* vec_reassoc_width. */
482 2, /* min_div_recip_mul_sf. */
5e9fcc70 483 2, /* min_div_recip_mul_df. */
35c51aa0 484 0, /* max_case_values. */
485 0, /* cache_line_size. */
5f73ddf0 486 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
e1a2ea91 487 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
488 | AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
5e9fcc70 489};
490
491static const struct tune_params cortexa72_tunings =
492{
493 &cortexa57_extra_costs,
494 &cortexa57_addrcost_table,
495 &cortexa57_regmove_cost,
496 &cortexa57_vector_cost,
497 &generic_branch_cost,
498 4, /* memmov_cost */
499 3, /* issue_rate */
500 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
501 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
502 16, /* function_align. */
503 8, /* jump_align. */
504 4, /* loop_align. */
505 2, /* int_reassoc_width. */
506 4, /* fp_reassoc_width. */
507 1, /* vec_reassoc_width. */
508 2, /* min_div_recip_mul_sf. */
509 2, /* min_div_recip_mul_df. */
35c51aa0 510 0, /* max_case_values. */
511 0, /* cache_line_size. */
5f73ddf0 512 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
5e9fcc70 513 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d6426295 514};
515
ceb58cce 516static const struct tune_params exynosm1_tunings =
517{
518 &exynosm1_extra_costs,
519 &exynosm1_addrcost_table,
520 &exynosm1_regmove_cost,
521 &exynosm1_vector_cost,
522 &generic_branch_cost,
523 4, /* memmov_cost */
524 3, /* issue_rate */
525 (AARCH64_FUSE_NOTHING), /* fusible_ops */
526 4, /* function_align. */
527 4, /* jump_align. */
528 4, /* loop_align. */
529 2, /* int_reassoc_width. */
530 4, /* fp_reassoc_width. */
531 1, /* vec_reassoc_width. */
532 2, /* min_div_recip_mul_sf. */
533 2, /* min_div_recip_mul_df. */
534 48, /* max_case_values. */
535 64, /* cache_line_size. */
536 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
537 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
538};
539
d14cac46 540static const struct tune_params thunderx_tunings =
541{
542 &thunderx_extra_costs,
543 &generic_addrcost_table,
544 &thunderx_regmove_cost,
545 &generic_vector_cost,
d05ee6d2 546 &generic_branch_cost,
f05bfe67 547 6, /* memmov_cost */
548 2, /* issue_rate */
37ef9e23 549 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
a6f68480 550 8, /* function_align. */
551 8, /* jump_align. */
552 8, /* loop_align. */
88227718 553 2, /* int_reassoc_width. */
554 4, /* fp_reassoc_width. */
f7429d30 555 1, /* vec_reassoc_width. */
556 2, /* min_div_recip_mul_sf. */
5e9fcc70 557 2, /* min_div_recip_mul_df. */
35c51aa0 558 0, /* max_case_values. */
559 0, /* cache_line_size. */
5f73ddf0 560 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
5e9fcc70 561 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d14cac46 562};
563
552fa694 564static const struct tune_params xgene1_tunings =
565{
566 &xgene1_extra_costs,
567 &xgene1_addrcost_table,
568 &xgene1_regmove_cost,
569 &xgene1_vector_cost,
d05ee6d2 570 &generic_branch_cost,
f05bfe67 571 6, /* memmov_cost */
572 4, /* issue_rate */
37ef9e23 573 AARCH64_FUSE_NOTHING, /* fusible_ops */
552fa694 574 16, /* function_align. */
575 8, /* jump_align. */
576 16, /* loop_align. */
577 2, /* int_reassoc_width. */
578 4, /* fp_reassoc_width. */
f7429d30 579 1, /* vec_reassoc_width. */
580 2, /* min_div_recip_mul_sf. */
5e9fcc70 581 2, /* min_div_recip_mul_df. */
35c51aa0 582 0, /* max_case_values. */
583 0, /* cache_line_size. */
5f73ddf0 584 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
e1a2ea91 585 (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
552fa694 586};
587
4d2c6420 588/* Support for fine-grained override of the tuning structures. */
589struct aarch64_tuning_override_function
590{
591 const char* name;
592 void (*parse_override)(const char*, struct tune_params*);
593};
594
595static void aarch64_parse_fuse_string (const char*, struct tune_params*);
596static void aarch64_parse_tune_string (const char*, struct tune_params*);
597
598static const struct aarch64_tuning_override_function
599aarch64_tuning_override_functions[] =
600{
601 { "fuse", aarch64_parse_fuse_string },
602 { "tune", aarch64_parse_tune_string },
603 { NULL, NULL }
604};
605
df401d54 606/* A processor implementing AArch64. */
607struct processor
608{
609 const char *const name;
245cad52 610 enum aarch64_processor ident;
611 enum aarch64_processor sched_core;
9356ca16 612 enum aarch64_arch arch;
b92f2770 613 unsigned architecture_version;
df401d54 614 const unsigned long flags;
615 const struct tune_params *const tune;
616};
617
9356ca16 618/* Architectures implementing AArch64. */
619static const struct processor all_architectures[] =
620{
621#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
622 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
623#include "aarch64-arches.def"
624#undef AARCH64_ARCH
625 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
626};
627
df401d54 628/* Processor cores implementing AArch64. */
629static const struct processor all_cores[] =
630{
8b81ce60 631#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
9356ca16 632 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
633 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
634 FLAGS, &COSTS##_tunings},
df401d54 635#include "aarch64-cores.def"
636#undef AARCH64_CORE
9356ca16 637 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
638 AARCH64_FL_FOR_ARCH8, &generic_tunings},
639 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
df401d54 640};
641
df401d54 642
a0db861f 643/* Target specification. These are populated by the -march, -mtune, -mcpu
644 handling code or by target attributes. */
df401d54 645static const struct processor *selected_arch;
646static const struct processor *selected_cpu;
647static const struct processor *selected_tune;
648
14677da9 649/* The current tuning set. */
650struct tune_params aarch64_tune_params = generic_tunings;
651
df401d54 652#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
653
654/* An ISA extension in the co-processor and main instruction set space. */
655struct aarch64_option_extension
656{
657 const char *const name;
658 const unsigned long flags_on;
659 const unsigned long flags_off;
660};
661
662/* ISA extensions in AArch64. */
663static const struct aarch64_option_extension all_extensions[] =
664{
8b81ce60 665#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
df401d54 666 {NAME, FLAGS_ON, FLAGS_OFF},
667#include "aarch64-option-extensions.def"
668#undef AARCH64_OPT_EXTENSION
669 {NULL, 0, 0}
670};
671
df401d54 672typedef enum aarch64_cond_code
673{
674 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
675 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
676 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
677}
678aarch64_cc;
679
680#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
681
682/* The condition codes of the processor, and the inverse function. */
683static const char * const aarch64_condition_codes[] =
684{
685 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
686 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
687};
688
050af05b 689/* Generate code to enable conditional branches in functions over 1 MiB. */
690const char *
691aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
692 const char * branch_format)
693{
694 rtx_code_label * tmp_label = gen_label_rtx ();
695 char label_buf[256];
696 char buffer[128];
697 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
698 CODE_LABEL_NUMBER (tmp_label));
699 const char *label_ptr = targetm.strip_name_encoding (label_buf);
700 rtx dest_label = operands[pos_label];
701 operands[pos_label] = tmp_label;
702
703 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
704 output_asm_insn (buffer, operands);
705
706 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
707 operands[pos_label] = dest_label;
708 output_asm_insn (buffer, operands);
709 return "";
710}
711
b37104f6 712void
713aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
714{
715 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
716 if (TARGET_GENERAL_REGS_ONLY)
717 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
718 else
719 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
720}
721
c8203104 722static unsigned int
f7429d30 723aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
c8203104 724{
f7429d30 725 if (GET_MODE_UNIT_SIZE (mode) == 4)
14677da9 726 return aarch64_tune_params.min_div_recip_mul_sf;
727 return aarch64_tune_params.min_div_recip_mul_df;
c8203104 728}
729
88227718 730static int
731aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
732 enum machine_mode mode)
733{
734 if (VECTOR_MODE_P (mode))
14677da9 735 return aarch64_tune_params.vec_reassoc_width;
88227718 736 if (INTEGRAL_MODE_P (mode))
14677da9 737 return aarch64_tune_params.int_reassoc_width;
88227718 738 if (FLOAT_MODE_P (mode))
14677da9 739 return aarch64_tune_params.fp_reassoc_width;
88227718 740 return 1;
741}
742
df401d54 743/* Provide a mapping from gcc register numbers to dwarf register numbers. */
744unsigned
745aarch64_dbx_register_number (unsigned regno)
746{
747 if (GP_REGNUM_P (regno))
748 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
749 else if (regno == SP_REGNUM)
750 return AARCH64_DWARF_SP;
751 else if (FP_REGNUM_P (regno))
752 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
753
754 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
755 equivalent DWARF register. */
756 return DWARF_FRAME_REGISTERS;
757}
758
759/* Return TRUE if MODE is any of the large INT modes. */
760static bool
3754d046 761aarch64_vect_struct_mode_p (machine_mode mode)
df401d54 762{
763 return mode == OImode || mode == CImode || mode == XImode;
764}
765
766/* Return TRUE if MODE is any of the vector modes. */
767static bool
3754d046 768aarch64_vector_mode_p (machine_mode mode)
df401d54 769{
770 return aarch64_vector_mode_supported_p (mode)
771 || aarch64_vect_struct_mode_p (mode);
772}
773
774/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
775static bool
3754d046 776aarch64_array_mode_supported_p (machine_mode mode,
df401d54 777 unsigned HOST_WIDE_INT nelems)
778{
779 if (TARGET_SIMD
e1a00927 780 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
781 || AARCH64_VALID_SIMD_DREG_MODE (mode))
df401d54 782 && (nelems >= 2 && nelems <= 4))
783 return true;
784
785 return false;
786}
787
788/* Implement HARD_REGNO_NREGS. */
789
790int
3754d046 791aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
df401d54 792{
793 switch (aarch64_regno_regclass (regno))
794 {
795 case FP_REGS:
796 case FP_LO_REGS:
797 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
798 default:
799 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
800 }
801 gcc_unreachable ();
802}
803
804/* Implement HARD_REGNO_MODE_OK. */
805
806int
3754d046 807aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
df401d54 808{
809 if (GET_MODE_CLASS (mode) == MODE_CC)
810 return regno == CC_REGNUM;
811
ec8b8726 812 if (regno == SP_REGNUM)
813 /* The purpose of comparing with ptr_mode is to support the
814 global register variable associated with the stack pointer
815 register via the syntax of asm ("wsp") in ILP32. */
816 return mode == Pmode || mode == ptr_mode;
817
818 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
df401d54 819 return mode == Pmode;
820
821 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
822 return 1;
823
824 if (FP_REGNUM_P (regno))
825 {
826 if (aarch64_vect_struct_mode_p (mode))
827 return
828 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
829 else
830 return 1;
831 }
832
833 return 0;
834}
835
d5a682f1 836/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
3754d046 837machine_mode
d5a682f1 838aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
3754d046 839 machine_mode mode)
d5a682f1 840{
841 /* Handle modes that fit within single registers. */
842 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
843 {
844 if (GET_MODE_SIZE (mode) >= 4)
845 return mode;
846 else
847 return SImode;
848 }
849 /* Fall back to generic for multi-reg and very large modes. */
850 else
851 return choose_hard_reg_mode (regno, nregs, false);
852}
853
df401d54 854/* Return true if calls to DECL should be treated as
855 long-calls (ie called via a register). */
856static bool
857aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
858{
859 return false;
860}
861
862/* Return true if calls to symbol-ref SYM should be treated as
863 long-calls (ie called via a register). */
864bool
865aarch64_is_long_call_p (rtx sym)
866{
867 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
868}
869
2bcb7473 870/* Return true if calls to symbol-ref SYM should not go through
871 plt stubs. */
872
873bool
874aarch64_is_noplt_call_p (rtx sym)
875{
876 const_tree decl = SYMBOL_REF_DECL (sym);
877
878 if (flag_pic
879 && decl
880 && (!flag_plt
881 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
882 && !targetm.binds_local_p (decl))
883 return true;
884
885 return false;
886}
887
df401d54 888/* Return true if the offsets to a zero/sign-extract operation
889 represent an expression that matches an extend operation. The
890 operands represent the paramters from
891
2f0038b0 892 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
df401d54 893bool
3754d046 894aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
df401d54 895 rtx extract_imm)
896{
897 HOST_WIDE_INT mult_val, extract_val;
898
899 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
900 return false;
901
902 mult_val = INTVAL (mult_imm);
903 extract_val = INTVAL (extract_imm);
904
905 if (extract_val > 8
906 && extract_val < GET_MODE_BITSIZE (mode)
907 && exact_log2 (extract_val & ~7) > 0
908 && (extract_val & 7) <= 4
909 && mult_val == (1 << (extract_val & 7)))
910 return true;
911
912 return false;
913}
914
915/* Emit an insn that's a simple single-set. Both the operands must be
916 known to be valid. */
917inline static rtx
918emit_set_insn (rtx x, rtx y)
919{
d1f9b275 920 return emit_insn (gen_rtx_SET (x, y));
df401d54 921}
922
923/* X and Y are two things to compare using CODE. Emit the compare insn and
924 return the rtx for register 0 in the proper mode. */
925rtx
926aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
927{
3754d046 928 machine_mode mode = SELECT_CC_MODE (code, x, y);
df401d54 929 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
930
931 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
932 return cc_reg;
933}
934
935/* Build the SYMBOL_REF for __tls_get_addr. */
936
937static GTY(()) rtx tls_get_addr_libfunc;
938
939rtx
940aarch64_tls_get_addr (void)
941{
942 if (!tls_get_addr_libfunc)
943 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
944 return tls_get_addr_libfunc;
945}
946
947/* Return the TLS model to use for ADDR. */
948
949static enum tls_model
950tls_symbolic_operand_type (rtx addr)
951{
952 enum tls_model tls_kind = TLS_MODEL_NONE;
953 rtx sym, addend;
954
955 if (GET_CODE (addr) == CONST)
956 {
957 split_const (addr, &sym, &addend);
958 if (GET_CODE (sym) == SYMBOL_REF)
959 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
960 }
961 else if (GET_CODE (addr) == SYMBOL_REF)
962 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
963
964 return tls_kind;
965}
966
967/* We'll allow lo_sum's in addresses in our legitimate addresses
968 so that combine would take care of combining addresses where
969 necessary, but for generation purposes, we'll generate the address
970 as :
971 RTL Absolute
972 tmp = hi (symbol_ref); adrp x1, foo
973 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
974 nop
975
976 PIC TLS
977 adrp x1, :got:foo adrp tmp, :tlsgd:foo
978 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
979 bl __tls_get_addr
980 nop
981
982 Load TLS symbol, depending on TLS mechanism and TLS access model.
983
984 Global Dynamic - Traditional TLS:
985 adrp tmp, :tlsgd:imm
986 add dest, tmp, #:tlsgd_lo12:imm
987 bl __tls_get_addr
988
989 Global Dynamic - TLS Descriptors:
990 adrp dest, :tlsdesc:imm
991 ldr tmp, [dest, #:tlsdesc_lo12:imm]
992 add dest, dest, #:tlsdesc_lo12:imm
993 blr tmp
994 mrs tp, tpidr_el0
995 add dest, dest, tp
996
997 Initial Exec:
998 mrs tp, tpidr_el0
999 adrp tmp, :gottprel:imm
1000 ldr dest, [tmp, #:gottprel_lo12:imm]
1001 add dest, dest, tp
1002
1003 Local Exec:
1004 mrs tp, tpidr_el0
208e784b 1005 add t0, tp, #:tprel_hi12:imm, lsl #12
1006 add t0, t0, #:tprel_lo12_nc:imm
df401d54 1007*/
1008
1009static void
1010aarch64_load_symref_appropriately (rtx dest, rtx imm,
1011 enum aarch64_symbol_type type)
1012{
1013 switch (type)
1014 {
1015 case SYMBOL_SMALL_ABSOLUTE:
1016 {
011aed18 1017 /* In ILP32, the mode of dest can be either SImode or DImode. */
df401d54 1018 rtx tmp_reg = dest;
3754d046 1019 machine_mode mode = GET_MODE (dest);
011aed18 1020
1021 gcc_assert (mode == Pmode || mode == ptr_mode);
1022
df401d54 1023 if (can_create_pseudo_p ())
011aed18 1024 tmp_reg = gen_reg_rtx (mode);
df401d54 1025
011aed18 1026 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
df401d54 1027 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1028 return;
1029 }
1030
5137d3cb 1031 case SYMBOL_TINY_ABSOLUTE:
d1f9b275 1032 emit_insn (gen_rtx_SET (dest, imm));
5137d3cb 1033 return;
1034
65f988f7 1035 case SYMBOL_SMALL_GOT_28K:
1036 {
1037 machine_mode mode = GET_MODE (dest);
1038 rtx gp_rtx = pic_offset_table_rtx;
f1e4a138 1039 rtx insn;
1040 rtx mem;
65f988f7 1041
1042 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1043 here before rtl expand. Tree IVOPT will generate rtl pattern to
1044 decide rtx costs, in which case pic_offset_table_rtx is not
1045 initialized. For that case no need to generate the first adrp
47ae02b7 1046 instruction as the final cost for global variable access is
65f988f7 1047 one instruction. */
1048 if (gp_rtx != NULL)
1049 {
1050 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1051 using the page base as GOT base, the first page may be wasted,
1052 in the worst scenario, there is only 28K space for GOT).
1053
1054 The generate instruction sequence for accessing global variable
1055 is:
1056
b79ac70f 1057 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
65f988f7 1058
1059 Only one instruction needed. But we must initialize
1060 pic_offset_table_rtx properly. We generate initialize insn for
1061 every global access, and allow CSE to remove all redundant.
1062
1063 The final instruction sequences will look like the following
1064 for multiply global variables access.
1065
b79ac70f 1066 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
65f988f7 1067
b79ac70f 1068 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1069 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1070 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1071 ... */
65f988f7 1072
1073 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1074 crtl->uses_pic_offset_table = 1;
1075 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1076
1077 if (mode != GET_MODE (gp_rtx))
1078 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
1079 }
1080
1081 if (mode == ptr_mode)
1082 {
1083 if (mode == DImode)
f1e4a138 1084 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
65f988f7 1085 else
f1e4a138 1086 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1087
1088 mem = XVECEXP (SET_SRC (insn), 0, 0);
65f988f7 1089 }
1090 else
1091 {
1092 gcc_assert (mode == Pmode);
f1e4a138 1093
1094 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1095 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
65f988f7 1096 }
1097
f1e4a138 1098 /* The operand is expected to be MEM. Whenever the related insn
1099 pattern changed, above code which calculate mem should be
1100 updated. */
1101 gcc_assert (GET_CODE (mem) == MEM);
1102 MEM_READONLY_P (mem) = 1;
1103 MEM_NOTRAP_P (mem) = 1;
1104 emit_insn (insn);
65f988f7 1105 return;
1106 }
1107
41754803 1108 case SYMBOL_SMALL_GOT_4G:
df401d54 1109 {
011aed18 1110 /* In ILP32, the mode of dest can be either SImode or DImode,
1111 while the got entry is always of SImode size. The mode of
1112 dest depends on how dest is used: if dest is assigned to a
1113 pointer (e.g. in the memory), it has SImode; it may have
1114 DImode if dest is dereferenced to access the memeory.
1115 This is why we have to handle three different ldr_got_small
1116 patterns here (two patterns for ILP32). */
f1e4a138 1117
1118 rtx insn;
1119 rtx mem;
df401d54 1120 rtx tmp_reg = dest;
3754d046 1121 machine_mode mode = GET_MODE (dest);
011aed18 1122
df401d54 1123 if (can_create_pseudo_p ())
011aed18 1124 tmp_reg = gen_reg_rtx (mode);
1125
1126 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1127 if (mode == ptr_mode)
1128 {
1129 if (mode == DImode)
f1e4a138 1130 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
011aed18 1131 else
f1e4a138 1132 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1133
1134 mem = XVECEXP (SET_SRC (insn), 0, 0);
011aed18 1135 }
1136 else
1137 {
1138 gcc_assert (mode == Pmode);
f1e4a138 1139
1140 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1141 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
011aed18 1142 }
1143
f1e4a138 1144 gcc_assert (GET_CODE (mem) == MEM);
1145 MEM_READONLY_P (mem) = 1;
1146 MEM_NOTRAP_P (mem) = 1;
1147 emit_insn (insn);
df401d54 1148 return;
1149 }
1150
1151 case SYMBOL_SMALL_TLSGD:
1152 {
ff38b261 1153 rtx_insn *insns;
df401d54 1154 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1155
1156 start_sequence ();
ba189be5 1157 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
df401d54 1158 insns = get_insns ();
1159 end_sequence ();
1160
1161 RTL_CONST_CALL_P (insns) = 1;
1162 emit_libcall_block (insns, dest, result, imm);
1163 return;
1164 }
1165
1166 case SYMBOL_SMALL_TLSDESC:
1167 {
3754d046 1168 machine_mode mode = GET_MODE (dest);
718da8fb 1169 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
df401d54 1170 rtx tp;
1171
718da8fb 1172 gcc_assert (mode == Pmode || mode == ptr_mode);
1173
4c5898f7 1174 /* In ILP32, the got entry is always of SImode size. Unlike
1175 small GOT, the dest is fixed at reg 0. */
1176 if (TARGET_ILP32)
1177 emit_insn (gen_tlsdesc_small_si (imm));
718da8fb 1178 else
4c5898f7 1179 emit_insn (gen_tlsdesc_small_di (imm));
df401d54 1180 tp = aarch64_load_tp (NULL);
718da8fb 1181
1182 if (mode != Pmode)
1183 tp = gen_lowpart (mode, tp);
1184
4c5898f7 1185 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
df401d54 1186 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1187 return;
1188 }
1189
7bff97c2 1190 case SYMBOL_SMALL_TLSIE:
df401d54 1191 {
718da8fb 1192 /* In ILP32, the mode of dest can be either SImode or DImode,
1193 while the got entry is always of SImode size. The mode of
1194 dest depends on how dest is used: if dest is assigned to a
1195 pointer (e.g. in the memory), it has SImode; it may have
1196 DImode if dest is dereferenced to access the memeory.
1197 This is why we have to handle three different tlsie_small
1198 patterns here (two patterns for ILP32). */
3754d046 1199 machine_mode mode = GET_MODE (dest);
718da8fb 1200 rtx tmp_reg = gen_reg_rtx (mode);
df401d54 1201 rtx tp = aarch64_load_tp (NULL);
718da8fb 1202
1203 if (mode == ptr_mode)
1204 {
1205 if (mode == DImode)
1206 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1207 else
1208 {
1209 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1210 tp = gen_lowpart (mode, tp);
1211 }
1212 }
1213 else
1214 {
1215 gcc_assert (mode == Pmode);
1216 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1217 }
1218
d1f9b275 1219 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
df401d54 1220 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1221 return;
1222 }
1223
57507fa5 1224 case SYMBOL_TLSLE12:
950cf06f 1225 case SYMBOL_TLSLE24:
57507fa5 1226 case SYMBOL_TLSLE32:
1227 case SYMBOL_TLSLE48:
df401d54 1228 {
57507fa5 1229 machine_mode mode = GET_MODE (dest);
df401d54 1230 rtx tp = aarch64_load_tp (NULL);
8f46e8a7 1231
57507fa5 1232 if (mode != Pmode)
1233 tp = gen_lowpart (mode, tp);
1234
1235 switch (type)
1236 {
1237 case SYMBOL_TLSLE12:
1238 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1239 (dest, tp, imm));
1240 break;
1241 case SYMBOL_TLSLE24:
1242 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1243 (dest, tp, imm));
1244 break;
1245 case SYMBOL_TLSLE32:
1246 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1247 (dest, imm));
1248 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1249 (dest, dest, tp));
1250 break;
1251 case SYMBOL_TLSLE48:
1252 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1253 (dest, imm));
1254 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1255 (dest, dest, tp));
1256 break;
1257 default:
1258 gcc_unreachable ();
1259 }
8f46e8a7 1260
df401d54 1261 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1262 return;
1263 }
1264
2c97ec73 1265 case SYMBOL_TINY_GOT:
1266 emit_insn (gen_ldr_got_tiny (dest, imm));
1267 return;
1268
f546e2d1 1269 case SYMBOL_TINY_TLSIE:
1270 {
1271 machine_mode mode = GET_MODE (dest);
1272 rtx tp = aarch64_load_tp (NULL);
1273
1274 if (mode == ptr_mode)
1275 {
1276 if (mode == DImode)
1277 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1278 else
1279 {
1280 tp = gen_lowpart (mode, tp);
1281 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1282 }
1283 }
1284 else
1285 {
1286 gcc_assert (mode == Pmode);
1287 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1288 }
1289
1290 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1291 return;
1292 }
1293
df401d54 1294 default:
1295 gcc_unreachable ();
1296 }
1297}
1298
1299/* Emit a move from SRC to DEST. Assume that the move expanders can
1300 handle all moves if !can_create_pseudo_p (). The distinction is
1301 important because, unlike emit_move_insn, the move expanders know
1302 how to force Pmode objects into the constant pool even when the
1303 constant pool address is not itself legitimate. */
1304static rtx
1305aarch64_emit_move (rtx dest, rtx src)
1306{
1307 return (can_create_pseudo_p ()
1308 ? emit_move_insn (dest, src)
1309 : emit_move_insn_1 (dest, src));
1310}
1311
2b5ec7ce 1312/* Split a 128-bit move operation into two 64-bit move operations,
1313 taking care to handle partial overlap of register to register
1314 copies. Special cases are needed when moving between GP regs and
1315 FP regs. SRC can be a register, constant or memory; DST a register
1316 or memory. If either operand is memory it must not have any side
1317 effects. */
df401d54 1318void
1319aarch64_split_128bit_move (rtx dst, rtx src)
1320{
2b5ec7ce 1321 rtx dst_lo, dst_hi;
1322 rtx src_lo, src_hi;
df401d54 1323
3754d046 1324 machine_mode mode = GET_MODE (dst);
eff87958 1325
2b5ec7ce 1326 gcc_assert (mode == TImode || mode == TFmode);
1327 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1328 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
df401d54 1329
1330 if (REG_P (dst) && REG_P (src))
1331 {
2b5ec7ce 1332 int src_regno = REGNO (src);
1333 int dst_regno = REGNO (dst);
df401d54 1334
2b5ec7ce 1335 /* Handle FP <-> GP regs. */
df401d54 1336 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1337 {
2b5ec7ce 1338 src_lo = gen_lowpart (word_mode, src);
1339 src_hi = gen_highpart (word_mode, src);
1340
1341 if (mode == TImode)
1342 {
1343 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1344 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1345 }
1346 else
1347 {
1348 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1349 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1350 }
1351 return;
df401d54 1352 }
1353 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1354 {
2b5ec7ce 1355 dst_lo = gen_lowpart (word_mode, dst);
1356 dst_hi = gen_highpart (word_mode, dst);
1357
1358 if (mode == TImode)
1359 {
1360 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1361 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1362 }
1363 else
1364 {
1365 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1366 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1367 }
1368 return;
df401d54 1369 }
df401d54 1370 }
1371
2b5ec7ce 1372 dst_lo = gen_lowpart (word_mode, dst);
1373 dst_hi = gen_highpart (word_mode, dst);
1374 src_lo = gen_lowpart (word_mode, src);
1375 src_hi = gen_highpart_mode (word_mode, mode, src);
1376
1377 /* At most one pairing may overlap. */
1378 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1379 {
1380 aarch64_emit_move (dst_hi, src_hi);
1381 aarch64_emit_move (dst_lo, src_lo);
1382 }
1383 else
1384 {
1385 aarch64_emit_move (dst_lo, src_lo);
1386 aarch64_emit_move (dst_hi, src_hi);
1387 }
df401d54 1388}
1389
1390bool
1391aarch64_split_128bit_move_p (rtx dst, rtx src)
1392{
1393 return (! REG_P (src)
1394 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1395}
1396
d820433c 1397/* Split a complex SIMD combine. */
1398
1399void
1400aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1401{
3754d046 1402 machine_mode src_mode = GET_MODE (src1);
1403 machine_mode dst_mode = GET_MODE (dst);
d820433c 1404
1405 gcc_assert (VECTOR_MODE_P (dst_mode));
1406
1407 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1408 {
1409 rtx (*gen) (rtx, rtx, rtx);
1410
1411 switch (src_mode)
1412 {
1413 case V8QImode:
1414 gen = gen_aarch64_simd_combinev8qi;
1415 break;
1416 case V4HImode:
1417 gen = gen_aarch64_simd_combinev4hi;
1418 break;
1419 case V2SImode:
1420 gen = gen_aarch64_simd_combinev2si;
1421 break;
07c40439 1422 case V4HFmode:
1423 gen = gen_aarch64_simd_combinev4hf;
1424 break;
d820433c 1425 case V2SFmode:
1426 gen = gen_aarch64_simd_combinev2sf;
1427 break;
1428 case DImode:
1429 gen = gen_aarch64_simd_combinedi;
1430 break;
1431 case DFmode:
1432 gen = gen_aarch64_simd_combinedf;
1433 break;
1434 default:
1435 gcc_unreachable ();
1436 }
1437
1438 emit_insn (gen (dst, src1, src2));
1439 return;
1440 }
1441}
1442
e0e03aa1 1443/* Split a complex SIMD move. */
1444
1445void
1446aarch64_split_simd_move (rtx dst, rtx src)
1447{
3754d046 1448 machine_mode src_mode = GET_MODE (src);
1449 machine_mode dst_mode = GET_MODE (dst);
e0e03aa1 1450
1451 gcc_assert (VECTOR_MODE_P (dst_mode));
1452
1453 if (REG_P (dst) && REG_P (src))
1454 {
e30d3162 1455 rtx (*gen) (rtx, rtx);
1456
e0e03aa1 1457 gcc_assert (VECTOR_MODE_P (src_mode));
1458
1459 switch (src_mode)
1460 {
1461 case V16QImode:
e30d3162 1462 gen = gen_aarch64_split_simd_movv16qi;
e0e03aa1 1463 break;
1464 case V8HImode:
e30d3162 1465 gen = gen_aarch64_split_simd_movv8hi;
e0e03aa1 1466 break;
1467 case V4SImode:
e30d3162 1468 gen = gen_aarch64_split_simd_movv4si;
e0e03aa1 1469 break;
1470 case V2DImode:
e30d3162 1471 gen = gen_aarch64_split_simd_movv2di;
e0e03aa1 1472 break;
aea31229 1473 case V8HFmode:
1474 gen = gen_aarch64_split_simd_movv8hf;
1475 break;
e0e03aa1 1476 case V4SFmode:
e30d3162 1477 gen = gen_aarch64_split_simd_movv4sf;
e0e03aa1 1478 break;
1479 case V2DFmode:
e30d3162 1480 gen = gen_aarch64_split_simd_movv2df;
e0e03aa1 1481 break;
1482 default:
1483 gcc_unreachable ();
1484 }
e30d3162 1485
1486 emit_insn (gen (dst, src));
e0e03aa1 1487 return;
1488 }
1489}
1490
df401d54 1491static rtx
3754d046 1492aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
df401d54 1493{
1494 if (can_create_pseudo_p ())
6b765f96 1495 return force_reg (mode, value);
df401d54 1496 else
1497 {
1498 x = aarch64_emit_move (x, value);
1499 return x;
1500 }
1501}
1502
1503
1504static rtx
3754d046 1505aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
df401d54 1506{
7f2c00e6 1507 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
df401d54 1508 {
1509 rtx high;
1510 /* Load the full offset into a register. This
1511 might be improvable in the future. */
1512 high = GEN_INT (offset);
1513 offset = 0;
6b765f96 1514 high = aarch64_force_temporary (mode, temp, high);
1515 reg = aarch64_force_temporary (mode, temp,
1516 gen_rtx_PLUS (mode, high, reg));
df401d54 1517 }
1518 return plus_constant (mode, reg, offset);
1519}
1520
a6cab7d4 1521static int
1522aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1523 machine_mode mode)
df401d54 1524{
df401d54 1525 int i;
5a1a2872 1526 unsigned HOST_WIDE_INT val, val2, mask;
1527 int one_match, zero_match;
1528 int num_insns;
df401d54 1529
5a1a2872 1530 val = INTVAL (imm);
1531
1532 if (aarch64_move_imm (val, mode))
df401d54 1533 {
a6cab7d4 1534 if (generate)
d1f9b275 1535 emit_insn (gen_rtx_SET (dest, imm));
5a1a2872 1536 return 1;
df401d54 1537 }
1538
5a1a2872 1539 if ((val >> 32) == 0 || mode == SImode)
df401d54 1540 {
a6cab7d4 1541 if (generate)
1542 {
5a1a2872 1543 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1544 if (mode == SImode)
1545 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1546 GEN_INT ((val >> 16) & 0xffff)));
1547 else
1548 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1549 GEN_INT ((val >> 16) & 0xffff)));
a6cab7d4 1550 }
5a1a2872 1551 return 2;
df401d54 1552 }
1553
1554 /* Remaining cases are all for DImode. */
1555
df401d54 1556 mask = 0xffff;
5a1a2872 1557 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1558 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1559 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1560 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
df401d54 1561
3ab32114 1562 if (zero_match != 2 && one_match != 2)
df401d54 1563 {
3ab32114 1564 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1565 For a 64-bit bitmask try whether changing 16 bits to all ones or
1566 zeroes creates a valid bitmask. To check any repeated bitmask,
1567 try using 16 bits from the other 32-bit half of val. */
df401d54 1568
3ab32114 1569 for (i = 0; i < 64; i += 16, mask <<= 16)
df401d54 1570 {
3ab32114 1571 val2 = val & ~mask;
1572 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1573 break;
1574 val2 = val | mask;
1575 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1576 break;
1577 val2 = val2 & ~mask;
1578 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1579 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1580 break;
df401d54 1581 }
3ab32114 1582 if (i != 64)
df401d54 1583 {
3ab32114 1584 if (generate)
df401d54 1585 {
3ab32114 1586 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1587 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
5a1a2872 1588 GEN_INT ((val >> i) & 0xffff)));
df401d54 1589 }
df401d54 1590 }
1591 }
1592
5a1a2872 1593 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1594 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1595 otherwise skip zero bits. */
a2588a5b 1596
5a1a2872 1597 num_insns = 1;
df401d54 1598 mask = 0xffff;
5a1a2872 1599 val2 = one_match > zero_match ? ~val : val;
1600 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1601
1602 if (generate)
1603 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1604 ? (val | ~(mask << i))
1605 : (val & (mask << i)))));
1606 for (i += 16; i < 64; i += 16)
df401d54 1607 {
5a1a2872 1608 if ((val2 & (mask << i)) == 0)
1609 continue;
1610 if (generate)
1611 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1612 GEN_INT ((val >> i) & 0xffff)));
1613 num_insns ++;
a6cab7d4 1614 }
1615
1616 return num_insns;
1617}
1618
1619
1620void
1621aarch64_expand_mov_immediate (rtx dest, rtx imm)
1622{
1623 machine_mode mode = GET_MODE (dest);
1624
1625 gcc_assert (mode == SImode || mode == DImode);
1626
1627 /* Check on what type of symbol it is. */
1628 if (GET_CODE (imm) == SYMBOL_REF
1629 || GET_CODE (imm) == LABEL_REF
1630 || GET_CODE (imm) == CONST)
1631 {
1632 rtx mem, base, offset;
1633 enum aarch64_symbol_type sty;
1634
1635 /* If we have (const (plus symbol offset)), separate out the offset
1636 before we start classifying the symbol. */
1637 split_const (imm, &base, &offset);
1638
82882dbd 1639 sty = aarch64_classify_symbol (base, offset);
a6cab7d4 1640 switch (sty)
1641 {
1642 case SYMBOL_FORCE_TO_MEM:
1643 if (offset != const0_rtx
1644 && targetm.cannot_force_const_mem (mode, imm))
1645 {
1646 gcc_assert (can_create_pseudo_p ());
1647 base = aarch64_force_temporary (mode, dest, base);
1648 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1649 aarch64_emit_move (dest, base);
1650 return;
1651 }
ae1cefe6 1652
a6cab7d4 1653 mem = force_const_mem (ptr_mode, imm);
1654 gcc_assert (mem);
ae1cefe6 1655
1656 /* If we aren't generating PC relative literals, then
1657 we need to expand the literal pool access carefully.
1658 This is something that needs to be done in a number
1659 of places, so could well live as a separate function. */
acb1dac7 1660 if (aarch64_nopcrelative_literal_loads)
ae1cefe6 1661 {
1662 gcc_assert (can_create_pseudo_p ());
1663 base = gen_reg_rtx (ptr_mode);
1664 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1665 mem = gen_rtx_MEM (ptr_mode, base);
1666 }
1667
a6cab7d4 1668 if (mode != ptr_mode)
1669 mem = gen_rtx_ZERO_EXTEND (mode, mem);
ae1cefe6 1670
d1f9b275 1671 emit_insn (gen_rtx_SET (dest, mem));
ae1cefe6 1672
a6cab7d4 1673 return;
1674
1675 case SYMBOL_SMALL_TLSGD:
1676 case SYMBOL_SMALL_TLSDESC:
7bff97c2 1677 case SYMBOL_SMALL_TLSIE:
65f988f7 1678 case SYMBOL_SMALL_GOT_28K:
41754803 1679 case SYMBOL_SMALL_GOT_4G:
a6cab7d4 1680 case SYMBOL_TINY_GOT:
f546e2d1 1681 case SYMBOL_TINY_TLSIE:
a6cab7d4 1682 if (offset != const0_rtx)
1683 {
1684 gcc_assert(can_create_pseudo_p ());
1685 base = aarch64_force_temporary (mode, dest, base);
1686 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1687 aarch64_emit_move (dest, base);
1688 return;
1689 }
1690 /* FALLTHRU */
1691
a6cab7d4 1692 case SYMBOL_SMALL_ABSOLUTE:
1693 case SYMBOL_TINY_ABSOLUTE:
57507fa5 1694 case SYMBOL_TLSLE12:
950cf06f 1695 case SYMBOL_TLSLE24:
57507fa5 1696 case SYMBOL_TLSLE32:
1697 case SYMBOL_TLSLE48:
a6cab7d4 1698 aarch64_load_symref_appropriately (dest, imm, sty);
1699 return;
1700
1701 default:
1702 gcc_unreachable ();
1703 }
1704 }
1705
1706 if (!CONST_INT_P (imm))
1707 {
1708 if (GET_CODE (imm) == HIGH)
d1f9b275 1709 emit_insn (gen_rtx_SET (dest, imm));
a6cab7d4 1710 else
1711 {
1712 rtx mem = force_const_mem (mode, imm);
1713 gcc_assert (mem);
d1f9b275 1714 emit_insn (gen_rtx_SET (dest, mem));
df401d54 1715 }
a6cab7d4 1716
1717 return;
df401d54 1718 }
a6cab7d4 1719
1720 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
df401d54 1721}
1722
1723static bool
1718b6c1 1724aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1725 tree exp ATTRIBUTE_UNUSED)
df401d54 1726{
1718b6c1 1727 /* Currently, always true. */
df401d54 1728 return true;
1729}
1730
1731/* Implement TARGET_PASS_BY_REFERENCE. */
1732
1733static bool
1734aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
3754d046 1735 machine_mode mode,
df401d54 1736 const_tree type,
1737 bool named ATTRIBUTE_UNUSED)
1738{
1739 HOST_WIDE_INT size;
3754d046 1740 machine_mode dummymode;
df401d54 1741 int nregs;
1742
1743 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1744 size = (mode == BLKmode && type)
1745 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1746
d7a05d41 1747 /* Aggregates are passed by reference based on their size. */
1748 if (type && AGGREGATE_TYPE_P (type))
df401d54 1749 {
d7a05d41 1750 size = int_size_in_bytes (type);
df401d54 1751 }
1752
1753 /* Variable sized arguments are always returned by reference. */
1754 if (size < 0)
1755 return true;
1756
1757 /* Can this be a candidate to be passed in fp/simd register(s)? */
1758 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1759 &dummymode, &nregs,
1760 NULL))
1761 return false;
1762
1763 /* Arguments which are variable sized or larger than 2 registers are
1764 passed by reference unless they are a homogenous floating point
1765 aggregate. */
1766 return size > 2 * UNITS_PER_WORD;
1767}
1768
1769/* Return TRUE if VALTYPE is padded to its least significant bits. */
1770static bool
1771aarch64_return_in_msb (const_tree valtype)
1772{
3754d046 1773 machine_mode dummy_mode;
df401d54 1774 int dummy_int;
1775
1776 /* Never happens in little-endian mode. */
1777 if (!BYTES_BIG_ENDIAN)
1778 return false;
1779
1780 /* Only composite types smaller than or equal to 16 bytes can
1781 be potentially returned in registers. */
1782 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1783 || int_size_in_bytes (valtype) <= 0
1784 || int_size_in_bytes (valtype) > 16)
1785 return false;
1786
1787 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1788 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1789 is always passed/returned in the least significant bits of fp/simd
1790 register(s). */
1791 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1792 &dummy_mode, &dummy_int, NULL))
1793 return false;
1794
1795 return true;
1796}
1797
1798/* Implement TARGET_FUNCTION_VALUE.
1799 Define how to find the value returned by a function. */
1800
1801static rtx
1802aarch64_function_value (const_tree type, const_tree func,
1803 bool outgoing ATTRIBUTE_UNUSED)
1804{
3754d046 1805 machine_mode mode;
df401d54 1806 int unsignedp;
1807 int count;
3754d046 1808 machine_mode ag_mode;
df401d54 1809
1810 mode = TYPE_MODE (type);
1811 if (INTEGRAL_TYPE_P (type))
1812 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1813
1814 if (aarch64_return_in_msb (type))
1815 {
1816 HOST_WIDE_INT size = int_size_in_bytes (type);
1817
1818 if (size % UNITS_PER_WORD != 0)
1819 {
1820 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1821 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1822 }
1823 }
1824
1825 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1826 &ag_mode, &count, NULL))
1827 {
1828 if (!aarch64_composite_type_p (type, mode))
1829 {
1830 gcc_assert (count == 1 && mode == ag_mode);
1831 return gen_rtx_REG (mode, V0_REGNUM);
1832 }
1833 else
1834 {
1835 int i;
1836 rtx par;
1837
1838 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1839 for (i = 0; i < count; i++)
1840 {
1841 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1842 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1843 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1844 XVECEXP (par, 0, i) = tmp;
1845 }
1846 return par;
1847 }
1848 }
1849 else
1850 return gen_rtx_REG (mode, R0_REGNUM);
1851}
1852
1853/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1854 Return true if REGNO is the number of a hard register in which the values
1855 of called function may come back. */
1856
1857static bool
1858aarch64_function_value_regno_p (const unsigned int regno)
1859{
1860 /* Maximum of 16 bytes can be returned in the general registers. Examples
1861 of 16-byte return values are: 128-bit integers and 16-byte small
1862 structures (excluding homogeneous floating-point aggregates). */
1863 if (regno == R0_REGNUM || regno == R1_REGNUM)
1864 return true;
1865
1866 /* Up to four fp/simd registers can return a function value, e.g. a
1867 homogeneous floating-point aggregate having four members. */
1868 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
a0c7b470 1869 return TARGET_FLOAT;
df401d54 1870
1871 return false;
1872}
1873
1874/* Implement TARGET_RETURN_IN_MEMORY.
1875
1876 If the type T of the result of a function is such that
1877 void func (T arg)
1878 would require that arg be passed as a value in a register (or set of
1879 registers) according to the parameter passing rules, then the result
1880 is returned in the same registers as would be used for such an
1881 argument. */
1882
1883static bool
1884aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1885{
1886 HOST_WIDE_INT size;
3754d046 1887 machine_mode ag_mode;
df401d54 1888 int count;
1889
1890 if (!AGGREGATE_TYPE_P (type)
1891 && TREE_CODE (type) != COMPLEX_TYPE
1892 && TREE_CODE (type) != VECTOR_TYPE)
1893 /* Simple scalar types always returned in registers. */
1894 return false;
1895
1896 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1897 type,
1898 &ag_mode,
1899 &count,
1900 NULL))
1901 return false;
1902
1903 /* Types larger than 2 registers returned in memory. */
1904 size = int_size_in_bytes (type);
1905 return (size < 0 || size > 2 * UNITS_PER_WORD);
1906}
1907
1908static bool
3754d046 1909aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
df401d54 1910 const_tree type, int *nregs)
1911{
1912 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1913 return aarch64_vfp_is_call_or_return_candidate (mode,
1914 type,
1915 &pcum->aapcs_vfp_rmode,
1916 nregs,
1917 NULL);
1918}
1919
1920/* Given MODE and TYPE of a function argument, return the alignment in
1921 bits. The idea is to suppress any stronger alignment requested by
1922 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1923 This is a helper function for local use only. */
1924
1925static unsigned int
3754d046 1926aarch64_function_arg_alignment (machine_mode mode, const_tree type)
df401d54 1927{
1928 unsigned int alignment;
1929
1930 if (type)
1931 {
1932 if (!integer_zerop (TYPE_SIZE (type)))
1933 {
1934 if (TYPE_MODE (type) == mode)
1935 alignment = TYPE_ALIGN (type);
1936 else
1937 alignment = GET_MODE_ALIGNMENT (mode);
1938 }
1939 else
1940 alignment = 0;
1941 }
1942 else
1943 alignment = GET_MODE_ALIGNMENT (mode);
1944
1945 return alignment;
1946}
1947
1948/* Layout a function argument according to the AAPCS64 rules. The rule
1949 numbers refer to the rule numbers in the AAPCS64. */
1950
1951static void
3754d046 1952aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
df401d54 1953 const_tree type,
1954 bool named ATTRIBUTE_UNUSED)
1955{
1956 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1957 int ncrn, nvrn, nregs;
1958 bool allocate_ncrn, allocate_nvrn;
ba5f7cbc 1959 HOST_WIDE_INT size;
df401d54 1960
1961 /* We need to do this once per argument. */
1962 if (pcum->aapcs_arg_processed)
1963 return;
1964
1965 pcum->aapcs_arg_processed = true;
1966
ba5f7cbc 1967 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1968 size
74d63f1e 1969 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1970 UNITS_PER_WORD);
ba5f7cbc 1971
df401d54 1972 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1973 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1974 mode,
1975 type,
1976 &nregs);
1977
1978 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1979 The following code thus handles passing by SIMD/FP registers first. */
1980
1981 nvrn = pcum->aapcs_nvrn;
1982
1983 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1984 and homogenous short-vector aggregates (HVA). */
1985 if (allocate_nvrn)
1986 {
b37104f6 1987 if (!TARGET_FLOAT)
1988 aarch64_err_no_fpadvsimd (mode, "argument");
1989
df401d54 1990 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1991 {
1992 pcum->aapcs_nextnvrn = nvrn + nregs;
1993 if (!aarch64_composite_type_p (type, mode))
1994 {
1995 gcc_assert (nregs == 1);
1996 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1997 }
1998 else
1999 {
2000 rtx par;
2001 int i;
2002 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2003 for (i = 0; i < nregs; i++)
2004 {
2005 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2006 V0_REGNUM + nvrn + i);
2007 tmp = gen_rtx_EXPR_LIST
2008 (VOIDmode, tmp,
2009 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2010 XVECEXP (par, 0, i) = tmp;
2011 }
2012 pcum->aapcs_reg = par;
2013 }
2014 return;
2015 }
2016 else
2017 {
2018 /* C.3 NSRN is set to 8. */
2019 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2020 goto on_stack;
2021 }
2022 }
2023
2024 ncrn = pcum->aapcs_ncrn;
ba5f7cbc 2025 nregs = size / UNITS_PER_WORD;
df401d54 2026
2027 /* C6 - C9. though the sign and zero extension semantics are
2028 handled elsewhere. This is the case where the argument fits
2029 entirely general registers. */
2030 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2031 {
2032 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2033
2034 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2035
2036 /* C.8 if the argument has an alignment of 16 then the NGRN is
2037 rounded up to the next even number. */
2038 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
2039 {
2040 ++ncrn;
2041 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2042 }
2043 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2044 A reg is still generated for it, but the caller should be smart
2045 enough not to use it. */
2046 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2047 {
2048 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2049 }
2050 else
2051 {
2052 rtx par;
2053 int i;
2054
2055 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2056 for (i = 0; i < nregs; i++)
2057 {
2058 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2059 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2060 GEN_INT (i * UNITS_PER_WORD));
2061 XVECEXP (par, 0, i) = tmp;
2062 }
2063 pcum->aapcs_reg = par;
2064 }
2065
2066 pcum->aapcs_nextncrn = ncrn + nregs;
2067 return;
2068 }
2069
2070 /* C.11 */
2071 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2072
2073 /* The argument is passed on stack; record the needed number of words for
ba5f7cbc 2074 this argument and align the total size if necessary. */
df401d54 2075on_stack:
ba5f7cbc 2076 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
df401d54 2077 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
74d63f1e 2078 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2079 16 / UNITS_PER_WORD);
df401d54 2080 return;
2081}
2082
2083/* Implement TARGET_FUNCTION_ARG. */
2084
2085static rtx
3754d046 2086aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
df401d54 2087 const_tree type, bool named)
2088{
2089 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2090 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2091
2092 if (mode == VOIDmode)
2093 return NULL_RTX;
2094
2095 aarch64_layout_arg (pcum_v, mode, type, named);
2096 return pcum->aapcs_reg;
2097}
2098
2099void
2100aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2101 const_tree fntype ATTRIBUTE_UNUSED,
2102 rtx libname ATTRIBUTE_UNUSED,
2103 const_tree fndecl ATTRIBUTE_UNUSED,
2104 unsigned n_named ATTRIBUTE_UNUSED)
2105{
2106 pcum->aapcs_ncrn = 0;
2107 pcum->aapcs_nvrn = 0;
2108 pcum->aapcs_nextncrn = 0;
2109 pcum->aapcs_nextnvrn = 0;
2110 pcum->pcs_variant = ARM_PCS_AAPCS64;
2111 pcum->aapcs_reg = NULL_RTX;
2112 pcum->aapcs_arg_processed = false;
2113 pcum->aapcs_stack_words = 0;
2114 pcum->aapcs_stack_size = 0;
2115
b37104f6 2116 if (!TARGET_FLOAT
2117 && fndecl && TREE_PUBLIC (fndecl)
2118 && fntype && fntype != error_mark_node)
2119 {
2120 const_tree type = TREE_TYPE (fntype);
2121 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2122 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2123 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2124 &mode, &nregs, NULL))
2125 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2126 }
df401d54 2127 return;
2128}
2129
2130static void
2131aarch64_function_arg_advance (cumulative_args_t pcum_v,
3754d046 2132 machine_mode mode,
df401d54 2133 const_tree type,
2134 bool named)
2135{
2136 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2137 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2138 {
2139 aarch64_layout_arg (pcum_v, mode, type, named);
2140 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2141 != (pcum->aapcs_stack_words != 0));
2142 pcum->aapcs_arg_processed = false;
2143 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2144 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2145 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2146 pcum->aapcs_stack_words = 0;
2147 pcum->aapcs_reg = NULL_RTX;
2148 }
2149}
2150
2151bool
2152aarch64_function_arg_regno_p (unsigned regno)
2153{
2154 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2155 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2156}
2157
2158/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2159 PARM_BOUNDARY bits of alignment, but will be given anything up
2160 to STACK_BOUNDARY bits if the type requires it. This makes sure
2161 that both before and after the layout of each argument, the Next
2162 Stacked Argument Address (NSAA) will have a minimum alignment of
2163 8 bytes. */
2164
2165static unsigned int
3754d046 2166aarch64_function_arg_boundary (machine_mode mode, const_tree type)
df401d54 2167{
2168 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2169
2170 if (alignment < PARM_BOUNDARY)
2171 alignment = PARM_BOUNDARY;
2172 if (alignment > STACK_BOUNDARY)
2173 alignment = STACK_BOUNDARY;
2174 return alignment;
2175}
2176
2177/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2178
2179 Return true if an argument passed on the stack should be padded upwards,
2180 i.e. if the least-significant byte of the stack slot has useful data.
2181
2182 Small aggregate types are placed in the lowest memory address.
2183
2184 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2185
2186bool
3754d046 2187aarch64_pad_arg_upward (machine_mode mode, const_tree type)
df401d54 2188{
2189 /* On little-endian targets, the least significant byte of every stack
2190 argument is passed at the lowest byte address of the stack slot. */
2191 if (!BYTES_BIG_ENDIAN)
2192 return true;
2193
1ae2a9c0 2194 /* Otherwise, integral, floating-point and pointer types are padded downward:
df401d54 2195 the least significant byte of a stack argument is passed at the highest
2196 byte address of the stack slot. */
2197 if (type
1ae2a9c0 2198 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2199 || POINTER_TYPE_P (type))
df401d54 2200 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2201 return false;
2202
2203 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2204 return true;
2205}
2206
2207/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2208
2209 It specifies padding for the last (may also be the only)
2210 element of a block move between registers and memory. If
2211 assuming the block is in the memory, padding upward means that
2212 the last element is padded after its highest significant byte,
2213 while in downward padding, the last element is padded at the
2214 its least significant byte side.
2215
2216 Small aggregates and small complex types are always padded
2217 upwards.
2218
2219 We don't need to worry about homogeneous floating-point or
2220 short-vector aggregates; their move is not affected by the
2221 padding direction determined here. Regardless of endianness,
2222 each element of such an aggregate is put in the least
2223 significant bits of a fp/simd register.
2224
2225 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2226 register has useful data, and return the opposite if the most
2227 significant byte does. */
2228
2229bool
3754d046 2230aarch64_pad_reg_upward (machine_mode mode, const_tree type,
df401d54 2231 bool first ATTRIBUTE_UNUSED)
2232{
2233
2234 /* Small composite types are always padded upward. */
2235 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2236 {
2237 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2238 : GET_MODE_SIZE (mode));
2239 if (size < 2 * UNITS_PER_WORD)
2240 return true;
2241 }
2242
2243 /* Otherwise, use the default padding. */
2244 return !BYTES_BIG_ENDIAN;
2245}
2246
3754d046 2247static machine_mode
df401d54 2248aarch64_libgcc_cmp_return_mode (void)
2249{
2250 return SImode;
2251}
2252
6a979f73 2253#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2254
2255/* We use the 12-bit shifted immediate arithmetic instructions so values
2256 must be multiple of (1 << 12), i.e. 4096. */
2257#define ARITH_FACTOR 4096
2258
2259#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2260#error Cannot use simple address calculation for stack probing
2261#endif
2262
2263/* The pair of scratch registers used for stack probing. */
2264#define PROBE_STACK_FIRST_REG 9
2265#define PROBE_STACK_SECOND_REG 10
2266
2267/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2268 inclusive. These are offsets from the current stack pointer. */
2269
2270static void
2271aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2272{
2273 rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
2274
2275 /* See the same assertion on PROBE_INTERVAL above. */
2276 gcc_assert ((first % ARITH_FACTOR) == 0);
2277
2278 /* See if we have a constant small number of probes to generate. If so,
2279 that's the easy case. */
2280 if (size <= PROBE_INTERVAL)
2281 {
2282 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2283
2284 emit_set_insn (reg1,
2285 plus_constant (ptr_mode,
2286 stack_pointer_rtx, -(first + base)));
2287 emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
2288 }
2289
2290 /* The run-time loop is made up of 8 insns in the generic case while the
2291 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2292 else if (size <= 4 * PROBE_INTERVAL)
2293 {
2294 HOST_WIDE_INT i, rem;
2295
2296 emit_set_insn (reg1,
2297 plus_constant (ptr_mode,
2298 stack_pointer_rtx,
2299 -(first + PROBE_INTERVAL)));
2300 emit_stack_probe (reg1);
2301
2302 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2303 it exceeds SIZE. If only two probes are needed, this will not
2304 generate any code. Then probe at FIRST + SIZE. */
2305 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2306 {
2307 emit_set_insn (reg1,
2308 plus_constant (ptr_mode, reg1, -PROBE_INTERVAL));
2309 emit_stack_probe (reg1);
2310 }
2311
2312 rem = size - (i - PROBE_INTERVAL);
2313 if (rem > 256)
2314 {
2315 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2316
2317 emit_set_insn (reg1, plus_constant (ptr_mode, reg1, -base));
2318 emit_stack_probe (plus_constant (ptr_mode, reg1, base - rem));
2319 }
2320 else
2321 emit_stack_probe (plus_constant (ptr_mode, reg1, -rem));
2322 }
2323
2324 /* Otherwise, do the same as above, but in a loop. Note that we must be
2325 extra careful with variables wrapping around because we might be at
2326 the very top (or the very bottom) of the address space and we have
2327 to be able to handle this case properly; in particular, we use an
2328 equality test for the loop condition. */
2329 else
2330 {
2331 rtx reg2 = gen_rtx_REG (ptr_mode, PROBE_STACK_SECOND_REG);
2332
2333 /* Step 1: round SIZE to the previous multiple of the interval. */
2334
2335 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2336
2337
2338 /* Step 2: compute initial and final value of the loop counter. */
2339
2340 /* TEST_ADDR = SP + FIRST. */
2341 emit_set_insn (reg1,
2342 plus_constant (ptr_mode, stack_pointer_rtx, -first));
2343
2344 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2345 emit_set_insn (reg2,
2346 plus_constant (ptr_mode, stack_pointer_rtx,
2347 -(first + rounded_size)));
2348
2349
2350 /* Step 3: the loop
2351
2352 do
2353 {
2354 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2355 probe at TEST_ADDR
2356 }
2357 while (TEST_ADDR != LAST_ADDR)
2358
2359 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2360 until it is equal to ROUNDED_SIZE. */
2361
2362 if (ptr_mode == DImode)
2363 emit_insn (gen_probe_stack_range_di (reg1, reg1, reg2));
2364 else
2365 emit_insn (gen_probe_stack_range_si (reg1, reg1, reg2));
2366
2367
2368 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2369 that SIZE is equal to ROUNDED_SIZE. */
2370
2371 if (size != rounded_size)
2372 {
2373 HOST_WIDE_INT rem = size - rounded_size;
2374
2375 if (rem > 256)
2376 {
2377 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2378
2379 emit_set_insn (reg2, plus_constant (ptr_mode, reg2, -base));
2380 emit_stack_probe (plus_constant (ptr_mode, reg2, base - rem));
2381 }
2382 else
2383 emit_stack_probe (plus_constant (ptr_mode, reg2, -rem));
2384 }
2385 }
2386
2387 /* Make sure nothing is scheduled before we are done. */
2388 emit_insn (gen_blockage ());
2389}
2390
2391/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2392 absolute addresses. */
2393
2394const char *
2395aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2396{
2397 static int labelno = 0;
2398 char loop_lab[32];
2399 rtx xops[2];
2400
2401 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2402
2403 /* Loop. */
2404 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2405
2406 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2407 xops[0] = reg1;
2408 xops[1] = GEN_INT (PROBE_INTERVAL);
2409 output_asm_insn ("sub\t%0, %0, %1", xops);
2410
2411 /* Probe at TEST_ADDR. */
2412 output_asm_insn ("str\txzr, [%0]", xops);
2413
2414 /* Test if TEST_ADDR == LAST_ADDR. */
2415 xops[1] = reg2;
2416 output_asm_insn ("cmp\t%0, %1", xops);
2417
2418 /* Branch. */
2419 fputs ("\tb.ne\t", asm_out_file);
2420 assemble_name_raw (asm_out_file, loop_lab);
2421 fputc ('\n', asm_out_file);
2422
2423 return "";
2424}
2425
df401d54 2426static bool
2427aarch64_frame_pointer_required (void)
2428{
743de8ed 2429 /* In aarch64_override_options_after_change
2430 flag_omit_leaf_frame_pointer turns off the frame pointer by
2431 default. Turn it back on now if we've not got a leaf
2432 function. */
2433 if (flag_omit_leaf_frame_pointer
2434 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2435 return true;
df401d54 2436
743de8ed 2437 return false;
df401d54 2438}
2439
2440/* Mark the registers that need to be saved by the callee and calculate
2441 the size of the callee-saved registers area and frame record (both FP
2442 and LR may be omitted). */
2443static void
2444aarch64_layout_frame (void)
2445{
2446 HOST_WIDE_INT offset = 0;
2447 int regno;
2448
2449 if (reload_completed && cfun->machine->frame.laid_out)
2450 return;
2451
4fae7ffc 2452#define SLOT_NOT_REQUIRED (-2)
2453#define SLOT_REQUIRED (-1)
2454
0f3313e1 2455 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2456 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2457
df401d54 2458 /* First mark all the registers that really need to be saved... */
2459 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4fae7ffc 2460 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
df401d54 2461
2462 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4fae7ffc 2463 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
df401d54 2464
2465 /* ... that includes the eh data registers (if needed)... */
2466 if (crtl->calls_eh_return)
2467 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
4fae7ffc 2468 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2469 = SLOT_REQUIRED;
df401d54 2470
2471 /* ... and any callee saved register that dataflow says is live. */
2472 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2473 if (df_regs_ever_live_p (regno)
376e0a4f 2474 && (regno == R30_REGNUM
2475 || !call_used_regs[regno]))
4fae7ffc 2476 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
df401d54 2477
2478 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2479 if (df_regs_ever_live_p (regno)
2480 && !call_used_regs[regno])
4fae7ffc 2481 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
df401d54 2482
2483 if (frame_pointer_needed)
2484 {
5286c9d9 2485 /* FP and LR are placed in the linkage record. */
df401d54 2486 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
0f3313e1 2487 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
5286c9d9 2488 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
0f3313e1 2489 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
df401d54 2490 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
5286c9d9 2491 offset += 2 * UNITS_PER_WORD;
df401d54 2492 }
2493
2494 /* Now assign stack slots for them. */
5286c9d9 2495 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4fae7ffc 2496 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
df401d54 2497 {
2498 cfun->machine->frame.reg_offset[regno] = offset;
0f3313e1 2499 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2500 cfun->machine->frame.wb_candidate1 = regno;
2501 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2502 cfun->machine->frame.wb_candidate2 = regno;
df401d54 2503 offset += UNITS_PER_WORD;
2504 }
2505
2506 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4fae7ffc 2507 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
df401d54 2508 {
2509 cfun->machine->frame.reg_offset[regno] = offset;
0f3313e1 2510 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2511 cfun->machine->frame.wb_candidate1 = regno;
2512 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2513 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2514 cfun->machine->frame.wb_candidate2 = regno;
df401d54 2515 offset += UNITS_PER_WORD;
2516 }
2517
df401d54 2518 cfun->machine->frame.padding0 =
74d63f1e 2519 (ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2520 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
df401d54 2521
2522 cfun->machine->frame.saved_regs_size = offset;
ee32c719 2523
2524 cfun->machine->frame.hard_fp_offset
74d63f1e 2525 = ROUND_UP (cfun->machine->frame.saved_varargs_size
2526 + get_frame_size ()
2527 + cfun->machine->frame.saved_regs_size,
2528 STACK_BOUNDARY / BITS_PER_UNIT);
ee32c719 2529
2530 cfun->machine->frame.frame_size
74d63f1e 2531 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2532 + crtl->outgoing_args_size,
2533 STACK_BOUNDARY / BITS_PER_UNIT);
ee32c719 2534
df401d54 2535 cfun->machine->frame.laid_out = true;
2536}
2537
df401d54 2538static bool
2539aarch64_register_saved_on_entry (int regno)
2540{
4fae7ffc 2541 return cfun->machine->frame.reg_offset[regno] >= 0;
df401d54 2542}
2543
f138c228 2544static unsigned
2545aarch64_next_callee_save (unsigned regno, unsigned limit)
2546{
2547 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2548 regno ++;
2549 return regno;
2550}
df401d54 2551
0904ffa7 2552static void
3754d046 2553aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
0904ffa7 2554 HOST_WIDE_INT adjustment)
2555 {
2556 rtx base_rtx = stack_pointer_rtx;
2557 rtx insn, reg, mem;
2558
2559 reg = gen_rtx_REG (mode, regno);
2560 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2561 plus_constant (Pmode, base_rtx, -adjustment));
2562 mem = gen_rtx_MEM (mode, mem);
2563
2564 insn = emit_move_insn (mem, reg);
2565 RTX_FRAME_RELATED_P (insn) = 1;
2566}
2567
76868f75 2568static rtx
3754d046 2569aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
76868f75 2570 HOST_WIDE_INT adjustment)
2571{
2572 switch (mode)
2573 {
2574 case DImode:
2575 return gen_storewb_pairdi_di (base, base, reg, reg2,
2576 GEN_INT (-adjustment),
2577 GEN_INT (UNITS_PER_WORD - adjustment));
2578 case DFmode:
2579 return gen_storewb_pairdf_di (base, base, reg, reg2,
2580 GEN_INT (-adjustment),
2581 GEN_INT (UNITS_PER_WORD - adjustment));
2582 default:
2583 gcc_unreachable ();
2584 }
2585}
2586
2587static void
3754d046 2588aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
76868f75 2589 unsigned regno2, HOST_WIDE_INT adjustment)
2590{
ff38b261 2591 rtx_insn *insn;
76868f75 2592 rtx reg1 = gen_rtx_REG (mode, regno1);
2593 rtx reg2 = gen_rtx_REG (mode, regno2);
2594
2595 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2596 reg2, adjustment));
2597 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
76868f75 2598 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2599 RTX_FRAME_RELATED_P (insn) = 1;
2600}
2601
b855409a 2602static rtx
3754d046 2603aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
b855409a 2604 HOST_WIDE_INT adjustment)
2605{
2606 switch (mode)
2607 {
2608 case DImode:
2609 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2341013f 2610 GEN_INT (UNITS_PER_WORD));
b855409a 2611 case DFmode:
2612 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2341013f 2613 GEN_INT (UNITS_PER_WORD));
b855409a 2614 default:
2615 gcc_unreachable ();
2616 }
2617}
2618
468fb693 2619static rtx
3754d046 2620aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
468fb693 2621 rtx reg2)
2622{
2623 switch (mode)
2624 {
2625 case DImode:
2626 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2627
2628 case DFmode:
2629 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2630
2631 default:
2632 gcc_unreachable ();
2633 }
2634}
2635
2636static rtx
3754d046 2637aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
468fb693 2638 rtx mem2)
2639{
2640 switch (mode)
2641 {
2642 case DImode:
2643 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2644
2645 case DFmode:
2646 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2647
2648 default:
2649 gcc_unreachable ();
2650 }
2651}
2652
df401d54 2653
df401d54 2654static void
3754d046 2655aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ea12ddae 2656 unsigned start, unsigned limit, bool skip_wb)
df401d54 2657{
ff38b261 2658 rtx_insn *insn;
3754d046 2659 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
4b6c4dda 2660 ? gen_frame_mem : gen_rtx_MEM);
df401d54 2661 unsigned regno;
2662 unsigned regno2;
2663
9a5fab4d 2664 for (regno = aarch64_next_callee_save (start, limit);
f138c228 2665 regno <= limit;
2666 regno = aarch64_next_callee_save (regno + 1, limit))
df401d54 2667 {
ea12ddae 2668 rtx reg, mem;
2669 HOST_WIDE_INT offset;
f138c228 2670
ea12ddae 2671 if (skip_wb
2672 && (regno == cfun->machine->frame.wb_candidate1
2673 || regno == cfun->machine->frame.wb_candidate2))
2674 continue;
2675
2676 reg = gen_rtx_REG (mode, regno);
2677 offset = start_offset + cfun->machine->frame.reg_offset[regno];
9a5fab4d 2678 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2679 offset));
f138c228 2680
2681 regno2 = aarch64_next_callee_save (regno + 1, limit);
2682
2683 if (regno2 <= limit
2684 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2685 == cfun->machine->frame.reg_offset[regno2]))
2686
df401d54 2687 {
9a5fab4d 2688 rtx reg2 = gen_rtx_REG (mode, regno2);
f138c228 2689 rtx mem2;
2690
2691 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3b6e7368 2692 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2693 offset));
2694 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2695 reg2));
adc234e0 2696
f138c228 2697 /* The first part of a frame-related parallel insn is
2698 always assumed to be relevant to the frame
2699 calculations; subsequent parts, are only
2700 frame-related if explicitly marked. */
2701 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2702 regno = regno2;
2703 }
2704 else
3b6e7368 2705 insn = emit_move_insn (mem, reg);
2706
2707 RTX_FRAME_RELATED_P (insn) = 1;
2708 }
2709}
2710
2711static void
3754d046 2712aarch64_restore_callee_saves (machine_mode mode,
3b6e7368 2713 HOST_WIDE_INT start_offset, unsigned start,
ea3264ed 2714 unsigned limit, bool skip_wb, rtx *cfi_ops)
3b6e7368 2715{
3b6e7368 2716 rtx base_rtx = stack_pointer_rtx;
3754d046 2717 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
3b6e7368 2718 ? gen_frame_mem : gen_rtx_MEM);
2719 unsigned regno;
2720 unsigned regno2;
2721 HOST_WIDE_INT offset;
2722
2723 for (regno = aarch64_next_callee_save (start, limit);
2724 regno <= limit;
2725 regno = aarch64_next_callee_save (regno + 1, limit))
2726 {
ea12ddae 2727 rtx reg, mem;
3b6e7368 2728
ea12ddae 2729 if (skip_wb
2730 && (regno == cfun->machine->frame.wb_candidate1
2731 || regno == cfun->machine->frame.wb_candidate2))
2732 continue;
2733
2734 reg = gen_rtx_REG (mode, regno);
3b6e7368 2735 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2736 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2737
2738 regno2 = aarch64_next_callee_save (regno + 1, limit);
2739
2740 if (regno2 <= limit
2741 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2742 == cfun->machine->frame.reg_offset[regno2]))
f138c228 2743 {
3b6e7368 2744 rtx reg2 = gen_rtx_REG (mode, regno2);
2745 rtx mem2;
2746
2747 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2748 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
ea3264ed 2749 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3b6e7368 2750
ea3264ed 2751 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3b6e7368 2752 regno = regno2;
df401d54 2753 }
3b6e7368 2754 else
ea3264ed 2755 emit_move_insn (reg, mem);
2756 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
df401d54 2757 }
df401d54 2758}
2759
2760/* AArch64 stack frames generated by this compiler look like:
2761
2762 +-------------------------------+
2763 | |
2764 | incoming stack arguments |
2765 | |
37f9433c 2766 +-------------------------------+
2767 | | <-- incoming stack pointer (aligned)
df401d54 2768 | callee-allocated save area |
2769 | for register varargs |
2770 | |
37f9433c 2771 +-------------------------------+
2772 | local variables | <-- frame_pointer_rtx
df401d54 2773 | |
2774 +-------------------------------+
02bf77ed 2775 | padding0 | \
2776 +-------------------------------+ |
02bf77ed 2777 | callee-saved registers | | frame.saved_regs_size
02bf77ed 2778 +-------------------------------+ |
2779 | LR' | |
2780 +-------------------------------+ |
37f9433c 2781 | FP' | / <- hard_frame_pointer_rtx (aligned)
2782 +-------------------------------+
df401d54 2783 | dynamic allocation |
2784 +-------------------------------+
37f9433c 2785 | padding |
2786 +-------------------------------+
2787 | outgoing stack arguments | <-- arg_pointer
2788 | |
2789 +-------------------------------+
2790 | | <-- stack_pointer_rtx (aligned)
df401d54 2791
37f9433c 2792 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2793 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2794 unchanged. */
df401d54 2795
2796/* Generate the prologue instructions for entry into a function.
2797 Establish the stack frame by decreasing the stack pointer with a
2798 properly calculated size and, if necessary, create a frame record
2799 filled with the values of LR and previous frame pointer. The
1ba4a1db 2800 current FP is also set up if it is in use. */
df401d54 2801
2802void
2803aarch64_expand_prologue (void)
2804{
2805 /* sub sp, sp, #<frame_size>
2806 stp {fp, lr}, [sp, #<frame_size> - 16]
2807 add fp, sp, #<frame_size> - hardfp_offset
2808 stp {cs_reg}, [fp, #-16] etc.
2809
2810 sub sp, sp, <final_adjustment_if_any>
2811 */
df401d54 2812 HOST_WIDE_INT frame_size, offset;
ee32c719 2813 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
ea3264ed 2814 HOST_WIDE_INT hard_fp_offset;
ff38b261 2815 rtx_insn *insn;
df401d54 2816
2817 aarch64_layout_frame ();
df401d54 2818
ea3264ed 2819 offset = frame_size = cfun->machine->frame.frame_size;
2820 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2821 fp_offset = frame_size - hard_fp_offset;
df401d54 2822
ea3264ed 2823 if (flag_stack_usage_info)
2824 current_function_static_stack_size = frame_size;
df401d54 2825
6a979f73 2826 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2827 {
2828 if (crtl->is_leaf && !cfun->calls_alloca)
2829 {
2830 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
2831 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
2832 frame_size - STACK_CHECK_PROTECT);
2833 }
2834 else if (frame_size > 0)
2835 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
2836 }
2837
cfc0f4b2 2838 /* Store pairs and load pairs have a range only -512 to 504. */
df401d54 2839 if (offset >= 512)
2840 {
2841 /* When the frame has a large size, an initial decrease is done on
2842 the stack pointer to jump over the callee-allocated save area for
2843 register varargs, the local variable area and/or the callee-saved
2844 register area. This will allow the pre-index write-back
2845 store pair instructions to be used for setting up the stack frame
2846 efficiently. */
ea3264ed 2847 offset = hard_fp_offset;
df401d54 2848 if (offset >= 512)
2849 offset = cfun->machine->frame.saved_regs_size;
2850
2851 frame_size -= (offset + crtl->outgoing_args_size);
2852 fp_offset = 0;
2853
2854 if (frame_size >= 0x1000000)
2855 {
2856 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2857 emit_move_insn (op0, GEN_INT (-frame_size));
ea3264ed 2858 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2859
2860 add_reg_note (insn, REG_CFA_ADJUST_CFA,
d1f9b275 2861 gen_rtx_SET (stack_pointer_rtx,
ea3264ed 2862 plus_constant (Pmode, stack_pointer_rtx,
2863 -frame_size)));
2864 RTX_FRAME_RELATED_P (insn) = 1;
df401d54 2865 }
2866 else if (frame_size > 0)
2867 {
ea3264ed 2868 int hi_ofs = frame_size & 0xfff000;
2869 int lo_ofs = frame_size & 0x000fff;
2870
2871 if (hi_ofs)
df401d54 2872 {
2873 insn = emit_insn (gen_add2_insn
ea3264ed 2874 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
df401d54 2875 RTX_FRAME_RELATED_P (insn) = 1;
2876 }
ea3264ed 2877 if (lo_ofs)
df401d54 2878 {
2879 insn = emit_insn (gen_add2_insn
ea3264ed 2880 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
df401d54 2881 RTX_FRAME_RELATED_P (insn) = 1;
2882 }
2883 }
2884 }
2885 else
2886 frame_size = -1;
2887
2888 if (offset > 0)
2889 {
ea12ddae 2890 bool skip_wb = false;
2891
df401d54 2892 if (frame_pointer_needed)
2893 {
0904ffa7 2894 skip_wb = true;
2895
df401d54 2896 if (fp_offset)
2897 {
2898 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2899 GEN_INT (-offset)));
2900 RTX_FRAME_RELATED_P (insn) = 1;
76868f75 2901
2902 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
0904ffa7 2903 R30_REGNUM, false);
df401d54 2904 }
2905 else
76868f75 2906 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
df401d54 2907
2908 /* Set up frame pointer to point to the location of the
2909 previous frame pointer on the stack. */
2910 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2911 stack_pointer_rtx,
2912 GEN_INT (fp_offset)));
df401d54 2913 RTX_FRAME_RELATED_P (insn) = 1;
ea3264ed 2914 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
df401d54 2915 }
2916 else
2917 {
0904ffa7 2918 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2919 unsigned reg2 = cfun->machine->frame.wb_candidate2;
76868f75 2920
0904ffa7 2921 if (fp_offset
2922 || reg1 == FIRST_PSEUDO_REGISTER
2923 || (reg2 == FIRST_PSEUDO_REGISTER
2924 && offset >= 256))
2925 {
2926 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2927 GEN_INT (-offset)));
2928 RTX_FRAME_RELATED_P (insn) = 1;
2929 }
2930 else
2931 {
3754d046 2932 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
0904ffa7 2933
2934 skip_wb = true;
2935
2936 if (reg2 == FIRST_PSEUDO_REGISTER)
2937 aarch64_pushwb_single_reg (mode1, reg1, offset);
2938 else
2939 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2940 }
df401d54 2941 }
2942
0904ffa7 2943 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2944 skip_wb);
ea12ddae 2945 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2946 skip_wb);
df401d54 2947 }
2948
2949 /* when offset >= 512,
2950 sub sp, sp, #<outgoing_args_size> */
2951 if (frame_size > -1)
2952 {
2953 if (crtl->outgoing_args_size > 0)
2954 {
2955 insn = emit_insn (gen_add2_insn
2956 (stack_pointer_rtx,
2957 GEN_INT (- crtl->outgoing_args_size)));
2958 RTX_FRAME_RELATED_P (insn) = 1;
2959 }
2960 }
2961}
2962
a45f86df 2963/* Return TRUE if we can use a simple_return insn.
2964
2965 This function checks whether the callee saved stack is empty, which
2966 means no restore actions are need. The pro_and_epilogue will use
2967 this to check whether shrink-wrapping opt is feasible. */
2968
2969bool
2970aarch64_use_return_insn_p (void)
2971{
2972 if (!reload_completed)
2973 return false;
2974
2975 if (crtl->profile)
2976 return false;
2977
2978 aarch64_layout_frame ();
2979
2980 return cfun->machine->frame.frame_size == 0;
2981}
2982
df401d54 2983/* Generate the epilogue instructions for returning from a function. */
2984void
2985aarch64_expand_epilogue (bool for_sibcall)
2986{
ee32c719 2987 HOST_WIDE_INT frame_size, offset;
df401d54 2988 HOST_WIDE_INT fp_offset;
ea3264ed 2989 HOST_WIDE_INT hard_fp_offset;
ff38b261 2990 rtx_insn *insn;
d3213b83 2991 /* We need to add memory barrier to prevent read from deallocated stack. */
2992 bool need_barrier_p = (get_frame_size () != 0
2993 || cfun->machine->frame.saved_varargs_size);
df401d54 2994
2995 aarch64_layout_frame ();
df401d54 2996
ee32c719 2997 offset = frame_size = cfun->machine->frame.frame_size;
ea3264ed 2998 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2999 fp_offset = frame_size - hard_fp_offset;
cfc0f4b2 3000
3001 /* Store pairs and load pairs have a range only -512 to 504. */
df401d54 3002 if (offset >= 512)
3003 {
ea3264ed 3004 offset = hard_fp_offset;
df401d54 3005 if (offset >= 512)
3006 offset = cfun->machine->frame.saved_regs_size;
3007
3008 frame_size -= (offset + crtl->outgoing_args_size);
3009 fp_offset = 0;
3010 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
3011 {
3012 insn = emit_insn (gen_add2_insn
3013 (stack_pointer_rtx,
3014 GEN_INT (crtl->outgoing_args_size)));
3015 RTX_FRAME_RELATED_P (insn) = 1;
3016 }
3017 }
3018 else
3019 frame_size = -1;
3020
3021 /* If there were outgoing arguments or we've done dynamic stack
3022 allocation, then restore the stack pointer from the frame
3023 pointer. This is at most one insn and more efficient than using
3024 GCC's internal mechanism. */
3025 if (frame_pointer_needed
3026 && (crtl->outgoing_args_size || cfun->calls_alloca))
3027 {
d3213b83 3028 if (cfun->calls_alloca)
3029 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3030
df401d54 3031 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3032 hard_frame_pointer_rtx,
e72fc098 3033 GEN_INT (0)));
3034 offset = offset - fp_offset;
df401d54 3035 }
3036
df401d54 3037 if (offset > 0)
3038 {
e34a5acb 3039 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3040 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3041 bool skip_wb = true;
ea3264ed 3042 rtx cfi_ops = NULL;
e34a5acb 3043
df401d54 3044 if (frame_pointer_needed)
e34a5acb 3045 fp_offset = 0;
3046 else if (fp_offset
3047 || reg1 == FIRST_PSEUDO_REGISTER
3048 || (reg2 == FIRST_PSEUDO_REGISTER
3049 && offset >= 256))
3050 skip_wb = false;
3051
3052 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
ea3264ed 3053 skip_wb, &cfi_ops);
e34a5acb 3054 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
ea3264ed 3055 skip_wb, &cfi_ops);
e34a5acb 3056
d3213b83 3057 if (need_barrier_p)
3058 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3059
e34a5acb 3060 if (skip_wb)
df401d54 3061 {
3754d046 3062 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
ea3264ed 3063 rtx rreg1 = gen_rtx_REG (mode1, reg1);
e34a5acb 3064
ea3264ed 3065 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
e34a5acb 3066 if (reg2 == FIRST_PSEUDO_REGISTER)
ea3264ed 3067 {
3068 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
3069 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3070 mem = gen_rtx_MEM (mode1, mem);
3071 insn = emit_move_insn (rreg1, mem);
3072 }
e34a5acb 3073 else
3074 {
ea3264ed 3075 rtx rreg2 = gen_rtx_REG (mode1, reg2);
e34a5acb 3076
ea3264ed 3077 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
3078 insn = emit_insn (aarch64_gen_loadwb_pair
3079 (mode1, stack_pointer_rtx, rreg1,
3080 rreg2, offset));
e34a5acb 3081 }
df401d54 3082 }
df401d54 3083 else
3084 {
3085 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
3086 GEN_INT (offset)));
df401d54 3087 }
df401d54 3088
ea3264ed 3089 /* Reset the CFA to be SP + FRAME_SIZE. */
3090 rtx new_cfa = stack_pointer_rtx;
3091 if (frame_size > 0)
3092 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
3093 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
3094 REG_NOTES (insn) = cfi_ops;
df401d54 3095 RTX_FRAME_RELATED_P (insn) = 1;
df401d54 3096 }
3097
ea3264ed 3098 if (frame_size > 0)
df401d54 3099 {
d3213b83 3100 if (need_barrier_p)
3101 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3102
df401d54 3103 if (frame_size >= 0x1000000)
3104 {
3105 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3106 emit_move_insn (op0, GEN_INT (frame_size));
ea3264ed 3107 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
df401d54 3108 }
ea3264ed 3109 else
df401d54 3110 {
ea3264ed 3111 int hi_ofs = frame_size & 0xfff000;
3112 int lo_ofs = frame_size & 0x000fff;
3113
3114 if (hi_ofs && lo_ofs)
df401d54 3115 {
3116 insn = emit_insn (gen_add2_insn
ea3264ed 3117 (stack_pointer_rtx, GEN_INT (hi_ofs)));
df401d54 3118 RTX_FRAME_RELATED_P (insn) = 1;
ea3264ed 3119 frame_size = lo_ofs;
df401d54 3120 }
ea3264ed 3121 insn = emit_insn (gen_add2_insn
3122 (stack_pointer_rtx, GEN_INT (frame_size)));
df401d54 3123 }
3124
ea3264ed 3125 /* Reset the CFA to be SP + 0. */
3126 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
3127 RTX_FRAME_RELATED_P (insn) = 1;
3128 }
3129
3130 /* Stack adjustment for exception handler. */
3131 if (crtl->calls_eh_return)
3132 {
3133 /* We need to unwind the stack by the offset computed by
3134 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3135 to be SP; letting the CFA move during this adjustment
3136 is just as correct as retaining the CFA from the body
3137 of the function. Therefore, do nothing special. */
3138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
df401d54 3139 }
3140
3141 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3142 if (!for_sibcall)
3143 emit_jump_insn (ret_rtx);
3144}
3145
3146/* Return the place to copy the exception unwinding return address to.
3147 This will probably be a stack slot, but could (in theory be the
3148 return register). */
3149rtx
3150aarch64_final_eh_return_addr (void)
3151{
ee32c719 3152 HOST_WIDE_INT fp_offset;
3153
df401d54 3154 aarch64_layout_frame ();
ee32c719 3155
3156 fp_offset = cfun->machine->frame.frame_size
3157 - cfun->machine->frame.hard_fp_offset;
df401d54 3158
3159 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
3160 return gen_rtx_REG (DImode, LR_REGNUM);
3161
3162 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
3163 result in a store to save LR introduced by builtin_eh_return () being
3164 incorrectly deleted because the alias is not detected.
3165 So in the calculation of the address to copy the exception unwinding
3166 return address to, we note 2 cases.
3167 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
3168 we return a SP-relative location since all the addresses are SP-relative
3169 in this case. This prevents the store from being optimized away.
3170 If the fp_offset is not 0, then the addresses will be FP-relative and
3171 therefore we return a FP-relative location. */
3172
3173 if (frame_pointer_needed)
3174 {
3175 if (fp_offset)
3176 return gen_frame_mem (DImode,
3177 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3178 else
3179 return gen_frame_mem (DImode,
3180 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
3181 }
3182
3183 /* If FP is not needed, we calculate the location of LR, which would be
3184 at the top of the saved registers block. */
3185
3186 return gen_frame_mem (DImode,
3187 plus_constant (Pmode,
3188 stack_pointer_rtx,
3189 fp_offset
3190 + cfun->machine->frame.saved_regs_size
3191 - 2 * UNITS_PER_WORD));
3192}
3193
3967ee63 3194/* Possibly output code to build up a constant in a register. For
3195 the benefit of the costs infrastructure, returns the number of
3196 instructions which would be emitted. GENERATE inhibits or
3197 enables code generation. */
3198
3199static int
3200aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
df401d54 3201{
3967ee63 3202 int insns = 0;
3203
df401d54 3204 if (aarch64_bitmask_imm (val, DImode))
3967ee63 3205 {
3206 if (generate)
3207 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
3208 insns = 1;
3209 }
df401d54 3210 else
3211 {
3212 int i;
3213 int ncount = 0;
3214 int zcount = 0;
3215 HOST_WIDE_INT valp = val >> 16;
3216 HOST_WIDE_INT valm;
3217 HOST_WIDE_INT tval;
3218
3219 for (i = 16; i < 64; i += 16)
3220 {
3221 valm = (valp & 0xffff);
3222
3223 if (valm != 0)
3224 ++ zcount;
3225
3226 if (valm != 0xffff)
3227 ++ ncount;
3228
3229 valp >>= 16;
3230 }
3231
3232 /* zcount contains the number of additional MOVK instructions
3233 required if the constant is built up with an initial MOVZ instruction,
3234 while ncount is the number of MOVK instructions required if starting
3235 with a MOVN instruction. Choose the sequence that yields the fewest
3236 number of instructions, preferring MOVZ instructions when they are both
3237 the same. */
3238 if (ncount < zcount)
3239 {
3967ee63 3240 if (generate)
3241 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3242 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
df401d54 3243 tval = 0xffff;
3967ee63 3244 insns++;
df401d54 3245 }
3246 else
3247 {
3967ee63 3248 if (generate)
3249 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3250 GEN_INT (val & 0xffff));
df401d54 3251 tval = 0;
3967ee63 3252 insns++;
df401d54 3253 }
3254
3255 val >>= 16;
3256
3257 for (i = 16; i < 64; i += 16)
3258 {
3259 if ((val & 0xffff) != tval)
3967ee63 3260 {
3261 if (generate)
3262 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
3263 GEN_INT (i),
3264 GEN_INT (val & 0xffff)));
3265 insns++;
3266 }
df401d54 3267 val >>= 16;
3268 }
3269 }
3967ee63 3270 return insns;
df401d54 3271}
3272
3273static void
85513da8 3274aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
df401d54 3275{
3276 HOST_WIDE_INT mdelta = delta;
85513da8 3277 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
3278 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
df401d54 3279
3280 if (mdelta < 0)
3281 mdelta = -mdelta;
3282
3283 if (mdelta >= 4096 * 4096)
3284 {
3967ee63 3285 (void) aarch64_build_constant (scratchreg, delta, true);
85513da8 3286 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
df401d54 3287 }
3288 else if (mdelta > 0)
3289 {
df401d54 3290 if (mdelta >= 4096)
85513da8 3291 {
d1f9b275 3292 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
85513da8 3293 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
3294 if (delta < 0)
d1f9b275 3295 emit_insn (gen_rtx_SET (this_rtx,
85513da8 3296 gen_rtx_MINUS (Pmode, this_rtx, shift)));
3297 else
d1f9b275 3298 emit_insn (gen_rtx_SET (this_rtx,
85513da8 3299 gen_rtx_PLUS (Pmode, this_rtx, shift)));
3300 }
df401d54 3301 if (mdelta % 4096 != 0)
85513da8 3302 {
3303 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
d1f9b275 3304 emit_insn (gen_rtx_SET (this_rtx,
85513da8 3305 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
3306 }
df401d54 3307 }
3308}
3309
3310/* Output code to add DELTA to the first argument, and then jump
3311 to FUNCTION. Used for C++ multiple inheritance. */
3312static void
3313aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3314 HOST_WIDE_INT delta,
3315 HOST_WIDE_INT vcall_offset,
3316 tree function)
3317{
3318 /* The this pointer is always in x0. Note that this differs from
3319 Arm where the this pointer maybe bumped to r1 if r0 is required
3320 to return a pointer to an aggregate. On AArch64 a result value
3321 pointer will be in x8. */
3322 int this_regno = R0_REGNUM;
ff38b261 3323 rtx this_rtx, temp0, temp1, addr, funexp;
3324 rtx_insn *insn;
df401d54 3325
e468b4c1 3326 reload_completed = 1;
3327 emit_note (NOTE_INSN_PROLOGUE_END);
df401d54 3328
3329 if (vcall_offset == 0)
85513da8 3330 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
df401d54 3331 else
3332 {
011aed18 3333 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
df401d54 3334
e468b4c1 3335 this_rtx = gen_rtx_REG (Pmode, this_regno);
3336 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3337 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
df401d54 3338
e468b4c1 3339 addr = this_rtx;
3340 if (delta != 0)
3341 {
3342 if (delta >= -256 && delta < 256)
3343 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3344 plus_constant (Pmode, this_rtx, delta));
3345 else
85513da8 3346 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
df401d54 3347 }
3348
011aed18 3349 if (Pmode == ptr_mode)
3350 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3351 else
3352 aarch64_emit_move (temp0,
3353 gen_rtx_ZERO_EXTEND (Pmode,
3354 gen_rtx_MEM (ptr_mode, addr)));
e468b4c1 3355
011aed18 3356 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
e468b4c1 3357 addr = plus_constant (Pmode, temp0, vcall_offset);
df401d54 3358 else
3359 {
3967ee63 3360 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
e468b4c1 3361 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
df401d54 3362 }
3363
011aed18 3364 if (Pmode == ptr_mode)
3365 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3366 else
3367 aarch64_emit_move (temp1,
3368 gen_rtx_SIGN_EXTEND (Pmode,
3369 gen_rtx_MEM (ptr_mode, addr)));
3370
e468b4c1 3371 emit_insn (gen_add2_insn (this_rtx, temp1));
df401d54 3372 }
3373
e468b4c1 3374 /* Generate a tail call to the target function. */
3375 if (!TREE_USED (function))
3376 {
3377 assemble_external (function);
3378 TREE_USED (function) = 1;
3379 }
3380 funexp = XEXP (DECL_RTL (function), 0);
3381 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3382 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3383 SIBLING_CALL_P (insn) = 1;
3384
3385 insn = get_insns ();
3386 shorten_branches (insn);
3387 final_start_function (insn, file, 1);
3388 final (insn, file, 1);
df401d54 3389 final_end_function ();
e468b4c1 3390
3391 /* Stop pretending to be a post-reload pass. */
3392 reload_completed = 0;
df401d54 3393}
3394
df401d54 3395static bool
3396aarch64_tls_referenced_p (rtx x)
3397{
3398 if (!TARGET_HAVE_TLS)
3399 return false;
868d8521 3400 subrtx_iterator::array_type array;
3401 FOR_EACH_SUBRTX (iter, array, x, ALL)
3402 {
3403 const_rtx x = *iter;
3404 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3405 return true;
3406 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3407 TLS offsets, not real symbol references. */
3408 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3409 iter.skip_subrtxes ();
3410 }
3411 return false;
df401d54 3412}
3413
3414
df401d54 3415/* Return true if val can be encoded as a 12-bit unsigned immediate with
3416 a left shift of 0 or 12 bits. */
3417bool
3418aarch64_uimm12_shift (HOST_WIDE_INT val)
3419{
3420 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3421 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3422 );
3423}
3424
3425
3426/* Return true if val is an immediate that can be loaded into a
3427 register by a MOVZ instruction. */
3428static bool
3754d046 3429aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
df401d54 3430{
3431 if (GET_MODE_SIZE (mode) > 4)
3432 {
3433 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3434 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3435 return 1;
3436 }
3437 else
3438 {
3439 /* Ignore sign extension. */
3440 val &= (HOST_WIDE_INT) 0xffffffff;
3441 }
3442 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3443 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3444}
3445
41f9090f 3446/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3447
3448static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
3449 {
3450 0x0000000100000001ull,
3451 0x0001000100010001ull,
3452 0x0101010101010101ull,
3453 0x1111111111111111ull,
3454 0x5555555555555555ull,
3455 };
3456
df401d54 3457
3458/* Return true if val is a valid bitmask immediate. */
41f9090f 3459
df401d54 3460bool
41f9090f 3461aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
df401d54 3462{
41f9090f 3463 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
3464 int bits;
3465
3466 /* Check for a single sequence of one bits and return quickly if so.
3467 The special cases of all ones and all zeroes returns false. */
3468 val = (unsigned HOST_WIDE_INT) val_in;
3469 tmp = val + (val & -val);
3470
3471 if (tmp == (tmp & -tmp))
3472 return (val + 1) > 1;
3473
3474 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3475 if (mode == SImode)
3476 val = (val << 32) | (val & 0xffffffff);
3477
3478 /* Invert if the immediate doesn't start with a zero bit - this means we
3479 only need to search for sequences of one bits. */
3480 if (val & 1)
3481 val = ~val;
3482
3483 /* Find the first set bit and set tmp to val with the first sequence of one
3484 bits removed. Return success if there is a single sequence of ones. */
3485 first_one = val & -val;
3486 tmp = val & (val + first_one);
3487
3488 if (tmp == 0)
3489 return true;
3490
3491 /* Find the next set bit and compute the difference in bit position. */
3492 next_one = tmp & -tmp;
3493 bits = clz_hwi (first_one) - clz_hwi (next_one);
3494 mask = val ^ tmp;
3495
3496 /* Check the bit position difference is a power of 2, and that the first
3497 sequence of one bits fits within 'bits' bits. */
3498 if ((mask >> bits) != 0 || bits != (bits & -bits))
3499 return false;
3500
3501 /* Check the sequence of one bits is repeated 64/bits times. */
3502 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
df401d54 3503}
3504
3505
3506/* Return true if val is an immediate that can be loaded into a
3507 register in a single instruction. */
3508bool
3754d046 3509aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
df401d54 3510{
3511 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3512 return 1;
3513 return aarch64_bitmask_imm (val, mode);
3514}
3515
3516static bool
3754d046 3517aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
df401d54 3518{
3519 rtx base, offset;
beac24f9 3520
df401d54 3521 if (GET_CODE (x) == HIGH)
3522 return true;
3523
3524 split_const (x, &base, &offset);
3525 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
011aed18 3526 {
82882dbd 3527 if (aarch64_classify_symbol (base, offset)
011aed18 3528 != SYMBOL_FORCE_TO_MEM)
3529 return true;
3530 else
3531 /* Avoid generating a 64-bit relocation in ILP32; leave
3532 to aarch64_expand_mov_immediate to handle it properly. */
3533 return mode != ptr_mode;
3534 }
df401d54 3535
3536 return aarch64_tls_referenced_p (x);
3537}
3538
35c51aa0 3539/* Implement TARGET_CASE_VALUES_THRESHOLD. */
3540
3541static unsigned int
3542aarch64_case_values_threshold (void)
3543{
3544 /* Use the specified limit for the number of cases before using jump
3545 tables at higher optimization levels. */
3546 if (optimize > 2
3547 && selected_cpu->tune->max_case_values != 0)
3548 return selected_cpu->tune->max_case_values;
3549 else
3550 return default_case_values_threshold ();
3551}
3552
df401d54 3553/* Return true if register REGNO is a valid index register.
3554 STRICT_P is true if REG_OK_STRICT is in effect. */
3555
3556bool
3557aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3558{
3559 if (!HARD_REGISTER_NUM_P (regno))
3560 {
3561 if (!strict_p)
3562 return true;
3563
3564 if (!reg_renumber)
3565 return false;
3566
3567 regno = reg_renumber[regno];
3568 }
3569 return GP_REGNUM_P (regno);
3570}
3571
3572/* Return true if register REGNO is a valid base register for mode MODE.
3573 STRICT_P is true if REG_OK_STRICT is in effect. */
3574
3575bool
3576aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3577{
3578 if (!HARD_REGISTER_NUM_P (regno))
3579 {
3580 if (!strict_p)
3581 return true;
3582
3583 if (!reg_renumber)
3584 return false;
3585
3586 regno = reg_renumber[regno];
3587 }
3588
3589 /* The fake registers will be eliminated to either the stack or
3590 hard frame pointer, both of which are usually valid base registers.
3591 Reload deals with the cases where the eliminated form isn't valid. */
3592 return (GP_REGNUM_P (regno)
3593 || regno == SP_REGNUM
3594 || regno == FRAME_POINTER_REGNUM
3595 || regno == ARG_POINTER_REGNUM);
3596}
3597
3598/* Return true if X is a valid base register for mode MODE.
3599 STRICT_P is true if REG_OK_STRICT is in effect. */
3600
3601static bool
3602aarch64_base_register_rtx_p (rtx x, bool strict_p)
3603{
3604 if (!strict_p && GET_CODE (x) == SUBREG)
3605 x = SUBREG_REG (x);
3606
3607 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3608}
3609
3610/* Return true if address offset is a valid index. If it is, fill in INFO
3611 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3612
3613static bool
3614aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3754d046 3615 machine_mode mode, bool strict_p)
df401d54 3616{
3617 enum aarch64_address_type type;
3618 rtx index;
3619 int shift;
3620
3621 /* (reg:P) */
3622 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3623 && GET_MODE (x) == Pmode)
3624 {
3625 type = ADDRESS_REG_REG;
3626 index = x;
3627 shift = 0;
3628 }
3629 /* (sign_extend:DI (reg:SI)) */
3630 else if ((GET_CODE (x) == SIGN_EXTEND
3631 || GET_CODE (x) == ZERO_EXTEND)
3632 && GET_MODE (x) == DImode
3633 && GET_MODE (XEXP (x, 0)) == SImode)
3634 {
3635 type = (GET_CODE (x) == SIGN_EXTEND)
3636 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3637 index = XEXP (x, 0);
3638 shift = 0;
3639 }
3640 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3641 else if (GET_CODE (x) == MULT
3642 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3643 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3644 && GET_MODE (XEXP (x, 0)) == DImode
3645 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3646 && CONST_INT_P (XEXP (x, 1)))
3647 {
3648 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3650 index = XEXP (XEXP (x, 0), 0);
3651 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3652 }
3653 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3654 else if (GET_CODE (x) == ASHIFT
3655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3657 && GET_MODE (XEXP (x, 0)) == DImode
3658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3659 && CONST_INT_P (XEXP (x, 1)))
3660 {
3661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3663 index = XEXP (XEXP (x, 0), 0);
3664 shift = INTVAL (XEXP (x, 1));
3665 }
3666 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3667 else if ((GET_CODE (x) == SIGN_EXTRACT
3668 || GET_CODE (x) == ZERO_EXTRACT)
3669 && GET_MODE (x) == DImode
3670 && GET_CODE (XEXP (x, 0)) == MULT
3671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3672 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3673 {
3674 type = (GET_CODE (x) == SIGN_EXTRACT)
3675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3676 index = XEXP (XEXP (x, 0), 0);
3677 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3678 if (INTVAL (XEXP (x, 1)) != 32 + shift
3679 || INTVAL (XEXP (x, 2)) != 0)
3680 shift = -1;
3681 }
3682 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3683 (const_int 0xffffffff<<shift)) */
3684 else if (GET_CODE (x) == AND
3685 && GET_MODE (x) == DImode
3686 && GET_CODE (XEXP (x, 0)) == MULT
3687 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3688 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3689 && CONST_INT_P (XEXP (x, 1)))
3690 {
3691 type = ADDRESS_REG_UXTW;
3692 index = XEXP (XEXP (x, 0), 0);
3693 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3694 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3695 shift = -1;
3696 }
3697 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3698 else if ((GET_CODE (x) == SIGN_EXTRACT
3699 || GET_CODE (x) == ZERO_EXTRACT)
3700 && GET_MODE (x) == DImode
3701 && GET_CODE (XEXP (x, 0)) == ASHIFT
3702 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3703 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3704 {
3705 type = (GET_CODE (x) == SIGN_EXTRACT)
3706 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3707 index = XEXP (XEXP (x, 0), 0);
3708 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3709 if (INTVAL (XEXP (x, 1)) != 32 + shift
3710 || INTVAL (XEXP (x, 2)) != 0)
3711 shift = -1;
3712 }
3713 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3714 (const_int 0xffffffff<<shift)) */
3715 else if (GET_CODE (x) == AND
3716 && GET_MODE (x) == DImode
3717 && GET_CODE (XEXP (x, 0)) == ASHIFT
3718 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3719 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3720 && CONST_INT_P (XEXP (x, 1)))
3721 {
3722 type = ADDRESS_REG_UXTW;
3723 index = XEXP (XEXP (x, 0), 0);
3724 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3725 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3726 shift = -1;
3727 }
3728 /* (mult:P (reg:P) (const_int scale)) */
3729 else if (GET_CODE (x) == MULT
3730 && GET_MODE (x) == Pmode
3731 && GET_MODE (XEXP (x, 0)) == Pmode
3732 && CONST_INT_P (XEXP (x, 1)))
3733 {
3734 type = ADDRESS_REG_REG;
3735 index = XEXP (x, 0);
3736 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3737 }
3738 /* (ashift:P (reg:P) (const_int shift)) */
3739 else if (GET_CODE (x) == ASHIFT
3740 && GET_MODE (x) == Pmode
3741 && GET_MODE (XEXP (x, 0)) == Pmode
3742 && CONST_INT_P (XEXP (x, 1)))
3743 {
3744 type = ADDRESS_REG_REG;
3745 index = XEXP (x, 0);
3746 shift = INTVAL (XEXP (x, 1));
3747 }
3748 else
3749 return false;
3750
3751 if (GET_CODE (index) == SUBREG)
3752 index = SUBREG_REG (index);
3753
3754 if ((shift == 0 ||
3755 (shift > 0 && shift <= 3
3756 && (1 << shift) == GET_MODE_SIZE (mode)))
3757 && REG_P (index)
3758 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3759 {
3760 info->type = type;
3761 info->offset = index;
3762 info->shift = shift;
3763 return true;
3764 }
3765
3766 return false;
3767}
3768
3ba73ce3 3769bool
3754d046 3770aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
df401d54 3771{
3772 return (offset >= -64 * GET_MODE_SIZE (mode)
3773 && offset < 64 * GET_MODE_SIZE (mode)
3774 && offset % GET_MODE_SIZE (mode) == 0);
3775}
3776
3777static inline bool
3754d046 3778offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
df401d54 3779 HOST_WIDE_INT offset)
3780{
3781 return offset >= -256 && offset < 256;
3782}
3783
3784static inline bool
3754d046 3785offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
df401d54 3786{
3787 return (offset >= 0
3788 && offset < 4096 * GET_MODE_SIZE (mode)
3789 && offset % GET_MODE_SIZE (mode) == 0);
3790}
3791
9202af54 3792/* Return true if MODE is one of the modes for which we
3793 support LDP/STP operations. */
3794
3795static bool
3796aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
3797{
3798 return mode == SImode || mode == DImode
3799 || mode == SFmode || mode == DFmode
3800 || (aarch64_vector_mode_supported_p (mode)
3801 && GET_MODE_SIZE (mode) == 8);
3802}
3803
df401d54 3804/* Return true if X is a valid address for machine mode MODE. If it is,
3805 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3806 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3807
3808static bool
3809aarch64_classify_address (struct aarch64_address_info *info,
3754d046 3810 rtx x, machine_mode mode,
df401d54 3811 RTX_CODE outer_code, bool strict_p)
3812{
3813 enum rtx_code code = GET_CODE (x);
3814 rtx op0, op1;
8bcdf19e 3815
3816 /* On BE, we use load/store pair for all large int mode load/stores. */
3817 bool load_store_pair_p = (outer_code == PARALLEL
3818 || (BYTES_BIG_ENDIAN
3819 && aarch64_vect_struct_mode_p (mode)));
3820
df401d54 3821 bool allow_reg_index_p =
8bcdf19e 3822 !load_store_pair_p
3823 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3824 && !aarch64_vect_struct_mode_p (mode);
3825
3826 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3827 REG addressing. */
3828 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
df401d54 3829 && (code != POST_INC && code != REG))
3830 return false;
3831
3832 switch (code)
3833 {
3834 case REG:
3835 case SUBREG:
3836 info->type = ADDRESS_REG_IMM;
3837 info->base = x;
3838 info->offset = const0_rtx;
3839 return aarch64_base_register_rtx_p (x, strict_p);
3840
3841 case PLUS:
3842 op0 = XEXP (x, 0);
3843 op1 = XEXP (x, 1);
ee907931 3844
3845 if (! strict_p
978f06f4 3846 && REG_P (op0)
ee907931 3847 && (op0 == virtual_stack_vars_rtx
3848 || op0 == frame_pointer_rtx
3849 || op0 == arg_pointer_rtx)
978f06f4 3850 && CONST_INT_P (op1))
ee907931 3851 {
3852 info->type = ADDRESS_REG_IMM;
3853 info->base = op0;
3854 info->offset = op1;
3855
3856 return true;
3857 }
3858
df401d54 3859 if (GET_MODE_SIZE (mode) != 0
3860 && CONST_INT_P (op1)
3861 && aarch64_base_register_rtx_p (op0, strict_p))
3862 {
3863 HOST_WIDE_INT offset = INTVAL (op1);
3864
3865 info->type = ADDRESS_REG_IMM;
3866 info->base = op0;
3867 info->offset = op1;
3868
3869 /* TImode and TFmode values are allowed in both pairs of X
3870 registers and individual Q registers. The available
3871 address modes are:
3872 X,X: 7-bit signed scaled offset
3873 Q: 9-bit signed offset
3874 We conservatively require an offset representable in either mode.
3875 */
3876 if (mode == TImode || mode == TFmode)
3ba73ce3 3877 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
df401d54 3878 && offset_9bit_signed_unscaled_p (mode, offset));
3879
8bcdf19e 3880 /* A 7bit offset check because OImode will emit a ldp/stp
3881 instruction (only big endian will get here).
3882 For ldp/stp instructions, the offset is scaled for the size of a
3883 single element of the pair. */
3884 if (mode == OImode)
3885 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3886
3887 /* Three 9/12 bit offsets checks because CImode will emit three
3888 ldr/str instructions (only big endian will get here). */
3889 if (mode == CImode)
3890 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3891 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3892 || offset_12bit_unsigned_scaled_p (V16QImode,
3893 offset + 32)));
3894
3895 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3896 instructions (only big endian will get here). */
3897 if (mode == XImode)
3898 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3899 && aarch64_offset_7bit_signed_scaled_p (TImode,
3900 offset + 32));
3901
3902 if (load_store_pair_p)
df401d54 3903 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3ba73ce3 3904 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
df401d54 3905 else
3906 return (offset_9bit_signed_unscaled_p (mode, offset)
3907 || offset_12bit_unsigned_scaled_p (mode, offset));
3908 }
3909
3910 if (allow_reg_index_p)
3911 {
3912 /* Look for base + (scaled/extended) index register. */
3913 if (aarch64_base_register_rtx_p (op0, strict_p)
3914 && aarch64_classify_index (info, op1, mode, strict_p))
3915 {
3916 info->base = op0;
3917 return true;
3918 }
3919 if (aarch64_base_register_rtx_p (op1, strict_p)
3920 && aarch64_classify_index (info, op0, mode, strict_p))
3921 {
3922 info->base = op1;
3923 return true;
3924 }
3925 }
3926
3927 return false;
3928
3929 case POST_INC:
3930 case POST_DEC:
3931 case PRE_INC:
3932 case PRE_DEC:
3933 info->type = ADDRESS_REG_WB;
3934 info->base = XEXP (x, 0);
3935 info->offset = NULL_RTX;
3936 return aarch64_base_register_rtx_p (info->base, strict_p);
3937
3938 case POST_MODIFY:
3939 case PRE_MODIFY:
3940 info->type = ADDRESS_REG_WB;
3941 info->base = XEXP (x, 0);
3942 if (GET_CODE (XEXP (x, 1)) == PLUS
3943 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3944 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3945 && aarch64_base_register_rtx_p (info->base, strict_p))
3946 {
3947 HOST_WIDE_INT offset;
3948 info->offset = XEXP (XEXP (x, 1), 1);
3949 offset = INTVAL (info->offset);
3950
3951 /* TImode and TFmode values are allowed in both pairs of X
3952 registers and individual Q registers. The available
3953 address modes are:
3954 X,X: 7-bit signed scaled offset
3955 Q: 9-bit signed offset
3956 We conservatively require an offset representable in either mode.
3957 */
3958 if (mode == TImode || mode == TFmode)
3ba73ce3 3959 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
df401d54 3960 && offset_9bit_signed_unscaled_p (mode, offset));
3961
8bcdf19e 3962 if (load_store_pair_p)
df401d54 3963 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3ba73ce3 3964 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
df401d54 3965 else
3966 return offset_9bit_signed_unscaled_p (mode, offset);
3967 }
3968 return false;
3969
3970 case CONST:
3971 case SYMBOL_REF:
3972 case LABEL_REF:
df67a123 3973 /* load literal: pc-relative constant pool entry. Only supported
3974 for SI mode or larger. */
df401d54 3975 info->type = ADDRESS_SYMBOLIC;
8bcdf19e 3976
3977 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
df401d54 3978 {
3979 rtx sym, addend;
3980
3981 split_const (x, &sym, &addend);
ae1cefe6 3982 return ((GET_CODE (sym) == LABEL_REF
3983 || (GET_CODE (sym) == SYMBOL_REF
3984 && CONSTANT_POOL_ADDRESS_P (sym)
acb1dac7 3985 && !aarch64_nopcrelative_literal_loads)));
df401d54 3986 }
3987 return false;
3988
3989 case LO_SUM:
3990 info->type = ADDRESS_LO_SUM;
3991 info->base = XEXP (x, 0);
3992 info->offset = XEXP (x, 1);
3993 if (allow_reg_index_p
3994 && aarch64_base_register_rtx_p (info->base, strict_p))
3995 {
3996 rtx sym, offs;
3997 split_const (info->offset, &sym, &offs);
3998 if (GET_CODE (sym) == SYMBOL_REF
82882dbd 3999 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
df401d54 4000 {
4001 /* The symbol and offset must be aligned to the access size. */
4002 unsigned int align;
4003 unsigned int ref_size;
4004
4005 if (CONSTANT_POOL_ADDRESS_P (sym))
4006 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4007 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4008 {
4009 tree exp = SYMBOL_REF_DECL (sym);
4010 align = TYPE_ALIGN (TREE_TYPE (exp));
4011 align = CONSTANT_ALIGNMENT (exp, align);
4012 }
4013 else if (SYMBOL_REF_DECL (sym))
4014 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
f79e87ea 4015 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4016 && SYMBOL_REF_BLOCK (sym) != NULL)
4017 align = SYMBOL_REF_BLOCK (sym)->alignment;
df401d54 4018 else
4019 align = BITS_PER_UNIT;
4020
4021 ref_size = GET_MODE_SIZE (mode);
4022 if (ref_size == 0)
4023 ref_size = GET_MODE_SIZE (DImode);
4024
4025 return ((INTVAL (offs) & (ref_size - 1)) == 0
4026 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4027 }
4028 }
4029 return false;
4030
4031 default:
4032 return false;
4033 }
4034}
4035
4036bool
4037aarch64_symbolic_address_p (rtx x)
4038{
4039 rtx offset;
4040
4041 split_const (x, &x, &offset);
4042 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4043}
4044
82882dbd 4045/* Classify the base of symbolic expression X. */
8fd61aba 4046
4047enum aarch64_symbol_type
82882dbd 4048aarch64_classify_symbolic_expression (rtx x)
df401d54 4049{
4050 rtx offset;
8fd61aba 4051
df401d54 4052 split_const (x, &x, &offset);
82882dbd 4053 return aarch64_classify_symbol (x, offset);
df401d54 4054}
4055
4056
4057/* Return TRUE if X is a legitimate address for accessing memory in
4058 mode MODE. */
4059static bool
3754d046 4060aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
df401d54 4061{
4062 struct aarch64_address_info addr;
4063
4064 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4065}
4066
4067/* Return TRUE if X is a legitimate address for accessing memory in
4068 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4069 pair operation. */
4070bool
3754d046 4071aarch64_legitimate_address_p (machine_mode mode, rtx x,
e79f23c4 4072 RTX_CODE outer_code, bool strict_p)
df401d54 4073{
4074 struct aarch64_address_info addr;
4075
4076 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4077}
4078
4079/* Return TRUE if rtx X is immediate constant 0.0 */
4080bool
72841352 4081aarch64_float_const_zero_rtx_p (rtx x)
df401d54 4082{
df401d54 4083 if (GET_MODE (x) == VOIDmode)
4084 return false;
4085
945f7b03 4086 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
df401d54 4087 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
945f7b03 4088 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
df401d54 4089}
4090
b49f35d1 4091/* Return the fixed registers used for condition codes. */
4092
4093static bool
4094aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4095{
4096 *p1 = CC_REGNUM;
4097 *p2 = INVALID_REGNUM;
4098 return true;
4099}
4100
ba189be5 4101/* Emit call insn with PAT and do aarch64-specific handling. */
4102
3bc53c66 4103void
ba189be5 4104aarch64_emit_call_insn (rtx pat)
4105{
4106 rtx insn = emit_call_insn (pat);
4107
4108 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4109 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4110 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4111}
4112
3754d046 4113machine_mode
df401d54 4114aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4115{
4116 /* All floating point compares return CCFP if it is an equality
4117 comparison, and CCFPE otherwise. */
4118 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4119 {
4120 switch (code)
4121 {
4122 case EQ:
4123 case NE:
4124 case UNORDERED:
4125 case ORDERED:
4126 case UNLT:
4127 case UNLE:
4128 case UNGT:
4129 case UNGE:
4130 case UNEQ:
4131 case LTGT:
4132 return CCFPmode;
4133
4134 case LT:
4135 case LE:
4136 case GT:
4137 case GE:
4138 return CCFPEmode;
4139
4140 default:
4141 gcc_unreachable ();
4142 }
4143 }
4144
4145 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4146 && y == const0_rtx
4147 && (code == EQ || code == NE || code == LT || code == GE)
62bad789 4148 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
050a2b40 4149 || GET_CODE (x) == NEG
4150 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4151 && CONST_INT_P (XEXP (x, 2)))))
df401d54 4152 return CC_NZmode;
4153
46b590a1 4154 /* A compare with a shifted operand. Because of canonicalization,
df401d54 4155 the comparison will have to be swapped when we emit the assembly
4156 code. */
4157 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
978f06f4 4158 && (REG_P (y) || GET_CODE (y) == SUBREG)
df401d54 4159 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4160 || GET_CODE (x) == LSHIFTRT
46b590a1 4161 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
df401d54 4162 return CC_SWPmode;
4163
46b590a1 4164 /* Similarly for a negated operand, but we can only do this for
4165 equalities. */
4166 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
978f06f4 4167 && (REG_P (y) || GET_CODE (y) == SUBREG)
46b590a1 4168 && (code == EQ || code == NE)
4169 && GET_CODE (x) == NEG)
4170 return CC_Zmode;
4171
df401d54 4172 /* A compare of a mode narrower than SI mode against zero can be done
4173 by extending the value in the comparison. */
4174 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
4175 && y == const0_rtx)
4176 /* Only use sign-extension if we really need it. */
4177 return ((code == GT || code == GE || code == LE || code == LT)
4178 ? CC_SESWPmode : CC_ZESWPmode);
4179
4180 /* For everything else, return CCmode. */
4181 return CCmode;
4182}
4183
4ac2176a 4184static int
4185aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4186
1e757156 4187int
df401d54 4188aarch64_get_condition_code (rtx x)
4189{
3754d046 4190 machine_mode mode = GET_MODE (XEXP (x, 0));
df401d54 4191 enum rtx_code comp_code = GET_CODE (x);
4192
4193 if (GET_MODE_CLASS (mode) != MODE_CC)
4194 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
4ac2176a 4195 return aarch64_get_condition_code_1 (mode, comp_code);
4196}
df401d54 4197
4ac2176a 4198static int
4199aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4200{
4201 int ne = -1, eq = -1;
df401d54 4202 switch (mode)
4203 {
4204 case CCFPmode:
4205 case CCFPEmode:
4206 switch (comp_code)
4207 {
4208 case GE: return AARCH64_GE;
4209 case GT: return AARCH64_GT;
4210 case LE: return AARCH64_LS;
4211 case LT: return AARCH64_MI;
4212 case NE: return AARCH64_NE;
4213 case EQ: return AARCH64_EQ;
4214 case ORDERED: return AARCH64_VC;
4215 case UNORDERED: return AARCH64_VS;
4216 case UNLT: return AARCH64_LT;
4217 case UNLE: return AARCH64_LE;
4218 case UNGT: return AARCH64_HI;
4219 case UNGE: return AARCH64_PL;
1e757156 4220 default: return -1;
df401d54 4221 }
4222 break;
4223
4ac2176a 4224 case CC_DNEmode:
4225 ne = AARCH64_NE;
4226 eq = AARCH64_EQ;
4227 break;
4228
4229 case CC_DEQmode:
4230 ne = AARCH64_EQ;
4231 eq = AARCH64_NE;
4232 break;
4233
4234 case CC_DGEmode:
4235 ne = AARCH64_GE;
4236 eq = AARCH64_LT;
4237 break;
4238
4239 case CC_DLTmode:
4240 ne = AARCH64_LT;
4241 eq = AARCH64_GE;
4242 break;
4243
4244 case CC_DGTmode:
4245 ne = AARCH64_GT;
4246 eq = AARCH64_LE;
4247 break;
4248
4249 case CC_DLEmode:
4250 ne = AARCH64_LE;
4251 eq = AARCH64_GT;
4252 break;
4253
4254 case CC_DGEUmode:
4255 ne = AARCH64_CS;
4256 eq = AARCH64_CC;
4257 break;
4258
4259 case CC_DLTUmode:
4260 ne = AARCH64_CC;
4261 eq = AARCH64_CS;
4262 break;
4263
4264 case CC_DGTUmode:
4265 ne = AARCH64_HI;
4266 eq = AARCH64_LS;
4267 break;
4268
4269 case CC_DLEUmode:
4270 ne = AARCH64_LS;
4271 eq = AARCH64_HI;
4272 break;
4273
df401d54 4274 case CCmode:
4275 switch (comp_code)
4276 {
4277 case NE: return AARCH64_NE;
4278 case EQ: return AARCH64_EQ;
4279 case GE: return AARCH64_GE;
4280 case GT: return AARCH64_GT;
4281 case LE: return AARCH64_LE;
4282 case LT: return AARCH64_LT;
4283 case GEU: return AARCH64_CS;
4284 case GTU: return AARCH64_HI;
4285 case LEU: return AARCH64_LS;
4286 case LTU: return AARCH64_CC;
1e757156 4287 default: return -1;
df401d54 4288 }
4289 break;
4290
4291 case CC_SWPmode:
4292 case CC_ZESWPmode:
4293 case CC_SESWPmode:
4294 switch (comp_code)
4295 {
4296 case NE: return AARCH64_NE;
4297 case EQ: return AARCH64_EQ;
4298 case GE: return AARCH64_LE;
4299 case GT: return AARCH64_LT;
4300 case LE: return AARCH64_GE;
4301 case LT: return AARCH64_GT;
4302 case GEU: return AARCH64_LS;
4303 case GTU: return AARCH64_CC;
4304 case LEU: return AARCH64_CS;
4305 case LTU: return AARCH64_HI;
1e757156 4306 default: return -1;
df401d54 4307 }
4308 break;
4309
4310 case CC_NZmode:
4311 switch (comp_code)
4312 {
4313 case NE: return AARCH64_NE;
4314 case EQ: return AARCH64_EQ;
4315 case GE: return AARCH64_PL;
4316 case LT: return AARCH64_MI;
1e757156 4317 default: return -1;
df401d54 4318 }
4319 break;
4320
46b590a1 4321 case CC_Zmode:
4322 switch (comp_code)
4323 {
4324 case NE: return AARCH64_NE;
4325 case EQ: return AARCH64_EQ;
1e757156 4326 default: return -1;
46b590a1 4327 }
4328 break;
4329
df401d54 4330 default:
1e757156 4331 return -1;
df401d54 4332 break;
4333 }
4ac2176a 4334
4335 if (comp_code == NE)
4336 return ne;
4337
4338 if (comp_code == EQ)
4339 return eq;
4340
4341 return -1;
df401d54 4342}
4343
bead021f 4344bool
4345aarch64_const_vec_all_same_in_range_p (rtx x,
4346 HOST_WIDE_INT minval,
4347 HOST_WIDE_INT maxval)
4348{
4349 HOST_WIDE_INT firstval;
4350 int count, i;
4351
4352 if (GET_CODE (x) != CONST_VECTOR
4353 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4354 return false;
4355
4356 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4357 if (firstval < minval || firstval > maxval)
4358 return false;
4359
4360 count = CONST_VECTOR_NUNITS (x);
4361 for (i = 1; i < count; i++)
4362 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4363 return false;
4364
4365 return true;
4366}
4367
4368bool
4369aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4370{
4371 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4372}
4373
df401d54 4374
478473ed 4375/* N Z C V. */
4376#define AARCH64_CC_V 1
4377#define AARCH64_CC_C (1 << 1)
4378#define AARCH64_CC_Z (1 << 2)
4379#define AARCH64_CC_N (1 << 3)
4380
4381/* N Z C V flags for ccmp. The first code is for AND op and the other
4382 is for IOR op. Indexed by AARCH64_COND_CODE. */
4383static const int aarch64_nzcv_codes[][2] =
4384{
4385 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
4386 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
4387 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4388 {0, AARCH64_CC_C}, /* CC, C == 0. */
4389 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4390 {0, AARCH64_CC_N}, /* PL, N == 0. */
4391 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4392 {0, AARCH64_CC_V}, /* VC, V == 0. */
4393 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4394 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4395 {0, AARCH64_CC_V}, /* GE, N == V. */
4396 {AARCH64_CC_V, 0}, /* LT, N != V. */
4397 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4398 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4399 {0, 0}, /* AL, Any. */
4400 {0, 0}, /* NV, Any. */
4401};
4402
4403int
4404aarch64_ccmp_mode_to_code (enum machine_mode mode)
4405{
4406 switch (mode)
4407 {
4408 case CC_DNEmode:
4409 return NE;
4410
4411 case CC_DEQmode:
4412 return EQ;
4413
4414 case CC_DLEmode:
4415 return LE;
4416
4417 case CC_DGTmode:
4418 return GT;
4419
4420 case CC_DLTmode:
4421 return LT;
4422
4423 case CC_DGEmode:
4424 return GE;
4425
4426 case CC_DLEUmode:
4427 return LEU;
4428
4429 case CC_DGTUmode:
4430 return GTU;
4431
4432 case CC_DLTUmode:
4433 return LTU;
4434
4435 case CC_DGEUmode:
4436 return GEU;
4437
4438 default:
4439 gcc_unreachable ();
4440 }
4441}
4442
4443
3c047fe9 4444static void
4445aarch64_print_operand (FILE *f, rtx x, int code)
df401d54 4446{
4447 switch (code)
4448 {
e0aaea4d 4449 /* An integer or symbol address without a preceding # sign. */
4450 case 'c':
4451 switch (GET_CODE (x))
4452 {
4453 case CONST_INT:
4454 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4455 break;
4456
4457 case SYMBOL_REF:
4458 output_addr_const (f, x);
4459 break;
4460
4461 case CONST:
4462 if (GET_CODE (XEXP (x, 0)) == PLUS
4463 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4464 {
4465 output_addr_const (f, x);
4466 break;
4467 }
4468 /* Fall through. */
4469
4470 default:
4471 output_operand_lossage ("Unsupported operand for code '%c'", code);
4472 }
4473 break;
4474
df401d54 4475 case 'e':
4476 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4477 {
4478 int n;
4479
978f06f4 4480 if (!CONST_INT_P (x)
df401d54 4481 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4482 {
4483 output_operand_lossage ("invalid operand for '%%%c'", code);
4484 return;
4485 }
4486
4487 switch (n)
4488 {
4489 case 3:
4490 fputc ('b', f);
4491 break;
4492 case 4:
4493 fputc ('h', f);
4494 break;
4495 case 5:
4496 fputc ('w', f);
4497 break;
4498 default:
4499 output_operand_lossage ("invalid operand for '%%%c'", code);
4500 return;
4501 }
4502 }
4503 break;
4504
4505 case 'p':
4506 {
4507 int n;
4508
4509 /* Print N such that 2^N == X. */
978f06f4 4510 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
df401d54 4511 {
4512 output_operand_lossage ("invalid operand for '%%%c'", code);
4513 return;
4514 }
4515
4516 asm_fprintf (f, "%d", n);
4517 }
4518 break;
4519
4520 case 'P':
4521 /* Print the number of non-zero bits in X (a const_int). */
978f06f4 4522 if (!CONST_INT_P (x))
df401d54 4523 {
4524 output_operand_lossage ("invalid operand for '%%%c'", code);
4525 return;
4526 }
4527
34bf6b7f 4528 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
df401d54 4529 break;
4530
4531 case 'H':
4532 /* Print the higher numbered register of a pair (TImode) of regs. */
978f06f4 4533 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
df401d54 4534 {
4535 output_operand_lossage ("invalid operand for '%%%c'", code);
4536 return;
4537 }
4538
b9ccfbdf 4539 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
df401d54 4540 break;
4541
df401d54 4542 case 'm':
1e757156 4543 {
4544 int cond_code;
4545 /* Print a condition (eq, ne, etc). */
df401d54 4546
1e757156 4547 /* CONST_TRUE_RTX means always -- that's the default. */
4548 if (x == const_true_rtx)
df401d54 4549 return;
df401d54 4550
1e757156 4551 if (!COMPARISON_P (x))
4552 {
4553 output_operand_lossage ("invalid operand for '%%%c'", code);
4554 return;
4555 }
4556
4557 cond_code = aarch64_get_condition_code (x);
4558 gcc_assert (cond_code >= 0);
4559 fputs (aarch64_condition_codes[cond_code], f);
4560 }
df401d54 4561 break;
4562
4563 case 'M':
1e757156 4564 {
4565 int cond_code;
4566 /* Print the inverse of a condition (eq <-> ne, etc). */
df401d54 4567
1e757156 4568 /* CONST_TRUE_RTX means never -- that's the default. */
4569 if (x == const_true_rtx)
4570 {
4571 fputs ("nv", f);
4572 return;
4573 }
df401d54 4574
1e757156 4575 if (!COMPARISON_P (x))
4576 {
4577 output_operand_lossage ("invalid operand for '%%%c'", code);
4578 return;
4579 }
4580 cond_code = aarch64_get_condition_code (x);
4581 gcc_assert (cond_code >= 0);
4582 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4583 (cond_code)], f);
4584 }
df401d54 4585 break;
4586
4587 case 'b':
4588 case 'h':
4589 case 's':
4590 case 'd':
4591 case 'q':
4592 /* Print a scalar FP/SIMD register name. */
4593 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4594 {
4595 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4596 return;
4597 }
1b457b76 4598 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
df401d54 4599 break;
4600
4601 case 'S':
4602 case 'T':
4603 case 'U':
4604 case 'V':
4605 /* Print the first FP/SIMD register name in a list. */
4606 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4607 {
4608 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4609 return;
4610 }
1b457b76 4611 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
df401d54 4612 break;
4613
8bcdf19e 4614 case 'R':
4615 /* Print a scalar FP/SIMD register name + 1. */
4616 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4617 {
4618 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4619 return;
4620 }
4621 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4622 break;
4623
c6ed7a1a 4624 case 'X':
2f42b453 4625 /* Print bottom 16 bits of integer constant in hex. */
978f06f4 4626 if (!CONST_INT_P (x))
c6ed7a1a 4627 {
4628 output_operand_lossage ("invalid operand for '%%%c'", code);
4629 return;
4630 }
2f42b453 4631 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
c6ed7a1a 4632 break;
4633
df401d54 4634 case 'w':
4635 case 'x':
4636 /* Print a general register name or the zero register (32-bit or
4637 64-bit). */
72841352 4638 if (x == const0_rtx
4639 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
df401d54 4640 {
1b457b76 4641 asm_fprintf (f, "%czr", code);
df401d54 4642 break;
4643 }
4644
4645 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4646 {
1b457b76 4647 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
df401d54 4648 break;
4649 }
4650
4651 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4652 {
1b457b76 4653 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
df401d54 4654 break;
4655 }
4656
4657 /* Fall through */
4658
4659 case 0:
4660 /* Print a normal operand, if it's a general register, then we
4661 assume DImode. */
4662 if (x == NULL)
4663 {
4664 output_operand_lossage ("missing operand");
4665 return;
4666 }
4667
4668 switch (GET_CODE (x))
4669 {
4670 case REG:
b9ccfbdf 4671 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
df401d54 4672 break;
4673
4674 case MEM:
3c047fe9 4675 output_address (GET_MODE (x), XEXP (x, 0));
df401d54 4676 break;
4677
1d46fab9 4678 case CONST:
df401d54 4679 case LABEL_REF:
4680 case SYMBOL_REF:
4681 output_addr_const (asm_out_file, x);
4682 break;
4683
4684 case CONST_INT:
4685 asm_fprintf (f, "%wd", INTVAL (x));
4686 break;
4687
4688 case CONST_VECTOR:
72841352 4689 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4690 {
bead021f 4691 gcc_assert (
4692 aarch64_const_vec_all_same_in_range_p (x,
4693 HOST_WIDE_INT_MIN,
4694 HOST_WIDE_INT_MAX));
72841352 4695 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4696 }
4697 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4698 {
4699 fputc ('0', f);
4700 }
4701 else
4702 gcc_unreachable ();
df401d54 4703 break;
4704
72841352 4705 case CONST_DOUBLE:
cac639db 4706 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
4707 be getting CONST_DOUBLEs holding integers. */
4708 gcc_assert (GET_MODE (x) != VOIDmode);
4709 if (aarch64_float_const_zero_rtx_p (x))
72841352 4710 {
4711 fputc ('0', f);
4712 break;
4713 }
4714 else if (aarch64_float_const_representable_p (x))
4715 {
4716#define buf_size 20
4717 char float_buf[buf_size] = {'\0'};
945f7b03 4718 real_to_decimal_for_mode (float_buf,
4719 CONST_DOUBLE_REAL_VALUE (x),
72841352 4720 buf_size, buf_size,
4721 1, GET_MODE (x));
4722 asm_fprintf (asm_out_file, "%s", float_buf);
4723 break;
4724#undef buf_size
4725 }
4726 output_operand_lossage ("invalid constant");
4727 return;
df401d54 4728 default:
4729 output_operand_lossage ("invalid operand");
4730 return;
4731 }
4732 break;
4733
4734 case 'A':
4735 if (GET_CODE (x) == HIGH)
4736 x = XEXP (x, 0);
4737
82882dbd 4738 switch (aarch64_classify_symbolic_expression (x))
df401d54 4739 {
41754803 4740 case SYMBOL_SMALL_GOT_4G:
df401d54 4741 asm_fprintf (asm_out_file, ":got:");
4742 break;
4743
4744 case SYMBOL_SMALL_TLSGD:
4745 asm_fprintf (asm_out_file, ":tlsgd:");
4746 break;
4747
4748 case SYMBOL_SMALL_TLSDESC:
4749 asm_fprintf (asm_out_file, ":tlsdesc:");
4750 break;
4751
7bff97c2 4752 case SYMBOL_SMALL_TLSIE:
df401d54 4753 asm_fprintf (asm_out_file, ":gottprel:");
4754 break;
4755
950cf06f 4756 case SYMBOL_TLSLE24:
df401d54 4757 asm_fprintf (asm_out_file, ":tprel:");
4758 break;
4759
2c97ec73 4760 case SYMBOL_TINY_GOT:
4761 gcc_unreachable ();
4762 break;
4763
df401d54 4764 default:
4765 break;
4766 }
4767 output_addr_const (asm_out_file, x);
4768 break;
4769
4770 case 'L':
82882dbd 4771 switch (aarch64_classify_symbolic_expression (x))
df401d54 4772 {
41754803 4773 case SYMBOL_SMALL_GOT_4G:
df401d54 4774 asm_fprintf (asm_out_file, ":lo12:");
4775 break;
4776
4777 case SYMBOL_SMALL_TLSGD:
4778 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4779 break;
4780
4781 case SYMBOL_SMALL_TLSDESC:
4782 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4783 break;
4784
7bff97c2 4785 case SYMBOL_SMALL_TLSIE:
df401d54 4786 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4787 break;
4788
57507fa5 4789 case SYMBOL_TLSLE12:
4790 asm_fprintf (asm_out_file, ":tprel_lo12:");
4791 break;
4792
950cf06f 4793 case SYMBOL_TLSLE24:
df401d54 4794 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4795 break;
4796
2c97ec73 4797 case SYMBOL_TINY_GOT:
4798 asm_fprintf (asm_out_file, ":got:");
4799 break;
4800
f546e2d1 4801 case SYMBOL_TINY_TLSIE:
4802 asm_fprintf (asm_out_file, ":gottprel:");
4803 break;
4804
df401d54 4805 default:
4806 break;
4807 }
4808 output_addr_const (asm_out_file, x);
4809 break;
4810
4811 case 'G':
4812
82882dbd 4813 switch (aarch64_classify_symbolic_expression (x))
df401d54 4814 {
950cf06f 4815 case SYMBOL_TLSLE24:
df401d54 4816 asm_fprintf (asm_out_file, ":tprel_hi12:");
4817 break;
4818 default:
4819 break;
4820 }
4821 output_addr_const (asm_out_file, x);
4822 break;
4823
478473ed 4824 case 'K':
4825 {
4826 int cond_code;
4827 /* Print nzcv. */
4828
4829 if (!COMPARISON_P (x))
4830 {
4831 output_operand_lossage ("invalid operand for '%%%c'", code);
4832 return;
4833 }
4834
4835 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4836 gcc_assert (cond_code >= 0);
4837 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4838 }
4839 break;
4840
4841 case 'k':
4842 {
4843 int cond_code;
4844 /* Print nzcv. */
4845
4846 if (!COMPARISON_P (x))
4847 {
4848 output_operand_lossage ("invalid operand for '%%%c'", code);
4849 return;
4850 }
4851
4852 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4853 gcc_assert (cond_code >= 0);
4854 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4855 }
4856 break;
4857
df401d54 4858 default:
4859 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4860 return;
4861 }
4862}
4863
3c047fe9 4864static void
4865aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
df401d54 4866{
4867 struct aarch64_address_info addr;
4868
3c047fe9 4869 if (aarch64_classify_address (&addr, x, mode, MEM, true))
df401d54 4870 switch (addr.type)
4871 {
4872 case ADDRESS_REG_IMM:
4873 if (addr.offset == const0_rtx)
b9ccfbdf 4874 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
df401d54 4875 else
9a9ae780 4876 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
df401d54 4877 INTVAL (addr.offset));
4878 return;
4879
4880 case ADDRESS_REG_REG:
4881 if (addr.shift == 0)
9a9ae780 4882 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
b9ccfbdf 4883 reg_names [REGNO (addr.offset)]);
df401d54 4884 else
9a9ae780 4885 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
b9ccfbdf 4886 reg_names [REGNO (addr.offset)], addr.shift);
df401d54 4887 return;
4888
4889 case ADDRESS_REG_UXTW:
4890 if (addr.shift == 0)
9a9ae780 4891 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
df401d54 4892 REGNO (addr.offset) - R0_REGNUM);
4893 else
9a9ae780 4894 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
df401d54 4895 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4896 return;
4897
4898 case ADDRESS_REG_SXTW:
4899 if (addr.shift == 0)
9a9ae780 4900 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
df401d54 4901 REGNO (addr.offset) - R0_REGNUM);
4902 else
9a9ae780 4903 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
df401d54 4904 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4905 return;
4906
4907 case ADDRESS_REG_WB:
4908 switch (GET_CODE (x))
4909 {
4910 case PRE_INC:
9a9ae780 4911 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3c047fe9 4912 GET_MODE_SIZE (mode));
df401d54 4913 return;
4914 case POST_INC:
9a9ae780 4915 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
3c047fe9 4916 GET_MODE_SIZE (mode));
df401d54 4917 return;
4918 case PRE_DEC:
9a9ae780 4919 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
3c047fe9 4920 GET_MODE_SIZE (mode));
df401d54 4921 return;
4922 case POST_DEC:
9a9ae780 4923 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
3c047fe9 4924 GET_MODE_SIZE (mode));
df401d54 4925 return;
4926 case PRE_MODIFY:
9a9ae780 4927 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
df401d54 4928 INTVAL (addr.offset));
4929 return;
4930 case POST_MODIFY:
9a9ae780 4931 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
df401d54 4932 INTVAL (addr.offset));
4933 return;
4934 default:
4935 break;
4936 }
4937 break;
4938
4939 case ADDRESS_LO_SUM:
9a9ae780 4940 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
df401d54 4941 output_addr_const (f, addr.offset);
4942 asm_fprintf (f, "]");
4943 return;
4944
4945 case ADDRESS_SYMBOLIC:
4946 break;
4947 }
4948
4949 output_addr_const (f, x);
4950}
4951
df401d54 4952bool
4953aarch64_label_mentioned_p (rtx x)
4954{
4955 const char *fmt;
4956 int i;
4957
4958 if (GET_CODE (x) == LABEL_REF)
4959 return true;
4960
4961 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4962 referencing instruction, but they are constant offsets, not
4963 symbols. */
4964 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4965 return false;
4966
4967 fmt = GET_RTX_FORMAT (GET_CODE (x));
4968 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4969 {
4970 if (fmt[i] == 'E')
4971 {
4972 int j;
4973
4974 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4975 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4976 return 1;
4977 }
4978 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4979 return 1;
4980 }
4981
4982 return 0;
4983}
4984
4985/* Implement REGNO_REG_CLASS. */
4986
4987enum reg_class
4988aarch64_regno_regclass (unsigned regno)
4989{
4990 if (GP_REGNUM_P (regno))
e5160b8b 4991 return GENERAL_REGS;
df401d54 4992
4993 if (regno == SP_REGNUM)
4994 return STACK_REG;
4995
4996 if (regno == FRAME_POINTER_REGNUM
4997 || regno == ARG_POINTER_REGNUM)
11e0f6d7 4998 return POINTER_REGS;
df401d54 4999
5000 if (FP_REGNUM_P (regno))
5001 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5002
5003 return NO_REGS;
5004}
5005
10635db9 5006static rtx
3754d046 5007aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
10635db9 5008{
5009 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5010 where mask is selected by alignment and size of the offset.
5011 We try to pick as large a range for the offset as possible to
5012 maximize the chance of a CSE. However, for aligned addresses
5013 we limit the range to 4k so that structures with different sized
c8a9f728 5014 elements are likely to use the same base. We need to be careful
5015 not to split a CONST for some forms of address expression, otherwise
5016 it will generate sub-optimal code. */
10635db9 5017
5018 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5019 {
5020 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
5021 HOST_WIDE_INT base_offset;
5022
c8a9f728 5023 if (GET_CODE (XEXP (x, 0)) == PLUS)
5024 {
5025 rtx op0 = XEXP (XEXP (x, 0), 0);
5026 rtx op1 = XEXP (XEXP (x, 0), 1);
5027
5028 /* Address expressions of the form Ra + Rb + CONST.
5029
5030 If CONST is within the range supported by the addressing
5031 mode "reg+offset", do not split CONST and use the
5032 sequence
5033 Rt = Ra + Rb;
5034 addr = Rt + CONST. */
5035 if (REG_P (op0) && REG_P (op1))
5036 {
5037 machine_mode addr_mode = GET_MODE (x);
5038 rtx base = gen_reg_rtx (addr_mode);
5039 rtx addr = plus_constant (addr_mode, base, offset);
5040
5041 if (aarch64_legitimate_address_hook_p (mode, addr, false))
5042 {
5043 emit_insn (gen_adddi3 (base, op0, op1));
5044 return addr;
5045 }
5046 }
5047 /* Address expressions of the form Ra + Rb<<SCALE + CONST.
5048
5049 If Reg + Rb<<SCALE is a valid address expression, do not
5050 split CONST and use the sequence
5051 Rc = CONST;
5052 Rt = Ra + Rc;
5053 addr = Rt + Rb<<SCALE.
5054
5055 Here we split CONST out of memory referece because:
5056 a) We depend on GIMPLE optimizers to pick up common sub
5057 expression involving the scaling operation.
5058 b) The index Rb is likely a loop iv, it's better to split
5059 the CONST so that computation of new base Rt is a loop
5060 invariant and can be moved out of loop. This is more
5061 important when the original base Ra is sfp related. */
5062 else if (REG_P (op0) || REG_P (op1))
5063 {
5064 machine_mode addr_mode = GET_MODE (x);
5065 rtx base = gen_reg_rtx (addr_mode);
5066
5067 /* Switch to make sure that register is in op0. */
5068 if (REG_P (op1))
5069 std::swap (op0, op1);
5070
5071 rtx addr = gen_rtx_PLUS (addr_mode, op1, base);
5072
5073 if (aarch64_legitimate_address_hook_p (mode, addr, false))
5074 {
5075 base = force_operand (plus_constant (addr_mode,
5076 op0, offset),
5077 NULL_RTX);
5078 return gen_rtx_PLUS (addr_mode, op1, base);
5079 }
5080 }
5081 }
5082
10635db9 5083 /* Does it look like we'll need a load/store-pair operation? */
5084 if (GET_MODE_SIZE (mode) > 16
5085 || mode == TImode)
5086 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
5087 & ~((128 * GET_MODE_SIZE (mode)) - 1));
5088 /* For offsets aren't a multiple of the access size, the limit is
5089 -256...255. */
5090 else if (offset & (GET_MODE_SIZE (mode) - 1))
5091 base_offset = (offset + 0x100) & ~0x1ff;
5092 else
5093 base_offset = offset & ~0xfff;
5094
5095 if (base_offset == 0)
5096 return x;
5097
5098 offset -= base_offset;
5099 rtx base_reg = gen_reg_rtx (Pmode);
5100 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
5101 NULL_RTX);
5102 emit_move_insn (base_reg, val);
5103 x = plus_constant (Pmode, base_reg, offset);
5104 }
5105
5106 return x;
5107}
5108
df401d54 5109/* Try a machine-dependent way of reloading an illegitimate address
5110 operand. If we find one, push the reload and return the new rtx. */
5111
5112rtx
5113aarch64_legitimize_reload_address (rtx *x_p,
3754d046 5114 machine_mode mode,
df401d54 5115 int opnum, int type,
5116 int ind_levels ATTRIBUTE_UNUSED)
5117{
5118 rtx x = *x_p;
5119
8013ad1b 5120 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
5121 if (aarch64_vect_struct_mode_p (mode)
df401d54 5122 && GET_CODE (x) == PLUS
5123 && REG_P (XEXP (x, 0))
5124 && CONST_INT_P (XEXP (x, 1)))
5125 {
5126 rtx orig_rtx = x;
5127 x = copy_rtx (x);
5128 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
5129 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5130 opnum, (enum reload_type) type);
5131 return x;
5132 }
5133
5134 /* We must recognize output that we have already generated ourselves. */
5135 if (GET_CODE (x) == PLUS
5136 && GET_CODE (XEXP (x, 0)) == PLUS
5137 && REG_P (XEXP (XEXP (x, 0), 0))
5138 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5139 && CONST_INT_P (XEXP (x, 1)))
5140 {
5141 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5142 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5143 opnum, (enum reload_type) type);
5144 return x;
5145 }
5146
5147 /* We wish to handle large displacements off a base register by splitting
5148 the addend across an add and the mem insn. This can cut the number of
5149 extra insns needed from 3 to 1. It is only useful for load/store of a
5150 single register with 12 bit offset field. */
5151 if (GET_CODE (x) == PLUS
5152 && REG_P (XEXP (x, 0))
5153 && CONST_INT_P (XEXP (x, 1))
5154 && HARD_REGISTER_P (XEXP (x, 0))
5155 && mode != TImode
5156 && mode != TFmode
5157 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
5158 {
5159 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5160 HOST_WIDE_INT low = val & 0xfff;
5161 HOST_WIDE_INT high = val - low;
5162 HOST_WIDE_INT offs;
5163 rtx cst;
3754d046 5164 machine_mode xmode = GET_MODE (x);
011aed18 5165
5166 /* In ILP32, xmode can be either DImode or SImode. */
5167 gcc_assert (xmode == DImode || xmode == SImode);
df401d54 5168
5169 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
5170 BLKmode alignment. */
5171 if (GET_MODE_SIZE (mode) == 0)
5172 return NULL_RTX;
5173
5174 offs = low % GET_MODE_SIZE (mode);
5175
5176 /* Align misaligned offset by adjusting high part to compensate. */
5177 if (offs != 0)
5178 {
5179 if (aarch64_uimm12_shift (high + offs))
5180 {
5181 /* Align down. */
5182 low = low - offs;
5183 high = high + offs;
5184 }
5185 else
5186 {
5187 /* Align up. */
5188 offs = GET_MODE_SIZE (mode) - offs;
5189 low = low + offs;
5190 high = high + (low & 0x1000) - offs;
5191 low &= 0xfff;
5192 }
5193 }
5194
5195 /* Check for overflow. */
5196 if (high + low != val)
5197 return NULL_RTX;
5198
5199 cst = GEN_INT (high);
5200 if (!aarch64_uimm12_shift (high))
011aed18 5201 cst = force_const_mem (xmode, cst);
df401d54 5202
5203 /* Reload high part into base reg, leaving the low part
ce071c3c 5204 in the mem instruction.
5205 Note that replacing this gen_rtx_PLUS with plus_constant is
5206 wrong in this case because we rely on the
5207 (plus (plus reg c1) c2) structure being preserved so that
5208 XEXP (*p, 0) in push_reload below uses the correct term. */
67358f72 5209 x = gen_rtx_PLUS (xmode,
5210 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
5211 GEN_INT (low));
df401d54 5212
5213 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
011aed18 5214 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
df401d54 5215 opnum, (enum reload_type) type);
5216 return x;
5217 }
5218
5219 return NULL_RTX;
5220}
5221
5222
ae1cefe6 5223/* Return the reload icode required for a constant pool in mode. */
5224static enum insn_code
5225aarch64_constant_pool_reload_icode (machine_mode mode)
5226{
5227 switch (mode)
5228 {
5229 case SFmode:
5230 return CODE_FOR_aarch64_reload_movcpsfdi;
5231
5232 case DFmode:
5233 return CODE_FOR_aarch64_reload_movcpdfdi;
5234
5235 case TFmode:
5236 return CODE_FOR_aarch64_reload_movcptfdi;
5237
5238 case V8QImode:
5239 return CODE_FOR_aarch64_reload_movcpv8qidi;
5240
5241 case V16QImode:
5242 return CODE_FOR_aarch64_reload_movcpv16qidi;
5243
5244 case V4HImode:
5245 return CODE_FOR_aarch64_reload_movcpv4hidi;
5246
5247 case V8HImode:
5248 return CODE_FOR_aarch64_reload_movcpv8hidi;
5249
5250 case V2SImode:
5251 return CODE_FOR_aarch64_reload_movcpv2sidi;
5252
5253 case V4SImode:
5254 return CODE_FOR_aarch64_reload_movcpv4sidi;
5255
5256 case V2DImode:
5257 return CODE_FOR_aarch64_reload_movcpv2didi;
5258
5259 case V2DFmode:
5260 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5261
5262 default:
5263 gcc_unreachable ();
5264 }
5265
5266 gcc_unreachable ();
5267}
df401d54 5268static reg_class_t
5269aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5270 reg_class_t rclass,
3754d046 5271 machine_mode mode,
df401d54 5272 secondary_reload_info *sri)
5273{
ae1cefe6 5274
5275 /* If we have to disable direct literal pool loads and stores because the
5276 function is too big, then we need a scratch register. */
5277 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5278 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5279 || targetm.vector_mode_supported_p (GET_MODE (x)))
acb1dac7 5280 && aarch64_nopcrelative_literal_loads)
ae1cefe6 5281 {
5282 sri->icode = aarch64_constant_pool_reload_icode (mode);
5283 return NO_REGS;
5284 }
5285
df401d54 5286 /* Without the TARGET_SIMD instructions we cannot move a Q register
5287 to a Q register directly. We need a scratch. */
5288 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5289 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5290 && reg_class_subset_p (rclass, FP_REGS))
5291 {
5292 if (mode == TFmode)
5293 sri->icode = CODE_FOR_aarch64_reload_movtf;
5294 else if (mode == TImode)
5295 sri->icode = CODE_FOR_aarch64_reload_movti;
5296 return NO_REGS;
5297 }
5298
5299 /* A TFmode or TImode memory access should be handled via an FP_REGS
5300 because AArch64 has richer addressing modes for LDR/STR instructions
5301 than LDP/STP instructions. */
a0c7b470 5302 if (TARGET_FLOAT && rclass == GENERAL_REGS
df401d54 5303 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5304 return FP_REGS;
5305
5306 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
e5160b8b 5307 return GENERAL_REGS;
df401d54 5308
5309 return NO_REGS;
5310}
5311
5312static bool
5313aarch64_can_eliminate (const int from, const int to)
5314{
5315 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5316 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5317
5318 if (frame_pointer_needed)
5319 {
5320 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5321 return true;
5322 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5323 return false;
5324 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5325 && !cfun->calls_alloca)
5326 return true;
5327 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5328 return true;
743de8ed 5329
5330 return false;
df401d54 5331 }
376e0a4f 5332 else
5333 {
5334 /* If we decided that we didn't need a leaf frame pointer but then used
5335 LR in the function, then we'll want a frame pointer after all, so
5336 prevent this elimination to ensure a frame pointer is used. */
5337 if (to == STACK_POINTER_REGNUM
5338 && flag_omit_leaf_frame_pointer
5339 && df_regs_ever_live_p (LR_REGNUM))
5340 return false;
5341 }
0d62a093 5342
df401d54 5343 return true;
5344}
5345
5346HOST_WIDE_INT
5347aarch64_initial_elimination_offset (unsigned from, unsigned to)
5348{
df401d54 5349 aarch64_layout_frame ();
df401d54 5350
57c414e7 5351 if (to == HARD_FRAME_POINTER_REGNUM)
5352 {
5353 if (from == ARG_POINTER_REGNUM)
ee32c719 5354 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
df401d54 5355
57c414e7 5356 if (from == FRAME_POINTER_REGNUM)
ee32c719 5357 return (cfun->machine->frame.hard_fp_offset
5358 - cfun->machine->frame.saved_varargs_size);
57c414e7 5359 }
df401d54 5360
57c414e7 5361 if (to == STACK_POINTER_REGNUM)
5362 {
5363 if (from == FRAME_POINTER_REGNUM)
ee32c719 5364 return (cfun->machine->frame.frame_size
5365 - cfun->machine->frame.saved_varargs_size);
57c414e7 5366 }
df401d54 5367
ee32c719 5368 return cfun->machine->frame.frame_size;
df401d54 5369}
5370
df401d54 5371/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5372 previous frame. */
5373
5374rtx
5375aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5376{
5377 if (count != 0)
5378 return const0_rtx;
5379 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5380}
5381
5382
5383static void
5384aarch64_asm_trampoline_template (FILE *f)
5385{
011aed18 5386 if (TARGET_ILP32)
5387 {
5388 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5389 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5390 }
5391 else
5392 {
5393 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5394 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5395 }
b9ccfbdf 5396 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
df401d54 5397 assemble_aligned_integer (4, const0_rtx);
011aed18 5398 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5399 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
df401d54 5400}
5401
5402static void
5403aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5404{
5405 rtx fnaddr, mem, a_tramp;
011aed18 5406 const int tramp_code_sz = 16;
df401d54 5407
5408 /* Don't need to copy the trailing D-words, we fill those in below. */
5409 emit_block_move (m_tramp, assemble_trampoline_template (),
011aed18 5410 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5411 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
df401d54 5412 fnaddr = XEXP (DECL_RTL (fndecl), 0);
011aed18 5413 if (GET_MODE (fnaddr) != ptr_mode)
5414 fnaddr = convert_memory_address (ptr_mode, fnaddr);
df401d54 5415 emit_move_insn (mem, fnaddr);
5416
011aed18 5417 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
df401d54 5418 emit_move_insn (mem, chain_value);
5419
5420 /* XXX We should really define a "clear_cache" pattern and use
5421 gen_clear_cache(). */
5422 a_tramp = XEXP (m_tramp, 0);
5423 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
011aed18 5424 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5425 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5426 ptr_mode);
df401d54 5427}
5428
5429static unsigned char
3754d046 5430aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
df401d54 5431{
5432 switch (regclass)
5433 {
1718b6c1 5434 case CALLER_SAVE_REGS:
df401d54 5435 case POINTER_REGS:
5436 case GENERAL_REGS:
5437 case ALL_REGS:
5438 case FP_REGS:
5439 case FP_LO_REGS:
5440 return
c4985cef 5441 aarch64_vector_mode_p (mode)
5442 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5443 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
df401d54 5444 case STACK_REG:
5445 return 1;
5446
5447 case NO_REGS:
5448 return 0;
5449
5450 default:
5451 break;
5452 }
5453 gcc_unreachable ();
5454}
5455
5456static reg_class_t
5c5ef2f3 5457aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
df401d54 5458{
e57c0318 5459 if (regclass == POINTER_REGS)
5c5ef2f3 5460 return GENERAL_REGS;
5461
e57c0318 5462 if (regclass == STACK_REG)
5463 {
5464 if (REG_P(x)
5465 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5466 return regclass;
5467
5468 return NO_REGS;
5469 }
5470
5c5ef2f3 5471 /* If it's an integer immediate that MOVI can't handle, then
5472 FP_REGS is not an option, so we return NO_REGS instead. */
5473 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5474 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5475 return NO_REGS;
5476
ec44ca7c 5477 /* Register eliminiation can result in a request for
5478 SP+constant->FP_REGS. We cannot support such operations which
5479 use SP as source and an FP_REG as destination, so reject out
5480 right now. */
5481 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5482 {
5483 rtx lhs = XEXP (x, 0);
5484
5485 /* Look through a possible SUBREG introduced by ILP32. */
5486 if (GET_CODE (lhs) == SUBREG)
5487 lhs = SUBREG_REG (lhs);
5488
5489 gcc_assert (REG_P (lhs));
5490 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5491 POINTER_REGS));
5492 return NO_REGS;
5493 }
5494
5c5ef2f3 5495 return regclass;
df401d54 5496}
5497
5498void
5499aarch64_asm_output_labelref (FILE* f, const char *name)
5500{
5501 asm_fprintf (f, "%U%s", name);
5502}
5503
5504static void
5505aarch64_elf_asm_constructor (rtx symbol, int priority)
5506{
5507 if (priority == DEFAULT_INIT_PRIORITY)
5508 default_ctor_section_asm_out_constructor (symbol, priority);
5509 else
5510 {
5511 section *s;
5512 char buf[18];
5513 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5514 s = get_section (buf, SECTION_WRITE, NULL);
5515 switch_to_section (s);
5516 assemble_align (POINTER_SIZE);
011aed18 5517 assemble_aligned_integer (POINTER_BYTES, symbol);
df401d54 5518 }
5519}
5520
5521static void
5522aarch64_elf_asm_destructor (rtx symbol, int priority)
5523{
5524 if (priority == DEFAULT_INIT_PRIORITY)
5525 default_dtor_section_asm_out_destructor (symbol, priority);
5526 else
5527 {
5528 section *s;
5529 char buf[18];
5530 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5531 s = get_section (buf, SECTION_WRITE, NULL);
5532 switch_to_section (s);
5533 assemble_align (POINTER_SIZE);
011aed18 5534 assemble_aligned_integer (POINTER_BYTES, symbol);
df401d54 5535 }
5536}
5537
5538const char*
5539aarch64_output_casesi (rtx *operands)
5540{
5541 char buf[100];
5542 char label[100];
91a55c11 5543 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
df401d54 5544 int index;
5545 static const char *const patterns[4][2] =
5546 {
5547 {
5548 "ldrb\t%w3, [%0,%w1,uxtw]",
5549 "add\t%3, %4, %w3, sxtb #2"
5550 },
5551 {
5552 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5553 "add\t%3, %4, %w3, sxth #2"
5554 },
5555 {
5556 "ldr\t%w3, [%0,%w1,uxtw #2]",
5557 "add\t%3, %4, %w3, sxtw #2"
5558 },
5559 /* We assume that DImode is only generated when not optimizing and
5560 that we don't really need 64-bit address offsets. That would
5561 imply an object file with 8GB of code in a single function! */
5562 {
5563 "ldr\t%w3, [%0,%w1,uxtw #2]",
5564 "add\t%3, %4, %w3, sxtw #2"
5565 }
5566 };
5567
5568 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5569
5570 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5571
5572 gcc_assert (index >= 0 && index <= 3);
5573
5574 /* Need to implement table size reduction, by chaning the code below. */
5575 output_asm_insn (patterns[index][0], operands);
5576 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5577 snprintf (buf, sizeof (buf),
5578 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5579 output_asm_insn (buf, operands);
5580 output_asm_insn (patterns[index][1], operands);
5581 output_asm_insn ("br\t%3", operands);
5582 assemble_label (asm_out_file, label);
5583 return "";
5584}
5585
5586
5587/* Return size in bits of an arithmetic operand which is shifted/scaled and
5588 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5589 operator. */
5590
5591int
5592aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5593{
5594 if (shift >= 0 && shift <= 3)
5595 {
5596 int size;
5597 for (size = 8; size <= 32; size *= 2)
5598 {
5599 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5600 if (mask == bits << shift)
5601 return size;
5602 }
5603 }
5604 return 0;
5605}
5606
cd4fcdb8 5607/* Constant pools are per function only when PC relative
5608 literal loads are true or we are in the large memory
5609 model. */
5610
5611static inline bool
5612aarch64_can_use_per_function_literal_pools_p (void)
5613{
5614 return (!aarch64_nopcrelative_literal_loads
5615 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
5616}
5617
df401d54 5618static bool
cd4fcdb8 5619aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
df401d54 5620{
bc443a71 5621 /* Fixme:: In an ideal world this would work similar
5622 to the logic in aarch64_select_rtx_section but this
5623 breaks bootstrap in gcc go. For now we workaround
5624 this by returning false here. */
5625 return false;
df401d54 5626}
5627
cd4fcdb8 5628/* Select appropriate section for constants depending
5629 on where we place literal pools. */
5630
df401d54 5631static section *
cd4fcdb8 5632aarch64_select_rtx_section (machine_mode mode,
5633 rtx x,
5634 unsigned HOST_WIDE_INT align)
df401d54 5635{
cd4fcdb8 5636 if (aarch64_can_use_per_function_literal_pools_p ())
5637 return function_section (current_function_decl);
df401d54 5638
cd4fcdb8 5639 return default_elf_select_rtx_section (mode, x, align);
5640}
df401d54 5641
5642/* Costs. */
5643
5644/* Helper function for rtx cost calculation. Strip a shift expression
5645 from X. Returns the inner operand if successful, or the original
5646 expression on failure. */
5647static rtx
5648aarch64_strip_shift (rtx x)
5649{
5650 rtx op = x;
5651
e523eda9 5652 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5653 we can convert both to ROR during final output. */
df401d54 5654 if ((GET_CODE (op) == ASHIFT
5655 || GET_CODE (op) == ASHIFTRT
e523eda9 5656 || GET_CODE (op) == LSHIFTRT
5657 || GET_CODE (op) == ROTATERT
5658 || GET_CODE (op) == ROTATE)
df401d54 5659 && CONST_INT_P (XEXP (op, 1)))
5660 return XEXP (op, 0);
5661
5662 if (GET_CODE (op) == MULT
5663 && CONST_INT_P (XEXP (op, 1))
5664 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5665 return XEXP (op, 0);
5666
5667 return x;
5668}
5669
2f0038b0 5670/* Helper function for rtx cost calculation. Strip an extend
df401d54 5671 expression from X. Returns the inner operand if successful, or the
5672 original expression on failure. We deal with a number of possible
5673 canonicalization variations here. */
5674static rtx
2f0038b0 5675aarch64_strip_extend (rtx x)
df401d54 5676{
5677 rtx op = x;
5678
5679 /* Zero and sign extraction of a widened value. */
5680 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5681 && XEXP (op, 2) == const0_rtx
2f0038b0 5682 && GET_CODE (XEXP (op, 0)) == MULT
df401d54 5683 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5684 XEXP (op, 1)))
5685 return XEXP (XEXP (op, 0), 0);
5686
5687 /* It can also be represented (for zero-extend) as an AND with an
5688 immediate. */
5689 if (GET_CODE (op) == AND
5690 && GET_CODE (XEXP (op, 0)) == MULT
5691 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5692 && CONST_INT_P (XEXP (op, 1))
5693 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5694 INTVAL (XEXP (op, 1))) != 0)
5695 return XEXP (XEXP (op, 0), 0);
5696
5697 /* Now handle extended register, as this may also have an optional
5698 left shift by 1..4. */
5699 if (GET_CODE (op) == ASHIFT
5700 && CONST_INT_P (XEXP (op, 1))
5701 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5702 op = XEXP (op, 0);
5703
5704 if (GET_CODE (op) == ZERO_EXTEND
5705 || GET_CODE (op) == SIGN_EXTEND)
5706 op = XEXP (op, 0);
5707
5708 if (op != x)
5709 return op;
5710
2f0038b0 5711 return x;
5712}
5713
4ab3c0df 5714/* Return true iff CODE is a shift supported in combination
5715 with arithmetic instructions. */
21907b0f 5716
4ab3c0df 5717static bool
5718aarch64_shift_p (enum rtx_code code)
5719{
5720 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5721}
5722
2f0038b0 5723/* Helper function for rtx cost calculation. Calculate the cost of
4ab3c0df 5724 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5725 Return the calculated cost of the expression, recursing manually in to
2f0038b0 5726 operands where needed. */
5727
5728static int
5ae4887d 5729aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
2f0038b0 5730{
5731 rtx op0, op1;
5732 const struct cpu_cost_table *extra_cost
14677da9 5733 = aarch64_tune_params.insn_extra_cost;
2f0038b0 5734 int cost = 0;
4ab3c0df 5735 bool compound_p = (outer == PLUS || outer == MINUS);
3754d046 5736 machine_mode mode = GET_MODE (x);
2f0038b0 5737
5738 gcc_checking_assert (code == MULT);
5739
5740 op0 = XEXP (x, 0);
5741 op1 = XEXP (x, 1);
5742
5743 if (VECTOR_MODE_P (mode))
5744 mode = GET_MODE_INNER (mode);
5745
5746 /* Integer multiply/fma. */
5747 if (GET_MODE_CLASS (mode) == MODE_INT)
5748 {
5749 /* The multiply will be canonicalized as a shift, cost it as such. */
4ab3c0df 5750 if (aarch64_shift_p (GET_CODE (x))
5751 || (CONST_INT_P (op1)
5752 && exact_log2 (INTVAL (op1)) > 0))
2f0038b0 5753 {
4ab3c0df 5754 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5755 || GET_CODE (op0) == SIGN_EXTEND;
2f0038b0 5756 if (speed)
5757 {
4ab3c0df 5758 if (compound_p)
5759 {
5760 if (REG_P (op1))
5761 /* ARITH + shift-by-register. */
5762 cost += extra_cost->alu.arith_shift_reg;
5763 else if (is_extend)
5764 /* ARITH + extended register. We don't have a cost field
5765 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5766 cost += extra_cost->alu.extend_arith;
5767 else
5768 /* ARITH + shift-by-immediate. */
5769 cost += extra_cost->alu.arith_shift;
5770 }
2f0038b0 5771 else
5772 /* LSL (immediate). */
4ab3c0df 5773 cost += extra_cost->alu.shift;
5774
2f0038b0 5775 }
4ab3c0df 5776 /* Strip extends as we will have costed them in the case above. */
5777 if (is_extend)
5778 op0 = aarch64_strip_extend (op0);
2f0038b0 5779
5ae4887d 5780 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
2f0038b0 5781
5782 return cost;
5783 }
5784
e087eb70 5785 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5786 compound and let the below cases handle it. After all, MNEG is a
5787 special-case alias of MSUB. */
5788 if (GET_CODE (op0) == NEG)
5789 {
5790 op0 = XEXP (op0, 0);
5791 compound_p = true;
5792 }
5793
2f0038b0 5794 /* Integer multiplies or FMAs have zero/sign extending variants. */
5795 if ((GET_CODE (op0) == ZERO_EXTEND
5796 && GET_CODE (op1) == ZERO_EXTEND)
5797 || (GET_CODE (op0) == SIGN_EXTEND
5798 && GET_CODE (op1) == SIGN_EXTEND))
5799 {
5ae4887d 5800 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5801 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
2f0038b0 5802
5803 if (speed)
5804 {
4ab3c0df 5805 if (compound_p)
e087eb70 5806 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
2f0038b0 5807 cost += extra_cost->mult[0].extend_add;
5808 else
5809 /* MUL/SMULL/UMULL. */
5810 cost += extra_cost->mult[0].extend;
5811 }
5812
5813 return cost;
5814 }
5815
e087eb70 5816 /* This is either an integer multiply or a MADD. In both cases
2f0038b0 5817 we want to recurse and cost the operands. */
5ae4887d 5818 cost += rtx_cost (op0, mode, MULT, 0, speed);
5819 cost += rtx_cost (op1, mode, MULT, 1, speed);
2f0038b0 5820
5821 if (speed)
5822 {
4ab3c0df 5823 if (compound_p)
e087eb70 5824 /* MADD/MSUB. */
2f0038b0 5825 cost += extra_cost->mult[mode == DImode].add;
5826 else
5827 /* MUL. */
5828 cost += extra_cost->mult[mode == DImode].simple;
5829 }
5830
5831 return cost;
5832 }
5833 else
5834 {
5835 if (speed)
5836 {
40018c37 5837 /* Floating-point FMA/FMUL can also support negations of the
32c25e89 5838 operands, unless the rounding mode is upward or downward in
5839 which case FNMUL is different than FMUL with operand negation. */
5840 bool neg0 = GET_CODE (op0) == NEG;
5841 bool neg1 = GET_CODE (op1) == NEG;
5842 if (compound_p || !flag_rounding_math || (neg0 && neg1))
5843 {
5844 if (neg0)
5845 op0 = XEXP (op0, 0);
5846 if (neg1)
5847 op1 = XEXP (op1, 0);
5848 }
2f0038b0 5849
4ab3c0df 5850 if (compound_p)
2f0038b0 5851 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5852 cost += extra_cost->fp[mode == DFmode].fma;
5853 else
40018c37 5854 /* FMUL/FNMUL. */
2f0038b0 5855 cost += extra_cost->fp[mode == DFmode].mult;
5856 }
5857
5ae4887d 5858 cost += rtx_cost (op0, mode, MULT, 0, speed);
5859 cost += rtx_cost (op1, mode, MULT, 1, speed);
2f0038b0 5860 return cost;
5861 }
df401d54 5862}
5863
3d70178f 5864static int
5865aarch64_address_cost (rtx x,
3754d046 5866 machine_mode mode,
3d70178f 5867 addr_space_t as ATTRIBUTE_UNUSED,
5868 bool speed)
5869{
5870 enum rtx_code c = GET_CODE (x);
14677da9 5871 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
3d70178f 5872 struct aarch64_address_info info;
5873 int cost = 0;
5874 info.shift = 0;
5875
5876 if (!aarch64_classify_address (&info, x, mode, c, false))
5877 {
5878 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5879 {
5880 /* This is a CONST or SYMBOL ref which will be split
5881 in a different way depending on the code model in use.
5882 Cost it through the generic infrastructure. */
5ae4887d 5883 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
3d70178f 5884 /* Divide through by the cost of one instruction to
5885 bring it to the same units as the address costs. */
5886 cost_symbol_ref /= COSTS_N_INSNS (1);
5887 /* The cost is then the cost of preparing the address,
5888 followed by an immediate (possibly 0) offset. */
5889 return cost_symbol_ref + addr_cost->imm_offset;
5890 }
5891 else
5892 {
5893 /* This is most likely a jump table from a case
5894 statement. */
5895 return addr_cost->register_offset;
5896 }
5897 }
5898
5899 switch (info.type)
5900 {
5901 case ADDRESS_LO_SUM:
5902 case ADDRESS_SYMBOLIC:
5903 case ADDRESS_REG_IMM:
5904 cost += addr_cost->imm_offset;
5905 break;
5906
5907 case ADDRESS_REG_WB:
5908 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5909 cost += addr_cost->pre_modify;
5910 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5911 cost += addr_cost->post_modify;
5912 else
5913 gcc_unreachable ();
5914
5915 break;
5916
5917 case ADDRESS_REG_REG:
5918 cost += addr_cost->register_offset;
5919 break;
5920
3d70178f 5921 case ADDRESS_REG_SXTW:
85258792 5922 cost += addr_cost->register_sextend;
5923 break;
5924
5925 case ADDRESS_REG_UXTW:
5926 cost += addr_cost->register_zextend;
3d70178f 5927 break;
5928
5929 default:
5930 gcc_unreachable ();
5931 }
5932
5933
5934 if (info.shift > 0)
5935 {
5936 /* For the sake of calculating the cost of the shifted register
5937 component, we can treat same sized modes in the same way. */
5938 switch (GET_MODE_BITSIZE (mode))
5939 {
5940 case 16:
5941 cost += addr_cost->addr_scale_costs.hi;
5942 break;
5943
5944 case 32:
5945 cost += addr_cost->addr_scale_costs.si;
5946 break;
5947
5948 case 64:
5949 cost += addr_cost->addr_scale_costs.di;
5950 break;
5951
5952 /* We can't tell, or this is a 128-bit vector. */
5953 default:
5954 cost += addr_cost->addr_scale_costs.ti;
5955 break;
5956 }
5957 }
5958
5959 return cost;
5960}
5961
d05ee6d2 5962/* Return the cost of a branch. If SPEED_P is true then the compiler is
5963 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5964 to be taken. */
5965
5966int
5967aarch64_branch_cost (bool speed_p, bool predictable_p)
5968{
5969 /* When optimizing for speed, use the cost of unpredictable branches. */
5970 const struct cpu_branch_cost *branch_costs =
14677da9 5971 aarch64_tune_params.branch_costs;
d05ee6d2 5972
5973 if (!speed_p || predictable_p)
5974 return branch_costs->predictable;
5975 else
5976 return branch_costs->unpredictable;
5977}
5978
d515bbc9 5979/* Return true if the RTX X in mode MODE is a zero or sign extract
5980 usable in an ADD or SUB (extended register) instruction. */
5981static bool
3754d046 5982aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
d515bbc9 5983{
5984 /* Catch add with a sign extract.
5985 This is add_<optab><mode>_multp2. */
5986 if (GET_CODE (x) == SIGN_EXTRACT
5987 || GET_CODE (x) == ZERO_EXTRACT)
5988 {
5989 rtx op0 = XEXP (x, 0);
5990 rtx op1 = XEXP (x, 1);
5991 rtx op2 = XEXP (x, 2);
5992
5993 if (GET_CODE (op0) == MULT
5994 && CONST_INT_P (op1)
5995 && op2 == const0_rtx
5996 && CONST_INT_P (XEXP (op0, 1))
5997 && aarch64_is_extend_from_extract (mode,
5998 XEXP (op0, 1),
5999 op1))
6000 {
6001 return true;
6002 }
6003 }
9d8e046e 6004 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6005 No shift. */
6006 else if (GET_CODE (x) == SIGN_EXTEND
6007 || GET_CODE (x) == ZERO_EXTEND)
6008 return REG_P (XEXP (x, 0));
d515bbc9 6009
6010 return false;
6011}
6012
fb53b6aa 6013static bool
6014aarch64_frint_unspec_p (unsigned int u)
6015{
6016 switch (u)
6017 {
6018 case UNSPEC_FRINTZ:
6019 case UNSPEC_FRINTP:
6020 case UNSPEC_FRINTM:
6021 case UNSPEC_FRINTA:
6022 case UNSPEC_FRINTN:
6023 case UNSPEC_FRINTX:
6024 case UNSPEC_FRINTI:
6025 return true;
6026
6027 default:
6028 return false;
6029 }
6030}
6031
7e733b67 6032/* Return true iff X is an rtx that will match an extr instruction
6033 i.e. as described in the *extr<mode>5_insn family of patterns.
6034 OP0 and OP1 will be set to the operands of the shifts involved
6035 on success and will be NULL_RTX otherwise. */
6036
6037static bool
6038aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6039{
6040 rtx op0, op1;
6041 machine_mode mode = GET_MODE (x);
6042
6043 *res_op0 = NULL_RTX;
6044 *res_op1 = NULL_RTX;
6045
6046 if (GET_CODE (x) != IOR)
6047 return false;
6048
6049 op0 = XEXP (x, 0);
6050 op1 = XEXP (x, 1);
6051
6052 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6053 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6054 {
6055 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6056 if (GET_CODE (op1) == ASHIFT)
6057 std::swap (op0, op1);
6058
6059 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6060 return false;
6061
6062 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6063 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6064
6065 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6066 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6067 {
6068 *res_op0 = XEXP (op0, 0);
6069 *res_op1 = XEXP (op1, 0);
6070 return true;
6071 }
6072 }
6073
6074 return false;
6075}
6076
39f4504d 6077/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6078 storing it in *COST. Result is true if the total cost of the operation
6079 has now been calculated. */
6080static bool
6081aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6082{
2f76a745 6083 rtx inner;
6084 rtx comparator;
6085 enum rtx_code cmpcode;
6086
6087 if (COMPARISON_P (op0))
6088 {
6089 inner = XEXP (op0, 0);
6090 comparator = XEXP (op0, 1);
6091 cmpcode = GET_CODE (op0);
6092 }
6093 else
6094 {
6095 inner = op0;
6096 comparator = const0_rtx;
6097 cmpcode = NE;
6098 }
6099
39f4504d 6100 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6101 {
6102 /* Conditional branch. */
2f76a745 6103 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
39f4504d 6104 return true;
6105 else
6106 {
2f76a745 6107 if (cmpcode == NE || cmpcode == EQ)
39f4504d 6108 {
39f4504d 6109 if (comparator == const0_rtx)
6110 {
6111 /* TBZ/TBNZ/CBZ/CBNZ. */
6112 if (GET_CODE (inner) == ZERO_EXTRACT)
6113 /* TBZ/TBNZ. */
5ae4887d 6114 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6115 ZERO_EXTRACT, 0, speed);
6116 else
6117 /* CBZ/CBNZ. */
6118 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
39f4504d 6119
6120 return true;
6121 }
6122 }
2f76a745 6123 else if (cmpcode == LT || cmpcode == GE)
39f4504d 6124 {
39f4504d 6125 /* TBZ/TBNZ. */
6126 if (comparator == const0_rtx)
6127 return true;
6128 }
6129 }
6130 }
2f76a745 6131 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
39f4504d 6132 {
6133 /* It's a conditional operation based on the status flags,
6134 so it must be some flavor of CSEL. */
6135
6136 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6137 if (GET_CODE (op1) == NEG
6138 || GET_CODE (op1) == NOT
6139 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6140 op1 = XEXP (op1, 0);
6141
5ae4887d 6142 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6143 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
39f4504d 6144 return true;
6145 }
6146
6147 /* We don't know what this is, cost all operands. */
6148 return false;
6149}
6150
82fc3cbd 6151/* Check whether X is a bitfield operation of the form shift + extend that
6152 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6153 operand to which the bitfield operation is applied. Otherwise return
6154 NULL_RTX. */
6155
6156static rtx
6157aarch64_extend_bitfield_pattern_p (rtx x)
6158{
6159 rtx_code outer_code = GET_CODE (x);
6160 machine_mode outer_mode = GET_MODE (x);
6161
6162 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6163 && outer_mode != SImode && outer_mode != DImode)
6164 return NULL_RTX;
6165
6166 rtx inner = XEXP (x, 0);
6167 rtx_code inner_code = GET_CODE (inner);
6168 machine_mode inner_mode = GET_MODE (inner);
6169 rtx op = NULL_RTX;
6170
6171 switch (inner_code)
6172 {
6173 case ASHIFT:
6174 if (CONST_INT_P (XEXP (inner, 1))
6175 && (inner_mode == QImode || inner_mode == HImode))
6176 op = XEXP (inner, 0);
6177 break;
6178 case LSHIFTRT:
6179 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6180 && (inner_mode == QImode || inner_mode == HImode))
6181 op = XEXP (inner, 0);
6182 break;
6183 case ASHIFTRT:
6184 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6185 && (inner_mode == QImode || inner_mode == HImode))
6186 op = XEXP (inner, 0);
6187 break;
6188 default:
6189 break;
6190 }
6191
6192 return op;
6193}
6194
df401d54 6195/* Calculate the cost of calculating X, storing it in *COST. Result
6196 is true if the total cost of the operation has now been calculated. */
6197static bool
5ae4887d 6198aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
df401d54 6199 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6200{
7105f3a4 6201 rtx op0, op1, op2;
b29be4e7 6202 const struct cpu_cost_table *extra_cost
14677da9 6203 = aarch64_tune_params.insn_extra_cost;
5ae4887d 6204 int code = GET_CODE (x);
df401d54 6205
8c31cc62 6206 /* By default, assume that everything has equivalent cost to the
6207 cheapest instruction. Any additional costs are applied as a delta
6208 above this default. */
6209 *cost = COSTS_N_INSNS (1);
6210
df401d54 6211 switch (code)
6212 {
6213 case SET:
600207df 6214 /* The cost depends entirely on the operands to SET. */
6215 *cost = 0;
df401d54 6216 op0 = SET_DEST (x);
6217 op1 = SET_SRC (x);
6218
6219 switch (GET_CODE (op0))
6220 {
6221 case MEM:
6222 if (speed)
af274096 6223 {
6224 rtx address = XEXP (op0, 0);
155e90fe 6225 if (VECTOR_MODE_P (mode))
6226 *cost += extra_cost->ldst.storev;
6227 else if (GET_MODE_CLASS (mode) == MODE_INT)
af274096 6228 *cost += extra_cost->ldst.store;
6229 else if (mode == SFmode)
6230 *cost += extra_cost->ldst.storef;
6231 else if (mode == DFmode)
6232 *cost += extra_cost->ldst.stored;
6233
6234 *cost +=
6235 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6236 0, speed));
6237 }
df401d54 6238
5ae4887d 6239 *cost += rtx_cost (op1, mode, SET, 1, speed);
df401d54 6240 return true;
6241
6242 case SUBREG:
6243 if (! REG_P (SUBREG_REG (op0)))
5ae4887d 6244 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
600207df 6245
df401d54 6246 /* Fall through. */
6247 case REG:
155e90fe 6248 /* The cost is one per vector-register copied. */
6249 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6250 {
6251 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6252 / GET_MODE_SIZE (V4SImode);
6253 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6254 }
600207df 6255 /* const0_rtx is in general free, but we will use an
6256 instruction to set a register to 0. */
155e90fe 6257 else if (REG_P (op1) || op1 == const0_rtx)
6258 {
6259 /* The cost is 1 per register copied. */
6260 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
600207df 6261 / UNITS_PER_WORD;
155e90fe 6262 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6263 }
600207df 6264 else
6265 /* Cost is just the cost of the RHS of the set. */
5ae4887d 6266 *cost += rtx_cost (op1, mode, SET, 1, speed);
df401d54 6267 return true;
6268
600207df 6269 case ZERO_EXTRACT:
df401d54 6270 case SIGN_EXTRACT:
600207df 6271 /* Bit-field insertion. Strip any redundant widening of
6272 the RHS to meet the width of the target. */
df401d54 6273 if (GET_CODE (op1) == SUBREG)
6274 op1 = SUBREG_REG (op1);
6275 if ((GET_CODE (op1) == ZERO_EXTEND
6276 || GET_CODE (op1) == SIGN_EXTEND)
978f06f4 6277 && CONST_INT_P (XEXP (op0, 1))
df401d54 6278 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6279 >= INTVAL (XEXP (op0, 1))))
6280 op1 = XEXP (op1, 0);
600207df 6281
6282 if (CONST_INT_P (op1))
6283 {
6284 /* MOV immediate is assumed to always be cheap. */
6285 *cost = COSTS_N_INSNS (1);
6286 }
6287 else
6288 {
6289 /* BFM. */
6290 if (speed)
6291 *cost += extra_cost->alu.bfi;
5ae4887d 6292 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
600207df 6293 }
6294
df401d54 6295 return true;
6296
6297 default:
600207df 6298 /* We can't make sense of this, assume default cost. */
6299 *cost = COSTS_N_INSNS (1);
fb53b6aa 6300 return false;
df401d54 6301 }
6302 return false;
6303
3967ee63 6304 case CONST_INT:
6305 /* If an instruction can incorporate a constant within the
6306 instruction, the instruction's expression avoids calling
6307 rtx_cost() on the constant. If rtx_cost() is called on a
6308 constant, then it is usually because the constant must be
6309 moved into a register by one or more instructions.
6310
6311 The exception is constant 0, which can be expressed
6312 as XZR/WZR and is therefore free. The exception to this is
6313 if we have (set (reg) (const0_rtx)) in which case we must cost
6314 the move. However, we can catch that when we cost the SET, so
6315 we don't need to consider that here. */
6316 if (x == const0_rtx)
6317 *cost = 0;
6318 else
6319 {
6320 /* To an approximation, building any other constant is
6321 proportionally expensive to the number of instructions
6322 required to build that constant. This is true whether we
6323 are compiling for SPEED or otherwise. */
a6cab7d4 6324 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6325 (NULL_RTX, x, false, mode));
3967ee63 6326 }
6327 return true;
6328
6329 case CONST_DOUBLE:
6330 if (speed)
6331 {
6332 /* mov[df,sf]_aarch64. */
6333 if (aarch64_float_const_representable_p (x))
6334 /* FMOV (scalar immediate). */
6335 *cost += extra_cost->fp[mode == DFmode].fpconst;
6336 else if (!aarch64_float_const_zero_rtx_p (x))
6337 {
6338 /* This will be a load from memory. */
6339 if (mode == DFmode)
6340 *cost += extra_cost->ldst.loadd;
6341 else
6342 *cost += extra_cost->ldst.loadf;
6343 }
6344 else
6345 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6346 or MOV v0.s[0], wzr - neither of which are modeled by the
6347 cost tables. Just use the default cost. */
6348 {
6349 }
6350 }
6351
6352 return true;
6353
df401d54 6354 case MEM:
6355 if (speed)
af274096 6356 {
6357 /* For loads we want the base cost of a load, plus an
6358 approximation for the additional cost of the addressing
6359 mode. */
6360 rtx address = XEXP (x, 0);
155e90fe 6361 if (VECTOR_MODE_P (mode))
6362 *cost += extra_cost->ldst.loadv;
6363 else if (GET_MODE_CLASS (mode) == MODE_INT)
af274096 6364 *cost += extra_cost->ldst.load;
6365 else if (mode == SFmode)
6366 *cost += extra_cost->ldst.loadf;
6367 else if (mode == DFmode)
6368 *cost += extra_cost->ldst.loadd;
6369
6370 *cost +=
6371 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6372 0, speed));
6373 }
df401d54 6374
6375 return true;
6376
6377 case NEG:
2f0038b0 6378 op0 = XEXP (x, 0);
6379
155e90fe 6380 if (VECTOR_MODE_P (mode))
6381 {
6382 if (speed)
6383 {
6384 /* FNEG. */
6385 *cost += extra_cost->vect.alu;
6386 }
6387 return false;
6388 }
6389
5ae4887d 6390 if (GET_MODE_CLASS (mode) == MODE_INT)
6391 {
2f0038b0 6392 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6393 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6394 {
6395 /* CSETM. */
5ae4887d 6396 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
2f0038b0 6397 return true;
6398 }
6399
6400 /* Cost this as SUB wzr, X. */
5ae4887d 6401 op0 = CONST0_RTX (mode);
2f0038b0 6402 op1 = XEXP (x, 0);
6403 goto cost_minus;
6404 }
6405
5ae4887d 6406 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2f0038b0 6407 {
6408 /* Support (neg(fma...)) as a single instruction only if
6409 sign of zeros is unimportant. This matches the decision
6410 making in aarch64.md. */
6411 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6412 {
6413 /* FNMADD. */
5ae4887d 6414 *cost = rtx_cost (op0, mode, NEG, 0, speed);
2f0038b0 6415 return true;
6416 }
32c25e89 6417 if (GET_CODE (op0) == MULT)
6418 {
6419 /* FNMUL. */
6420 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6421 return true;
6422 }
2f0038b0 6423 if (speed)
6424 /* FNEG. */
6425 *cost += extra_cost->fp[mode == DFmode].neg;
6426 return false;
6427 }
6428
6429 return false;
df401d54 6430
4c3697c8 6431 case CLRSB:
6432 case CLZ:
6433 if (speed)
155e90fe 6434 {
6435 if (VECTOR_MODE_P (mode))
6436 *cost += extra_cost->vect.alu;
6437 else
6438 *cost += extra_cost->alu.clz;
6439 }
4c3697c8 6440
6441 return false;
6442
df401d54 6443 case COMPARE:
6444 op0 = XEXP (x, 0);
6445 op1 = XEXP (x, 1);
6446
6447 if (op1 == const0_rtx
6448 && GET_CODE (op0) == AND)
6449 {
6450 x = op0;
5ae4887d 6451 mode = GET_MODE (op0);
df401d54 6452 goto cost_logic;
6453 }
6454
7105f3a4 6455 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6456 {
6457 /* TODO: A write to the CC flags possibly costs extra, this
6458 needs encoding in the cost tables. */
6459
6460 /* CC_ZESWPmode supports zero extend for free. */
5ae4887d 6461 if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
7105f3a4 6462 op0 = XEXP (op0, 0);
6463
5ae4887d 6464 mode = GET_MODE (op0);
7105f3a4 6465 /* ANDS. */
6466 if (GET_CODE (op0) == AND)
6467 {
6468 x = op0;
6469 goto cost_logic;
6470 }
6471
6472 if (GET_CODE (op0) == PLUS)
6473 {
6474 /* ADDS (and CMN alias). */
6475 x = op0;
6476 goto cost_plus;
6477 }
6478
6479 if (GET_CODE (op0) == MINUS)
6480 {
6481 /* SUBS. */
6482 x = op0;
6483 goto cost_minus;
6484 }
6485
6486 if (GET_CODE (op1) == NEG)
6487 {
6488 /* CMN. */
6489 if (speed)
6490 *cost += extra_cost->alu.arith;
6491
5ae4887d 6492 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6493 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
7105f3a4 6494 return true;
6495 }
6496
6497 /* CMP.
6498
6499 Compare can freely swap the order of operands, and
6500 canonicalization puts the more complex operation first.
6501 But the integer MINUS logic expects the shift/extend
6502 operation in op1. */
6503 if (! (REG_P (op0)
6504 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6505 {
6506 op0 = XEXP (x, 1);
6507 op1 = XEXP (x, 0);
6508 }
6509 goto cost_minus;
6510 }
6511
6512 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6513 {
6514 /* FCMP. */
6515 if (speed)
6516 *cost += extra_cost->fp[mode == DFmode].compare;
6517
6518 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6519 {
5ae4887d 6520 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
7105f3a4 6521 /* FCMP supports constant 0.0 for no extra cost. */
6522 return true;
6523 }
6524 return false;
6525 }
6526
155e90fe 6527 if (VECTOR_MODE_P (mode))
6528 {
6529 /* Vector compare. */
6530 if (speed)
6531 *cost += extra_cost->vect.alu;
6532
6533 if (aarch64_float_const_zero_rtx_p (op1))
6534 {
6535 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6536 cost. */
6537 return true;
6538 }
6539 return false;
6540 }
7105f3a4 6541 return false;
df401d54 6542
6543 case MINUS:
2f0038b0 6544 {
6545 op0 = XEXP (x, 0);
6546 op1 = XEXP (x, 1);
6547
6548cost_minus:
5ae4887d 6549 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
aa32f5e1 6550
2f0038b0 6551 /* Detect valid immediates. */
6552 if ((GET_MODE_CLASS (mode) == MODE_INT
6553 || (GET_MODE_CLASS (mode) == MODE_CC
6554 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6555 && CONST_INT_P (op1)
6556 && aarch64_uimm12_shift (INTVAL (op1)))
6557 {
2f0038b0 6558 if (speed)
6559 /* SUB(S) (immediate). */
6560 *cost += extra_cost->alu.arith;
6561 return true;
2f0038b0 6562 }
6563
d515bbc9 6564 /* Look for SUB (extended register). */
6565 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6566 {
6567 if (speed)
f6935cdd 6568 *cost += extra_cost->alu.extend_arith;
d515bbc9 6569
9d8e046e 6570 op1 = aarch64_strip_extend (op1);
6571 *cost += rtx_cost (op1, VOIDmode,
5ae4887d 6572 (enum rtx_code) GET_CODE (op1), 0, speed);
d515bbc9 6573 return true;
6574 }
6575
2f0038b0 6576 rtx new_op1 = aarch64_strip_extend (op1);
6577
6578 /* Cost this as an FMA-alike operation. */
6579 if ((GET_CODE (new_op1) == MULT
4ab3c0df 6580 || aarch64_shift_p (GET_CODE (new_op1)))
2f0038b0 6581 && code != COMPARE)
6582 {
6583 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6584 (enum rtx_code) code,
6585 speed);
2f0038b0 6586 return true;
6587 }
df401d54 6588
5ae4887d 6589 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
df401d54 6590
2f0038b0 6591 if (speed)
6592 {
155e90fe 6593 if (VECTOR_MODE_P (mode))
6594 {
6595 /* Vector SUB. */
6596 *cost += extra_cost->vect.alu;
6597 }
6598 else if (GET_MODE_CLASS (mode) == MODE_INT)
6599 {
6600 /* SUB(S). */
6601 *cost += extra_cost->alu.arith;
6602 }
2f0038b0 6603 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
155e90fe 6604 {
6605 /* FSUB. */
6606 *cost += extra_cost->fp[mode == DFmode].addsub;
6607 }
2f0038b0 6608 }
6609 return true;
6610 }
df401d54 6611
6612 case PLUS:
2f0038b0 6613 {
6614 rtx new_op0;
df401d54 6615
2f0038b0 6616 op0 = XEXP (x, 0);
6617 op1 = XEXP (x, 1);
df401d54 6618
7105f3a4 6619cost_plus:
2f0038b0 6620 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6621 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6622 {
6623 /* CSINC. */
5ae4887d 6624 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6625 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
2f0038b0 6626 return true;
6627 }
df401d54 6628
2f0038b0 6629 if (GET_MODE_CLASS (mode) == MODE_INT
6630 && CONST_INT_P (op1)
6631 && aarch64_uimm12_shift (INTVAL (op1)))
6632 {
5ae4887d 6633 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
df401d54 6634
2f0038b0 6635 if (speed)
6636 /* ADD (immediate). */
6637 *cost += extra_cost->alu.arith;
6638 return true;
6639 }
6640
5ae4887d 6641 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
aa32f5e1 6642
d515bbc9 6643 /* Look for ADD (extended register). */
6644 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6645 {
6646 if (speed)
f6935cdd 6647 *cost += extra_cost->alu.extend_arith;
d515bbc9 6648
9d8e046e 6649 op0 = aarch64_strip_extend (op0);
6650 *cost += rtx_cost (op0, VOIDmode,
5ae4887d 6651 (enum rtx_code) GET_CODE (op0), 0, speed);
d515bbc9 6652 return true;
6653 }
6654
2f0038b0 6655 /* Strip any extend, leave shifts behind as we will
6656 cost them through mult_cost. */
6657 new_op0 = aarch64_strip_extend (op0);
6658
6659 if (GET_CODE (new_op0) == MULT
4ab3c0df 6660 || aarch64_shift_p (GET_CODE (new_op0)))
2f0038b0 6661 {
6662 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6663 speed);
2f0038b0 6664 return true;
6665 }
6666
5ae4887d 6667 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
2f0038b0 6668
6669 if (speed)
6670 {
155e90fe 6671 if (VECTOR_MODE_P (mode))
6672 {
6673 /* Vector ADD. */
6674 *cost += extra_cost->vect.alu;
6675 }
6676 else if (GET_MODE_CLASS (mode) == MODE_INT)
6677 {
6678 /* ADD. */
6679 *cost += extra_cost->alu.arith;
6680 }
2f0038b0 6681 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
155e90fe 6682 {
6683 /* FADD. */
6684 *cost += extra_cost->fp[mode == DFmode].addsub;
6685 }
2f0038b0 6686 }
6687 return true;
6688 }
df401d54 6689
b1aadd97 6690 case BSWAP:
6691 *cost = COSTS_N_INSNS (1);
6692
6693 if (speed)
155e90fe 6694 {
6695 if (VECTOR_MODE_P (mode))
6696 *cost += extra_cost->vect.alu;
6697 else
6698 *cost += extra_cost->alu.rev;
6699 }
b1aadd97 6700 return false;
6701
df401d54 6702 case IOR:
d049924d 6703 if (aarch_rev16_p (x))
6704 {
6705 *cost = COSTS_N_INSNS (1);
6706
155e90fe 6707 if (speed)
6708 {
6709 if (VECTOR_MODE_P (mode))
6710 *cost += extra_cost->vect.alu;
6711 else
6712 *cost += extra_cost->alu.rev;
6713 }
6714 return true;
d049924d 6715 }
7e733b67 6716
6717 if (aarch64_extr_rtx_p (x, &op0, &op1))
6718 {
5ae4887d 6719 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6720 *cost += rtx_cost (op1, mode, IOR, 1, speed);
7e733b67 6721 if (speed)
6722 *cost += extra_cost->alu.shift;
6723
6724 return true;
6725 }
d049924d 6726 /* Fall through. */
df401d54 6727 case XOR:
6728 case AND:
6729 cost_logic:
6730 op0 = XEXP (x, 0);
6731 op1 = XEXP (x, 1);
6732
155e90fe 6733 if (VECTOR_MODE_P (mode))
6734 {
6735 if (speed)
6736 *cost += extra_cost->vect.alu;
6737 return true;
6738 }
6739
69ddbb92 6740 if (code == AND
6741 && GET_CODE (op0) == MULT
6742 && CONST_INT_P (XEXP (op0, 1))
6743 && CONST_INT_P (op1)
6744 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6745 INTVAL (op1)) != 0)
6746 {
6747 /* This is a UBFM/SBFM. */
5ae4887d 6748 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
69ddbb92 6749 if (speed)
6750 *cost += extra_cost->alu.bfx;
6751 return true;
6752 }
6753
5ae4887d 6754 if (GET_MODE_CLASS (mode) == MODE_INT)
df401d54 6755 {
69ddbb92 6756 /* We possibly get the immediate for free, this is not
6757 modelled. */
df401d54 6758 if (CONST_INT_P (op1)
5ae4887d 6759 && aarch64_bitmask_imm (INTVAL (op1), mode))
df401d54 6760 {
5ae4887d 6761 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
69ddbb92 6762
6763 if (speed)
6764 *cost += extra_cost->alu.logical;
6765
6766 return true;
df401d54 6767 }
6768 else
6769 {
69ddbb92 6770 rtx new_op0 = op0;
6771
6772 /* Handle ORN, EON, or BIC. */
df401d54 6773 if (GET_CODE (op0) == NOT)
6774 op0 = XEXP (op0, 0);
69ddbb92 6775
6776 new_op0 = aarch64_strip_shift (op0);
6777
6778 /* If we had a shift on op0 then this is a logical-shift-
6779 by-register/immediate operation. Otherwise, this is just
6780 a logical operation. */
6781 if (speed)
6782 {
6783 if (new_op0 != op0)
6784 {
6785 /* Shift by immediate. */
6786 if (CONST_INT_P (XEXP (op0, 1)))
6787 *cost += extra_cost->alu.log_shift;
6788 else
6789 *cost += extra_cost->alu.log_shift_reg;
6790 }
6791 else
6792 *cost += extra_cost->alu.logical;
6793 }
6794
6795 /* In both cases we want to cost both operands. */
5ae4887d 6796 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6797 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
69ddbb92 6798
6799 return true;
df401d54 6800 }
df401d54 6801 }
6802 return false;
6803
69ddbb92 6804 case NOT:
17a5981f 6805 x = XEXP (x, 0);
6806 op0 = aarch64_strip_shift (x);
6807
155e90fe 6808 if (VECTOR_MODE_P (mode))
6809 {
6810 /* Vector NOT. */
6811 *cost += extra_cost->vect.alu;
6812 return false;
6813 }
6814
17a5981f 6815 /* MVN-shifted-reg. */
6816 if (op0 != x)
6817 {
5ae4887d 6818 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
17a5981f 6819
6820 if (speed)
6821 *cost += extra_cost->alu.log_shift;
6822
6823 return true;
6824 }
6825 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6826 Handle the second form here taking care that 'a' in the above can
6827 be a shift. */
6828 else if (GET_CODE (op0) == XOR)
6829 {
6830 rtx newop0 = XEXP (op0, 0);
6831 rtx newop1 = XEXP (op0, 1);
6832 rtx op0_stripped = aarch64_strip_shift (newop0);
6833
5ae4887d 6834 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6835 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
17a5981f 6836
6837 if (speed)
6838 {
6839 if (op0_stripped != newop0)
6840 *cost += extra_cost->alu.log_shift;
6841 else
6842 *cost += extra_cost->alu.logical;
6843 }
6844
6845 return true;
6846 }
69ddbb92 6847 /* MVN. */
6848 if (speed)
6849 *cost += extra_cost->alu.logical;
6850
69ddbb92 6851 return false;
6852
df401d54 6853 case ZERO_EXTEND:
81d1eb9d 6854
6855 op0 = XEXP (x, 0);
6856 /* If a value is written in SI mode, then zero extended to DI
6857 mode, the operation will in general be free as a write to
6858 a 'w' register implicitly zeroes the upper bits of an 'x'
6859 register. However, if this is
6860
6861 (set (reg) (zero_extend (reg)))
6862
6863 we must cost the explicit register move. */
6864 if (mode == DImode
6865 && GET_MODE (op0) == SImode
6866 && outer == SET)
6867 {
5ae4887d 6868 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
81d1eb9d 6869
6870 if (!op_cost && speed)
6871 /* MOV. */
6872 *cost += extra_cost->alu.extend;
6873 else
6874 /* Free, the cost is that of the SI mode operation. */
6875 *cost = op_cost;
6876
6877 return true;
6878 }
5ae4887d 6879 else if (MEM_P (op0))
df401d54 6880 {
81d1eb9d 6881 /* All loads can zero extend to any size for free. */
5ae4887d 6882 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
df401d54 6883 return true;
6884 }
81d1eb9d 6885
82fc3cbd 6886 op0 = aarch64_extend_bitfield_pattern_p (x);
6887 if (op0)
6888 {
6889 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
6890 if (speed)
6891 *cost += extra_cost->alu.bfx;
6892 return true;
6893 }
6894
81d1eb9d 6895 if (speed)
155e90fe 6896 {
6897 if (VECTOR_MODE_P (mode))
6898 {
6899 /* UMOV. */
6900 *cost += extra_cost->vect.alu;
6901 }
6902 else
6903 {
6904 /* UXTB/UXTH. */
6905 *cost += extra_cost->alu.extend;
6906 }
6907 }
df401d54 6908 return false;
6909
6910 case SIGN_EXTEND:
81d1eb9d 6911 if (MEM_P (XEXP (x, 0)))
df401d54 6912 {
81d1eb9d 6913 /* LDRSH. */
6914 if (speed)
6915 {
6916 rtx address = XEXP (XEXP (x, 0), 0);
6917 *cost += extra_cost->ldst.load_sign_extend;
6918
6919 *cost +=
6920 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6921 0, speed));
6922 }
df401d54 6923 return true;
6924 }
81d1eb9d 6925
82fc3cbd 6926 op0 = aarch64_extend_bitfield_pattern_p (x);
6927 if (op0)
6928 {
6929 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
6930 if (speed)
6931 *cost += extra_cost->alu.bfx;
6932 return true;
6933 }
6934
81d1eb9d 6935 if (speed)
155e90fe 6936 {
6937 if (VECTOR_MODE_P (mode))
6938 *cost += extra_cost->vect.alu;
6939 else
6940 *cost += extra_cost->alu.extend;
6941 }
df401d54 6942 return false;
6943
89821004 6944 case ASHIFT:
6945 op0 = XEXP (x, 0);
6946 op1 = XEXP (x, 1);
6947
6948 if (CONST_INT_P (op1))
6949 {
89821004 6950 if (speed)
155e90fe 6951 {
6952 if (VECTOR_MODE_P (mode))
6953 {
6954 /* Vector shift (immediate). */
6955 *cost += extra_cost->vect.alu;
6956 }
6957 else
6958 {
6959 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6960 aliases. */
6961 *cost += extra_cost->alu.shift;
6962 }
6963 }
89821004 6964
6965 /* We can incorporate zero/sign extend for free. */
6966 if (GET_CODE (op0) == ZERO_EXTEND
6967 || GET_CODE (op0) == SIGN_EXTEND)
6968 op0 = XEXP (op0, 0);
6969
5ae4887d 6970 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
89821004 6971 return true;
6972 }
6973 else
6974 {
89821004 6975 if (speed)
155e90fe 6976 {
6977 if (VECTOR_MODE_P (mode))
6978 {
6979 /* Vector shift (register). */
6980 *cost += extra_cost->vect.alu;
6981 }
6982 else
6983 {
6984 /* LSLV. */
6985 *cost += extra_cost->alu.shift_reg;
6986 }
6987 }
89821004 6988 return false; /* All arguments need to be in registers. */
6989 }
6990
df401d54 6991 case ROTATE:
df401d54 6992 case ROTATERT:
6993 case LSHIFTRT:
df401d54 6994 case ASHIFTRT:
89821004 6995 op0 = XEXP (x, 0);
6996 op1 = XEXP (x, 1);
df401d54 6997
89821004 6998 if (CONST_INT_P (op1))
6999 {
7000 /* ASR (immediate) and friends. */
7001 if (speed)
155e90fe 7002 {
7003 if (VECTOR_MODE_P (mode))
7004 *cost += extra_cost->vect.alu;
7005 else
7006 *cost += extra_cost->alu.shift;
7007 }
df401d54 7008
5ae4887d 7009 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
89821004 7010 return true;
7011 }
7012 else
7013 {
7014
7015 /* ASR (register) and friends. */
7016 if (speed)
155e90fe 7017 {
7018 if (VECTOR_MODE_P (mode))
7019 *cost += extra_cost->vect.alu;
7020 else
7021 *cost += extra_cost->alu.shift_reg;
7022 }
89821004 7023 return false; /* All arguments need to be in registers. */
7024 }
df401d54 7025
d369e66f 7026 case SYMBOL_REF:
7027
65f988f7 7028 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7029 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
d369e66f 7030 {
7031 /* LDR. */
7032 if (speed)
7033 *cost += extra_cost->ldst.load;
7034 }
7035 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7036 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7037 {
7038 /* ADRP, followed by ADD. */
7039 *cost += COSTS_N_INSNS (1);
7040 if (speed)
7041 *cost += 2 * extra_cost->alu.arith;
7042 }
7043 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7044 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7045 {
7046 /* ADR. */
7047 if (speed)
7048 *cost += extra_cost->alu.arith;
7049 }
7050
7051 if (flag_pic)
7052 {
7053 /* One extra load instruction, after accessing the GOT. */
7054 *cost += COSTS_N_INSNS (1);
7055 if (speed)
7056 *cost += extra_cost->ldst.load;
7057 }
df401d54 7058 return true;
7059
d369e66f 7060 case HIGH:
df401d54 7061 case LO_SUM:
d369e66f 7062 /* ADRP/ADD (immediate). */
7063 if (speed)
7064 *cost += extra_cost->alu.arith;
df401d54 7065 return true;
7066
7067 case ZERO_EXTRACT:
7068 case SIGN_EXTRACT:
d515bbc9 7069 /* UBFX/SBFX. */
7070 if (speed)
155e90fe 7071 {
7072 if (VECTOR_MODE_P (mode))
7073 *cost += extra_cost->vect.alu;
7074 else
7075 *cost += extra_cost->alu.bfx;
7076 }
d515bbc9 7077
7078 /* We can trust that the immediates used will be correct (there
7079 are no by-register forms), so we need only cost op0. */
5ae4887d 7080 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
df401d54 7081 return true;
7082
7083 case MULT:
2f0038b0 7084 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7085 /* aarch64_rtx_mult_cost always handles recursion to its
7086 operands. */
7087 return true;
df401d54 7088
7089 case MOD:
5e3b2855 7090 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7091 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7092 an unconditional negate. This case should only ever be reached through
7093 the set_smod_pow2_cheap check in expmed.c. */
7094 if (CONST_INT_P (XEXP (x, 1))
7095 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7096 && (mode == SImode || mode == DImode))
7097 {
7098 /* We expand to 4 instructions. Reset the baseline. */
7099 *cost = COSTS_N_INSNS (4);
7100
7101 if (speed)
7102 *cost += 2 * extra_cost->alu.logical
7103 + 2 * extra_cost->alu.arith;
7104
7105 return true;
7106 }
7107
7108 /* Fall-through. */
df401d54 7109 case UMOD:
df401d54 7110 if (speed)
7111 {
155e90fe 7112 if (VECTOR_MODE_P (mode))
7113 *cost += extra_cost->vect.alu;
5ae4887d 7114 else if (GET_MODE_CLASS (mode) == MODE_INT)
7115 *cost += (extra_cost->mult[mode == DImode].add
7116 + extra_cost->mult[mode == DImode].idiv);
7117 else if (mode == DFmode)
b29be4e7 7118 *cost += (extra_cost->fp[1].mult
7119 + extra_cost->fp[1].div);
5ae4887d 7120 else if (mode == SFmode)
b29be4e7 7121 *cost += (extra_cost->fp[0].mult
7122 + extra_cost->fp[0].div);
df401d54 7123 }
7124 return false; /* All arguments need to be in registers. */
7125
7126 case DIV:
7127 case UDIV:
8065e8b4 7128 case SQRT:
df401d54 7129 if (speed)
7130 {
155e90fe 7131 if (VECTOR_MODE_P (mode))
7132 *cost += extra_cost->vect.alu;
7133 else if (GET_MODE_CLASS (mode) == MODE_INT)
8065e8b4 7134 /* There is no integer SQRT, so only DIV and UDIV can get
7135 here. */
7136 *cost += extra_cost->mult[mode == DImode].idiv;
7137 else
7138 *cost += extra_cost->fp[mode == DFmode].div;
df401d54 7139 }
7140 return false; /* All arguments need to be in registers. */
7141
7105f3a4 7142 case IF_THEN_ELSE:
39f4504d 7143 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7144 XEXP (x, 2), cost, speed);
7105f3a4 7145
7146 case EQ:
7147 case NE:
7148 case GT:
7149 case GTU:
7150 case LT:
7151 case LTU:
7152 case GE:
7153 case GEU:
7154 case LE:
7155 case LEU:
7156
7157 return false; /* All arguments must be in registers. */
7158
d31e8ed0 7159 case FMA:
7160 op0 = XEXP (x, 0);
7161 op1 = XEXP (x, 1);
7162 op2 = XEXP (x, 2);
7163
7164 if (speed)
155e90fe 7165 {
7166 if (VECTOR_MODE_P (mode))
7167 *cost += extra_cost->vect.alu;
7168 else
7169 *cost += extra_cost->fp[mode == DFmode].fma;
7170 }
d31e8ed0 7171
7172 /* FMSUB, FNMADD, and FNMSUB are free. */
7173 if (GET_CODE (op0) == NEG)
7174 op0 = XEXP (op0, 0);
7175
7176 if (GET_CODE (op2) == NEG)
7177 op2 = XEXP (op2, 0);
7178
7179 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7180 and the by-element operand as operand 0. */
7181 if (GET_CODE (op1) == NEG)
7182 op1 = XEXP (op1, 0);
7183
7184 /* Catch vector-by-element operations. The by-element operand can
7185 either be (vec_duplicate (vec_select (x))) or just
7186 (vec_select (x)), depending on whether we are multiplying by
7187 a vector or a scalar.
7188
7189 Canonicalization is not very good in these cases, FMA4 will put the
7190 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7191 if (GET_CODE (op0) == VEC_DUPLICATE)
7192 op0 = XEXP (op0, 0);
7193 else if (GET_CODE (op1) == VEC_DUPLICATE)
7194 op1 = XEXP (op1, 0);
7195
7196 if (GET_CODE (op0) == VEC_SELECT)
7197 op0 = XEXP (op0, 0);
7198 else if (GET_CODE (op1) == VEC_SELECT)
7199 op1 = XEXP (op1, 0);
7200
7201 /* If the remaining parameters are not registers,
7202 get the cost to put them into registers. */
5ae4887d 7203 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7204 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7205 *cost += rtx_cost (op2, mode, FMA, 2, speed);
d31e8ed0 7206 return true;
7207
61f1e9bc 7208 case FLOAT:
7209 case UNSIGNED_FLOAT:
7210 if (speed)
7211 *cost += extra_cost->fp[mode == DFmode].fromint;
7212 return false;
7213
d31e8ed0 7214 case FLOAT_EXTEND:
7215 if (speed)
155e90fe 7216 {
7217 if (VECTOR_MODE_P (mode))
7218 {
7219 /*Vector truncate. */
7220 *cost += extra_cost->vect.alu;
7221 }
7222 else
7223 *cost += extra_cost->fp[mode == DFmode].widen;
7224 }
d31e8ed0 7225 return false;
7226
7227 case FLOAT_TRUNCATE:
7228 if (speed)
155e90fe 7229 {
7230 if (VECTOR_MODE_P (mode))
7231 {
7232 /*Vector conversion. */
7233 *cost += extra_cost->vect.alu;
7234 }
7235 else
7236 *cost += extra_cost->fp[mode == DFmode].narrow;
7237 }
d31e8ed0 7238 return false;
7239
fb53b6aa 7240 case FIX:
7241 case UNSIGNED_FIX:
7242 x = XEXP (x, 0);
7243 /* Strip the rounding part. They will all be implemented
7244 by the fcvt* family of instructions anyway. */
7245 if (GET_CODE (x) == UNSPEC)
7246 {
7247 unsigned int uns_code = XINT (x, 1);
7248
7249 if (uns_code == UNSPEC_FRINTA
7250 || uns_code == UNSPEC_FRINTM
7251 || uns_code == UNSPEC_FRINTN
7252 || uns_code == UNSPEC_FRINTP
7253 || uns_code == UNSPEC_FRINTZ)
7254 x = XVECEXP (x, 0, 0);
7255 }
7256
7257 if (speed)
155e90fe 7258 {
7259 if (VECTOR_MODE_P (mode))
7260 *cost += extra_cost->vect.alu;
7261 else
7262 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7263 }
ac80c076 7264
7265 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7266 fixed-point fcvt. */
7267 if (GET_CODE (x) == MULT
7268 && ((VECTOR_MODE_P (mode)
7269 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7270 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7271 {
7272 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7273 0, speed);
7274 return true;
7275 }
7276
5ae4887d 7277 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
fb53b6aa 7278 return true;
7279
d31e8ed0 7280 case ABS:
155e90fe 7281 if (VECTOR_MODE_P (mode))
7282 {
7283 /* ABS (vector). */
7284 if (speed)
7285 *cost += extra_cost->vect.alu;
7286 }
7287 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
d31e8ed0 7288 {
1db16df8 7289 op0 = XEXP (x, 0);
7290
7291 /* FABD, which is analogous to FADD. */
7292 if (GET_CODE (op0) == MINUS)
7293 {
5ae4887d 7294 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7295 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
1db16df8 7296 if (speed)
7297 *cost += extra_cost->fp[mode == DFmode].addsub;
7298
7299 return true;
7300 }
7301 /* Simple FABS is analogous to FNEG. */
d31e8ed0 7302 if (speed)
7303 *cost += extra_cost->fp[mode == DFmode].neg;
7304 }
7305 else
7306 {
7307 /* Integer ABS will either be split to
7308 two arithmetic instructions, or will be an ABS
7309 (scalar), which we don't model. */
7310 *cost = COSTS_N_INSNS (2);
7311 if (speed)
7312 *cost += 2 * extra_cost->alu.arith;
7313 }
7314 return false;
7315
7316 case SMAX:
7317 case SMIN:
7318 if (speed)
7319 {
155e90fe 7320 if (VECTOR_MODE_P (mode))
7321 *cost += extra_cost->vect.alu;
7322 else
7323 {
7324 /* FMAXNM/FMINNM/FMAX/FMIN.
7325 TODO: This may not be accurate for all implementations, but
7326 we do not model this in the cost tables. */
7327 *cost += extra_cost->fp[mode == DFmode].addsub;
7328 }
d31e8ed0 7329 }
7330 return false;
7331
fb53b6aa 7332 case UNSPEC:
7333 /* The floating point round to integer frint* instructions. */
7334 if (aarch64_frint_unspec_p (XINT (x, 1)))
7335 {
7336 if (speed)
7337 *cost += extra_cost->fp[mode == DFmode].roundint;
7338
7339 return false;
7340 }
4c3697c8 7341
7342 if (XINT (x, 1) == UNSPEC_RBIT)
7343 {
7344 if (speed)
7345 *cost += extra_cost->alu.rev;
7346
7347 return false;
7348 }
fb53b6aa 7349 break;
7350
4a8f9d52 7351 case TRUNCATE:
7352
7353 /* Decompose <su>muldi3_highpart. */
7354 if (/* (truncate:DI */
7355 mode == DImode
7356 /* (lshiftrt:TI */
7357 && GET_MODE (XEXP (x, 0)) == TImode
7358 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7359 /* (mult:TI */
7360 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7361 /* (ANY_EXTEND:TI (reg:DI))
7362 (ANY_EXTEND:TI (reg:DI))) */
7363 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7364 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7365 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7366 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7367 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7368 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7369 /* (const_int 64) */
7370 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7371 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7372 {
7373 /* UMULH/SMULH. */
7374 if (speed)
7375 *cost += extra_cost->mult[mode == DImode].extend;
5ae4887d 7376 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7377 mode, MULT, 0, speed);
7378 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7379 mode, MULT, 1, speed);
4a8f9d52 7380 return true;
7381 }
7382
7383 /* Fall through. */
df401d54 7384 default:
fb53b6aa 7385 break;
df401d54 7386 }
fb53b6aa 7387
7388 if (dump_file && (dump_flags & TDF_DETAILS))
7389 fprintf (dump_file,
7390 "\nFailed to cost RTX. Assuming default cost.\n");
7391
7392 return true;
df401d54 7393}
7394
e50295b3 7395/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7396 calculated for X. This cost is stored in *COST. Returns true
7397 if the total cost of X was calculated. */
7398static bool
5ae4887d 7399aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
e50295b3 7400 int param, int *cost, bool speed)
7401{
5ae4887d 7402 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
e50295b3 7403
7404 if (dump_file && (dump_flags & TDF_DETAILS))
7405 {
7406 print_rtl_single (dump_file, x);
7407 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7408 speed ? "Hot" : "Cold",
7409 *cost, result ? "final" : "partial");
7410 }
7411
7412 return result;
7413}
7414
df401d54 7415static int
3754d046 7416aarch64_register_move_cost (machine_mode mode,
52e9ac01 7417 reg_class_t from_i, reg_class_t to_i)
df401d54 7418{
52e9ac01 7419 enum reg_class from = (enum reg_class) from_i;
7420 enum reg_class to = (enum reg_class) to_i;
df401d54 7421 const struct cpu_regmove_cost *regmove_cost
14677da9 7422 = aarch64_tune_params.regmove_cost;
df401d54 7423
32accfd7 7424 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
4c5898f7 7425 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
32accfd7 7426 to = GENERAL_REGS;
7427
4c5898f7 7428 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
32accfd7 7429 from = GENERAL_REGS;
7430
0c3180b6 7431 /* Moving between GPR and stack cost is the same as GP2GP. */
7432 if ((from == GENERAL_REGS && to == STACK_REG)
7433 || (to == GENERAL_REGS && from == STACK_REG))
7434 return regmove_cost->GP2GP;
7435
7436 /* To/From the stack register, we move via the gprs. */
7437 if (to == STACK_REG || from == STACK_REG)
7438 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7439 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7440
396b062c 7441 if (GET_MODE_SIZE (mode) == 16)
7442 {
7443 /* 128-bit operations on general registers require 2 instructions. */
7444 if (from == GENERAL_REGS && to == GENERAL_REGS)
7445 return regmove_cost->GP2GP * 2;
7446 else if (from == GENERAL_REGS)
7447 return regmove_cost->GP2FP * 2;
7448 else if (to == GENERAL_REGS)
7449 return regmove_cost->FP2GP * 2;
7450
7451 /* When AdvSIMD instructions are disabled it is not possible to move
7452 a 128-bit value directly between Q registers. This is handled in
7453 secondary reload. A general register is used as a scratch to move
7454 the upper DI value and the lower DI value is moved directly,
7455 hence the cost is the sum of three moves. */
7456 if (! TARGET_SIMD)
7457 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7458
7459 return regmove_cost->FP2FP;
7460 }
7461
df401d54 7462 if (from == GENERAL_REGS && to == GENERAL_REGS)
7463 return regmove_cost->GP2GP;
7464 else if (from == GENERAL_REGS)
7465 return regmove_cost->GP2FP;
7466 else if (to == GENERAL_REGS)
7467 return regmove_cost->FP2GP;
7468
df401d54 7469 return regmove_cost->FP2FP;
7470}
7471
7472static int
3754d046 7473aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
df401d54 7474 reg_class_t rclass ATTRIBUTE_UNUSED,
7475 bool in ATTRIBUTE_UNUSED)
7476{
14677da9 7477 return aarch64_tune_params.memmov_cost;
df401d54 7478}
7479
4cfd27a5 7480/* Return true if it is safe and beneficial to use the rsqrt optabs to
7481 optimize 1.0/sqrt. */
7482
7483static bool
7484use_rsqrt_p (void)
7485{
7486 return (!flag_trapping_math
7487 && flag_unsafe_math_optimizations
7488 && (aarch64_tune_params.extra_tuning_flags
7489 & AARCH64_EXTRA_TUNE_RECIP_SQRT));
7490}
7491
e1a2ea91 7492/* Function to decide when to use
7493 reciprocal square root builtins. */
7494
7495static tree
4cfd27a5 7496aarch64_builtin_reciprocal (tree fndecl)
e1a2ea91 7497{
4cfd27a5 7498 if (!use_rsqrt_p ())
e1a2ea91 7499 return NULL_TREE;
4cfd27a5 7500 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
e1a2ea91 7501}
7502
7503typedef rtx (*rsqrte_type) (rtx, rtx);
7504
7505/* Select reciprocal square root initial estimate
7506 insn depending on machine mode. */
7507
7508rsqrte_type
7509get_rsqrte_type (machine_mode mode)
7510{
7511 switch (mode)
7512 {
7513 case DFmode: return gen_aarch64_rsqrte_df2;
7514 case SFmode: return gen_aarch64_rsqrte_sf2;
7515 case V2DFmode: return gen_aarch64_rsqrte_v2df2;
7516 case V2SFmode: return gen_aarch64_rsqrte_v2sf2;
7517 case V4SFmode: return gen_aarch64_rsqrte_v4sf2;
7518 default: gcc_unreachable ();
7519 }
7520}
7521
7522typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
7523
7524/* Select reciprocal square root Newton-Raphson step
7525 insn depending on machine mode. */
7526
7527rsqrts_type
7528get_rsqrts_type (machine_mode mode)
7529{
7530 switch (mode)
7531 {
7532 case DFmode: return gen_aarch64_rsqrts_df3;
7533 case SFmode: return gen_aarch64_rsqrts_sf3;
7534 case V2DFmode: return gen_aarch64_rsqrts_v2df3;
7535 case V2SFmode: return gen_aarch64_rsqrts_v2sf3;
7536 case V4SFmode: return gen_aarch64_rsqrts_v4sf3;
7537 default: gcc_unreachable ();
7538 }
7539}
7540
7541/* Emit instruction sequence to compute
7542 reciprocal square root. Use two Newton-Raphson steps
7543 for single precision and three for double precision. */
7544
7545void
7546aarch64_emit_swrsqrt (rtx dst, rtx src)
7547{
7548 machine_mode mode = GET_MODE (src);
7549 gcc_assert (
7550 mode == SFmode || mode == V2SFmode || mode == V4SFmode
7551 || mode == DFmode || mode == V2DFmode);
7552
7553 rtx xsrc = gen_reg_rtx (mode);
7554 emit_move_insn (xsrc, src);
7555 rtx x0 = gen_reg_rtx (mode);
7556
7557 emit_insn ((*get_rsqrte_type (mode)) (x0, xsrc));
7558
7559 bool double_mode = (mode == DFmode || mode == V2DFmode);
7560
7561 int iterations = double_mode ? 3 : 2;
7562
7563 if (flag_mrecip_low_precision_sqrt)
7564 iterations--;
7565
7566 for (int i = 0; i < iterations; ++i)
7567 {
7568 rtx x1 = gen_reg_rtx (mode);
7569 rtx x2 = gen_reg_rtx (mode);
7570 rtx x3 = gen_reg_rtx (mode);
7571 emit_set_insn (x2, gen_rtx_MULT (mode, x0, x0));
7572
7573 emit_insn ((*get_rsqrts_type (mode)) (x3, xsrc, x2));
7574
7575 emit_set_insn (x1, gen_rtx_MULT (mode, x0, x3));
7576 x0 = x1;
7577 }
7578
7579 emit_move_insn (dst, x0);
7580}
7581
268a662f 7582/* Return the number of instructions that can be issued per cycle. */
7583static int
7584aarch64_sched_issue_rate (void)
7585{
14677da9 7586 return aarch64_tune_params.issue_rate;
268a662f 7587}
7588
65d538fd 7589static int
7590aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7591{
7592 int issue_rate = aarch64_sched_issue_rate ();
7593
7594 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7595}
7596
5f73ddf0 7597
7598/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
7599 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
7600 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
7601
7602static int
7603aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
7604 int ready_index)
7605{
7606 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
7607}
7608
7609
61d9499e 7610/* Vectorizer cost model target hooks. */
7611
7612/* Implement targetm.vectorize.builtin_vectorization_cost. */
7613static int
7614aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7615 tree vectype,
7616 int misalign ATTRIBUTE_UNUSED)
7617{
7618 unsigned elements;
7619
7620 switch (type_of_cost)
7621 {
7622 case scalar_stmt:
14677da9 7623 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
61d9499e 7624
7625 case scalar_load:
14677da9 7626 return aarch64_tune_params.vec_costs->scalar_load_cost;
61d9499e 7627
7628 case scalar_store:
14677da9 7629 return aarch64_tune_params.vec_costs->scalar_store_cost;
61d9499e 7630
7631 case vector_stmt:
14677da9 7632 return aarch64_tune_params.vec_costs->vec_stmt_cost;
61d9499e 7633
7634 case vector_load:
14677da9 7635 return aarch64_tune_params.vec_costs->vec_align_load_cost;
61d9499e 7636
7637 case vector_store:
14677da9 7638 return aarch64_tune_params.vec_costs->vec_store_cost;
61d9499e 7639
7640 case vec_to_scalar:
14677da9 7641 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
61d9499e 7642
7643 case scalar_to_vec:
14677da9 7644 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
61d9499e 7645
7646 case unaligned_load:
14677da9 7647 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
61d9499e 7648
7649 case unaligned_store:
14677da9 7650 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
61d9499e 7651
7652 case cond_branch_taken:
14677da9 7653 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
61d9499e 7654
7655 case cond_branch_not_taken:
14677da9 7656 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
61d9499e 7657
7658 case vec_perm:
7659 case vec_promote_demote:
14677da9 7660 return aarch64_tune_params.vec_costs->vec_stmt_cost;
61d9499e 7661
7662 case vec_construct:
7663 elements = TYPE_VECTOR_SUBPARTS (vectype);
7664 return elements / 2 + 1;
7665
7666 default:
7667 gcc_unreachable ();
7668 }
7669}
7670
7671/* Implement targetm.vectorize.add_stmt_cost. */
7672static unsigned
7673aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7674 struct _stmt_vec_info *stmt_info, int misalign,
7675 enum vect_cost_model_location where)
7676{
7677 unsigned *cost = (unsigned *) data;
7678 unsigned retval = 0;
7679
7680 if (flag_vect_cost_model)
7681 {
7682 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7683 int stmt_cost =
7684 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7685
7686 /* Statements in an inner loop relative to the loop being
7687 vectorized are weighted more heavily. The value here is
c8b3c5dd 7688 arbitrary and could potentially be improved with analysis. */
61d9499e 7689 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
c8b3c5dd 7690 count *= 50; /* FIXME */
61d9499e 7691
7692 retval = (unsigned) (count * stmt_cost);
7693 cost[where] += retval;
7694 }
7695
7696 return retval;
7697}
7698
50021c32 7699static void initialize_aarch64_code_model (struct gcc_options *);
df401d54 7700
50021c32 7701/* Enum describing the various ways that the
7702 aarch64_parse_{arch,tune,cpu,extension} functions can fail.
7703 This way their callers can choose what kind of error to give. */
df401d54 7704
50021c32 7705enum aarch64_parse_opt_result
7706{
7707 AARCH64_PARSE_OK, /* Parsing was successful. */
7708 AARCH64_PARSE_MISSING_ARG, /* Missing argument. */
7709 AARCH64_PARSE_INVALID_FEATURE, /* Invalid feature modifier. */
7710 AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */
7711};
7712
7713/* Parse the architecture extension string STR and update ISA_FLAGS
7714 with the architecture features turned on or off. Return a
7715 aarch64_parse_opt_result describing the result. */
7716
7717static enum aarch64_parse_opt_result
7718aarch64_parse_extension (char *str, unsigned long *isa_flags)
df401d54 7719{
7720 /* The extension string is parsed left to right. */
7721 const struct aarch64_option_extension *opt = NULL;
7722
7723 /* Flag to say whether we are adding or removing an extension. */
7724 int adding_ext = -1;
7725
7726 while (str != NULL && *str != 0)
7727 {
7728 char *ext;
7729 size_t len;
7730
7731 str++;
7732 ext = strchr (str, '+');
7733
7734 if (ext != NULL)
7735 len = ext - str;
7736 else
7737 len = strlen (str);
7738
7739 if (len >= 2 && strncmp (str, "no", 2) == 0)
7740 {
7741 adding_ext = 0;
7742 len -= 2;
7743 str += 2;
7744 }
7745 else if (len > 0)
7746 adding_ext = 1;
7747
7748 if (len == 0)
50021c32 7749 return AARCH64_PARSE_MISSING_ARG;
7750
df401d54 7751
7752 /* Scan over the extensions table trying to find an exact match. */
7753 for (opt = all_extensions; opt->name != NULL; opt++)
7754 {
7755 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
7756 {
7757 /* Add or remove the extension. */
7758 if (adding_ext)
50021c32 7759 *isa_flags |= opt->flags_on;
df401d54 7760 else
50021c32 7761 *isa_flags &= ~(opt->flags_off);
df401d54 7762 break;
7763 }
7764 }
7765
7766 if (opt->name == NULL)
7767 {
7768 /* Extension not found in list. */
50021c32 7769 return AARCH64_PARSE_INVALID_FEATURE;
df401d54 7770 }
7771
7772 str = ext;
7773 };
7774
50021c32 7775 return AARCH64_PARSE_OK;
df401d54 7776}
7777
50021c32 7778/* Parse the TO_PARSE string and put the architecture struct that it
7779 selects into RES and the architectural features into ISA_FLAGS.
7780 Return an aarch64_parse_opt_result describing the parse result.
7781 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
df401d54 7782
50021c32 7783static enum aarch64_parse_opt_result
7784aarch64_parse_arch (const char *to_parse, const struct processor **res,
7785 unsigned long *isa_flags)
df401d54 7786{
7787 char *ext;
7788 const struct processor *arch;
50021c32 7789 char *str = (char *) alloca (strlen (to_parse) + 1);
df401d54 7790 size_t len;
7791
50021c32 7792 strcpy (str, to_parse);
df401d54 7793
7794 ext = strchr (str, '+');
7795
7796 if (ext != NULL)
7797 len = ext - str;
7798 else
7799 len = strlen (str);
7800
7801 if (len == 0)
50021c32 7802 return AARCH64_PARSE_MISSING_ARG;
7803
df401d54 7804
50021c32 7805 /* Loop through the list of supported ARCHes to find a match. */
df401d54 7806 for (arch = all_architectures; arch->name != NULL; arch++)
7807 {
7808 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7809 {
50021c32 7810 unsigned long isa_temp = arch->flags;
df401d54 7811
7812 if (ext != NULL)
7813 {
50021c32 7814 /* TO_PARSE string contains at least one extension. */
7815 enum aarch64_parse_opt_result ext_res
7816 = aarch64_parse_extension (ext, &isa_temp);
df401d54 7817
50021c32 7818 if (ext_res != AARCH64_PARSE_OK)
7819 return ext_res;
03189b5b 7820 }
50021c32 7821 /* Extension parsing was successful. Confirm the result
7822 arch and ISA flags. */
7823 *res = arch;
7824 *isa_flags = isa_temp;
7825 return AARCH64_PARSE_OK;
df401d54 7826 }
7827 }
7828
7829 /* ARCH name not found in list. */
50021c32 7830 return AARCH64_PARSE_INVALID_ARG;
df401d54 7831}
7832
50021c32 7833/* Parse the TO_PARSE string and put the result tuning in RES and the
7834 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7835 describing the parse result. If there is an error parsing, RES and
7836 ISA_FLAGS are left unchanged. */
df401d54 7837
50021c32 7838static enum aarch64_parse_opt_result
7839aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7840 unsigned long *isa_flags)
df401d54 7841{
7842 char *ext;
7843 const struct processor *cpu;
50021c32 7844 char *str = (char *) alloca (strlen (to_parse) + 1);
df401d54 7845 size_t len;
7846
50021c32 7847 strcpy (str, to_parse);
df401d54 7848
7849 ext = strchr (str, '+');
7850
7851 if (ext != NULL)
7852 len = ext - str;
7853 else
7854 len = strlen (str);
7855
7856 if (len == 0)
50021c32 7857 return AARCH64_PARSE_MISSING_ARG;
7858
df401d54 7859
7860 /* Loop through the list of supported CPUs to find a match. */
7861 for (cpu = all_cores; cpu->name != NULL; cpu++)
7862 {
7863 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7864 {
50021c32 7865 unsigned long isa_temp = cpu->flags;
7866
df401d54 7867
7868 if (ext != NULL)
7869 {
50021c32 7870 /* TO_PARSE string contains at least one extension. */
7871 enum aarch64_parse_opt_result ext_res
7872 = aarch64_parse_extension (ext, &isa_temp);
df401d54 7873
50021c32 7874 if (ext_res != AARCH64_PARSE_OK)
7875 return ext_res;
7876 }
7877 /* Extension parsing was successfull. Confirm the result
7878 cpu and ISA flags. */
7879 *res = cpu;
7880 *isa_flags = isa_temp;
7881 return AARCH64_PARSE_OK;
df401d54 7882 }
7883 }
7884
7885 /* CPU name not found in list. */
50021c32 7886 return AARCH64_PARSE_INVALID_ARG;
df401d54 7887}
7888
50021c32 7889/* Parse the TO_PARSE string and put the cpu it selects into RES.
7890 Return an aarch64_parse_opt_result describing the parse result.
7891 If the parsing fails the RES does not change. */
df401d54 7892
50021c32 7893static enum aarch64_parse_opt_result
7894aarch64_parse_tune (const char *to_parse, const struct processor **res)
df401d54 7895{
7896 const struct processor *cpu;
50021c32 7897 char *str = (char *) alloca (strlen (to_parse) + 1);
7898
7899 strcpy (str, to_parse);
df401d54 7900
7901 /* Loop through the list of supported CPUs to find a match. */
7902 for (cpu = all_cores; cpu->name != NULL; cpu++)
7903 {
7904 if (strcmp (cpu->name, str) == 0)
7905 {
50021c32 7906 *res = cpu;
7907 return AARCH64_PARSE_OK;
df401d54 7908 }
7909 }
7910
7911 /* CPU name not found in list. */
50021c32 7912 return AARCH64_PARSE_INVALID_ARG;
df401d54 7913}
7914
4d2c6420 7915/* Parse TOKEN, which has length LENGTH to see if it is an option
7916 described in FLAG. If it is, return the index bit for that fusion type.
7917 If not, error (printing OPTION_NAME) and return zero. */
7918
7919static unsigned int
7920aarch64_parse_one_option_token (const char *token,
7921 size_t length,
7922 const struct aarch64_flag_desc *flag,
7923 const char *option_name)
7924{
7925 for (; flag->name != NULL; flag++)
7926 {
7927 if (length == strlen (flag->name)
7928 && !strncmp (flag->name, token, length))
7929 return flag->flag;
7930 }
7931
7932 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7933 return 0;
7934}
7935
7936/* Parse OPTION which is a comma-separated list of flags to enable.
7937 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7938 default state we inherit from the CPU tuning structures. OPTION_NAME
7939 gives the top-level option we are parsing in the -moverride string,
7940 for use in error messages. */
7941
7942static unsigned int
7943aarch64_parse_boolean_options (const char *option,
7944 const struct aarch64_flag_desc *flags,
7945 unsigned int initial_state,
7946 const char *option_name)
7947{
7948 const char separator = '.';
7949 const char* specs = option;
7950 const char* ntoken = option;
7951 unsigned int found_flags = initial_state;
7952
7953 while ((ntoken = strchr (specs, separator)))
7954 {
7955 size_t token_length = ntoken - specs;
7956 unsigned token_ops = aarch64_parse_one_option_token (specs,
7957 token_length,
7958 flags,
7959 option_name);
7960 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7961 in the token stream, reset the supported operations. So:
7962
7963 adrp+add.cmp+branch.none.adrp+add
7964
7965 would have the result of turning on only adrp+add fusion. */
7966 if (!token_ops)
7967 found_flags = 0;
7968
7969 found_flags |= token_ops;
7970 specs = ++ntoken;
7971 }
7972
7973 /* We ended with a comma, print something. */
7974 if (!(*specs))
7975 {
7976 error ("%s string ill-formed\n", option_name);
7977 return 0;
7978 }
7979
7980 /* We still have one more token to parse. */
7981 size_t token_length = strlen (specs);
7982 unsigned token_ops = aarch64_parse_one_option_token (specs,
7983 token_length,
7984 flags,
7985 option_name);
7986 if (!token_ops)
7987 found_flags = 0;
7988
7989 found_flags |= token_ops;
7990 return found_flags;
7991}
7992
7993/* Support for overriding instruction fusion. */
7994
7995static void
7996aarch64_parse_fuse_string (const char *fuse_string,
7997 struct tune_params *tune)
7998{
7999 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8000 aarch64_fusible_pairs,
8001 tune->fusible_ops,
8002 "fuse=");
8003}
8004
8005/* Support for overriding other tuning flags. */
8006
8007static void
8008aarch64_parse_tune_string (const char *tune_string,
8009 struct tune_params *tune)
8010{
8011 tune->extra_tuning_flags
8012 = aarch64_parse_boolean_options (tune_string,
8013 aarch64_tuning_flags,
8014 tune->extra_tuning_flags,
8015 "tune=");
8016}
8017
8018/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8019 we understand. If it is, extract the option string and handoff to
8020 the appropriate function. */
8021
8022void
8023aarch64_parse_one_override_token (const char* token,
8024 size_t length,
8025 struct tune_params *tune)
8026{
8027 const struct aarch64_tuning_override_function *fn
8028 = aarch64_tuning_override_functions;
8029
8030 const char *option_part = strchr (token, '=');
8031 if (!option_part)
8032 {
8033 error ("tuning string missing in option (%s)", token);
8034 return;
8035 }
8036
8037 /* Get the length of the option name. */
8038 length = option_part - token;
8039 /* Skip the '=' to get to the option string. */
8040 option_part++;
8041
8042 for (; fn->name != NULL; fn++)
8043 {
8044 if (!strncmp (fn->name, token, length))
8045 {
8046 fn->parse_override (option_part, tune);
8047 return;
8048 }
8049 }
8050
8051 error ("unknown tuning option (%s)",token);
8052 return;
8053}
8054
02c8de1f 8055/* A checking mechanism for the implementation of the tls size. */
8056
8057static void
8058initialize_aarch64_tls_size (struct gcc_options *opts)
8059{
8060 if (aarch64_tls_size == 0)
8061 aarch64_tls_size = 24;
8062
8063 switch (opts->x_aarch64_cmodel_var)
8064 {
8065 case AARCH64_CMODEL_TINY:
8066 /* Both the default and maximum TLS size allowed under tiny is 1M which
8067 needs two instructions to address, so we clamp the size to 24. */
8068 if (aarch64_tls_size > 24)
8069 aarch64_tls_size = 24;
8070 break;
8071 case AARCH64_CMODEL_SMALL:
8072 /* The maximum TLS size allowed under small is 4G. */
8073 if (aarch64_tls_size > 32)
8074 aarch64_tls_size = 32;
8075 break;
8076 case AARCH64_CMODEL_LARGE:
8077 /* The maximum TLS size allowed under large is 16E.
8078 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8079 if (aarch64_tls_size > 48)
8080 aarch64_tls_size = 48;
8081 break;
8082 default:
8083 gcc_unreachable ();
8084 }
8085
8086 return;
8087}
8088
4d2c6420 8089/* Parse STRING looking for options in the format:
8090 string :: option:string
8091 option :: name=substring
8092 name :: {a-z}
8093 substring :: defined by option. */
8094
8095static void
8096aarch64_parse_override_string (const char* input_string,
8097 struct tune_params* tune)
8098{
8099 const char separator = ':';
8100 size_t string_length = strlen (input_string) + 1;
8101 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8102 char *string = string_root;
8103 strncpy (string, input_string, string_length);
8104 string[string_length - 1] = '\0';
8105
8106 char* ntoken = string;
8107
8108 while ((ntoken = strchr (string, separator)))
8109 {
8110 size_t token_length = ntoken - string;
8111 /* Make this substring look like a string. */
8112 *ntoken = '\0';
8113 aarch64_parse_one_override_token (string, token_length, tune);
8114 string = ++ntoken;
8115 }
8116
8117 /* One last option to parse. */
8118 aarch64_parse_one_override_token (string, strlen (string), tune);
8119 free (string_root);
8120}
df401d54 8121
df401d54 8122
8123static void
50021c32 8124aarch64_override_options_after_change_1 (struct gcc_options *opts)
df401d54 8125{
50021c32 8126 if (opts->x_flag_omit_frame_pointer)
8127 opts->x_flag_omit_leaf_frame_pointer = false;
8128 else if (opts->x_flag_omit_leaf_frame_pointer)
8129 opts->x_flag_omit_frame_pointer = true;
df401d54 8130
acb1dac7 8131 /* If not optimizing for size, set the default
50021c32 8132 alignment to what the target wants. */
8133 if (!opts->x_optimize_size)
df401d54 8134 {
50021c32 8135 if (opts->x_align_loops <= 0)
8136 opts->x_align_loops = aarch64_tune_params.loop_align;
8137 if (opts->x_align_jumps <= 0)
8138 opts->x_align_jumps = aarch64_tune_params.jump_align;
8139 if (opts->x_align_functions <= 0)
8140 opts->x_align_functions = aarch64_tune_params.function_align;
df401d54 8141 }
ae1cefe6 8142
8143 /* If nopcrelative_literal_loads is set on the command line, this
8144 implies that the user asked for PC relative literal loads. */
acb1dac7 8145 if (opts->x_nopcrelative_literal_loads == 1)
8146 aarch64_nopcrelative_literal_loads = false;
ae1cefe6 8147
8148 /* If it is not set on the command line, we default to no
8149 pc relative literal loads. */
acb1dac7 8150 if (opts->x_nopcrelative_literal_loads == 2)
8151 aarch64_nopcrelative_literal_loads = true;
ae1cefe6 8152
8153 /* In the tiny memory model it makes no sense
8154 to disallow non PC relative literal pool loads
8155 as many other things will break anyway. */
acb1dac7 8156 if (opts->x_nopcrelative_literal_loads
ae1cefe6 8157 && (aarch64_cmodel == AARCH64_CMODEL_TINY
8158 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC))
acb1dac7 8159 aarch64_nopcrelative_literal_loads = false;
50021c32 8160}
df401d54 8161
50021c32 8162/* 'Unpack' up the internal tuning structs and update the options
8163 in OPTS. The caller must have set up selected_tune and selected_arch
8164 as all the other target-specific codegen decisions are
8165 derived from them. */
8166
26db5325 8167void
50021c32 8168aarch64_override_options_internal (struct gcc_options *opts)
8169{
8170 aarch64_tune_flags = selected_tune->flags;
8171 aarch64_tune = selected_tune->sched_core;
8172 /* Make a copy of the tuning parameters attached to the core, which
8173 we may later overwrite. */
8174 aarch64_tune_params = *(selected_tune->tune);
8175 aarch64_architecture_version = selected_arch->architecture_version;
8176
8177 if (opts->x_aarch64_override_tune_string)
8178 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8179 &aarch64_tune_params);
8180
8181 /* This target defaults to strict volatile bitfields. */
8182 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8183 opts->x_flag_strict_volatile_bitfields = 1;
8184
50021c32 8185 initialize_aarch64_code_model (opts);
02c8de1f 8186 initialize_aarch64_tls_size (opts);
891f2090 8187
5f73ddf0 8188 int queue_depth = 0;
8189 switch (aarch64_tune_params.autoprefetcher_model)
8190 {
8191 case tune_params::AUTOPREFETCHER_OFF:
8192 queue_depth = -1;
8193 break;
8194 case tune_params::AUTOPREFETCHER_WEAK:
8195 queue_depth = 0;
8196 break;
8197 case tune_params::AUTOPREFETCHER_STRONG:
8198 queue_depth = max_insn_queue_index + 1;
8199 break;
8200 default:
8201 gcc_unreachable ();
8202 }
8203
8204 /* We don't mind passing in global_options_set here as we don't use
8205 the *options_set structs anyway. */
8206 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8207 queue_depth,
8208 opts->x_param_values,
8209 global_options_set.x_param_values);
8210
35c51aa0 8211 /* Set the L1 cache line size. */
8212 if (selected_cpu->tune->cache_line_size != 0)
8213 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
8214 selected_cpu->tune->cache_line_size,
8215 opts->x_param_values,
8216 global_options_set.x_param_values);
8217
50021c32 8218 aarch64_override_options_after_change_1 (opts);
8219}
df401d54 8220
50021c32 8221/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8222 specified in STR and throw errors if appropriate. Put the results if
a0db861f 8223 they are valid in RES and ISA_FLAGS. Return whether the option is
8224 valid. */
df401d54 8225
a0db861f 8226static bool
50021c32 8227aarch64_validate_mcpu (const char *str, const struct processor **res,
8228 unsigned long *isa_flags)
8229{
8230 enum aarch64_parse_opt_result parse_res
8231 = aarch64_parse_cpu (str, res, isa_flags);
8232
8233 if (parse_res == AARCH64_PARSE_OK)
a0db861f 8234 return true;
50021c32 8235
8236 switch (parse_res)
8237 {
8238 case AARCH64_PARSE_MISSING_ARG:
8239 error ("missing cpu name in -mcpu=%qs", str);
8240 break;
8241 case AARCH64_PARSE_INVALID_ARG:
8242 error ("unknown value %qs for -mcpu", str);
8243 break;
8244 case AARCH64_PARSE_INVALID_FEATURE:
8245 error ("invalid feature modifier in -mcpu=%qs", str);
8246 break;
8247 default:
8248 gcc_unreachable ();
8249 }
a0db861f 8250
8251 return false;
50021c32 8252}
8253
8254/* Validate a command-line -march option. Parse the arch and extensions
8255 (if any) specified in STR and throw errors if appropriate. Put the
a0db861f 8256 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8257 option is valid. */
50021c32 8258
a0db861f 8259static bool
50021c32 8260aarch64_validate_march (const char *str, const struct processor **res,
8261 unsigned long *isa_flags)
8262{
8263 enum aarch64_parse_opt_result parse_res
8264 = aarch64_parse_arch (str, res, isa_flags);
8265
8266 if (parse_res == AARCH64_PARSE_OK)
a0db861f 8267 return true;
50021c32 8268
8269 switch (parse_res)
8270 {
8271 case AARCH64_PARSE_MISSING_ARG:
8272 error ("missing arch name in -march=%qs", str);
8273 break;
8274 case AARCH64_PARSE_INVALID_ARG:
8275 error ("unknown value %qs for -march", str);
8276 break;
8277 case AARCH64_PARSE_INVALID_FEATURE:
8278 error ("invalid feature modifier in -march=%qs", str);
8279 break;
8280 default:
8281 gcc_unreachable ();
8282 }
a0db861f 8283
8284 return false;
50021c32 8285}
8286
8287/* Validate a command-line -mtune option. Parse the cpu
8288 specified in STR and throw errors if appropriate. Put the
a0db861f 8289 result, if it is valid, in RES. Return whether the option is
8290 valid. */
50021c32 8291
a0db861f 8292static bool
50021c32 8293aarch64_validate_mtune (const char *str, const struct processor **res)
8294{
8295 enum aarch64_parse_opt_result parse_res
8296 = aarch64_parse_tune (str, res);
8297
8298 if (parse_res == AARCH64_PARSE_OK)
a0db861f 8299 return true;
50021c32 8300
8301 switch (parse_res)
8302 {
8303 case AARCH64_PARSE_MISSING_ARG:
8304 error ("missing cpu name in -mtune=%qs", str);
8305 break;
8306 case AARCH64_PARSE_INVALID_ARG:
8307 error ("unknown value %qs for -mtune", str);
8308 break;
8309 default:
8310 gcc_unreachable ();
8311 }
a0db861f 8312 return false;
8313}
8314
8315/* Return the CPU corresponding to the enum CPU.
8316 If it doesn't specify a cpu, return the default. */
8317
8318static const struct processor *
8319aarch64_get_tune_cpu (enum aarch64_processor cpu)
8320{
8321 if (cpu != aarch64_none)
8322 return &all_cores[cpu];
8323
8324 /* The & 0x3f is to extract the bottom 6 bits that encode the
8325 default cpu as selected by the --with-cpu GCC configure option
8326 in config.gcc.
8327 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8328 flags mechanism should be reworked to make it more sane. */
8329 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8330}
8331
8332/* Return the architecture corresponding to the enum ARCH.
8333 If it doesn't specify a valid architecture, return the default. */
8334
8335static const struct processor *
8336aarch64_get_arch (enum aarch64_arch arch)
8337{
8338 if (arch != aarch64_no_arch)
8339 return &all_architectures[arch];
8340
8341 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8342
8343 return &all_architectures[cpu->arch];
50021c32 8344}
8345
8346/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8347 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8348 tuning structs. In particular it must set selected_tune and
8349 aarch64_isa_flags that define the available ISA features and tuning
8350 decisions. It must also set selected_arch as this will be used to
8351 output the .arch asm tags for each function. */
8352
8353static void
8354aarch64_override_options (void)
8355{
8356 unsigned long cpu_isa = 0;
8357 unsigned long arch_isa = 0;
8358 aarch64_isa_flags = 0;
8359
a0db861f 8360 bool valid_cpu = true;
8361 bool valid_tune = true;
8362 bool valid_arch = true;
8363
50021c32 8364 selected_cpu = NULL;
8365 selected_arch = NULL;
8366 selected_tune = NULL;
8367
8368 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8369 If either of -march or -mtune is given, they override their
8370 respective component of -mcpu. */
8371 if (aarch64_cpu_string)
a0db861f 8372 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
8373 &cpu_isa);
50021c32 8374
8375 if (aarch64_arch_string)
a0db861f 8376 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
8377 &arch_isa);
50021c32 8378
8379 if (aarch64_tune_string)
a0db861f 8380 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
df401d54 8381
8382 /* If the user did not specify a processor, choose the default
8383 one for them. This will be the CPU set during configuration using
39543dd4 8384 --with-cpu, otherwise it is "generic". */
df401d54 8385 if (!selected_cpu)
8386 {
50021c32 8387 if (selected_arch)
8388 {
8389 selected_cpu = &all_cores[selected_arch->ident];
8390 aarch64_isa_flags = arch_isa;
a0db861f 8391 explicit_arch = selected_arch->arch;
50021c32 8392 }
8393 else
8394 {
a0db861f 8395 /* Get default configure-time CPU. */
8396 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
50021c32 8397 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8398 }
a0db861f 8399
8400 if (selected_tune)
8401 explicit_tune_core = selected_tune->ident;
50021c32 8402 }
8403 /* If both -mcpu and -march are specified check that they are architecturally
8404 compatible, warn if they're not and prefer the -march ISA flags. */
8405 else if (selected_arch)
8406 {
8407 if (selected_arch->arch != selected_cpu->arch)
8408 {
8409 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8410 all_architectures[selected_cpu->arch].name,
8411 selected_arch->name);
8412 }
8413 aarch64_isa_flags = arch_isa;
a0db861f 8414 explicit_arch = selected_arch->arch;
8415 explicit_tune_core = selected_tune ? selected_tune->ident
8416 : selected_cpu->ident;
50021c32 8417 }
8418 else
8419 {
8420 /* -mcpu but no -march. */
8421 aarch64_isa_flags = cpu_isa;
a0db861f 8422 explicit_tune_core = selected_tune ? selected_tune->ident
8423 : selected_cpu->ident;
8424 gcc_assert (selected_cpu);
8425 selected_arch = &all_architectures[selected_cpu->arch];
8426 explicit_arch = selected_arch->arch;
df401d54 8427 }
8428
50021c32 8429 /* Set the arch as well as we will need it when outputing
8430 the .arch directive in assembly. */
8431 if (!selected_arch)
8432 {
8433 gcc_assert (selected_cpu);
8434 selected_arch = &all_architectures[selected_cpu->arch];
8435 }
df401d54 8436
df401d54 8437 if (!selected_tune)
64df3808 8438 selected_tune = selected_cpu;
df401d54 8439
50021c32 8440#ifndef HAVE_AS_MABI_OPTION
8441 /* The compiler may have been configured with 2.23.* binutils, which does
8442 not have support for ILP32. */
8443 if (TARGET_ILP32)
8444 error ("Assembler does not support -mabi=ilp32");
8445#endif
df401d54 8446
a0db861f 8447 /* Make sure we properly set up the explicit options. */
8448 if ((aarch64_cpu_string && valid_cpu)
8449 || (aarch64_tune_string && valid_tune))
8450 gcc_assert (explicit_tune_core != aarch64_none);
8451
8452 if ((aarch64_cpu_string && valid_cpu)
8453 || (aarch64_arch_string && valid_arch))
8454 gcc_assert (explicit_arch != aarch64_no_arch);
8455
50021c32 8456 aarch64_override_options_internal (&global_options);
8457
8458 /* Save these options as the default ones in case we push and pop them later
8459 while processing functions with potential target attributes. */
8460 target_option_default_node = target_option_current_node
8461 = build_target_option_node (&global_options);
c71e113d 8462
993a7330 8463 aarch64_register_fma_steering ();
ee7ef7ab 8464
df401d54 8465}
8466
8467/* Implement targetm.override_options_after_change. */
8468
8469static void
8470aarch64_override_options_after_change (void)
8471{
50021c32 8472 aarch64_override_options_after_change_1 (&global_options);
df401d54 8473}
8474
8475static struct machine_function *
8476aarch64_init_machine_status (void)
8477{
8478 struct machine_function *machine;
25a27413 8479 machine = ggc_cleared_alloc<machine_function> ();
df401d54 8480 return machine;
8481}
8482
8483void
8484aarch64_init_expanders (void)
8485{
8486 init_machine_status = aarch64_init_machine_status;
8487}
8488
8489/* A checking mechanism for the implementation of the various code models. */
8490static void
50021c32 8491initialize_aarch64_code_model (struct gcc_options *opts)
df401d54 8492{
50021c32 8493 if (opts->x_flag_pic)
df401d54 8494 {
50021c32 8495 switch (opts->x_aarch64_cmodel_var)
df401d54 8496 {
8497 case AARCH64_CMODEL_TINY:
8498 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
8499 break;
8500 case AARCH64_CMODEL_SMALL:
d65d3062 8501#ifdef HAVE_AS_SMALL_PIC_RELOCS
65f988f7 8502 aarch64_cmodel = (flag_pic == 2
8503 ? AARCH64_CMODEL_SMALL_PIC
8504 : AARCH64_CMODEL_SMALL_SPIC);
d65d3062 8505#else
8506 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
8507#endif
df401d54 8508 break;
8509 case AARCH64_CMODEL_LARGE:
8510 sorry ("code model %qs with -f%s", "large",
50021c32 8511 opts->x_flag_pic > 1 ? "PIC" : "pic");
4dcd2731 8512 break;
df401d54 8513 default:
8514 gcc_unreachable ();
8515 }
8516 }
8517 else
50021c32 8518 aarch64_cmodel = opts->x_aarch64_cmodel_var;
df401d54 8519}
8520
a0db861f 8521/* Implement TARGET_OPTION_SAVE. */
8522
8523static void
8524aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
8525{
8526 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
8527}
8528
8529/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8530 using the information saved in PTR. */
8531
8532static void
8533aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
8534{
8535 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
8536 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8537 opts->x_explicit_arch = ptr->x_explicit_arch;
8538 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
8539 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
8540
8541 aarch64_override_options_internal (opts);
8542}
8543
8544/* Implement TARGET_OPTION_PRINT. */
8545
8546static void
8547aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
8548{
8549 const struct processor *cpu
8550 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8551 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
8552 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
9dc3dccb 8553 std::string extension
8554 = aarch64_get_extension_string_for_isa_flags (isa_flags);
a0db861f 8555
8556 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
9dc3dccb 8557 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
8558 arch->name, extension.c_str ());
a0db861f 8559}
8560
f59387ab 8561static GTY(()) tree aarch64_previous_fndecl;
8562
26db5325 8563void
8564aarch64_reset_previous_fndecl (void)
8565{
8566 aarch64_previous_fndecl = NULL;
8567}
8568
f59387ab 8569/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
8570 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
8571 of the function, if such exists. This function may be called multiple
8572 times on a single function so use aarch64_previous_fndecl to avoid
8573 setting up identical state. */
8574
8575static void
8576aarch64_set_current_function (tree fndecl)
8577{
8578 tree old_tree = (aarch64_previous_fndecl
8579 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
8580 : NULL_TREE);
8581
8582 tree new_tree = (fndecl
8583 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
8584 : NULL_TREE);
8585
8586
8587 if (fndecl && fndecl != aarch64_previous_fndecl)
8588 {
8589 aarch64_previous_fndecl = fndecl;
8590 if (old_tree == new_tree)
8591 ;
8592
8593 else if (new_tree && new_tree != target_option_default_node)
8594 {
8595 cl_target_option_restore (&global_options,
8596 TREE_TARGET_OPTION (new_tree));
8597 if (TREE_TARGET_GLOBALS (new_tree))
8598 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8599 else
8600 TREE_TARGET_GLOBALS (new_tree)
8601 = save_target_globals_default_opts ();
8602 }
8603
8604 else if (old_tree && old_tree != target_option_default_node)
8605 {
8606 new_tree = target_option_current_node;
8607 cl_target_option_restore (&global_options,
8608 TREE_TARGET_OPTION (new_tree));
8609 if (TREE_TARGET_GLOBALS (new_tree))
8610 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8611 else if (new_tree == target_option_default_node)
8612 restore_target_globals (&default_target_globals);
8613 else
8614 TREE_TARGET_GLOBALS (new_tree)
8615 = save_target_globals_default_opts ();
8616 }
8617 }
5a065cf8 8618
8619 if (!fndecl)
8620 return;
8621
8622 /* If we turned on SIMD make sure that any vector parameters are re-laid out
8623 so that they use proper vector modes. */
8624 if (TARGET_SIMD)
8625 {
8626 tree parms = DECL_ARGUMENTS (fndecl);
8627 for (; parms && parms != void_list_node; parms = TREE_CHAIN (parms))
8628 {
8629 if (TREE_CODE (parms) == PARM_DECL
8630 && VECTOR_TYPE_P (TREE_TYPE (parms))
8631 && DECL_MODE (parms) != TYPE_MODE (TREE_TYPE (parms)))
8632 relayout_decl (parms);
8633 }
8634 }
f59387ab 8635}
a0db861f 8636
aadb8e17 8637/* Enum describing the various ways we can handle attributes.
8638 In many cases we can reuse the generic option handling machinery. */
8639
8640enum aarch64_attr_opt_type
8641{
8642 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
8643 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
8644 aarch64_attr_enum, /* Attribute sets an enum variable. */
8645 aarch64_attr_custom /* Attribute requires a custom handling function. */
8646};
8647
8648/* All the information needed to handle a target attribute.
8649 NAME is the name of the attribute.
8650 ATTR_TYPE specifies the type of behaviour of the attribute as described
8651 in the definition of enum aarch64_attr_opt_type.
8652 ALLOW_NEG is true if the attribute supports a "no-" form.
8653 HANDLER is the function that takes the attribute string and whether
8654 it is a pragma or attribute and handles the option. It is needed only
8655 when the ATTR_TYPE is aarch64_attr_custom.
8656 OPT_NUM is the enum specifying the option that the attribute modifies.
8657 This is needed for attributes that mirror the behaviour of a command-line
8658 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
8659 aarch64_attr_enum. */
8660
8661struct aarch64_attribute_info
8662{
8663 const char *name;
8664 enum aarch64_attr_opt_type attr_type;
8665 bool allow_neg;
8666 bool (*handler) (const char *, const char *);
8667 enum opt_code opt_num;
8668};
8669
8670/* Handle the ARCH_STR argument to the arch= target attribute.
8671 PRAGMA_OR_ATTR is used in potential error messages. */
8672
8673static bool
8674aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
8675{
8676 const struct processor *tmp_arch = NULL;
8677 enum aarch64_parse_opt_result parse_res
8678 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
8679
8680 if (parse_res == AARCH64_PARSE_OK)
8681 {
8682 gcc_assert (tmp_arch);
8683 selected_arch = tmp_arch;
8684 explicit_arch = selected_arch->arch;
8685 return true;
8686 }
8687
8688 switch (parse_res)
8689 {
8690 case AARCH64_PARSE_MISSING_ARG:
8691 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
8692 break;
8693 case AARCH64_PARSE_INVALID_ARG:
8694 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
8695 break;
8696 case AARCH64_PARSE_INVALID_FEATURE:
8697 error ("invalid feature modifier %qs for 'arch' target %s",
8698 str, pragma_or_attr);
8699 break;
8700 default:
8701 gcc_unreachable ();
8702 }
8703
8704 return false;
8705}
8706
8707/* Handle the argument CPU_STR to the cpu= target attribute.
8708 PRAGMA_OR_ATTR is used in potential error messages. */
8709
8710static bool
8711aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
8712{
8713 const struct processor *tmp_cpu = NULL;
8714 enum aarch64_parse_opt_result parse_res
8715 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
8716
8717 if (parse_res == AARCH64_PARSE_OK)
8718 {
8719 gcc_assert (tmp_cpu);
8720 selected_tune = tmp_cpu;
8721 explicit_tune_core = selected_tune->ident;
8722
8723 selected_arch = &all_architectures[tmp_cpu->arch];
8724 explicit_arch = selected_arch->arch;
8725 return true;
8726 }
8727
8728 switch (parse_res)
8729 {
8730 case AARCH64_PARSE_MISSING_ARG:
8731 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
8732 break;
8733 case AARCH64_PARSE_INVALID_ARG:
8734 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
8735 break;
8736 case AARCH64_PARSE_INVALID_FEATURE:
8737 error ("invalid feature modifier %qs for 'cpu' target %s",
8738 str, pragma_or_attr);
8739 break;
8740 default:
8741 gcc_unreachable ();
8742 }
8743
8744 return false;
8745}
8746
8747/* Handle the argument STR to the tune= target attribute.
8748 PRAGMA_OR_ATTR is used in potential error messages. */
8749
8750static bool
8751aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
8752{
8753 const struct processor *tmp_tune = NULL;
8754 enum aarch64_parse_opt_result parse_res
8755 = aarch64_parse_tune (str, &tmp_tune);
8756
8757 if (parse_res == AARCH64_PARSE_OK)
8758 {
8759 gcc_assert (tmp_tune);
8760 selected_tune = tmp_tune;
8761 explicit_tune_core = selected_tune->ident;
8762 return true;
8763 }
8764
8765 switch (parse_res)
8766 {
8767 case AARCH64_PARSE_INVALID_ARG:
8768 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
8769 break;
8770 default:
8771 gcc_unreachable ();
8772 }
8773
8774 return false;
8775}
8776
8777/* Parse an architecture extensions target attribute string specified in STR.
8778 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8779 if successful. Update aarch64_isa_flags to reflect the ISA features
8780 modified.
8781 PRAGMA_OR_ATTR is used in potential error messages. */
8782
8783static bool
8784aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
8785{
8786 enum aarch64_parse_opt_result parse_res;
8787 unsigned long isa_flags = aarch64_isa_flags;
8788
26db5325 8789 /* We allow "+nothing" in the beginning to clear out all architectural
8790 features if the user wants to handpick specific features. */
8791 if (strncmp ("+nothing", str, 8) == 0)
8792 {
8793 isa_flags = 0;
8794 str += 8;
8795 }
8796
aadb8e17 8797 parse_res = aarch64_parse_extension (str, &isa_flags);
8798
8799 if (parse_res == AARCH64_PARSE_OK)
8800 {
8801 aarch64_isa_flags = isa_flags;
8802 return true;
8803 }
8804
8805 switch (parse_res)
8806 {
8807 case AARCH64_PARSE_MISSING_ARG:
8808 error ("missing feature modifier in target %s %qs",
8809 pragma_or_attr, str);
8810 break;
8811
8812 case AARCH64_PARSE_INVALID_FEATURE:
8813 error ("invalid feature modifier in target %s %qs",
8814 pragma_or_attr, str);
8815 break;
8816
8817 default:
8818 gcc_unreachable ();
8819 }
8820
8821 return false;
8822}
8823
8824/* The target attributes that we support. On top of these we also support just
8825 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8826 handled explicitly in aarch64_process_one_target_attr. */
8827
8828static const struct aarch64_attribute_info aarch64_attributes[] =
8829{
8830 { "general-regs-only", aarch64_attr_mask, false, NULL,
8831 OPT_mgeneral_regs_only },
8832 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
8833 OPT_mfix_cortex_a53_835769 },
8834 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
8835 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
8836 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
8837 OPT_momit_leaf_frame_pointer },
8838 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
8839 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
8840 OPT_march_ },
8841 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
8842 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
8843 OPT_mtune_ },
8844 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
8845};
8846
8847/* Parse ARG_STR which contains the definition of one target attribute.
8848 Show appropriate errors if any or return true if the attribute is valid.
8849 PRAGMA_OR_ATTR holds the string to use in error messages about whether
8850 we're processing a target attribute or pragma. */
8851
8852static bool
8853aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
8854{
8855 bool invert = false;
8856
8857 size_t len = strlen (arg_str);
8858
8859 if (len == 0)
8860 {
8861 error ("malformed target %s", pragma_or_attr);
8862 return false;
8863 }
8864
8865 char *str_to_check = (char *) alloca (len + 1);
8866 strcpy (str_to_check, arg_str);
8867
8868 /* Skip leading whitespace. */
8869 while (*str_to_check == ' ' || *str_to_check == '\t')
8870 str_to_check++;
8871
8872 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
8873 It is easier to detect and handle it explicitly here rather than going
8874 through the machinery for the rest of the target attributes in this
8875 function. */
8876 if (*str_to_check == '+')
8877 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
8878
8879 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
8880 {
8881 invert = true;
8882 str_to_check += 3;
8883 }
8884 char *arg = strchr (str_to_check, '=');
8885
8886 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
8887 and point ARG to "foo". */
8888 if (arg)
8889 {
8890 *arg = '\0';
8891 arg++;
8892 }
8893 const struct aarch64_attribute_info *p_attr;
8894 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
8895 {
8896 /* If the names don't match up, or the user has given an argument
8897 to an attribute that doesn't accept one, or didn't give an argument
8898 to an attribute that expects one, fail to match. */
8899 if (strcmp (str_to_check, p_attr->name) != 0)
8900 continue;
8901
8902 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
8903 || p_attr->attr_type == aarch64_attr_enum;
8904
8905 if (attr_need_arg_p ^ (arg != NULL))
8906 {
8907 error ("target %s %qs does not accept an argument",
8908 pragma_or_attr, str_to_check);
8909 return false;
8910 }
8911
8912 /* If the name matches but the attribute does not allow "no-" versions
8913 then we can't match. */
8914 if (invert && !p_attr->allow_neg)
8915 {
8916 error ("target %s %qs does not allow a negated form",
8917 pragma_or_attr, str_to_check);
8918 return false;
8919 }
8920
8921 switch (p_attr->attr_type)
8922 {
8923 /* Has a custom handler registered.
8924 For example, cpu=, arch=, tune=. */
8925 case aarch64_attr_custom:
8926 gcc_assert (p_attr->handler);
8927 if (!p_attr->handler (arg, pragma_or_attr))
8928 return false;
8929 break;
8930
8931 /* Either set or unset a boolean option. */
8932 case aarch64_attr_bool:
8933 {
8934 struct cl_decoded_option decoded;
8935
8936 generate_option (p_attr->opt_num, NULL, !invert,
8937 CL_TARGET, &decoded);
8938 aarch64_handle_option (&global_options, &global_options_set,
8939 &decoded, input_location);
8940 break;
8941 }
8942 /* Set or unset a bit in the target_flags. aarch64_handle_option
8943 should know what mask to apply given the option number. */
8944 case aarch64_attr_mask:
8945 {
8946 struct cl_decoded_option decoded;
8947 /* We only need to specify the option number.
8948 aarch64_handle_option will know which mask to apply. */
8949 decoded.opt_index = p_attr->opt_num;
8950 decoded.value = !invert;
8951 aarch64_handle_option (&global_options, &global_options_set,
8952 &decoded, input_location);
8953 break;
8954 }
8955 /* Use the option setting machinery to set an option to an enum. */
8956 case aarch64_attr_enum:
8957 {
8958 gcc_assert (arg);
8959 bool valid;
8960 int value;
8961 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
8962 &value, CL_TARGET);
8963 if (valid)
8964 {
8965 set_option (&global_options, NULL, p_attr->opt_num, value,
8966 NULL, DK_UNSPECIFIED, input_location,
8967 global_dc);
8968 }
8969 else
8970 {
8971 error ("target %s %s=%s is not valid",
8972 pragma_or_attr, str_to_check, arg);
8973 }
8974 break;
8975 }
8976 default:
8977 gcc_unreachable ();
8978 }
8979 }
8980
8981 return true;
8982}
8983
8984/* Count how many times the character C appears in
8985 NULL-terminated string STR. */
8986
8987static unsigned int
8988num_occurences_in_str (char c, char *str)
8989{
8990 unsigned int res = 0;
8991 while (*str != '\0')
8992 {
8993 if (*str == c)
8994 res++;
8995
8996 str++;
8997 }
8998
8999 return res;
9000}
9001
9002/* Parse the tree in ARGS that contains the target attribute information
9003 and update the global target options space. PRAGMA_OR_ATTR is a string
9004 to be used in error messages, specifying whether this is processing
9005 a target attribute or a target pragma. */
9006
9007bool
9008aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9009{
9010 if (TREE_CODE (args) == TREE_LIST)
9011 {
9012 do
9013 {
9014 tree head = TREE_VALUE (args);
9015 if (head)
9016 {
9017 if (!aarch64_process_target_attr (head, pragma_or_attr))
9018 return false;
9019 }
9020 args = TREE_CHAIN (args);
9021 } while (args);
9022
9023 return true;
9024 }
9025 /* We expect to find a string to parse. */
9026 gcc_assert (TREE_CODE (args) == STRING_CST);
9027
9028 size_t len = strlen (TREE_STRING_POINTER (args));
9029 char *str_to_check = (char *) alloca (len + 1);
9030 strcpy (str_to_check, TREE_STRING_POINTER (args));
9031
9032 if (len == 0)
9033 {
9034 error ("malformed target %s value", pragma_or_attr);
9035 return false;
9036 }
9037
9038 /* Used to catch empty spaces between commas i.e.
9039 attribute ((target ("attr1,,attr2"))). */
9040 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9041
9042 /* Handle multiple target attributes separated by ','. */
9043 char *token = strtok (str_to_check, ",");
9044
9045 unsigned int num_attrs = 0;
9046 while (token)
9047 {
9048 num_attrs++;
9049 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9050 {
9051 error ("target %s %qs is invalid", pragma_or_attr, token);
9052 return false;
9053 }
9054
9055 token = strtok (NULL, ",");
9056 }
9057
9058 if (num_attrs != num_commas + 1)
9059 {
9060 error ("malformed target %s list %qs",
9061 pragma_or_attr, TREE_STRING_POINTER (args));
9062 return false;
9063 }
9064
9065 return true;
9066}
9067
9068/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9069 process attribute ((target ("..."))). */
9070
9071static bool
9072aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9073{
9074 struct cl_target_option cur_target;
9075 bool ret;
9076 tree old_optimize;
9077 tree new_target, new_optimize;
9078 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
cd333292 9079
9080 /* If what we're processing is the current pragma string then the
9081 target option node is already stored in target_option_current_node
9082 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9083 having to re-parse the string. This is especially useful to keep
9084 arm_neon.h compile times down since that header contains a lot
9085 of intrinsics enclosed in pragmas. */
9086 if (!existing_target && args == current_target_pragma)
9087 {
9088 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9089 return true;
9090 }
aadb8e17 9091 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9092
9093 old_optimize = build_optimization_node (&global_options);
9094 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9095
9096 /* If the function changed the optimization levels as well as setting
9097 target options, start with the optimizations specified. */
9098 if (func_optimize && func_optimize != old_optimize)
9099 cl_optimization_restore (&global_options,
9100 TREE_OPTIMIZATION (func_optimize));
9101
9102 /* Save the current target options to restore at the end. */
9103 cl_target_option_save (&cur_target, &global_options);
9104
9105 /* If fndecl already has some target attributes applied to it, unpack
9106 them so that we add this attribute on top of them, rather than
9107 overwriting them. */
9108 if (existing_target)
9109 {
9110 struct cl_target_option *existing_options
9111 = TREE_TARGET_OPTION (existing_target);
9112
9113 if (existing_options)
9114 cl_target_option_restore (&global_options, existing_options);
9115 }
9116 else
9117 cl_target_option_restore (&global_options,
9118 TREE_TARGET_OPTION (target_option_current_node));
9119
9120
9121 ret = aarch64_process_target_attr (args, "attribute");
9122
9123 /* Set up any additional state. */
9124 if (ret)
9125 {
9126 aarch64_override_options_internal (&global_options);
dad9014c 9127 /* Initialize SIMD builtins if we haven't already.
9128 Set current_target_pragma to NULL for the duration so that
9129 the builtin initialization code doesn't try to tag the functions
9130 being built with the attributes specified by any current pragma, thus
9131 going into an infinite recursion. */
9132 if (TARGET_SIMD)
9133 {
9134 tree saved_current_target_pragma = current_target_pragma;
9135 current_target_pragma = NULL;
9136 aarch64_init_simd_builtins ();
9137 current_target_pragma = saved_current_target_pragma;
9138 }
aadb8e17 9139 new_target = build_target_option_node (&global_options);
9140 }
9141 else
9142 new_target = NULL;
9143
9144 new_optimize = build_optimization_node (&global_options);
9145
9146 if (fndecl && ret)
9147 {
9148 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9149
9150 if (old_optimize != new_optimize)
9151 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9152 }
9153
9154 cl_target_option_restore (&global_options, &cur_target);
9155
9156 if (old_optimize != new_optimize)
9157 cl_optimization_restore (&global_options,
9158 TREE_OPTIMIZATION (old_optimize));
9159 return ret;
9160}
9161
b0269e32 9162/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9163 tri-bool options (yes, no, don't care) and the default value is
9164 DEF, determine whether to reject inlining. */
9165
9166static bool
9167aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9168 int dont_care, int def)
9169{
9170 /* If the callee doesn't care, always allow inlining. */
9171 if (callee == dont_care)
9172 return true;
9173
9174 /* If the caller doesn't care, always allow inlining. */
9175 if (caller == dont_care)
9176 return true;
9177
9178 /* Otherwise, allow inlining if either the callee and caller values
9179 agree, or if the callee is using the default value. */
9180 return (callee == caller || callee == def);
9181}
9182
9183/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9184 to inline CALLEE into CALLER based on target-specific info.
9185 Make sure that the caller and callee have compatible architectural
9186 features. Then go through the other possible target attributes
9187 and see if they can block inlining. Try not to reject always_inline
9188 callees unless they are incompatible architecturally. */
9189
9190static bool
9191aarch64_can_inline_p (tree caller, tree callee)
9192{
9193 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9194 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9195
9196 /* If callee has no option attributes, then it is ok to inline. */
9197 if (!callee_tree)
9198 return true;
9199
9200 struct cl_target_option *caller_opts
9201 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9202 : target_option_default_node);
9203
9204 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9205
9206
9207 /* Callee's ISA flags should be a subset of the caller's. */
9208 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9209 != callee_opts->x_aarch64_isa_flags)
9210 return false;
9211
9212 /* Allow non-strict aligned functions inlining into strict
9213 aligned ones. */
9214 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9215 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9216 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9217 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9218 return false;
9219
9220 bool always_inline = lookup_attribute ("always_inline",
9221 DECL_ATTRIBUTES (callee));
9222
9223 /* If the architectural features match up and the callee is always_inline
9224 then the other attributes don't matter. */
9225 if (always_inline)
9226 return true;
9227
9228 if (caller_opts->x_aarch64_cmodel_var
9229 != callee_opts->x_aarch64_cmodel_var)
9230 return false;
9231
9232 if (caller_opts->x_aarch64_tls_dialect
9233 != callee_opts->x_aarch64_tls_dialect)
9234 return false;
9235
9236 /* Honour explicit requests to workaround errata. */
9237 if (!aarch64_tribools_ok_for_inlining_p (
9238 caller_opts->x_aarch64_fix_a53_err835769,
9239 callee_opts->x_aarch64_fix_a53_err835769,
9240 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
9241 return false;
9242
9243 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9244 caller and calle and they don't match up, reject inlining. */
9245 if (!aarch64_tribools_ok_for_inlining_p (
9246 caller_opts->x_flag_omit_leaf_frame_pointer,
9247 callee_opts->x_flag_omit_leaf_frame_pointer,
9248 2, 1))
9249 return false;
9250
9251 /* If the callee has specific tuning overrides, respect them. */
9252 if (callee_opts->x_aarch64_override_tune_string != NULL
9253 && caller_opts->x_aarch64_override_tune_string == NULL)
9254 return false;
9255
9256 /* If the user specified tuning override strings for the
9257 caller and callee and they don't match up, reject inlining.
9258 We just do a string compare here, we don't analyze the meaning
9259 of the string, as it would be too costly for little gain. */
9260 if (callee_opts->x_aarch64_override_tune_string
9261 && caller_opts->x_aarch64_override_tune_string
9262 && (strcmp (callee_opts->x_aarch64_override_tune_string,
9263 caller_opts->x_aarch64_override_tune_string) != 0))
9264 return false;
9265
9266 return true;
9267}
9268
df401d54 9269/* Return true if SYMBOL_REF X binds locally. */
9270
9271static bool
9272aarch64_symbol_binds_local_p (const_rtx x)
9273{
9274 return (SYMBOL_REF_DECL (x)
9275 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
9276 : SYMBOL_REF_LOCAL_P (x));
9277}
9278
9279/* Return true if SYMBOL_REF X is thread local */
9280static bool
9281aarch64_tls_symbol_p (rtx x)
9282{
9283 if (! TARGET_HAVE_TLS)
9284 return false;
9285
9286 if (GET_CODE (x) != SYMBOL_REF)
9287 return false;
9288
9289 return SYMBOL_REF_TLS_MODEL (x) != 0;
9290}
9291
9292/* Classify a TLS symbol into one of the TLS kinds. */
9293enum aarch64_symbol_type
9294aarch64_classify_tls_symbol (rtx x)
9295{
9296 enum tls_model tls_kind = tls_symbolic_operand_type (x);
9297
9298 switch (tls_kind)
9299 {
9300 case TLS_MODEL_GLOBAL_DYNAMIC:
9301 case TLS_MODEL_LOCAL_DYNAMIC:
9302 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
9303
9304 case TLS_MODEL_INITIAL_EXEC:
f546e2d1 9305 switch (aarch64_cmodel)
9306 {
9307 case AARCH64_CMODEL_TINY:
9308 case AARCH64_CMODEL_TINY_PIC:
9309 return SYMBOL_TINY_TLSIE;
9310 default:
7bff97c2 9311 return SYMBOL_SMALL_TLSIE;
f546e2d1 9312 }
df401d54 9313
9314 case TLS_MODEL_LOCAL_EXEC:
57507fa5 9315 if (aarch64_tls_size == 12)
9316 return SYMBOL_TLSLE12;
9317 else if (aarch64_tls_size == 24)
9318 return SYMBOL_TLSLE24;
9319 else if (aarch64_tls_size == 32)
9320 return SYMBOL_TLSLE32;
9321 else if (aarch64_tls_size == 48)
9322 return SYMBOL_TLSLE48;
9323 else
9324 gcc_unreachable ();
df401d54 9325
9326 case TLS_MODEL_EMULATED:
9327 case TLS_MODEL_NONE:
9328 return SYMBOL_FORCE_TO_MEM;
9329
9330 default:
9331 gcc_unreachable ();
9332 }
9333}
9334
9335/* Return the method that should be used to access SYMBOL_REF or
82882dbd 9336 LABEL_REF X. */
e03531ec 9337
df401d54 9338enum aarch64_symbol_type
82882dbd 9339aarch64_classify_symbol (rtx x, rtx offset)
df401d54 9340{
9341 if (GET_CODE (x) == LABEL_REF)
9342 {
9343 switch (aarch64_cmodel)
9344 {
9345 case AARCH64_CMODEL_LARGE:
9346 return SYMBOL_FORCE_TO_MEM;
9347
9348 case AARCH64_CMODEL_TINY_PIC:
9349 case AARCH64_CMODEL_TINY:
5137d3cb 9350 return SYMBOL_TINY_ABSOLUTE;
9351
65f988f7 9352 case AARCH64_CMODEL_SMALL_SPIC:
df401d54 9353 case AARCH64_CMODEL_SMALL_PIC:
9354 case AARCH64_CMODEL_SMALL:
9355 return SYMBOL_SMALL_ABSOLUTE;
9356
9357 default:
9358 gcc_unreachable ();
9359 }
9360 }
9361
e03531ec 9362 if (GET_CODE (x) == SYMBOL_REF)
df401d54 9363 {
c85e352b 9364 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
ae1cefe6 9365 {
9366 /* This is alright even in PIC code as the constant
9367 pool reference is always PC relative and within
9368 the same translation unit. */
9369 if (nopcrelative_literal_loads
9370 && CONSTANT_POOL_ADDRESS_P (x))
9371 return SYMBOL_SMALL_ABSOLUTE;
9372 else
9373 return SYMBOL_FORCE_TO_MEM;
9374 }
df401d54 9375
9376 if (aarch64_tls_symbol_p (x))
9377 return aarch64_classify_tls_symbol (x);
9378
e03531ec 9379 switch (aarch64_cmodel)
9380 {
9381 case AARCH64_CMODEL_TINY:
73d7dc3e 9382 /* When we retreive symbol + offset address, we have to make sure
9383 the offset does not cause overflow of the final address. But
9384 we have no way of knowing the address of symbol at compile time
9385 so we can't accurately say if the distance between the PC and
9386 symbol + offset is outside the addressible range of +/-1M in the
9387 TINY code model. So we rely on images not being greater than
9388 1M and cap the offset at 1M and anything beyond 1M will have to
9389 be loaded using an alternative mechanism. */
9390 if (SYMBOL_REF_WEAK (x)
9391 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
5137d3cb 9392 return SYMBOL_FORCE_TO_MEM;
9393 return SYMBOL_TINY_ABSOLUTE;
9394
e03531ec 9395 case AARCH64_CMODEL_SMALL:
73d7dc3e 9396 /* Same reasoning as the tiny code model, but the offset cap here is
9397 4G. */
9398 if (SYMBOL_REF_WEAK (x)
73626f6a 9399 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9400 HOST_WIDE_INT_C (4294967264)))
e03531ec 9401 return SYMBOL_FORCE_TO_MEM;
9402 return SYMBOL_SMALL_ABSOLUTE;
df401d54 9403
e03531ec 9404 case AARCH64_CMODEL_TINY_PIC:
12a60807 9405 if (!aarch64_symbol_binds_local_p (x))
2c97ec73 9406 return SYMBOL_TINY_GOT;
12a60807 9407 return SYMBOL_TINY_ABSOLUTE;
9408
65f988f7 9409 case AARCH64_CMODEL_SMALL_SPIC:
e03531ec 9410 case AARCH64_CMODEL_SMALL_PIC:
9411 if (!aarch64_symbol_binds_local_p (x))
65f988f7 9412 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9413 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
e03531ec 9414 return SYMBOL_SMALL_ABSOLUTE;
df401d54 9415
e03531ec 9416 default:
9417 gcc_unreachable ();
9418 }
df401d54 9419 }
e03531ec 9420
df401d54 9421 /* By default push everything into the constant pool. */
9422 return SYMBOL_FORCE_TO_MEM;
9423}
9424
df401d54 9425bool
9426aarch64_constant_address_p (rtx x)
9427{
9428 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9429}
9430
9431bool
9432aarch64_legitimate_pic_operand_p (rtx x)
9433{
9434 if (GET_CODE (x) == SYMBOL_REF
9435 || (GET_CODE (x) == CONST
9436 && GET_CODE (XEXP (x, 0)) == PLUS
9437 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9438 return false;
9439
9440 return true;
9441}
9442
72841352 9443/* Return true if X holds either a quarter-precision or
9444 floating-point +0.0 constant. */
9445static bool
3754d046 9446aarch64_valid_floating_const (machine_mode mode, rtx x)
72841352 9447{
9448 if (!CONST_DOUBLE_P (x))
9449 return false;
9450
ef59e3de 9451 if (aarch64_float_const_zero_rtx_p (x))
9452 return true;
9453
9454 /* We only handle moving 0.0 to a TFmode register. */
72841352 9455 if (!(mode == SFmode || mode == DFmode))
9456 return false;
9457
72841352 9458 return aarch64_float_const_representable_p (x);
9459}
9460
df401d54 9461static bool
3754d046 9462aarch64_legitimate_constant_p (machine_mode mode, rtx x)
df401d54 9463{
9464 /* Do not allow vector struct mode constants. We could support
9465 0 and -1 easily, but they need support in aarch64-simd.md. */
9466 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
9467 return false;
9468
9469 /* This could probably go away because
9470 we now decompose CONST_INTs according to expand_mov_immediate. */
9471 if ((GET_CODE (x) == CONST_VECTOR
fc3eb658 9472 && aarch64_simd_valid_immediate (x, mode, false, NULL))
72841352 9473 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
9474 return !targetm.cannot_force_const_mem (mode, x);
df401d54 9475
9476 if (GET_CODE (x) == HIGH
9477 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9478 return true;
9479
9480 return aarch64_constant_address_p (x);
9481}
9482
c844530e 9483rtx
df401d54 9484aarch64_load_tp (rtx target)
9485{
9486 if (!target
9487 || GET_MODE (target) != Pmode
9488 || !register_operand (target, Pmode))
9489 target = gen_reg_rtx (Pmode);
9490
9491 /* Can return in any reg. */
9492 emit_insn (gen_aarch64_load_tp_hard (target));
9493 return target;
9494}
9495
df401d54 9496/* On AAPCS systems, this is the "struct __va_list". */
9497static GTY(()) tree va_list_type;
9498
9499/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9500 Return the type to use as __builtin_va_list.
9501
9502 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9503
9504 struct __va_list
9505 {
9506 void *__stack;
9507 void *__gr_top;
9508 void *__vr_top;
9509 int __gr_offs;
9510 int __vr_offs;
9511 }; */
9512
9513static tree
9514aarch64_build_builtin_va_list (void)
9515{
9516 tree va_list_name;
9517 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9518
9519 /* Create the type. */
9520 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
9521 /* Give it the required name. */
9522 va_list_name = build_decl (BUILTINS_LOCATION,
9523 TYPE_DECL,
9524 get_identifier ("__va_list"),
9525 va_list_type);
9526 DECL_ARTIFICIAL (va_list_name) = 1;
9527 TYPE_NAME (va_list_type) = va_list_name;
57835af6 9528 TYPE_STUB_DECL (va_list_type) = va_list_name;
df401d54 9529
9530 /* Create the fields. */
9531 f_stack = build_decl (BUILTINS_LOCATION,
9532 FIELD_DECL, get_identifier ("__stack"),
9533 ptr_type_node);
9534 f_grtop = build_decl (BUILTINS_LOCATION,
9535 FIELD_DECL, get_identifier ("__gr_top"),
9536 ptr_type_node);
9537 f_vrtop = build_decl (BUILTINS_LOCATION,
9538 FIELD_DECL, get_identifier ("__vr_top"),
9539 ptr_type_node);
9540 f_groff = build_decl (BUILTINS_LOCATION,
9541 FIELD_DECL, get_identifier ("__gr_offs"),
9542 integer_type_node);
9543 f_vroff = build_decl (BUILTINS_LOCATION,
9544 FIELD_DECL, get_identifier ("__vr_offs"),
9545 integer_type_node);
9546
9547 DECL_ARTIFICIAL (f_stack) = 1;
9548 DECL_ARTIFICIAL (f_grtop) = 1;
9549 DECL_ARTIFICIAL (f_vrtop) = 1;
9550 DECL_ARTIFICIAL (f_groff) = 1;
9551 DECL_ARTIFICIAL (f_vroff) = 1;
9552
9553 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
9554 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
9555 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
9556 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
9557 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
9558
9559 TYPE_FIELDS (va_list_type) = f_stack;
9560 DECL_CHAIN (f_stack) = f_grtop;
9561 DECL_CHAIN (f_grtop) = f_vrtop;
9562 DECL_CHAIN (f_vrtop) = f_groff;
9563 DECL_CHAIN (f_groff) = f_vroff;
9564
9565 /* Compute its layout. */
9566 layout_type (va_list_type);
9567
9568 return va_list_type;
9569}
9570
9571/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
9572static void
9573aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9574{
9575 const CUMULATIVE_ARGS *cum;
9576 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9577 tree stack, grtop, vrtop, groff, vroff;
9578 tree t;
9579 int gr_save_area_size;
9580 int vr_save_area_size;
9581 int vr_offset;
9582
9583 cum = &crtl->args.info;
9584 gr_save_area_size
9585 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
9586 vr_save_area_size
9587 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
9588
a0c7b470 9589 if (!TARGET_FLOAT)
df401d54 9590 {
b37104f6 9591 gcc_assert (cum->aapcs_nvrn == 0);
df401d54 9592 vr_save_area_size = 0;
9593 }
9594
9595 f_stack = TYPE_FIELDS (va_list_type_node);
9596 f_grtop = DECL_CHAIN (f_stack);
9597 f_vrtop = DECL_CHAIN (f_grtop);
9598 f_groff = DECL_CHAIN (f_vrtop);
9599 f_vroff = DECL_CHAIN (f_groff);
9600
9601 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
9602 NULL_TREE);
9603 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
9604 NULL_TREE);
9605 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
9606 NULL_TREE);
9607 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
9608 NULL_TREE);
9609 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
9610 NULL_TREE);
9611
9612 /* Emit code to initialize STACK, which points to the next varargs stack
9613 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
9614 by named arguments. STACK is 8-byte aligned. */
9615 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
9616 if (cum->aapcs_stack_size > 0)
9617 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
9618 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
9619 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9620
9621 /* Emit code to initialize GRTOP, the top of the GR save area.
9622 virtual_incoming_args_rtx should have been 16 byte aligned. */
9623 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
9624 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
9625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9626
9627 /* Emit code to initialize VRTOP, the top of the VR save area.
9628 This address is gr_save_area_bytes below GRTOP, rounded
9629 down to the next 16-byte boundary. */
9630 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
74d63f1e 9631 vr_offset = ROUND_UP (gr_save_area_size,
9632 STACK_BOUNDARY / BITS_PER_UNIT);
df401d54 9633
9634 if (vr_offset)
9635 t = fold_build_pointer_plus_hwi (t, -vr_offset);
9636 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
9637 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9638
9639 /* Emit code to initialize GROFF, the offset from GRTOP of the
9640 next GPR argument. */
9641 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
9642 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
9643 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9644
9645 /* Likewise emit code to initialize VROFF, the offset from FTOP
9646 of the next VR argument. */
9647 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
9648 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
9649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9650}
9651
9652/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
9653
9654static tree
9655aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
9656 gimple_seq *post_p ATTRIBUTE_UNUSED)
9657{
9658 tree addr;
9659 bool indirect_p;
9660 bool is_ha; /* is HFA or HVA. */
9661 bool dw_align; /* double-word align. */
3754d046 9662 machine_mode ag_mode = VOIDmode;
df401d54 9663 int nregs;
3754d046 9664 machine_mode mode;
df401d54 9665
9666 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9667 tree stack, f_top, f_off, off, arg, roundup, on_stack;
9668 HOST_WIDE_INT size, rsize, adjust, align;
9669 tree t, u, cond1, cond2;
9670
9671 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9672 if (indirect_p)
9673 type = build_pointer_type (type);
9674
9675 mode = TYPE_MODE (type);
9676
9677 f_stack = TYPE_FIELDS (va_list_type_node);
9678 f_grtop = DECL_CHAIN (f_stack);
9679 f_vrtop = DECL_CHAIN (f_grtop);
9680 f_groff = DECL_CHAIN (f_vrtop);
9681 f_vroff = DECL_CHAIN (f_groff);
9682
9683 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
9684 f_stack, NULL_TREE);
9685 size = int_size_in_bytes (type);
9686 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
9687
9688 dw_align = false;
9689 adjust = 0;
9690 if (aarch64_vfp_is_call_or_return_candidate (mode,
9691 type,
9692 &ag_mode,
9693 &nregs,
9694 &is_ha))
9695 {
9696 /* TYPE passed in fp/simd registers. */
a0c7b470 9697 if (!TARGET_FLOAT)
b37104f6 9698 aarch64_err_no_fpadvsimd (mode, "varargs");
df401d54 9699
9700 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
9701 unshare_expr (valist), f_vrtop, NULL_TREE);
9702 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
9703 unshare_expr (valist), f_vroff, NULL_TREE);
9704
9705 rsize = nregs * UNITS_PER_VREG;
9706
9707 if (is_ha)
9708 {
9709 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
9710 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
9711 }
9712 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
9713 && size < UNITS_PER_VREG)
9714 {
9715 adjust = UNITS_PER_VREG - size;
9716 }
9717 }
9718 else
9719 {
9720 /* TYPE passed in general registers. */
9721 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
9722 unshare_expr (valist), f_grtop, NULL_TREE);
9723 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
9724 unshare_expr (valist), f_groff, NULL_TREE);
74d63f1e 9725 rsize = ROUND_UP (size, UNITS_PER_WORD);
df401d54 9726 nregs = rsize / UNITS_PER_WORD;
9727
9728 if (align > 8)
9729 dw_align = true;
9730
9731 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9732 && size < UNITS_PER_WORD)
9733 {
9734 adjust = UNITS_PER_WORD - size;
9735 }
9736 }
9737
9738 /* Get a local temporary for the field value. */
9739 off = get_initialized_tmp_var (f_off, pre_p, NULL);
9740
9741 /* Emit code to branch if off >= 0. */
9742 t = build2 (GE_EXPR, boolean_type_node, off,
9743 build_int_cst (TREE_TYPE (off), 0));
9744 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
9745
9746 if (dw_align)
9747 {
9748 /* Emit: offs = (offs + 15) & -16. */
9749 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9750 build_int_cst (TREE_TYPE (off), 15));
9751 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
9752 build_int_cst (TREE_TYPE (off), -16));
9753 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
9754 }
9755 else
9756 roundup = NULL;
9757
9758 /* Update ap.__[g|v]r_offs */
9759 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9760 build_int_cst (TREE_TYPE (off), rsize));
9761 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
9762
9763 /* String up. */
9764 if (roundup)
9765 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9766
9767 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9768 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
9769 build_int_cst (TREE_TYPE (f_off), 0));
9770 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
9771
9772 /* String up: make sure the assignment happens before the use. */
9773 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
9774 COND_EXPR_ELSE (cond1) = t;
9775
9776 /* Prepare the trees handling the argument that is passed on the stack;
9777 the top level node will store in ON_STACK. */
9778 arg = get_initialized_tmp_var (stack, pre_p, NULL);
9779 if (align > 8)
9780 {
9781 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9782 t = fold_convert (intDI_type_node, arg);
9783 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9784 build_int_cst (TREE_TYPE (t), 15));
9785 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9786 build_int_cst (TREE_TYPE (t), -16));
9787 t = fold_convert (TREE_TYPE (arg), t);
9788 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
9789 }
9790 else
9791 roundup = NULL;
9792 /* Advance ap.__stack */
9793 t = fold_convert (intDI_type_node, arg);
9794 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9795 build_int_cst (TREE_TYPE (t), size + 7));
9796 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9797 build_int_cst (TREE_TYPE (t), -8));
9798 t = fold_convert (TREE_TYPE (arg), t);
9799 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
9800 /* String up roundup and advance. */
9801 if (roundup)
9802 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9803 /* String up with arg */
9804 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
9805 /* Big-endianness related address adjustment. */
9806 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9807 && size < UNITS_PER_WORD)
9808 {
9809 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
9810 size_int (UNITS_PER_WORD - size));
9811 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
9812 }
9813
9814 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
9815 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
9816
9817 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9818 t = off;
9819 if (adjust)
9820 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
9821 build_int_cst (TREE_TYPE (off), adjust));
9822
9823 t = fold_convert (sizetype, t);
9824 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
9825
9826 if (is_ha)
9827 {
9828 /* type ha; // treat as "struct {ftype field[n];}"
9829 ... [computing offs]
9830 for (i = 0; i <nregs; ++i, offs += 16)
9831 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
9832 return ha; */
9833 int i;
9834 tree tmp_ha, field_t, field_ptr_t;
9835
9836 /* Declare a local variable. */
9837 tmp_ha = create_tmp_var_raw (type, "ha");
9838 gimple_add_tmp_var (tmp_ha);
9839
9840 /* Establish the base type. */
9841 switch (ag_mode)
9842 {
9843 case SFmode:
9844 field_t = float_type_node;
9845 field_ptr_t = float_ptr_type_node;
9846 break;
9847 case DFmode:
9848 field_t = double_type_node;
9849 field_ptr_t = double_ptr_type_node;
9850 break;
9851 case TFmode:
9852 field_t = long_double_type_node;
9853 field_ptr_t = long_double_ptr_type_node;
9854 break;
9855/* The half precision and quad precision are not fully supported yet. Enable
9856 the following code after the support is complete. Need to find the correct
9857 type node for __fp16 *. */
9858#if 0
9859 case HFmode:
9860 field_t = float_type_node;
9861 field_ptr_t = float_ptr_type_node;
9862 break;
9863#endif
9864 case V2SImode:
9865 case V4SImode:
9866 {
9867 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
9868 field_t = build_vector_type_for_mode (innertype, ag_mode);
9869 field_ptr_t = build_pointer_type (field_t);
9870 }
9871 break;
9872 default:
9873 gcc_assert (0);
9874 }
9875
9876 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
9877 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
9878 addr = t;
9879 t = fold_convert (field_ptr_t, addr);
9880 t = build2 (MODIFY_EXPR, field_t,
9881 build1 (INDIRECT_REF, field_t, tmp_ha),
9882 build1 (INDIRECT_REF, field_t, t));
9883
9884 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
9885 for (i = 1; i < nregs; ++i)
9886 {
9887 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
9888 u = fold_convert (field_ptr_t, addr);
9889 u = build2 (MODIFY_EXPR, field_t,
9890 build2 (MEM_REF, field_t, tmp_ha,
9891 build_int_cst (field_ptr_t,
9892 (i *
9893 int_size_in_bytes (field_t)))),
9894 build1 (INDIRECT_REF, field_t, u));
9895 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
9896 }
9897
9898 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
9899 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
9900 }
9901
9902 COND_EXPR_ELSE (cond2) = t;
9903 addr = fold_convert (build_pointer_type (type), cond1);
9904 addr = build_va_arg_indirect_ref (addr);
9905
9906 if (indirect_p)
9907 addr = build_va_arg_indirect_ref (addr);
9908
9909 return addr;
9910}
9911
9912/* Implement TARGET_SETUP_INCOMING_VARARGS. */
9913
9914static void
3754d046 9915aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
df401d54 9916 tree type, int *pretend_size ATTRIBUTE_UNUSED,
9917 int no_rtl)
9918{
9919 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9920 CUMULATIVE_ARGS local_cum;
9921 int gr_saved, vr_saved;
9922
9923 /* The caller has advanced CUM up to, but not beyond, the last named
9924 argument. Advance a local copy of CUM past the last "real" named
9925 argument, to find out how many registers are left over. */
9926 local_cum = *cum;
9927 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
9928
9929 /* Found out how many registers we need to save. */
9930 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
9931 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
9932
a0c7b470 9933 if (!TARGET_FLOAT)
df401d54 9934 {
b37104f6 9935 gcc_assert (local_cum.aapcs_nvrn == 0);
df401d54 9936 vr_saved = 0;
9937 }
9938
9939 if (!no_rtl)
9940 {
9941 if (gr_saved > 0)
9942 {
9943 rtx ptr, mem;
9944
9945 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
9946 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
9947 - gr_saved * UNITS_PER_WORD);
9948 mem = gen_frame_mem (BLKmode, ptr);
9949 set_mem_alias_set (mem, get_varargs_alias_set ());
9950
9951 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
9952 mem, gr_saved);
9953 }
9954 if (vr_saved > 0)
9955 {
9956 /* We can't use move_block_from_reg, because it will use
9957 the wrong mode, storing D regs only. */
3754d046 9958 machine_mode mode = TImode;
df401d54 9959 int off, i;
9960
9961 /* Set OFF to the offset from virtual_incoming_args_rtx of
9962 the first vector register. The VR save area lies below
9963 the GR one, and is aligned to 16 bytes. */
74d63f1e 9964 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
9965 STACK_BOUNDARY / BITS_PER_UNIT);
df401d54 9966 off -= vr_saved * UNITS_PER_VREG;
9967
9968 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
9969 {
9970 rtx ptr, mem;
9971
9972 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
9973 mem = gen_frame_mem (mode, ptr);
9974 set_mem_alias_set (mem, get_varargs_alias_set ());
9975 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
9976 off += UNITS_PER_VREG;
9977 }
9978 }
9979 }
9980
9981 /* We don't save the size into *PRETEND_SIZE because we want to avoid
9982 any complication of having crtl->args.pretend_args_size changed. */
a42e2676 9983 cfun->machine->frame.saved_varargs_size
74d63f1e 9984 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
9985 STACK_BOUNDARY / BITS_PER_UNIT)
df401d54 9986 + vr_saved * UNITS_PER_VREG);
9987}
9988
9989static void
9990aarch64_conditional_register_usage (void)
9991{
9992 int i;
9993 if (!TARGET_FLOAT)
9994 {
9995 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
9996 {
9997 fixed_regs[i] = 1;
9998 call_used_regs[i] = 1;
9999 }
10000 }
10001}
10002
10003/* Walk down the type tree of TYPE counting consecutive base elements.
10004 If *MODEP is VOIDmode, then set it to the first valid floating point
10005 type. If a non-floating point type is found, or if a floating point
10006 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10007 otherwise return the count in the sub-tree. */
10008static int
3754d046 10009aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
df401d54 10010{
3754d046 10011 machine_mode mode;
df401d54 10012 HOST_WIDE_INT size;
10013
10014 switch (TREE_CODE (type))
10015 {
10016 case REAL_TYPE:
10017 mode = TYPE_MODE (type);
10018 if (mode != DFmode && mode != SFmode && mode != TFmode)
10019 return -1;
10020
10021 if (*modep == VOIDmode)
10022 *modep = mode;
10023
10024 if (*modep == mode)
10025 return 1;
10026
10027 break;
10028
10029 case COMPLEX_TYPE:
10030 mode = TYPE_MODE (TREE_TYPE (type));
10031 if (mode != DFmode && mode != SFmode && mode != TFmode)
10032 return -1;
10033
10034 if (*modep == VOIDmode)
10035 *modep = mode;
10036
10037 if (*modep == mode)
10038 return 2;
10039
10040 break;
10041
10042 case VECTOR_TYPE:
10043 /* Use V2SImode and V4SImode as representatives of all 64-bit
10044 and 128-bit vector types. */
10045 size = int_size_in_bytes (type);
10046 switch (size)
10047 {
10048 case 8:
10049 mode = V2SImode;
10050 break;
10051 case 16:
10052 mode = V4SImode;
10053 break;
10054 default:
10055 return -1;
10056 }
10057
10058 if (*modep == VOIDmode)
10059 *modep = mode;
10060
10061 /* Vector modes are considered to be opaque: two vectors are
10062 equivalent for the purposes of being homogeneous aggregates
10063 if they are the same size. */
10064 if (*modep == mode)
10065 return 1;
10066
10067 break;
10068
10069 case ARRAY_TYPE:
10070 {
10071 int count;
10072 tree index = TYPE_DOMAIN (type);
10073
221f0461 10074 /* Can't handle incomplete types nor sizes that are not
10075 fixed. */
10076 if (!COMPLETE_TYPE_P (type)
10077 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
df401d54 10078 return -1;
10079
10080 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10081 if (count == -1
10082 || !index
10083 || !TYPE_MAX_VALUE (index)
e913b5cd 10084 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
df401d54 10085 || !TYPE_MIN_VALUE (index)
e913b5cd 10086 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
df401d54 10087 || count < 0)
10088 return -1;
10089
e913b5cd 10090 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10091 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
df401d54 10092
10093 /* There must be no padding. */
c0d2fd0b 10094 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
df401d54 10095 return -1;
10096
10097 return count;
10098 }
10099
10100 case RECORD_TYPE:
10101 {
10102 int count = 0;
10103 int sub_count;
10104 tree field;
10105
221f0461 10106 /* Can't handle incomplete types nor sizes that are not
10107 fixed. */
10108 if (!COMPLETE_TYPE_P (type)
10109 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
df401d54 10110 return -1;
10111
10112 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10113 {
10114 if (TREE_CODE (field) != FIELD_DECL)
10115 continue;
10116
10117 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10118 if (sub_count < 0)
10119 return -1;
10120 count += sub_count;
10121 }
10122
10123 /* There must be no padding. */
c0d2fd0b 10124 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
df401d54 10125 return -1;
10126
10127 return count;
10128 }
10129
10130 case UNION_TYPE:
10131 case QUAL_UNION_TYPE:
10132 {
10133 /* These aren't very interesting except in a degenerate case. */
10134 int count = 0;
10135 int sub_count;
10136 tree field;
10137
221f0461 10138 /* Can't handle incomplete types nor sizes that are not
10139 fixed. */
10140 if (!COMPLETE_TYPE_P (type)
10141 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
df401d54 10142 return -1;
10143
10144 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10145 {
10146 if (TREE_CODE (field) != FIELD_DECL)
10147 continue;
10148
10149 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10150 if (sub_count < 0)
10151 return -1;
10152 count = count > sub_count ? count : sub_count;
10153 }
10154
10155 /* There must be no padding. */
c0d2fd0b 10156 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
df401d54 10157 return -1;
10158
10159 return count;
10160 }
10161
10162 default:
10163 break;
10164 }
10165
10166 return -1;
10167}
10168
d86303d8 10169/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10170 type as described in AAPCS64 \S 4.1.2.
10171
10172 See the comment above aarch64_composite_type_p for the notes on MODE. */
10173
10174static bool
10175aarch64_short_vector_p (const_tree type,
10176 machine_mode mode)
10177{
10178 HOST_WIDE_INT size = -1;
10179
10180 if (type && TREE_CODE (type) == VECTOR_TYPE)
10181 size = int_size_in_bytes (type);
10182 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10183 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10184 size = GET_MODE_SIZE (mode);
10185
10186 return (size == 8 || size == 16);
10187}
10188
df401d54 10189/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10190 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10191 array types. The C99 floating-point complex types are also considered
10192 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10193 types, which are GCC extensions and out of the scope of AAPCS64, are
10194 treated as composite types here as well.
10195
10196 Note that MODE itself is not sufficient in determining whether a type
10197 is such a composite type or not. This is because
10198 stor-layout.c:compute_record_mode may have already changed the MODE
10199 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10200 structure with only one field may have its MODE set to the mode of the
10201 field. Also an integer mode whose size matches the size of the
10202 RECORD_TYPE type may be used to substitute the original mode
10203 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10204 solely relied on. */
10205
10206static bool
10207aarch64_composite_type_p (const_tree type,
3754d046 10208 machine_mode mode)
df401d54 10209{
d86303d8 10210 if (aarch64_short_vector_p (type, mode))
10211 return false;
10212
df401d54 10213 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
10214 return true;
10215
10216 if (mode == BLKmode
10217 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
10218 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
10219 return true;
10220
10221 return false;
10222}
10223
df401d54 10224/* Return TRUE if an argument, whose type is described by TYPE and MODE,
10225 shall be passed or returned in simd/fp register(s) (providing these
10226 parameter passing registers are available).
10227
10228 Upon successful return, *COUNT returns the number of needed registers,
10229 *BASE_MODE returns the mode of the individual register and when IS_HAF
10230 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10231 floating-point aggregate or a homogeneous short-vector aggregate. */
10232
10233static bool
3754d046 10234aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
df401d54 10235 const_tree type,
3754d046 10236 machine_mode *base_mode,
df401d54 10237 int *count,
10238 bool *is_ha)
10239{
3754d046 10240 machine_mode new_mode = VOIDmode;
df401d54 10241 bool composite_p = aarch64_composite_type_p (type, mode);
10242
10243 if (is_ha != NULL) *is_ha = false;
10244
10245 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
10246 || aarch64_short_vector_p (type, mode))
10247 {
10248 *count = 1;
10249 new_mode = mode;
10250 }
10251 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10252 {
10253 if (is_ha != NULL) *is_ha = true;
10254 *count = 2;
10255 new_mode = GET_MODE_INNER (mode);
10256 }
10257 else if (type && composite_p)
10258 {
10259 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
10260
10261 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
10262 {
10263 if (is_ha != NULL) *is_ha = true;
10264 *count = ag_count;
10265 }
10266 else
10267 return false;
10268 }
10269 else
10270 return false;
10271
10272 *base_mode = new_mode;
10273 return true;
10274}
10275
10276/* Implement TARGET_STRUCT_VALUE_RTX. */
10277
10278static rtx
10279aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
10280 int incoming ATTRIBUTE_UNUSED)
10281{
10282 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
10283}
10284
10285/* Implements target hook vector_mode_supported_p. */
10286static bool
3754d046 10287aarch64_vector_mode_supported_p (machine_mode mode)
df401d54 10288{
10289 if (TARGET_SIMD
10290 && (mode == V4SImode || mode == V8HImode
10291 || mode == V16QImode || mode == V2DImode
10292 || mode == V2SImode || mode == V4HImode
10293 || mode == V8QImode || mode == V2SFmode
d70050b8 10294 || mode == V4SFmode || mode == V2DFmode
aea31229 10295 || mode == V4HFmode || mode == V8HFmode
d70050b8 10296 || mode == V1DFmode))
df401d54 10297 return true;
10298
10299 return false;
10300}
10301
dae88020 10302/* Return appropriate SIMD container
10303 for MODE within a vector of WIDTH bits. */
3754d046 10304static machine_mode
10305aarch64_simd_container_mode (machine_mode mode, unsigned width)
df401d54 10306{
dae88020 10307 gcc_assert (width == 64 || width == 128);
df401d54 10308 if (TARGET_SIMD)
dae88020 10309 {
10310 if (width == 128)
10311 switch (mode)
10312 {
10313 case DFmode:
10314 return V2DFmode;
10315 case SFmode:
10316 return V4SFmode;
10317 case SImode:
10318 return V4SImode;
10319 case HImode:
10320 return V8HImode;
10321 case QImode:
10322 return V16QImode;
10323 case DImode:
10324 return V2DImode;
10325 default:
10326 break;
10327 }
10328 else
10329 switch (mode)
10330 {
10331 case SFmode:
10332 return V2SFmode;
10333 case SImode:
10334 return V2SImode;
10335 case HImode:
10336 return V4HImode;
10337 case QImode:
10338 return V8QImode;
10339 default:
10340 break;
10341 }
10342 }
df401d54 10343 return word_mode;
10344}
10345
dae88020 10346/* Return 128-bit container as the preferred SIMD mode for MODE. */
3754d046 10347static machine_mode
10348aarch64_preferred_simd_mode (machine_mode mode)
dae88020 10349{
10350 return aarch64_simd_container_mode (mode, 128);
10351}
10352
b9ed2299 10353/* Return the bitmask of possible vector sizes for the vectorizer
10354 to iterate over. */
10355static unsigned int
10356aarch64_autovectorize_vector_sizes (void)
10357{
10358 return (16 | 8);
10359}
10360
ea092ff3 10361/* Implement TARGET_MANGLE_TYPE. */
10362
147f0a69 10363static const char *
ea092ff3 10364aarch64_mangle_type (const_tree type)
10365{
10366 /* The AArch64 ABI documents say that "__va_list" has to be
10367 managled as if it is in the "std" namespace. */
10368 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
10369 return "St9__va_list";
10370
6f520654 10371 /* Half-precision float. */
10372 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
10373 return "Dh";
10374
f3731846 10375 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10376 builtin types. */
10377 if (TYPE_NAME (type) != NULL)
10378 return aarch64_mangle_builtin_type (type);
52b39e44 10379
ea092ff3 10380 /* Use the default mangling. */
10381 return NULL;
10382}
10383
1d45170f 10384
10385/* Return true if the rtx_insn contains a MEM RTX somewhere
10386 in it. */
aa96b575 10387
10388static bool
1d45170f 10389has_memory_op (rtx_insn *mem_insn)
aa96b575 10390{
1d45170f 10391 subrtx_iterator::array_type array;
10392 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
10393 if (MEM_P (*iter))
10394 return true;
10395
10396 return false;
aa96b575 10397}
10398
10399/* Find the first rtx_insn before insn that will generate an assembly
10400 instruction. */
10401
10402static rtx_insn *
10403aarch64_prev_real_insn (rtx_insn *insn)
10404{
10405 if (!insn)
10406 return NULL;
10407
10408 do
10409 {
10410 insn = prev_real_insn (insn);
10411 }
10412 while (insn && recog_memoized (insn) < 0);
10413
10414 return insn;
10415}
10416
10417static bool
10418is_madd_op (enum attr_type t1)
10419{
10420 unsigned int i;
10421 /* A number of these may be AArch32 only. */
10422 enum attr_type mlatypes[] = {
10423 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10424 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10425 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10426 };
10427
10428 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10429 {
10430 if (t1 == mlatypes[i])
10431 return true;
10432 }
10433
10434 return false;
10435}
10436
10437/* Check if there is a register dependency between a load and the insn
10438 for which we hold recog_data. */
10439
10440static bool
10441dep_between_memop_and_curr (rtx memop)
10442{
10443 rtx load_reg;
10444 int opno;
10445
1d45170f 10446 gcc_assert (GET_CODE (memop) == SET);
aa96b575 10447
10448 if (!REG_P (SET_DEST (memop)))
10449 return false;
10450
10451 load_reg = SET_DEST (memop);
1d45170f 10452 for (opno = 1; opno < recog_data.n_operands; opno++)
aa96b575 10453 {
10454 rtx operand = recog_data.operand[opno];
10455 if (REG_P (operand)
10456 && reg_overlap_mentioned_p (load_reg, operand))
10457 return true;
10458
10459 }
10460 return false;
10461}
10462
1d45170f 10463
10464/* When working around the Cortex-A53 erratum 835769,
10465 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10466 instruction and has a preceding memory instruction such that a NOP
10467 should be inserted between them. */
10468
aa96b575 10469bool
10470aarch64_madd_needs_nop (rtx_insn* insn)
10471{
10472 enum attr_type attr_type;
10473 rtx_insn *prev;
10474 rtx body;
10475
ee92bc9a 10476 if (!TARGET_FIX_ERR_A53_835769)
aa96b575 10477 return false;
10478
af7bcbf7 10479 if (!INSN_P (insn) || recog_memoized (insn) < 0)
aa96b575 10480 return false;
10481
10482 attr_type = get_attr_type (insn);
10483 if (!is_madd_op (attr_type))
10484 return false;
10485
10486 prev = aarch64_prev_real_insn (insn);
fcf02f90 10487 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10488 Restore recog state to INSN to avoid state corruption. */
10489 extract_constrain_insn_cached (insn);
10490
1d45170f 10491 if (!prev || !has_memory_op (prev))
aa96b575 10492 return false;
10493
10494 body = single_set (prev);
10495
10496 /* If the previous insn is a memory op and there is no dependency between
1d45170f 10497 it and the DImode madd, emit a NOP between them. If body is NULL then we
10498 have a complex memory operation, probably a load/store pair.
10499 Be conservative for now and emit a NOP. */
10500 if (GET_MODE (recog_data.operand[0]) == DImode
10501 && (!body || !dep_between_memop_and_curr (body)))
aa96b575 10502 return true;
10503
10504 return false;
10505
10506}
10507
1d45170f 10508
10509/* Implement FINAL_PRESCAN_INSN. */
10510
aa96b575 10511void
10512aarch64_final_prescan_insn (rtx_insn *insn)
10513{
10514 if (aarch64_madd_needs_nop (insn))
10515 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
10516}
10517
10518
df401d54 10519/* Return the equivalent letter for size. */
04b042b2 10520static char
df401d54 10521sizetochar (int size)
10522{
10523 switch (size)
10524 {
10525 case 64: return 'd';
10526 case 32: return 's';
10527 case 16: return 'h';
10528 case 8 : return 'b';
10529 default: gcc_unreachable ();
10530 }
10531}
10532
72841352 10533/* Return true iff x is a uniform vector of floating-point
10534 constants, and the constant can be represented in
10535 quarter-precision form. Note, as aarch64_float_const_representable
10536 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10537static bool
10538aarch64_vect_float_const_representable_p (rtx x)
10539{
62fdb8e4 10540 rtx elt;
10541 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10542 && const_vec_duplicate_p (x, &elt)
10543 && aarch64_float_const_representable_p (elt));
72841352 10544}
10545
4eb118f1 10546/* Return true for valid and false for invalid. */
74c40875 10547bool
3754d046 10548aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
fc3eb658 10549 struct simd_immediate_info *info)
df401d54 10550{
10551#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
10552 matches = 1; \
10553 for (i = 0; i < idx; i += (STRIDE)) \
10554 if (!(TEST)) \
10555 matches = 0; \
10556 if (matches) \
10557 { \
10558 immtype = (CLASS); \
10559 elsize = (ELSIZE); \
df401d54 10560 eshift = (SHIFT); \
10561 emvn = (NEG); \
10562 break; \
10563 }
10564
10565 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6e256598 10566 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
df401d54 10567 unsigned char bytes[16];
df401d54 10568 int immtype = -1, matches;
10569 unsigned int invmask = inverse ? 0xff : 0;
10570 int eshift, emvn;
10571
df401d54 10572 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
72841352 10573 {
04b042b2 10574 if (! (aarch64_simd_imm_zero_p (op, mode)
10575 || aarch64_vect_float_const_representable_p (op)))
4eb118f1 10576 return false;
72841352 10577
fc3eb658 10578 if (info)
10579 {
10580 info->value = CONST_VECTOR_ELT (op, 0);
04b042b2 10581 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
fc3eb658 10582 info->mvn = false;
10583 info->shift = 0;
10584 }
72841352 10585
4eb118f1 10586 return true;
72841352 10587 }
df401d54 10588
10589 /* Splat vector constant out into a byte vector. */
10590 for (i = 0; i < n_elts; i++)
10591 {
0c1c0992 10592 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
10593 it must be laid out in the vector register in reverse order. */
10594 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
df401d54 10595 unsigned HOST_WIDE_INT elpart;
df401d54 10596
245b193f 10597 gcc_assert (CONST_INT_P (el));
10598 elpart = INTVAL (el);
10599
10600 for (unsigned int byte = 0; byte < innersize; byte++)
10601 {
10602 bytes[idx++] = (elpart & 0xff) ^ invmask;
10603 elpart >>= BITS_PER_UNIT;
10604 }
df401d54 10605
df401d54 10606 }
10607
10608 /* Sanity check. */
10609 gcc_assert (idx == GET_MODE_SIZE (mode));
10610
10611 do
10612 {
10613 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
10614 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
10615
10616 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
10617 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
10618
10619 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
10620 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
10621
10622 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
10623 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
10624
10625 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
10626
10627 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
10628
10629 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
10630 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
10631
10632 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
10633 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
10634
10635 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
10636 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
10637
10638 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
10639 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
10640
10641 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
10642
10643 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
10644
10645 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8458c9e9 10646 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
df401d54 10647
10648 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8458c9e9 10649 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
df401d54 10650
10651 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8458c9e9 10652 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
df401d54 10653
10654 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8458c9e9 10655 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
df401d54 10656
10657 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
10658
10659 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
10660 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
10661 }
10662 while (0);
10663
8458c9e9 10664 if (immtype == -1)
4eb118f1 10665 return false;
df401d54 10666
fc3eb658 10667 if (info)
df401d54 10668 {
fc3eb658 10669 info->element_width = elsize;
fc3eb658 10670 info->mvn = emvn != 0;
10671 info->shift = eshift;
10672
df401d54 10673 unsigned HOST_WIDE_INT imm = 0;
10674
8458c9e9 10675 if (immtype >= 12 && immtype <= 15)
10676 info->msl = true;
10677
df401d54 10678 /* Un-invert bytes of recognized vector, if necessary. */
10679 if (invmask != 0)
10680 for (i = 0; i < idx; i++)
10681 bytes[i] ^= invmask;
10682
10683 if (immtype == 17)
10684 {
10685 /* FIXME: Broken on 32-bit H_W_I hosts. */
10686 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
10687
10688 for (i = 0; i < 8; i++)
10689 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
10690 << (i * BITS_PER_UNIT);
10691
df401d54 10692
fc3eb658 10693 info->value = GEN_INT (imm);
10694 }
10695 else
10696 {
10697 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
10698 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
df401d54 10699
10700 /* Construct 'abcdefgh' because the assembler cannot handle
fc3eb658 10701 generic constants. */
10702 if (info->mvn)
df401d54 10703 imm = ~imm;
fc3eb658 10704 imm = (imm >> info->shift) & 0xff;
10705 info->value = GEN_INT (imm);
10706 }
df401d54 10707 }
10708
fc3eb658 10709 return true;
df401d54 10710#undef CHECK
10711}
10712
df401d54 10713/* Check of immediate shift constants are within range. */
10714bool
3754d046 10715aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
df401d54 10716{
10717 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
10718 if (left)
bead021f 10719 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
df401d54 10720 else
bead021f 10721 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
df401d54 10722}
10723
72841352 10724/* Return true if X is a uniform vector where all elements
10725 are either the floating-point constant 0.0 or the
10726 integer constant 0. */
df401d54 10727bool
3754d046 10728aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
df401d54 10729{
72841352 10730 return x == CONST0_RTX (mode);
df401d54 10731}
10732
050a2b40 10733
10734/* Return the bitmask CONST_INT to select the bits required by a zero extract
10735 operation of width WIDTH at bit position POS. */
10736
10737rtx
10738aarch64_mask_from_zextract_ops (rtx width, rtx pos)
10739{
10740 gcc_assert (CONST_INT_P (width));
10741 gcc_assert (CONST_INT_P (pos));
10742
10743 unsigned HOST_WIDE_INT mask
10744 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
10745 return GEN_INT (mask << UINTVAL (pos));
10746}
10747
df401d54 10748bool
3754d046 10749aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
df401d54 10750{
10751 HOST_WIDE_INT imm = INTVAL (x);
10752 int i;
10753
10754 for (i = 0; i < 8; i++)
10755 {
10756 unsigned int byte = imm & 0xff;
10757 if (byte != 0xff && byte != 0)
10758 return false;
10759 imm >>= 8;
10760 }
10761
10762 return true;
10763}
10764
fda698d7 10765bool
82882dbd 10766aarch64_mov_operand_p (rtx x, machine_mode mode)
fda698d7 10767{
fda698d7 10768 if (GET_CODE (x) == HIGH
10769 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10770 return true;
10771
a6cab7d4 10772 if (CONST_INT_P (x))
fda698d7 10773 return true;
10774
10775 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
10776 return true;
10777
82882dbd 10778 return aarch64_classify_symbolic_expression (x)
5137d3cb 10779 == SYMBOL_TINY_ABSOLUTE;
fda698d7 10780}
10781
df401d54 10782/* Return a const_int vector of VAL. */
10783rtx
3754d046 10784aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
df401d54 10785{
10786 int nunits = GET_MODE_NUNITS (mode);
10787 rtvec v = rtvec_alloc (nunits);
10788 int i;
10789
10790 for (i=0; i < nunits; i++)
10791 RTVEC_ELT (v, i) = GEN_INT (val);
10792
10793 return gen_rtx_CONST_VECTOR (mode, v);
10794}
10795
43bd8f5c 10796/* Check OP is a legal scalar immediate for the MOVI instruction. */
10797
10798bool
3754d046 10799aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
43bd8f5c 10800{
3754d046 10801 machine_mode vmode;
43bd8f5c 10802
10803 gcc_assert (!VECTOR_MODE_P (mode));
10804 vmode = aarch64_preferred_simd_mode (mode);
10805 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
fc3eb658 10806 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
43bd8f5c 10807}
10808
e71cd518 10809/* Construct and return a PARALLEL RTX vector with elements numbering the
10810 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10811 the vector - from the perspective of the architecture. This does not
10812 line up with GCC's perspective on lane numbers, so we end up with
10813 different masks depending on our target endian-ness. The diagram
10814 below may help. We must draw the distinction when building masks
10815 which select one half of the vector. An instruction selecting
10816 architectural low-lanes for a big-endian target, must be described using
10817 a mask selecting GCC high-lanes.
10818
10819 Big-Endian Little-Endian
10820
10821GCC 0 1 2 3 3 2 1 0
10822 | x | x | x | x | | x | x | x | x |
10823Architecture 3 2 1 0 3 2 1 0
10824
10825Low Mask: { 2, 3 } { 0, 1 }
10826High Mask: { 0, 1 } { 2, 3 }
10827*/
10828
df401d54 10829rtx
3754d046 10830aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
df401d54 10831{
10832 int nunits = GET_MODE_NUNITS (mode);
10833 rtvec v = rtvec_alloc (nunits / 2);
e71cd518 10834 int high_base = nunits / 2;
10835 int low_base = 0;
10836 int base;
df401d54 10837 rtx t1;
10838 int i;
10839
e71cd518 10840 if (BYTES_BIG_ENDIAN)
10841 base = high ? low_base : high_base;
10842 else
10843 base = high ? high_base : low_base;
10844
10845 for (i = 0; i < nunits / 2; i++)
df401d54 10846 RTVEC_ELT (v, i) = GEN_INT (base + i);
10847
10848 t1 = gen_rtx_PARALLEL (mode, v);
10849 return t1;
10850}
10851
e71cd518 10852/* Check OP for validity as a PARALLEL RTX vector with elements
10853 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
10854 from the perspective of the architecture. See the diagram above
10855 aarch64_simd_vect_par_cnst_half for more details. */
10856
10857bool
3754d046 10858aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
e71cd518 10859 bool high)
10860{
10861 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
10862 HOST_WIDE_INT count_op = XVECLEN (op, 0);
10863 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
10864 int i = 0;
10865
10866 if (!VECTOR_MODE_P (mode))
10867 return false;
10868
10869 if (count_op != count_ideal)
10870 return false;
10871
10872 for (i = 0; i < count_ideal; i++)
10873 {
10874 rtx elt_op = XVECEXP (op, 0, i);
10875 rtx elt_ideal = XVECEXP (ideal, 0, i);
10876
978f06f4 10877 if (!CONST_INT_P (elt_op)
e71cd518 10878 || INTVAL (elt_ideal) != INTVAL (elt_op))
10879 return false;
10880 }
10881 return true;
10882}
10883
df401d54 10884/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
10885 HIGH (exclusive). */
10886void
650ad49e 10887aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
10888 const_tree exp)
df401d54 10889{
10890 HOST_WIDE_INT lane;
978f06f4 10891 gcc_assert (CONST_INT_P (operand));
df401d54 10892 lane = INTVAL (operand);
10893
10894 if (lane < low || lane >= high)
650ad49e 10895 {
10896 if (exp)
3357e1c7 10897 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
650ad49e 10898 else
3357e1c7 10899 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
650ad49e 10900 }
df401d54 10901}
10902
df401d54 10903/* Return TRUE if OP is a valid vector addressing mode. */
10904bool
10905aarch64_simd_mem_operand_p (rtx op)
10906{
10907 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
978f06f4 10908 || REG_P (XEXP (op, 0)));
df401d54 10909}
10910
8bcdf19e 10911/* Emit a register copy from operand to operand, taking care not to
10912 early-clobber source registers in the process.
df401d54 10913
8bcdf19e 10914 COUNT is the number of components into which the copy needs to be
10915 decomposed. */
df401d54 10916void
8bcdf19e 10917aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
10918 unsigned int count)
df401d54 10919{
10920 unsigned int i;
8bcdf19e 10921 int rdest = REGNO (operands[0]);
10922 int rsrc = REGNO (operands[1]);
df401d54 10923
10924 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8bcdf19e 10925 || rdest < rsrc)
10926 for (i = 0; i < count; i++)
10927 emit_move_insn (gen_rtx_REG (mode, rdest + i),
10928 gen_rtx_REG (mode, rsrc + i));
df401d54 10929 else
8bcdf19e 10930 for (i = 0; i < count; i++)
10931 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
10932 gen_rtx_REG (mode, rsrc + count - i - 1));
df401d54 10933}
10934
10935/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
10936 one of VSTRUCT modes: OI, CI or XI. */
10937int
ed3e6e5d 10938aarch64_simd_attr_length_move (rtx_insn *insn)
df401d54 10939{
3754d046 10940 machine_mode mode;
df401d54 10941
10942 extract_insn_cached (insn);
10943
10944 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
10945 {
10946 mode = GET_MODE (recog_data.operand[0]);
10947 switch (mode)
10948 {
10949 case OImode:
10950 return 8;
10951 case CImode:
10952 return 12;
10953 case XImode:
10954 return 16;
10955 default:
10956 gcc_unreachable ();
10957 }
10958 }
10959 return 4;
10960}
10961
a91cc579 10962/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
40fd1973 10963 one of VSTRUCT modes: OI, CI, or XI. */
a91cc579 10964int
10965aarch64_simd_attr_length_rglist (enum machine_mode mode)
10966{
10967 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
10968}
10969
bb374c4d 10970/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
10971 alignment of a vector to 128 bits. */
10972static HOST_WIDE_INT
10973aarch64_simd_vector_alignment (const_tree type)
10974{
e913b5cd 10975 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
bb374c4d 10976 return MIN (align, 128);
10977}
10978
10979/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
10980static bool
10981aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
10982{
10983 if (is_packed)
10984 return false;
10985
10986 /* We guarantee alignment for vectors up to 128-bits. */
10987 if (tree_int_cst_compare (TYPE_SIZE (type),
10988 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
10989 return false;
10990
10991 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
10992 return true;
10993}
10994
501336f7 10995/* If VALS is a vector constant that can be loaded into a register
10996 using DUP, generate instructions to do so and return an RTX to
10997 assign to the register. Otherwise return NULL_RTX. */
10998static rtx
10999aarch64_simd_dup_constant (rtx vals)
11000{
3754d046 11001 machine_mode mode = GET_MODE (vals);
11002 machine_mode inner_mode = GET_MODE_INNER (mode);
501336f7 11003 rtx x;
501336f7 11004
62fdb8e4 11005 if (!const_vec_duplicate_p (vals, &x))
501336f7 11006 return NULL_RTX;
11007
11008 /* We can load this constant by using DUP and a constant in a
11009 single ARM register. This will be cheaper than a vector
11010 load. */
62fdb8e4 11011 x = copy_to_mode_reg (inner_mode, x);
501336f7 11012 return gen_rtx_VEC_DUPLICATE (mode, x);
11013}
11014
11015
11016/* Generate code to load VALS, which is a PARALLEL containing only
11017 constants (for vec_init) or CONST_VECTOR, efficiently into a
11018 register. Returns an RTX to copy into the register, or NULL_RTX
11019 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6d57cecd 11020static rtx
501336f7 11021aarch64_simd_make_constant (rtx vals)
11022{
3754d046 11023 machine_mode mode = GET_MODE (vals);
501336f7 11024 rtx const_dup;
11025 rtx const_vec = NULL_RTX;
11026 int n_elts = GET_MODE_NUNITS (mode);
11027 int n_const = 0;
11028 int i;
11029
11030 if (GET_CODE (vals) == CONST_VECTOR)
11031 const_vec = vals;
11032 else if (GET_CODE (vals) == PARALLEL)
11033 {
11034 /* A CONST_VECTOR must contain only CONST_INTs and
11035 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11036 Only store valid constants in a CONST_VECTOR. */
11037 for (i = 0; i < n_elts; ++i)
11038 {
11039 rtx x = XVECEXP (vals, 0, i);
11040 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11041 n_const++;
11042 }
11043 if (n_const == n_elts)
11044 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11045 }
11046 else
11047 gcc_unreachable ();
11048
11049 if (const_vec != NULL_RTX
fc3eb658 11050 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
501336f7 11051 /* Load using MOVI/MVNI. */
11052 return const_vec;
11053 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11054 /* Loaded using DUP. */
11055 return const_dup;
11056 else if (const_vec != NULL_RTX)
11057 /* Load from constant pool. We can not take advantage of single-cycle
11058 LD1 because we need a PC-relative addressing mode. */
11059 return const_vec;
11060 else
11061 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11062 We can not construct an initializer. */
11063 return NULL_RTX;
11064}
11065
11066void
11067aarch64_expand_vector_init (rtx target, rtx vals)
11068{
3754d046 11069 machine_mode mode = GET_MODE (target);
11070 machine_mode inner_mode = GET_MODE_INNER (mode);
501336f7 11071 int n_elts = GET_MODE_NUNITS (mode);
e956e9df 11072 int n_var = 0;
11073 rtx any_const = NULL_RTX;
501336f7 11074 bool all_same = true;
501336f7 11075
e956e9df 11076 for (int i = 0; i < n_elts; ++i)
501336f7 11077 {
e956e9df 11078 rtx x = XVECEXP (vals, 0, i);
501336f7 11079 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
e956e9df 11080 ++n_var;
11081 else
11082 any_const = x;
501336f7 11083
e956e9df 11084 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
501336f7 11085 all_same = false;
11086 }
11087
11088 if (n_var == 0)
11089 {
11090 rtx constant = aarch64_simd_make_constant (vals);
11091 if (constant != NULL_RTX)
11092 {
11093 emit_move_insn (target, constant);
11094 return;
11095 }
11096 }
11097
11098 /* Splat a single non-constant element if we can. */
11099 if (all_same)
11100 {
e956e9df 11101 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
501336f7 11102 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11103 return;
11104 }
11105
e956e9df 11106 /* Half the fields (or less) are non-constant. Load constant then overwrite
11107 varying fields. Hope that this is more efficient than using the stack. */
11108 if (n_var <= n_elts/2)
501336f7 11109 {
11110 rtx copy = copy_rtx (vals);
501336f7 11111
e956e9df 11112 /* Load constant part of vector. We really don't care what goes into the
11113 parts we will overwrite, but we're more likely to be able to load the
11114 constant efficiently if it has fewer, larger, repeating parts
11115 (see aarch64_simd_valid_immediate). */
11116 for (int i = 0; i < n_elts; i++)
11117 {
11118 rtx x = XVECEXP (vals, 0, i);
11119 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11120 continue;
11121 rtx subst = any_const;
11122 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11123 {
11124 /* Look in the copied vector, as more elements are const. */
11125 rtx test = XVECEXP (copy, 0, i ^ bit);
11126 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11127 {
11128 subst = test;
11129 break;
11130 }
11131 }
11132 XVECEXP (copy, 0, i) = subst;
11133 }
501336f7 11134 aarch64_expand_vector_init (target, copy);
11135
e956e9df 11136 /* Insert variables. */
11137 enum insn_code icode = optab_handler (vec_set_optab, mode);
501336f7 11138 gcc_assert (icode != CODE_FOR_nothing);
e956e9df 11139
11140 for (int i = 0; i < n_elts; i++)
11141 {
11142 rtx x = XVECEXP (vals, 0, i);
11143 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11144 continue;
11145 x = copy_to_mode_reg (inner_mode, x);
11146 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11147 }
501336f7 11148 return;
11149 }
11150
11151 /* Construct the vector in memory one field at a time
11152 and load the whole vector. */
e956e9df 11153 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11154 for (int i = 0; i < n_elts; i++)
501336f7 11155 emit_move_insn (adjust_address_nv (mem, inner_mode,
11156 i * GET_MODE_SIZE (inner_mode)),
11157 XVECEXP (vals, 0, i));
11158 emit_move_insn (target, mem);
11159
11160}
11161
df401d54 11162static unsigned HOST_WIDE_INT
3754d046 11163aarch64_shift_truncation_mask (machine_mode mode)
df401d54 11164{
11165 return
11166 (aarch64_vector_mode_supported_p (mode)
11167 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11168}
11169
df401d54 11170/* Select a format to encode pointers in exception handling data. */
11171int
11172aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
11173{
11174 int type;
11175 switch (aarch64_cmodel)
11176 {
11177 case AARCH64_CMODEL_TINY:
11178 case AARCH64_CMODEL_TINY_PIC:
11179 case AARCH64_CMODEL_SMALL:
11180 case AARCH64_CMODEL_SMALL_PIC:
65f988f7 11181 case AARCH64_CMODEL_SMALL_SPIC:
df401d54 11182 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11183 for everything. */
11184 type = DW_EH_PE_sdata4;
11185 break;
11186 default:
11187 /* No assumptions here. 8-byte relocs required. */
11188 type = DW_EH_PE_sdata8;
11189 break;
11190 }
11191 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
11192}
11193
a0db861f 11194/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11195 by the function fndecl. */
11196
11197void
11198aarch64_declare_function_name (FILE *stream, const char* name,
11199 tree fndecl)
11200{
11201 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
11202
11203 struct cl_target_option *targ_options;
11204 if (target_parts)
11205 targ_options = TREE_TARGET_OPTION (target_parts);
11206 else
11207 targ_options = TREE_TARGET_OPTION (target_option_current_node);
11208 gcc_assert (targ_options);
11209
11210 const struct processor *this_arch
11211 = aarch64_get_arch (targ_options->x_explicit_arch);
11212
9dc3dccb 11213 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
11214 std::string extension
11215 = aarch64_get_extension_string_for_isa_flags (isa_flags);
11216 asm_fprintf (asm_out_file, "\t.arch %s%s\n",
11217 this_arch->name, extension.c_str ());
a0db861f 11218
11219 /* Print the cpu name we're tuning for in the comments, might be
11220 useful to readers of the generated asm. */
11221
11222 const struct processor *this_tune
11223 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
11224
11225 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
11226 this_tune->name);
11227
11228 /* Don't forget the type directive for ELF. */
11229 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
11230 ASM_OUTPUT_LABEL (stream, name);
11231}
11232
3667870b 11233/* Emit load exclusive. */
11234
11235static void
3754d046 11236aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
3667870b 11237 rtx mem, rtx model_rtx)
11238{
11239 rtx (*gen) (rtx, rtx, rtx);
11240
11241 switch (mode)
11242 {
11243 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
11244 case HImode: gen = gen_aarch64_load_exclusivehi; break;
11245 case SImode: gen = gen_aarch64_load_exclusivesi; break;
11246 case DImode: gen = gen_aarch64_load_exclusivedi; break;
11247 default:
11248 gcc_unreachable ();
11249 }
11250
11251 emit_insn (gen (rval, mem, model_rtx));
11252}
11253
11254/* Emit store exclusive. */
11255
11256static void
3754d046 11257aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
3667870b 11258 rtx rval, rtx mem, rtx model_rtx)
11259{
11260 rtx (*gen) (rtx, rtx, rtx, rtx);
11261
11262 switch (mode)
11263 {
11264 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
11265 case HImode: gen = gen_aarch64_store_exclusivehi; break;
11266 case SImode: gen = gen_aarch64_store_exclusivesi; break;
11267 case DImode: gen = gen_aarch64_store_exclusivedi; break;
11268 default:
11269 gcc_unreachable ();
11270 }
11271
11272 emit_insn (gen (bval, rval, mem, model_rtx));
11273}
11274
11275/* Mark the previous jump instruction as unlikely. */
11276
11277static void
11278aarch64_emit_unlikely_jump (rtx insn)
11279{
9eb946de 11280 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
3667870b 11281
11282 insn = emit_jump_insn (insn);
9eb946de 11283 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
3667870b 11284}
11285
11286/* Expand a compare and swap pattern. */
11287
11288void
11289aarch64_expand_compare_and_swap (rtx operands[])
11290{
11291 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
3754d046 11292 machine_mode mode, cmp_mode;
b6582827 11293 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
11294 int idx;
11295 gen_cas_fn gen;
11296 const gen_cas_fn split_cas[] =
11297 {
11298 gen_aarch64_compare_and_swapqi,
11299 gen_aarch64_compare_and_swaphi,
11300 gen_aarch64_compare_and_swapsi,
11301 gen_aarch64_compare_and_swapdi
11302 };
11303 const gen_cas_fn atomic_cas[] =
11304 {
11305 gen_aarch64_compare_and_swapqi_lse,
11306 gen_aarch64_compare_and_swaphi_lse,
11307 gen_aarch64_compare_and_swapsi_lse,
11308 gen_aarch64_compare_and_swapdi_lse
11309 };
3667870b 11310
11311 bval = operands[0];
11312 rval = operands[1];
11313 mem = operands[2];
11314 oldval = operands[3];
11315 newval = operands[4];
11316 is_weak = operands[5];
11317 mod_s = operands[6];
11318 mod_f = operands[7];
11319 mode = GET_MODE (mem);
11320 cmp_mode = mode;
11321
11322 /* Normally the succ memory model must be stronger than fail, but in the
11323 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11324 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11325
a372f7ca 11326 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11327 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
3667870b 11328 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11329
11330 switch (mode)
11331 {
11332 case QImode:
11333 case HImode:
11334 /* For short modes, we're going to perform the comparison in SImode,
11335 so do the zero-extension now. */
11336 cmp_mode = SImode;
11337 rval = gen_reg_rtx (SImode);
11338 oldval = convert_modes (SImode, mode, oldval, true);
11339 /* Fall through. */
11340
11341 case SImode:
11342 case DImode:
11343 /* Force the value into a register if needed. */
11344 if (!aarch64_plus_operand (oldval, mode))
11345 oldval = force_reg (cmp_mode, oldval);
11346 break;
11347
11348 default:
11349 gcc_unreachable ();
11350 }
11351
11352 switch (mode)
11353 {
b6582827 11354 case QImode: idx = 0; break;
11355 case HImode: idx = 1; break;
11356 case SImode: idx = 2; break;
11357 case DImode: idx = 3; break;
3667870b 11358 default:
11359 gcc_unreachable ();
11360 }
b6582827 11361 if (TARGET_LSE)
11362 gen = atomic_cas[idx];
11363 else
11364 gen = split_cas[idx];
3667870b 11365
11366 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11367
11368 if (mode == QImode || mode == HImode)
11369 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11370
11371 x = gen_rtx_REG (CCmode, CC_REGNUM);
11372 x = gen_rtx_EQ (SImode, x, const0_rtx);
d1f9b275 11373 emit_insn (gen_rtx_SET (bval, x));
3667870b 11374}
11375
b8a8b19c 11376/* Test whether the target supports using a atomic load-operate instruction.
11377 CODE is the operation and AFTER is TRUE if the data in memory after the
11378 operation should be returned and FALSE if the data before the operation
11379 should be returned. Returns FALSE if the operation isn't supported by the
11380 architecture. */
11381
11382bool
11383aarch64_atomic_ldop_supported_p (enum rtx_code code)
11384{
11385 if (!TARGET_LSE)
11386 return false;
11387
11388 switch (code)
11389 {
11390 case SET:
11391 case AND:
11392 case IOR:
11393 case XOR:
11394 case MINUS:
11395 case PLUS:
11396 return true;
11397 default:
11398 return false;
11399 }
11400}
11401
b53436f9 11402/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11403 sequence implementing an atomic operation. */
11404
11405static void
11406aarch64_emit_post_barrier (enum memmodel model)
11407{
11408 const enum memmodel base_model = memmodel_base (model);
11409
11410 if (is_mm_sync (model)
11411 && (base_model == MEMMODEL_ACQUIRE
11412 || base_model == MEMMODEL_ACQ_REL
11413 || base_model == MEMMODEL_SEQ_CST))
11414 {
11415 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
11416 }
11417}
11418
b6582827 11419/* Emit an atomic compare-and-swap operation. RVAL is the destination register
11420 for the data in memory. EXPECTED is the value expected to be in memory.
11421 DESIRED is the value to store to memory. MEM is the memory location. MODEL
11422 is the memory ordering to use. */
11423
11424void
11425aarch64_gen_atomic_cas (rtx rval, rtx mem,
11426 rtx expected, rtx desired,
11427 rtx model)
11428{
11429 rtx (*gen) (rtx, rtx, rtx, rtx);
11430 machine_mode mode;
11431
11432 mode = GET_MODE (mem);
11433
11434 switch (mode)
11435 {
11436 case QImode: gen = gen_aarch64_atomic_casqi; break;
11437 case HImode: gen = gen_aarch64_atomic_cashi; break;
11438 case SImode: gen = gen_aarch64_atomic_cassi; break;
11439 case DImode: gen = gen_aarch64_atomic_casdi; break;
11440 default:
11441 gcc_unreachable ();
11442 }
11443
11444 /* Move the expected value into the CAS destination register. */
11445 emit_insn (gen_rtx_SET (rval, expected));
11446
11447 /* Emit the CAS. */
11448 emit_insn (gen (rval, mem, desired, model));
11449
11450 /* Compare the expected value with the value loaded by the CAS, to establish
11451 whether the swap was made. */
11452 aarch64_gen_compare_reg (EQ, rval, expected);
11453}
11454
3667870b 11455/* Split a compare and swap pattern. */
11456
11457void
11458aarch64_split_compare_and_swap (rtx operands[])
11459{
11460 rtx rval, mem, oldval, newval, scratch;
3754d046 11461 machine_mode mode;
3667870b 11462 bool is_weak;
ff38b261 11463 rtx_code_label *label1, *label2;
11464 rtx x, cond;
db0e33b3 11465 enum memmodel model;
11466 rtx model_rtx;
3667870b 11467
11468 rval = operands[0];
11469 mem = operands[1];
11470 oldval = operands[2];
11471 newval = operands[3];
11472 is_weak = (operands[4] != const0_rtx);
db0e33b3 11473 model_rtx = operands[5];
3667870b 11474 scratch = operands[7];
11475 mode = GET_MODE (mem);
db0e33b3 11476 model = memmodel_from_int (INTVAL (model_rtx));
3667870b 11477
ff38b261 11478 label1 = NULL;
3667870b 11479 if (!is_weak)
11480 {
11481 label1 = gen_label_rtx ();
11482 emit_label (label1);
11483 }
11484 label2 = gen_label_rtx ();
11485
db0e33b3 11486 /* The initial load can be relaxed for a __sync operation since a final
11487 barrier will be emitted to stop code hoisting. */
11488 if (is_mm_sync (model))
11489 aarch64_emit_load_exclusive (mode, rval, mem,
11490 GEN_INT (MEMMODEL_RELAXED));
11491 else
11492 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
3667870b 11493
11494 cond = aarch64_gen_compare_reg (NE, rval, oldval);
11495 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11496 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11497 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
d1f9b275 11498 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
3667870b 11499
db0e33b3 11500 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
3667870b 11501
11502 if (!is_weak)
11503 {
11504 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
11505 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11506 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
d1f9b275 11507 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
3667870b 11508 }
11509 else
11510 {
11511 cond = gen_rtx_REG (CCmode, CC_REGNUM);
11512 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
d1f9b275 11513 emit_insn (gen_rtx_SET (cond, x));
3667870b 11514 }
11515
11516 emit_label (label2);
db0e33b3 11517
11518 /* Emit any final barrier needed for a __sync operation. */
11519 if (is_mm_sync (model))
11520 aarch64_emit_post_barrier (model);
3667870b 11521}
11522
eeae6940 11523/* Emit a BIC instruction. */
11524
11525static void
11526aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
11527{
11528 rtx shift_rtx = GEN_INT (shift);
11529 rtx (*gen) (rtx, rtx, rtx, rtx);
11530
11531 switch (mode)
11532 {
11533 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
11534 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
11535 default:
11536 gcc_unreachable ();
11537 }
11538
11539 emit_insn (gen (dst, s2, shift_rtx, s1));
11540}
11541
82c9d053 11542/* Emit an atomic swap. */
11543
11544static void
11545aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
11546 rtx mem, rtx model)
11547{
11548 rtx (*gen) (rtx, rtx, rtx, rtx);
11549
11550 switch (mode)
11551 {
11552 case QImode: gen = gen_aarch64_atomic_swpqi; break;
11553 case HImode: gen = gen_aarch64_atomic_swphi; break;
11554 case SImode: gen = gen_aarch64_atomic_swpsi; break;
11555 case DImode: gen = gen_aarch64_atomic_swpdi; break;
11556 default:
11557 gcc_unreachable ();
11558 }
11559
11560 emit_insn (gen (dst, mem, value, model));
11561}
11562
b8a8b19c 11563/* Operations supported by aarch64_emit_atomic_load_op. */
11564
11565enum aarch64_atomic_load_op_code
11566{
11567 AARCH64_LDOP_PLUS, /* A + B */
11568 AARCH64_LDOP_XOR, /* A ^ B */
11569 AARCH64_LDOP_OR, /* A | B */
11570 AARCH64_LDOP_BIC /* A & ~B */
11571};
11572
11573/* Emit an atomic load-operate. */
11574
11575static void
11576aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
11577 machine_mode mode, rtx dst, rtx src,
11578 rtx mem, rtx model)
11579{
11580 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
11581 const aarch64_atomic_load_op_fn plus[] =
11582 {
11583 gen_aarch64_atomic_loadaddqi,
11584 gen_aarch64_atomic_loadaddhi,
11585 gen_aarch64_atomic_loadaddsi,
11586 gen_aarch64_atomic_loadadddi
11587 };
11588 const aarch64_atomic_load_op_fn eor[] =
11589 {
11590 gen_aarch64_atomic_loadeorqi,
11591 gen_aarch64_atomic_loadeorhi,
11592 gen_aarch64_atomic_loadeorsi,
11593 gen_aarch64_atomic_loadeordi
11594 };
11595 const aarch64_atomic_load_op_fn ior[] =
11596 {
11597 gen_aarch64_atomic_loadsetqi,
11598 gen_aarch64_atomic_loadsethi,
11599 gen_aarch64_atomic_loadsetsi,
11600 gen_aarch64_atomic_loadsetdi
11601 };
11602 const aarch64_atomic_load_op_fn bic[] =
11603 {
11604 gen_aarch64_atomic_loadclrqi,
11605 gen_aarch64_atomic_loadclrhi,
11606 gen_aarch64_atomic_loadclrsi,
11607 gen_aarch64_atomic_loadclrdi
11608 };
11609 aarch64_atomic_load_op_fn gen;
11610 int idx = 0;
11611
11612 switch (mode)
11613 {
11614 case QImode: idx = 0; break;
11615 case HImode: idx = 1; break;
11616 case SImode: idx = 2; break;
11617 case DImode: idx = 3; break;
11618 default:
11619 gcc_unreachable ();
11620 }
11621
11622 switch (code)
11623 {
11624 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
11625 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
11626 case AARCH64_LDOP_OR: gen = ior[idx]; break;
11627 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
11628 default:
11629 gcc_unreachable ();
11630 }
11631
11632 emit_insn (gen (dst, mem, src, model));
11633}
11634
11635/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
eeae6940 11636 location to store the data read from memory. OUT_RESULT is the location to
11637 store the result of the operation. MEM is the memory location to read and
11638 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
11639 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
11640 be NULL. */
82c9d053 11641
11642void
eeae6940 11643aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
82c9d053 11644 rtx mem, rtx value, rtx model_rtx)
11645{
11646 machine_mode mode = GET_MODE (mem);
b8a8b19c 11647 machine_mode wmode = (mode == DImode ? DImode : SImode);
11648 const bool short_mode = (mode < SImode);
11649 aarch64_atomic_load_op_code ldop_code;
11650 rtx src;
11651 rtx x;
11652
11653 if (out_data)
11654 out_data = gen_lowpart (mode, out_data);
82c9d053 11655
eeae6940 11656 if (out_result)
11657 out_result = gen_lowpart (mode, out_result);
11658
b8a8b19c 11659 /* Make sure the value is in a register, putting it into a destination
11660 register if it needs to be manipulated. */
11661 if (!register_operand (value, mode)
11662 || code == AND || code == MINUS)
11663 {
eeae6940 11664 src = out_result ? out_result : out_data;
b8a8b19c 11665 emit_move_insn (src, gen_lowpart (mode, value));
11666 }
11667 else
11668 src = value;
11669 gcc_assert (register_operand (src, mode));
82c9d053 11670
b8a8b19c 11671 /* Preprocess the data for the operation as necessary. If the operation is
11672 a SET then emit a swap instruction and finish. */
82c9d053 11673 switch (code)
11674 {
11675 case SET:
b8a8b19c 11676 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
82c9d053 11677 return;
11678
b8a8b19c 11679 case MINUS:
11680 /* Negate the value and treat it as a PLUS. */
11681 {
11682 rtx neg_src;
11683
11684 /* Resize the value if necessary. */
11685 if (short_mode)
11686 src = gen_lowpart (wmode, src);
11687
11688 neg_src = gen_rtx_NEG (wmode, src);
11689 emit_insn (gen_rtx_SET (src, neg_src));
11690
11691 if (short_mode)
11692 src = gen_lowpart (mode, src);
11693 }
11694 /* Fall-through. */
11695 case PLUS:
11696 ldop_code = AARCH64_LDOP_PLUS;
11697 break;
11698
11699 case IOR:
11700 ldop_code = AARCH64_LDOP_OR;
11701 break;
11702
11703 case XOR:
11704 ldop_code = AARCH64_LDOP_XOR;
11705 break;
11706
11707 case AND:
11708 {
11709 rtx not_src;
11710
11711 /* Resize the value if necessary. */
11712 if (short_mode)
11713 src = gen_lowpart (wmode, src);
11714
11715 not_src = gen_rtx_NOT (wmode, src);
11716 emit_insn (gen_rtx_SET (src, not_src));
11717
11718 if (short_mode)
11719 src = gen_lowpart (mode, src);
11720 }
11721 ldop_code = AARCH64_LDOP_BIC;
11722 break;
11723
82c9d053 11724 default:
11725 /* The operation can't be done with atomic instructions. */
11726 gcc_unreachable ();
11727 }
b8a8b19c 11728
11729 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
eeae6940 11730
11731 /* If necessary, calculate the data in memory after the update by redoing the
11732 operation from values in registers. */
11733 if (!out_result)
11734 return;
11735
11736 if (short_mode)
11737 {
11738 src = gen_lowpart (wmode, src);
11739 out_data = gen_lowpart (wmode, out_data);
11740 out_result = gen_lowpart (wmode, out_result);
11741 }
11742
11743 x = NULL_RTX;
11744
11745 switch (code)
11746 {
11747 case MINUS:
11748 case PLUS:
11749 x = gen_rtx_PLUS (wmode, out_data, src);
11750 break;
11751 case IOR:
11752 x = gen_rtx_IOR (wmode, out_data, src);
11753 break;
11754 case XOR:
11755 x = gen_rtx_XOR (wmode, out_data, src);
11756 break;
11757 case AND:
11758 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
11759 return;
11760 default:
11761 gcc_unreachable ();
11762 }
11763
11764 emit_set_insn (out_result, x);
11765
11766 return;
82c9d053 11767}
11768
3667870b 11769/* Split an atomic operation. */
11770
11771void
11772aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
82c9d053 11773 rtx value, rtx model_rtx, rtx cond)
3667870b 11774{
3754d046 11775 machine_mode mode = GET_MODE (mem);
11776 machine_mode wmode = (mode == DImode ? DImode : SImode);
b53436f9 11777 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
11778 const bool is_sync = is_mm_sync (model);
ff38b261 11779 rtx_code_label *label;
11780 rtx x;
3667870b 11781
82c9d053 11782 /* Split the atomic operation into a sequence. */
3667870b 11783 label = gen_label_rtx ();
11784 emit_label (label);
11785
11786 if (new_out)
11787 new_out = gen_lowpart (wmode, new_out);
11788 if (old_out)
11789 old_out = gen_lowpart (wmode, old_out);
11790 else
11791 old_out = new_out;
11792 value = simplify_gen_subreg (wmode, value, mode, 0);
11793
b53436f9 11794 /* The initial load can be relaxed for a __sync operation since a final
11795 barrier will be emitted to stop code hoisting. */
11796 if (is_sync)
11797 aarch64_emit_load_exclusive (mode, old_out, mem,
11798 GEN_INT (MEMMODEL_RELAXED));
11799 else
11800 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
3667870b 11801
11802 switch (code)
11803 {
11804 case SET:
11805 new_out = value;
11806 break;
11807
11808 case NOT:
11809 x = gen_rtx_AND (wmode, old_out, value);
d1f9b275 11810 emit_insn (gen_rtx_SET (new_out, x));
3667870b 11811 x = gen_rtx_NOT (wmode, new_out);
d1f9b275 11812 emit_insn (gen_rtx_SET (new_out, x));
3667870b 11813 break;
11814
11815 case MINUS:
11816 if (CONST_INT_P (value))
11817 {
11818 value = GEN_INT (-INTVAL (value));
11819 code = PLUS;
11820 }
11821 /* Fall through. */
11822
11823 default:
11824 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
d1f9b275 11825 emit_insn (gen_rtx_SET (new_out, x));
3667870b 11826 break;
11827 }
11828
11829 aarch64_emit_store_exclusive (mode, cond, mem,
11830 gen_lowpart (mode, new_out), model_rtx);
11831
11832 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11833 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11834 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
d1f9b275 11835 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
b53436f9 11836
11837 /* Emit any final barrier needed for a __sync operation. */
11838 if (is_sync)
11839 aarch64_emit_post_barrier (model);
3667870b 11840}
11841
6f520654 11842static void
11843aarch64_init_libfuncs (void)
11844{
11845 /* Half-precision float operations. The compiler handles all operations
11846 with NULL libfuncs by converting to SFmode. */
11847
11848 /* Conversions. */
11849 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
11850 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
11851
11852 /* Arithmetic. */
11853 set_optab_libfunc (add_optab, HFmode, NULL);
11854 set_optab_libfunc (sdiv_optab, HFmode, NULL);
11855 set_optab_libfunc (smul_optab, HFmode, NULL);
11856 set_optab_libfunc (neg_optab, HFmode, NULL);
11857 set_optab_libfunc (sub_optab, HFmode, NULL);
11858
11859 /* Comparisons. */
11860 set_optab_libfunc (eq_optab, HFmode, NULL);
11861 set_optab_libfunc (ne_optab, HFmode, NULL);
11862 set_optab_libfunc (lt_optab, HFmode, NULL);
11863 set_optab_libfunc (le_optab, HFmode, NULL);
11864 set_optab_libfunc (ge_optab, HFmode, NULL);
11865 set_optab_libfunc (gt_optab, HFmode, NULL);
11866 set_optab_libfunc (unord_optab, HFmode, NULL);
11867}
11868
df401d54 11869/* Target hook for c_mode_for_suffix. */
3754d046 11870static machine_mode
df401d54 11871aarch64_c_mode_for_suffix (char suffix)
11872{
11873 if (suffix == 'q')
11874 return TFmode;
11875
11876 return VOIDmode;
11877}
11878
72841352 11879/* We can only represent floating point constants which will fit in
11880 "quarter-precision" values. These values are characterised by
11881 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
11882 by:
11883
11884 (-1)^s * (n/16) * 2^r
11885
11886 Where:
11887 's' is the sign bit.
11888 'n' is an integer in the range 16 <= n <= 31.
11889 'r' is an integer in the range -3 <= r <= 4. */
11890
11891/* Return true iff X can be represented by a quarter-precision
11892 floating point immediate operand X. Note, we cannot represent 0.0. */
11893bool
11894aarch64_float_const_representable_p (rtx x)
11895{
11896 /* This represents our current view of how many bits
11897 make up the mantissa. */
11898 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9439bdfe 11899 int exponent;
72841352 11900 unsigned HOST_WIDE_INT mantissa, mask;
72841352 11901 REAL_VALUE_TYPE r, m;
c0d2fd0b 11902 bool fail;
72841352 11903
11904 if (!CONST_DOUBLE_P (x))
11905 return false;
11906
6f520654 11907 /* We don't support HFmode constants yet. */
11908 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
16a5eb2d 11909 return false;
11910
945f7b03 11911 r = *CONST_DOUBLE_REAL_VALUE (x);
72841352 11912
11913 /* We cannot represent infinities, NaNs or +/-zero. We won't
11914 know if we have +zero until we analyse the mantissa, but we
11915 can reject the other invalid values. */
11916 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
11917 || REAL_VALUE_MINUS_ZERO (r))
11918 return false;
11919
9439bdfe 11920 /* Extract exponent. */
72841352 11921 r = real_value_abs (&r);
11922 exponent = REAL_EXP (&r);
11923
11924 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11925 highest (sign) bit, with a fixed binary point at bit point_pos.
11926 m1 holds the low part of the mantissa, m2 the high part.
11927 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
11928 bits for the mantissa, this can fail (low bits will be lost). */
11929 real_ldexp (&m, &r, point_pos - exponent);
c0d2fd0b 11930 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
72841352 11931
11932 /* If the low part of the mantissa has bits set we cannot represent
11933 the value. */
e913b5cd 11934 if (w.elt (0) != 0)
72841352 11935 return false;
11936 /* We have rejected the lower HOST_WIDE_INT, so update our
11937 understanding of how many bits lie in the mantissa and
11938 look only at the high HOST_WIDE_INT. */
e913b5cd 11939 mantissa = w.elt (1);
72841352 11940 point_pos -= HOST_BITS_PER_WIDE_INT;
11941
11942 /* We can only represent values with a mantissa of the form 1.xxxx. */
11943 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11944 if ((mantissa & mask) != 0)
11945 return false;
11946
11947 /* Having filtered unrepresentable values, we may now remove all
11948 but the highest 5 bits. */
11949 mantissa >>= point_pos - 5;
11950
11951 /* We cannot represent the value 0.0, so reject it. This is handled
11952 elsewhere. */
11953 if (mantissa == 0)
11954 return false;
11955
11956 /* Then, as bit 4 is always set, we can mask it off, leaving
11957 the mantissa in the range [0, 15]. */
11958 mantissa &= ~(1 << 4);
11959 gcc_assert (mantissa <= 15);
11960
11961 /* GCC internally does not use IEEE754-like encoding (where normalized
11962 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
11963 Our mantissa values are shifted 4 places to the left relative to
11964 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
11965 by 5 places to correct for GCC's representation. */
11966 exponent = 5 - exponent;
11967
11968 return (exponent >= 0 && exponent <= 7);
11969}
11970
11971char*
04b042b2 11972aarch64_output_simd_mov_immediate (rtx const_vector,
3754d046 11973 machine_mode mode,
72841352 11974 unsigned width)
11975{
74c40875 11976 bool is_valid;
72841352 11977 static char templ[40];
72841352 11978 const char *mnemonic;
8458c9e9 11979 const char *shift_op;
72841352 11980 unsigned int lane_count = 0;
04b042b2 11981 char element_char;
72841352 11982
8458c9e9 11983 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
fc3eb658 11984
11985 /* This will return true to show const_vector is legal for use as either
11986 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
11987 also update INFO to show how the immediate should be generated. */
04b042b2 11988 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
72841352 11989 gcc_assert (is_valid);
11990
04b042b2 11991 element_char = sizetochar (info.element_width);
fc3eb658 11992 lane_count = width / info.element_width;
11993
72841352 11994 mode = GET_MODE_INNER (mode);
8a73696a 11995 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
72841352 11996 {
fc3eb658 11997 gcc_assert (info.shift == 0 && ! info.mvn);
8a73696a 11998 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
11999 move immediate path. */
fc3eb658 12000 if (aarch64_float_const_zero_rtx_p (info.value))
12001 info.value = GEN_INT (0);
12002 else
12003 {
12004#define buf_size 20
fc3eb658 12005 char float_buf[buf_size] = {'\0'};
945f7b03 12006 real_to_decimal_for_mode (float_buf,
12007 CONST_DOUBLE_REAL_VALUE (info.value),
12008 buf_size, buf_size, 1, mode);
fc3eb658 12009#undef buf_size
12010
12011 if (lane_count == 1)
12012 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12013 else
12014 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
04b042b2 12015 lane_count, element_char, float_buf);
fc3eb658 12016 return templ;
12017 }
72841352 12018 }
72841352 12019
fc3eb658 12020 mnemonic = info.mvn ? "mvni" : "movi";
8458c9e9 12021 shift_op = info.msl ? "msl" : "lsl";
72841352 12022
8a73696a 12023 gcc_assert (CONST_INT_P (info.value));
72841352 12024 if (lane_count == 1)
fc3eb658 12025 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12026 mnemonic, UINTVAL (info.value));
12027 else if (info.shift)
12028 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8458c9e9 12029 ", %s %d", mnemonic, lane_count, element_char,
12030 UINTVAL (info.value), shift_op, info.shift);
72841352 12031 else
fc3eb658 12032 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
04b042b2 12033 mnemonic, lane_count, element_char, UINTVAL (info.value));
72841352 12034 return templ;
12035}
12036
dae88020 12037char*
12038aarch64_output_scalar_simd_mov_immediate (rtx immediate,
3754d046 12039 machine_mode mode)
dae88020 12040{
3754d046 12041 machine_mode vmode;
dae88020 12042
12043 gcc_assert (!VECTOR_MODE_P (mode));
12044 vmode = aarch64_simd_container_mode (mode, 64);
12045 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12046 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12047}
12048
5de1fcdb 12049/* Split operands into moves from op[1] + op[2] into op[0]. */
12050
12051void
12052aarch64_split_combinev16qi (rtx operands[3])
12053{
12054 unsigned int dest = REGNO (operands[0]);
12055 unsigned int src1 = REGNO (operands[1]);
12056 unsigned int src2 = REGNO (operands[2]);
3754d046 12057 machine_mode halfmode = GET_MODE (operands[1]);
5de1fcdb 12058 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12059 rtx destlo, desthi;
12060
12061 gcc_assert (halfmode == V16QImode);
12062
12063 if (src1 == dest && src2 == dest + halfregs)
12064 {
12065 /* No-op move. Can't split to nothing; emit something. */
12066 emit_note (NOTE_INSN_DELETED);
12067 return;
12068 }
12069
12070 /* Preserve register attributes for variable tracking. */
12071 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12072 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12073 GET_MODE_SIZE (halfmode));
12074
12075 /* Special case of reversed high/low parts. */
12076 if (reg_overlap_mentioned_p (operands[2], destlo)
12077 && reg_overlap_mentioned_p (operands[1], desthi))
12078 {
12079 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12080 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12081 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12082 }
12083 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12084 {
12085 /* Try to avoid unnecessary moves if part of the result
12086 is in the right place already. */
12087 if (src1 != dest)
12088 emit_move_insn (destlo, operands[1]);
12089 if (src2 != dest + halfregs)
12090 emit_move_insn (desthi, operands[2]);
12091 }
12092 else
12093 {
12094 if (src2 != dest + halfregs)
12095 emit_move_insn (desthi, operands[2]);
12096 if (src1 != dest)
12097 emit_move_insn (destlo, operands[1]);
12098 }
12099}
12100
12101/* vec_perm support. */
12102
12103#define MAX_VECT_LEN 16
12104
12105struct expand_vec_perm_d
12106{
12107 rtx target, op0, op1;
12108 unsigned char perm[MAX_VECT_LEN];
3754d046 12109 machine_mode vmode;
5de1fcdb 12110 unsigned char nelt;
12111 bool one_vector_p;
12112 bool testing_p;
12113};
12114
12115/* Generate a variable permutation. */
12116
12117static void
12118aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
12119{
3754d046 12120 machine_mode vmode = GET_MODE (target);
5de1fcdb 12121 bool one_vector_p = rtx_equal_p (op0, op1);
12122
12123 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
12124 gcc_checking_assert (GET_MODE (op0) == vmode);
12125 gcc_checking_assert (GET_MODE (op1) == vmode);
12126 gcc_checking_assert (GET_MODE (sel) == vmode);
12127 gcc_checking_assert (TARGET_SIMD);
12128
12129 if (one_vector_p)
12130 {
12131 if (vmode == V8QImode)
12132 {
12133 /* Expand the argument to a V16QI mode by duplicating it. */
12134 rtx pair = gen_reg_rtx (V16QImode);
12135 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
12136 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12137 }
12138 else
12139 {
12140 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
12141 }
12142 }
12143 else
12144 {
12145 rtx pair;
12146
12147 if (vmode == V8QImode)
12148 {
12149 pair = gen_reg_rtx (V16QImode);
12150 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
12151 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12152 }
12153 else
12154 {
12155 pair = gen_reg_rtx (OImode);
12156 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
12157 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
12158 }
12159 }
12160}
12161
12162void
12163aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
12164{
3754d046 12165 machine_mode vmode = GET_MODE (target);
ad314c0e 12166 unsigned int nelt = GET_MODE_NUNITS (vmode);
5de1fcdb 12167 bool one_vector_p = rtx_equal_p (op0, op1);
27cdb632 12168 rtx mask;
5de1fcdb 12169
12170 /* The TBL instruction does not use a modulo index, so we must take care
12171 of that ourselves. */
27cdb632 12172 mask = aarch64_simd_gen_const_vector_dup (vmode,
12173 one_vector_p ? nelt - 1 : 2 * nelt - 1);
5de1fcdb 12174 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
12175
27cdb632 12176 /* For big-endian, we also need to reverse the index within the vector
12177 (but not which vector). */
12178 if (BYTES_BIG_ENDIAN)
12179 {
12180 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12181 if (!one_vector_p)
12182 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
12183 sel = expand_simple_binop (vmode, XOR, sel, mask,
12184 NULL, 0, OPTAB_LIB_WIDEN);
12185 }
5de1fcdb 12186 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
12187}
12188
14799b23 12189/* Recognize patterns suitable for the TRN instructions. */
12190static bool
12191aarch64_evpc_trn (struct expand_vec_perm_d *d)
12192{
12193 unsigned int i, odd, mask, nelt = d->nelt;
12194 rtx out, in0, in1, x;
12195 rtx (*gen) (rtx, rtx, rtx);
3754d046 12196 machine_mode vmode = d->vmode;
14799b23 12197
12198 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12199 return false;
12200
12201 /* Note that these are little-endian tests.
12202 We correct for big-endian later. */
12203 if (d->perm[0] == 0)
12204 odd = 0;
12205 else if (d->perm[0] == 1)
12206 odd = 1;
12207 else
12208 return false;
12209 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12210
12211 for (i = 0; i < nelt; i += 2)
12212 {
12213 if (d->perm[i] != i + odd)
12214 return false;
12215 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
12216 return false;
12217 }
12218
12219 /* Success! */
12220 if (d->testing_p)
12221 return true;
12222
12223 in0 = d->op0;
12224 in1 = d->op1;
12225 if (BYTES_BIG_ENDIAN)
12226 {
12227 x = in0, in0 = in1, in1 = x;
12228 odd = !odd;
12229 }
12230 out = d->target;
12231
12232 if (odd)
12233 {
12234 switch (vmode)
12235 {
12236 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
12237 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
12238 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
12239 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
12240 case V4SImode: gen = gen_aarch64_trn2v4si; break;
12241 case V2SImode: gen = gen_aarch64_trn2v2si; break;
12242 case V2DImode: gen = gen_aarch64_trn2v2di; break;
12243 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
12244 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
12245 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
12246 default:
12247 return false;
12248 }
12249 }
12250 else
12251 {
12252 switch (vmode)
12253 {
12254 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
12255 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
12256 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
12257 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
12258 case V4SImode: gen = gen_aarch64_trn1v4si; break;
12259 case V2SImode: gen = gen_aarch64_trn1v2si; break;
12260 case V2DImode: gen = gen_aarch64_trn1v2di; break;
12261 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
12262 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
12263 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
12264 default:
12265 return false;
12266 }
12267 }
12268
12269 emit_insn (gen (out, in0, in1));
12270 return true;
12271}
12272
12273/* Recognize patterns suitable for the UZP instructions. */
12274static bool
12275aarch64_evpc_uzp (struct expand_vec_perm_d *d)
12276{
12277 unsigned int i, odd, mask, nelt = d->nelt;
12278 rtx out, in0, in1, x;
12279 rtx (*gen) (rtx, rtx, rtx);
3754d046 12280 machine_mode vmode = d->vmode;
14799b23 12281
12282 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12283 return false;
12284
12285 /* Note that these are little-endian tests.
12286 We correct for big-endian later. */
12287 if (d->perm[0] == 0)
12288 odd = 0;
12289 else if (d->perm[0] == 1)
12290 odd = 1;
12291 else
12292 return false;
12293 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12294
12295 for (i = 0; i < nelt; i++)
12296 {
12297 unsigned elt = (i * 2 + odd) & mask;
12298 if (d->perm[i] != elt)
12299 return false;
12300 }
12301
12302 /* Success! */
12303 if (d->testing_p)
12304 return true;
12305
12306 in0 = d->op0;
12307 in1 = d->op1;
12308 if (BYTES_BIG_ENDIAN)
12309 {
12310 x = in0, in0 = in1, in1 = x;
12311 odd = !odd;
12312 }
12313 out = d->target;
12314
12315 if (odd)
12316 {
12317 switch (vmode)
12318 {
12319 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
12320 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
12321 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
12322 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
12323 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
12324 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
12325 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
12326 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
12327 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
12328 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
12329 default:
12330 return false;
12331 }
12332 }
12333 else
12334 {
12335 switch (vmode)
12336 {
12337 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
12338 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
12339 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
12340 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
12341 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
12342 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
12343 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
12344 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
12345 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
12346 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
12347 default:
12348 return false;
12349 }
12350 }
12351
12352 emit_insn (gen (out, in0, in1));
12353 return true;
12354}
12355
12356/* Recognize patterns suitable for the ZIP instructions. */
12357static bool
12358aarch64_evpc_zip (struct expand_vec_perm_d *d)
12359{
12360 unsigned int i, high, mask, nelt = d->nelt;
12361 rtx out, in0, in1, x;
12362 rtx (*gen) (rtx, rtx, rtx);
3754d046 12363 machine_mode vmode = d->vmode;
14799b23 12364
12365 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12366 return false;
12367
12368 /* Note that these are little-endian tests.
12369 We correct for big-endian later. */
12370 high = nelt / 2;
12371 if (d->perm[0] == high)
12372 /* Do Nothing. */
12373 ;
12374 else if (d->perm[0] == 0)
12375 high = 0;
12376 else
12377 return false;
12378 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12379
12380 for (i = 0; i < nelt / 2; i++)
12381 {
12382 unsigned elt = (i + high) & mask;
12383 if (d->perm[i * 2] != elt)
12384 return false;
12385 elt = (elt + nelt) & mask;
12386 if (d->perm[i * 2 + 1] != elt)
12387 return false;
12388 }
12389
12390 /* Success! */
12391 if (d->testing_p)
12392 return true;
12393
12394 in0 = d->op0;
12395 in1 = d->op1;
12396 if (BYTES_BIG_ENDIAN)
12397 {
12398 x = in0, in0 = in1, in1 = x;
12399 high = !high;
12400 }
12401 out = d->target;
12402
12403 if (high)
12404 {
12405 switch (vmode)
12406 {
12407 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
12408 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
12409 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
12410 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
12411 case V4SImode: gen = gen_aarch64_zip2v4si; break;
12412 case V2SImode: gen = gen_aarch64_zip2v2si; break;
12413 case V2DImode: gen = gen_aarch64_zip2v2di; break;
12414 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
12415 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
12416 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
12417 default:
12418 return false;
12419 }
12420 }
12421 else
12422 {
12423 switch (vmode)
12424 {
12425 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
12426 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
12427 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
12428 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
12429 case V4SImode: gen = gen_aarch64_zip1v4si; break;
12430 case V2SImode: gen = gen_aarch64_zip1v2si; break;
12431 case V2DImode: gen = gen_aarch64_zip1v2di; break;
12432 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
12433 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
12434 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
12435 default:
12436 return false;
12437 }
12438 }
12439
12440 emit_insn (gen (out, in0, in1));
12441 return true;
12442}
12443
582dc50a 12444/* Recognize patterns for the EXT insn. */
12445
12446static bool
12447aarch64_evpc_ext (struct expand_vec_perm_d *d)
12448{
12449 unsigned int i, nelt = d->nelt;
12450 rtx (*gen) (rtx, rtx, rtx, rtx);
12451 rtx offset;
12452
12453 unsigned int location = d->perm[0]; /* Always < nelt. */
12454
12455 /* Check if the extracted indices are increasing by one. */
12456 for (i = 1; i < nelt; i++)
12457 {
12458 unsigned int required = location + i;
12459 if (d->one_vector_p)
12460 {
12461 /* We'll pass the same vector in twice, so allow indices to wrap. */
12462 required &= (nelt - 1);
12463 }
12464 if (d->perm[i] != required)
12465 return false;
12466 }
12467
582dc50a 12468 switch (d->vmode)
12469 {
12470 case V16QImode: gen = gen_aarch64_extv16qi; break;
12471 case V8QImode: gen = gen_aarch64_extv8qi; break;
12472 case V4HImode: gen = gen_aarch64_extv4hi; break;
12473 case V8HImode: gen = gen_aarch64_extv8hi; break;
12474 case V2SImode: gen = gen_aarch64_extv2si; break;
12475 case V4SImode: gen = gen_aarch64_extv4si; break;
12476 case V2SFmode: gen = gen_aarch64_extv2sf; break;
12477 case V4SFmode: gen = gen_aarch64_extv4sf; break;
12478 case V2DImode: gen = gen_aarch64_extv2di; break;
12479 case V2DFmode: gen = gen_aarch64_extv2df; break;
12480 default:
12481 return false;
12482 }
12483
12484 /* Success! */
12485 if (d->testing_p)
12486 return true;
12487
6bfe261e 12488 /* The case where (location == 0) is a no-op for both big- and little-endian,
12489 and is removed by the mid-end at optimization levels -O1 and higher. */
12490
12491 if (BYTES_BIG_ENDIAN && (location != 0))
582dc50a 12492 {
12493 /* After setup, we want the high elements of the first vector (stored
12494 at the LSB end of the register), and the low elements of the second
12495 vector (stored at the MSB end of the register). So swap. */
9c110f15 12496 std::swap (d->op0, d->op1);
582dc50a 12497 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
12498 location = nelt - location;
12499 }
12500
12501 offset = GEN_INT (location);
12502 emit_insn (gen (d->target, d->op0, d->op1, offset));
12503 return true;
12504}
12505
5f5fccf7 12506/* Recognize patterns for the REV insns. */
12507
12508static bool
12509aarch64_evpc_rev (struct expand_vec_perm_d *d)
12510{
12511 unsigned int i, j, diff, nelt = d->nelt;
12512 rtx (*gen) (rtx, rtx);
12513
12514 if (!d->one_vector_p)
12515 return false;
12516
12517 diff = d->perm[0];
12518 switch (diff)
12519 {
12520 case 7:
12521 switch (d->vmode)
12522 {
12523 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
12524 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
12525 default:
12526 return false;
12527 }
12528 break;
12529 case 3:
12530 switch (d->vmode)
12531 {
12532 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
12533 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
12534 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
12535 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
12536 default:
12537 return false;
12538 }
12539 break;
12540 case 1:
12541 switch (d->vmode)
12542 {
12543 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
12544 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
12545 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
12546 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
12547 case V4SImode: gen = gen_aarch64_rev64v4si; break;
12548 case V2SImode: gen = gen_aarch64_rev64v2si; break;
12549 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
12550 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
12551 default:
12552 return false;
12553 }
12554 break;
12555 default:
12556 return false;
12557 }
12558
12559 for (i = 0; i < nelt ; i += diff + 1)
12560 for (j = 0; j <= diff; j += 1)
12561 {
12562 /* This is guaranteed to be true as the value of diff
12563 is 7, 3, 1 and we should have enough elements in the
12564 queue to generate this. Getting a vector mask with a
12565 value of diff other than these values implies that
12566 something is wrong by the time we get here. */
12567 gcc_assert (i + j < nelt);
12568 if (d->perm[i + j] != i + diff - j)
12569 return false;
12570 }
12571
12572 /* Success! */
12573 if (d->testing_p)
12574 return true;
12575
12576 emit_insn (gen (d->target, d->op0));
12577 return true;
12578}
12579
2d67c34c 12580static bool
12581aarch64_evpc_dup (struct expand_vec_perm_d *d)
12582{
12583 rtx (*gen) (rtx, rtx, rtx);
12584 rtx out = d->target;
12585 rtx in0;
3754d046 12586 machine_mode vmode = d->vmode;
2d67c34c 12587 unsigned int i, elt, nelt = d->nelt;
12588 rtx lane;
12589
2d67c34c 12590 elt = d->perm[0];
12591 for (i = 1; i < nelt; i++)
12592 {
12593 if (elt != d->perm[i])
12594 return false;
12595 }
12596
12597 /* The generic preparation in aarch64_expand_vec_perm_const_1
12598 swaps the operand order and the permute indices if it finds
12599 d->perm[0] to be in the second operand. Thus, we can always
12600 use d->op0 and need not do any extra arithmetic to get the
12601 correct lane number. */
12602 in0 = d->op0;
f828b3c0 12603 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
2d67c34c 12604
12605 switch (vmode)
12606 {
12607 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
12608 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
12609 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
12610 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
12611 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
12612 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
12613 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
57887f75 12614 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
12615 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
2d67c34c 12616 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
12617 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
12618 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
12619 default:
12620 return false;
12621 }
12622
12623 emit_insn (gen (out, in0, lane));
12624 return true;
12625}
12626
5de1fcdb 12627static bool
12628aarch64_evpc_tbl (struct expand_vec_perm_d *d)
12629{
12630 rtx rperm[MAX_VECT_LEN], sel;
3754d046 12631 machine_mode vmode = d->vmode;
5de1fcdb 12632 unsigned int i, nelt = d->nelt;
12633
5de1fcdb 12634 if (d->testing_p)
12635 return true;
12636
12637 /* Generic code will try constant permutation twice. Once with the
12638 original mode and again with the elements lowered to QImode.
12639 So wait and don't do the selector expansion ourselves. */
12640 if (vmode != V8QImode && vmode != V16QImode)
12641 return false;
12642
12643 for (i = 0; i < nelt; ++i)
bc92d1f7 12644 {
12645 int nunits = GET_MODE_NUNITS (vmode);
12646
12647 /* If big-endian and two vectors we end up with a weird mixed-endian
12648 mode on NEON. Reverse the index within each word but not the word
12649 itself. */
12650 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
12651 : d->perm[i]);
12652 }
5de1fcdb 12653 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
12654 sel = force_reg (vmode, sel);
12655
12656 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
12657 return true;
12658}
12659
12660static bool
12661aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
12662{
12663 /* The pattern matching functions above are written to look for a small
12664 number to begin the sequence (0, 1, N/2). If we begin with an index
12665 from the second operand, we can swap the operands. */
12666 if (d->perm[0] >= d->nelt)
12667 {
12668 unsigned i, nelt = d->nelt;
5de1fcdb 12669
d44f2f7c 12670 gcc_assert (nelt == (nelt & -nelt));
5de1fcdb 12671 for (i = 0; i < nelt; ++i)
d44f2f7c 12672 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
5de1fcdb 12673
9c110f15 12674 std::swap (d->op0, d->op1);
5de1fcdb 12675 }
12676
12677 if (TARGET_SIMD)
14799b23 12678 {
5f5fccf7 12679 if (aarch64_evpc_rev (d))
12680 return true;
12681 else if (aarch64_evpc_ext (d))
582dc50a 12682 return true;
f828b3c0 12683 else if (aarch64_evpc_dup (d))
12684 return true;
582dc50a 12685 else if (aarch64_evpc_zip (d))
14799b23 12686 return true;
12687 else if (aarch64_evpc_uzp (d))
12688 return true;
12689 else if (aarch64_evpc_trn (d))
12690 return true;
12691 return aarch64_evpc_tbl (d);
12692 }
5de1fcdb 12693 return false;
12694}
12695
12696/* Expand a vec_perm_const pattern. */
12697
12698bool
12699aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
12700{
12701 struct expand_vec_perm_d d;
12702 int i, nelt, which;
12703
12704 d.target = target;
12705 d.op0 = op0;
12706 d.op1 = op1;
12707
12708 d.vmode = GET_MODE (target);
12709 gcc_assert (VECTOR_MODE_P (d.vmode));
12710 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12711 d.testing_p = false;
12712
12713 for (i = which = 0; i < nelt; ++i)
12714 {
12715 rtx e = XVECEXP (sel, 0, i);
12716 int ei = INTVAL (e) & (2 * nelt - 1);
12717 which |= (ei < nelt ? 1 : 2);
12718 d.perm[i] = ei;
12719 }
12720
12721 switch (which)
12722 {
12723 default:
12724 gcc_unreachable ();
12725
12726 case 3:
12727 d.one_vector_p = false;
12728 if (!rtx_equal_p (op0, op1))
12729 break;
12730
12731 /* The elements of PERM do not suggest that only the first operand
12732 is used, but both operands are identical. Allow easier matching
12733 of the permutation by folding the permutation into the single
12734 input vector. */
12735 /* Fall Through. */
12736 case 2:
12737 for (i = 0; i < nelt; ++i)
12738 d.perm[i] &= nelt - 1;
12739 d.op0 = op1;
12740 d.one_vector_p = true;
12741 break;
12742
12743 case 1:
12744 d.op1 = op0;
12745 d.one_vector_p = true;
12746 break;
12747 }
12748
12749 return aarch64_expand_vec_perm_const_1 (&d);
12750}
12751
12752static bool
3754d046 12753aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
5de1fcdb 12754 const unsigned char *sel)
12755{
12756 struct expand_vec_perm_d d;
12757 unsigned int i, nelt, which;
12758 bool ret;
12759
12760 d.vmode = vmode;
12761 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12762 d.testing_p = true;
12763 memcpy (d.perm, sel, nelt);
12764
12765 /* Calculate whether all elements are in one vector. */
12766 for (i = which = 0; i < nelt; ++i)
12767 {
12768 unsigned char e = d.perm[i];
12769 gcc_assert (e < 2 * nelt);
12770 which |= (e < nelt ? 1 : 2);
12771 }
12772
12773 /* If all elements are from the second vector, reindex as if from the
12774 first vector. */
12775 if (which == 2)
12776 for (i = 0; i < nelt; ++i)
12777 d.perm[i] -= nelt;
12778
12779 /* Check whether the mask can be applied to a single vector. */
12780 d.one_vector_p = (which != 3);
12781
12782 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
12783 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
12784 if (!d.one_vector_p)
12785 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
12786
12787 start_sequence ();
12788 ret = aarch64_expand_vec_perm_const_1 (&d);
12789 end_sequence ();
12790
12791 return ret;
12792}
12793
4e12ee82 12794/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
12795bool
12796aarch64_cannot_change_mode_class (machine_mode from,
12797 machine_mode to,
12798 enum reg_class rclass)
12799{
12800 /* We cannot allow word_mode subregs of full vector modes.
12801 Otherwise the middle-end will assume it's ok to store to
12802 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
12803 of the 128-bit register. However, after reload the subreg will
12804 be dropped leaving a plain DImode store. See PR67609 for a more
12805 detailed dicussion. In all other cases, we want to be permissive
12806 and return false. */
12807 return (reg_classes_intersect_p (FP_REGS, rclass)
12808 && GET_MODE_SIZE (to) == UNITS_PER_WORD
12809 && GET_MODE_SIZE (from) > UNITS_PER_WORD);
12810}
12811
a91cc579 12812rtx
12813aarch64_reverse_mask (enum machine_mode mode)
12814{
12815 /* We have to reverse each vector because we dont have
12816 a permuted load that can reverse-load according to ABI rules. */
12817 rtx mask;
12818 rtvec v = rtvec_alloc (16);
12819 int i, j;
12820 int nunits = GET_MODE_NUNITS (mode);
12821 int usize = GET_MODE_UNIT_SIZE (mode);
12822
12823 gcc_assert (BYTES_BIG_ENDIAN);
12824 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
12825
12826 for (i = 0; i < nunits; i++)
12827 for (j = 0; j < usize; j++)
12828 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
12829 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
12830 return force_reg (V16QImode, mask);
12831}
12832
610b1acb 12833/* Implement MODES_TIEABLE_P. */
12834
12835bool
3754d046 12836aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
610b1acb 12837{
12838 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
12839 return true;
12840
12841 /* We specifically want to allow elements of "structure" modes to
12842 be tieable to the structure. This more general condition allows
12843 other rarer situations too. */
12844 if (TARGET_SIMD
12845 && aarch64_vector_mode_p (mode1)
12846 && aarch64_vector_mode_p (mode2))
12847 return true;
12848
12849 return false;
12850}
12851
a7007121 12852/* Return a new RTX holding the result of moving POINTER forward by
12853 AMOUNT bytes. */
12854
12855static rtx
12856aarch64_move_pointer (rtx pointer, int amount)
12857{
12858 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
12859
12860 return adjust_automodify_address (pointer, GET_MODE (pointer),
12861 next, amount);
12862}
12863
12864/* Return a new RTX holding the result of moving POINTER forward by the
12865 size of the mode it points to. */
12866
12867static rtx
12868aarch64_progress_pointer (rtx pointer)
12869{
12870 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
12871
12872 return aarch64_move_pointer (pointer, amount);
12873}
12874
12875/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
12876 MODE bytes. */
12877
12878static void
12879aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
3754d046 12880 machine_mode mode)
a7007121 12881{
12882 rtx reg = gen_reg_rtx (mode);
12883
12884 /* "Cast" the pointers to the correct mode. */
12885 *src = adjust_address (*src, mode, 0);
12886 *dst = adjust_address (*dst, mode, 0);
12887 /* Emit the memcpy. */
12888 emit_move_insn (reg, *src);
12889 emit_move_insn (*dst, reg);
12890 /* Move the pointers forward. */
12891 *src = aarch64_progress_pointer (*src);
12892 *dst = aarch64_progress_pointer (*dst);
12893}
12894
12895/* Expand movmem, as if from a __builtin_memcpy. Return true if
12896 we succeed, otherwise return false. */
12897
12898bool
12899aarch64_expand_movmem (rtx *operands)
12900{
12901 unsigned int n;
12902 rtx dst = operands[0];
12903 rtx src = operands[1];
12904 rtx base;
12905 bool speed_p = !optimize_function_for_size_p (cfun);
12906
12907 /* When optimizing for size, give a better estimate of the length of a
12908 memcpy call, but use the default otherwise. */
12909 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
12910
12911 /* We can't do anything smart if the amount to copy is not constant. */
12912 if (!CONST_INT_P (operands[2]))
12913 return false;
12914
12915 n = UINTVAL (operands[2]);
12916
12917 /* Try to keep the number of instructions low. For cases below 16 bytes we
12918 need to make at most two moves. For cases above 16 bytes it will be one
12919 move for each 16 byte chunk, then at most two additional moves. */
12920 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
12921 return false;
12922
12923 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12924 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
12925
12926 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
12927 src = adjust_automodify_address (src, VOIDmode, base, 0);
12928
12929 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
12930 1-byte chunk. */
12931 if (n < 4)
12932 {
12933 if (n >= 2)
12934 {
12935 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12936 n -= 2;
12937 }
12938
12939 if (n == 1)
12940 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12941
12942 return true;
12943 }
12944
12945 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
12946 4-byte chunk, partially overlapping with the previously copied chunk. */
12947 if (n < 8)
12948 {
12949 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12950 n -= 4;
12951 if (n > 0)
12952 {
12953 int move = n - 4;
12954
12955 src = aarch64_move_pointer (src, move);
12956 dst = aarch64_move_pointer (dst, move);
12957 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12958 }
12959 return true;
12960 }
12961
12962 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
12963 them, then (if applicable) an 8-byte chunk. */
12964 while (n >= 8)
12965 {
12966 if (n / 16)
12967 {
12968 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
12969 n -= 16;
12970 }
12971 else
12972 {
12973 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
12974 n -= 8;
12975 }
12976 }
12977
12978 /* Finish the final bytes of the copy. We can always do this in one
12979 instruction. We either copy the exact amount we need, or partially
12980 overlap with the previous chunk we copied and copy 8-bytes. */
12981 if (n == 0)
12982 return true;
12983 else if (n == 1)
12984 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12985 else if (n == 2)
12986 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12987 else if (n == 4)
12988 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12989 else
12990 {
12991 if (n == 3)
12992 {
12993 src = aarch64_move_pointer (src, -1);
12994 dst = aarch64_move_pointer (dst, -1);
12995 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12996 }
12997 else
12998 {
12999 int move = n - 8;
13000
13001 src = aarch64_move_pointer (src, move);
13002 dst = aarch64_move_pointer (dst, move);
13003 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13004 }
13005 }
13006
13007 return true;
13008}
13009
6153b62d 13010/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13011
13012static unsigned HOST_WIDE_INT
13013aarch64_asan_shadow_offset (void)
13014{
13015 return (HOST_WIDE_INT_1 << 36);
13016}
13017
16b382d1 13018static bool
89da42b6 13019aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16b382d1 13020 unsigned int align,
13021 enum by_pieces_operation op,
13022 bool speed_p)
13023{
13024 /* STORE_BY_PIECES can be used when copying a constant string, but
13025 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13026 For now we always fail this and let the move_by_pieces code copy
13027 the string from read-only memory. */
13028 if (op == STORE_BY_PIECES)
13029 return false;
13030
13031 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13032}
13033
636fb80b 13034static enum machine_mode
13035aarch64_code_to_ccmode (enum rtx_code code)
13036{
13037 switch (code)
13038 {
13039 case NE:
13040 return CC_DNEmode;
13041
13042 case EQ:
13043 return CC_DEQmode;
13044
13045 case LE:
13046 return CC_DLEmode;
13047
13048 case LT:
13049 return CC_DLTmode;
13050
13051 case GE:
13052 return CC_DGEmode;
13053
13054 case GT:
13055 return CC_DGTmode;
13056
13057 case LEU:
13058 return CC_DLEUmode;
13059
13060 case LTU:
13061 return CC_DLTUmode;
13062
13063 case GEU:
13064 return CC_DGEUmode;
13065
13066 case GTU:
13067 return CC_DGTUmode;
13068
13069 default:
13070 return CCmode;
13071 }
13072}
13073
13074static rtx
13075aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
13076 int code, tree treeop0, tree treeop1)
13077{
13078 enum machine_mode op_mode, cmp_mode, cc_mode;
13079 rtx op0, op1, cmp, target;
13080 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
13081 enum insn_code icode;
13082 struct expand_operand ops[4];
13083
13084 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
13085 if (cc_mode == CCmode)
13086 return NULL_RTX;
13087
13088 start_sequence ();
13089 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13090
13091 op_mode = GET_MODE (op0);
13092 if (op_mode == VOIDmode)
13093 op_mode = GET_MODE (op1);
13094
13095 switch (op_mode)
13096 {
13097 case QImode:
13098 case HImode:
13099 case SImode:
13100 cmp_mode = SImode;
13101 icode = CODE_FOR_cmpsi;
13102 break;
13103
13104 case DImode:
13105 cmp_mode = DImode;
13106 icode = CODE_FOR_cmpdi;
13107 break;
13108
13109 default:
13110 end_sequence ();
13111 return NULL_RTX;
13112 }
13113
13114 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13115 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13116 if (!op0 || !op1)
13117 {
13118 end_sequence ();
13119 return NULL_RTX;
13120 }
13121 *prep_seq = get_insns ();
13122 end_sequence ();
13123
13124 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
13125 target = gen_rtx_REG (CCmode, CC_REGNUM);
13126
13127 create_output_operand (&ops[0], target, CCmode);
13128 create_fixed_operand (&ops[1], cmp);
13129 create_fixed_operand (&ops[2], op0);
13130 create_fixed_operand (&ops[3], op1);
13131
13132 start_sequence ();
13133 if (!maybe_expand_insn (icode, 4, ops))
13134 {
13135 end_sequence ();
13136 return NULL_RTX;
13137 }
13138 *gen_seq = get_insns ();
13139 end_sequence ();
13140
13141 return gen_rtx_REG (cc_mode, CC_REGNUM);
13142}
13143
13144static rtx
13145aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
13146 tree treeop0, tree treeop1, int bit_code)
13147{
13148 rtx op0, op1, cmp0, cmp1, target;
13149 enum machine_mode op_mode, cmp_mode, cc_mode;
13150 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
13151 enum insn_code icode = CODE_FOR_ccmp_andsi;
13152 struct expand_operand ops[6];
13153
13154 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
13155 if (cc_mode == CCmode)
13156 return NULL_RTX;
13157
13158 push_to_sequence ((rtx_insn*) *prep_seq);
13159 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13160
13161 op_mode = GET_MODE (op0);
13162 if (op_mode == VOIDmode)
13163 op_mode = GET_MODE (op1);
13164
13165 switch (op_mode)
13166 {
13167 case QImode:
13168 case HImode:
13169 case SImode:
13170 cmp_mode = SImode;
13171 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
13172 : CODE_FOR_ccmp_iorsi;
13173 break;
13174
13175 case DImode:
13176 cmp_mode = DImode;
13177 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
13178 : CODE_FOR_ccmp_iordi;
13179 break;
13180
13181 default:
13182 end_sequence ();
13183 return NULL_RTX;
13184 }
13185
13186 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13187 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13188 if (!op0 || !op1)
13189 {
13190 end_sequence ();
13191 return NULL_RTX;
13192 }
13193 *prep_seq = get_insns ();
13194 end_sequence ();
13195
13196 target = gen_rtx_REG (cc_mode, CC_REGNUM);
13197 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
13198 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
13199
13200 create_fixed_operand (&ops[0], prev);
13201 create_fixed_operand (&ops[1], target);
13202 create_fixed_operand (&ops[2], op0);
13203 create_fixed_operand (&ops[3], op1);
13204 create_fixed_operand (&ops[4], cmp0);
13205 create_fixed_operand (&ops[5], cmp1);
13206
13207 push_to_sequence ((rtx_insn*) *gen_seq);
13208 if (!maybe_expand_insn (icode, 6, ops))
13209 {
13210 end_sequence ();
13211 return NULL_RTX;
13212 }
13213
13214 *gen_seq = get_insns ();
13215 end_sequence ();
13216
13217 return target;
13218}
13219
13220#undef TARGET_GEN_CCMP_FIRST
13221#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13222
13223#undef TARGET_GEN_CCMP_NEXT
13224#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13225
498fec4d 13226/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13227 instruction fusion of some sort. */
13228
13229static bool
13230aarch64_macro_fusion_p (void)
13231{
14677da9 13232 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
498fec4d 13233}
13234
13235
13236/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13237 should be kept together during scheduling. */
13238
13239static bool
13240aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
13241{
13242 rtx set_dest;
13243 rtx prev_set = single_set (prev);
13244 rtx curr_set = single_set (curr);
13245 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13246 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
13247
13248 if (!aarch64_macro_fusion_p ())
13249 return false;
13250
13251 if (simple_sets_p
14677da9 13252 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
498fec4d 13253 {
13254 /* We are trying to match:
13255 prev (mov) == (set (reg r0) (const_int imm16))
13256 curr (movk) == (set (zero_extract (reg r0)
13257 (const_int 16)
13258 (const_int 16))
13259 (const_int imm16_1)) */
13260
13261 set_dest = SET_DEST (curr_set);
13262
13263 if (GET_CODE (set_dest) == ZERO_EXTRACT
13264 && CONST_INT_P (SET_SRC (curr_set))
13265 && CONST_INT_P (SET_SRC (prev_set))
13266 && CONST_INT_P (XEXP (set_dest, 2))
13267 && INTVAL (XEXP (set_dest, 2)) == 16
13268 && REG_P (XEXP (set_dest, 0))
13269 && REG_P (SET_DEST (prev_set))
13270 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
13271 {
13272 return true;
13273 }
13274 }
13275
a1b874a7 13276 if (simple_sets_p
14677da9 13277 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
a1b874a7 13278 {
13279
13280 /* We're trying to match:
13281 prev (adrp) == (set (reg r1)
13282 (high (symbol_ref ("SYM"))))
13283 curr (add) == (set (reg r0)
13284 (lo_sum (reg r1)
13285 (symbol_ref ("SYM"))))
13286 Note that r0 need not necessarily be the same as r1, especially
13287 during pre-regalloc scheduling. */
13288
13289 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13290 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13291 {
13292 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
13293 && REG_P (XEXP (SET_SRC (curr_set), 0))
13294 && REGNO (XEXP (SET_SRC (curr_set), 0))
13295 == REGNO (SET_DEST (prev_set))
13296 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
13297 XEXP (SET_SRC (curr_set), 1)))
13298 return true;
13299 }
13300 }
13301
93d965bb 13302 if (simple_sets_p
14677da9 13303 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
93d965bb 13304 {
13305
13306 /* We're trying to match:
13307 prev (movk) == (set (zero_extract (reg r0)
13308 (const_int 16)
13309 (const_int 32))
13310 (const_int imm16_1))
13311 curr (movk) == (set (zero_extract (reg r0)
13312 (const_int 16)
13313 (const_int 48))
13314 (const_int imm16_2)) */
13315
13316 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
13317 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
13318 && REG_P (XEXP (SET_DEST (prev_set), 0))
13319 && REG_P (XEXP (SET_DEST (curr_set), 0))
13320 && REGNO (XEXP (SET_DEST (prev_set), 0))
13321 == REGNO (XEXP (SET_DEST (curr_set), 0))
13322 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
13323 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
13324 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
13325 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
13326 && CONST_INT_P (SET_SRC (prev_set))
13327 && CONST_INT_P (SET_SRC (curr_set)))
13328 return true;
13329
13330 }
3901abbb 13331 if (simple_sets_p
14677da9 13332 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
3901abbb 13333 {
13334 /* We're trying to match:
13335 prev (adrp) == (set (reg r0)
13336 (high (symbol_ref ("SYM"))))
13337 curr (ldr) == (set (reg r1)
13338 (mem (lo_sum (reg r0)
13339 (symbol_ref ("SYM")))))
13340 or
13341 curr (ldr) == (set (reg r1)
13342 (zero_extend (mem
13343 (lo_sum (reg r0)
13344 (symbol_ref ("SYM")))))) */
13345 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13346 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13347 {
13348 rtx curr_src = SET_SRC (curr_set);
13349
13350 if (GET_CODE (curr_src) == ZERO_EXTEND)
13351 curr_src = XEXP (curr_src, 0);
13352
13353 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
13354 && REG_P (XEXP (XEXP (curr_src, 0), 0))
13355 && REGNO (XEXP (XEXP (curr_src, 0), 0))
13356 == REGNO (SET_DEST (prev_set))
13357 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
13358 XEXP (SET_SRC (prev_set), 0)))
13359 return true;
13360 }
13361 }
93d965bb 13362
14677da9 13363 if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
2416dc4f 13364 && any_condjump_p (curr))
13365 {
13366 enum attr_type prev_type = get_attr_type (prev);
13367
13368 /* FIXME: this misses some which is considered simple arthematic
13369 instructions for ThunderX. Simple shifts are missed here. */
13370 if (prev_type == TYPE_ALUS_SREG
13371 || prev_type == TYPE_ALUS_IMM
13372 || prev_type == TYPE_LOGICS_REG
13373 || prev_type == TYPE_LOGICS_IMM)
13374 return true;
13375 }
13376
498fec4d 13377 return false;
13378}
13379
18852586 13380/* If MEM is in the form of [base+offset], extract the two parts
13381 of address and set to BASE and OFFSET, otherwise return false
13382 after clearing BASE and OFFSET. */
13383
13384bool
13385extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
13386{
13387 rtx addr;
13388
13389 gcc_assert (MEM_P (mem));
13390
13391 addr = XEXP (mem, 0);
13392
13393 if (REG_P (addr))
13394 {
13395 *base = addr;
13396 *offset = const0_rtx;
13397 return true;
13398 }
13399
13400 if (GET_CODE (addr) == PLUS
13401 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
13402 {
13403 *base = XEXP (addr, 0);
13404 *offset = XEXP (addr, 1);
13405 return true;
13406 }
13407
13408 *base = NULL_RTX;
13409 *offset = NULL_RTX;
13410
13411 return false;
13412}
13413
13414/* Types for scheduling fusion. */
13415enum sched_fusion_type
13416{
13417 SCHED_FUSION_NONE = 0,
13418 SCHED_FUSION_LD_SIGN_EXTEND,
13419 SCHED_FUSION_LD_ZERO_EXTEND,
13420 SCHED_FUSION_LD,
13421 SCHED_FUSION_ST,
13422 SCHED_FUSION_NUM
13423};
13424
13425/* If INSN is a load or store of address in the form of [base+offset],
13426 extract the two parts and set to BASE and OFFSET. Return scheduling
13427 fusion type this INSN is. */
13428
13429static enum sched_fusion_type
13430fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
13431{
13432 rtx x, dest, src;
13433 enum sched_fusion_type fusion = SCHED_FUSION_LD;
13434
13435 gcc_assert (INSN_P (insn));
13436 x = PATTERN (insn);
13437 if (GET_CODE (x) != SET)
13438 return SCHED_FUSION_NONE;
13439
13440 src = SET_SRC (x);
13441 dest = SET_DEST (x);
13442
9202af54 13443 machine_mode dest_mode = GET_MODE (dest);
13444
13445 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
18852586 13446 return SCHED_FUSION_NONE;
13447
13448 if (GET_CODE (src) == SIGN_EXTEND)
13449 {
13450 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
13451 src = XEXP (src, 0);
13452 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13453 return SCHED_FUSION_NONE;
13454 }
13455 else if (GET_CODE (src) == ZERO_EXTEND)
13456 {
13457 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
13458 src = XEXP (src, 0);
13459 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13460 return SCHED_FUSION_NONE;
13461 }
13462
13463 if (GET_CODE (src) == MEM && REG_P (dest))
13464 extract_base_offset_in_addr (src, base, offset);
13465 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
13466 {
13467 fusion = SCHED_FUSION_ST;
13468 extract_base_offset_in_addr (dest, base, offset);
13469 }
13470 else
13471 return SCHED_FUSION_NONE;
13472
13473 if (*base == NULL_RTX || *offset == NULL_RTX)
13474 fusion = SCHED_FUSION_NONE;
13475
13476 return fusion;
13477}
13478
13479/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
13480
13481 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
13482 and PRI are only calculated for these instructions. For other instruction,
13483 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
13484 type instruction fusion can be added by returning different priorities.
13485
13486 It's important that irrelevant instructions get the largest FUSION_PRI. */
13487
13488static void
13489aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
13490 int *fusion_pri, int *pri)
13491{
13492 int tmp, off_val;
13493 rtx base, offset;
13494 enum sched_fusion_type fusion;
13495
13496 gcc_assert (INSN_P (insn));
13497
13498 tmp = max_pri - 1;
13499 fusion = fusion_load_store (insn, &base, &offset);
13500 if (fusion == SCHED_FUSION_NONE)
13501 {
13502 *pri = tmp;
13503 *fusion_pri = tmp;
13504 return;
13505 }
13506
13507 /* Set FUSION_PRI according to fusion type and base register. */
13508 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
13509
13510 /* Calculate PRI. */
13511 tmp /= 2;
13512
13513 /* INSN with smaller offset goes first. */
13514 off_val = (int)(INTVAL (offset));
13515 if (off_val >= 0)
13516 tmp -= (off_val & 0xfffff);
13517 else
13518 tmp += ((- off_val) & 0xfffff);
13519
13520 *pri = tmp;
13521 return;
13522}
13523
13524/* Given OPERANDS of consecutive load/store, check if we can merge
13525 them into ldp/stp. LOAD is true if they are load instructions.
13526 MODE is the mode of memory operands. */
13527
13528bool
13529aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
13530 enum machine_mode mode)
13531{
13532 HOST_WIDE_INT offval_1, offval_2, msize;
13533 enum reg_class rclass_1, rclass_2;
13534 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
13535
13536 if (load)
13537 {
13538 mem_1 = operands[1];
13539 mem_2 = operands[3];
13540 reg_1 = operands[0];
13541 reg_2 = operands[2];
13542 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
13543 if (REGNO (reg_1) == REGNO (reg_2))
13544 return false;
13545 }
13546 else
13547 {
13548 mem_1 = operands[0];
13549 mem_2 = operands[2];
13550 reg_1 = operands[1];
13551 reg_2 = operands[3];
13552 }
13553
8bf5b2f8 13554 /* The mems cannot be volatile. */
13555 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
13556 return false;
13557
18852586 13558 /* Check if the addresses are in the form of [base+offset]. */
13559 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13560 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13561 return false;
13562 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13563 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13564 return false;
13565
13566 /* Check if the bases are same. */
13567 if (!rtx_equal_p (base_1, base_2))
13568 return false;
13569
13570 offval_1 = INTVAL (offset_1);
13571 offval_2 = INTVAL (offset_2);
13572 msize = GET_MODE_SIZE (mode);
13573 /* Check if the offsets are consecutive. */
13574 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
13575 return false;
13576
13577 /* Check if the addresses are clobbered by load. */
13578 if (load)
13579 {
13580 if (reg_mentioned_p (reg_1, mem_1))
13581 return false;
13582
13583 /* In increasing order, the last load can clobber the address. */
13584 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
13585 return false;
13586 }
13587
13588 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13589 rclass_1 = FP_REGS;
13590 else
13591 rclass_1 = GENERAL_REGS;
13592
13593 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13594 rclass_2 = FP_REGS;
13595 else
13596 rclass_2 = GENERAL_REGS;
13597
13598 /* Check if the registers are of same class. */
13599 if (rclass_1 != rclass_2)
13600 return false;
13601
13602 return true;
13603}
13604
13605/* Given OPERANDS of consecutive load/store, check if we can merge
13606 them into ldp/stp by adjusting the offset. LOAD is true if they
13607 are load instructions. MODE is the mode of memory operands.
13608
13609 Given below consecutive stores:
13610
13611 str w1, [xb, 0x100]
13612 str w1, [xb, 0x104]
13613 str w1, [xb, 0x108]
13614 str w1, [xb, 0x10c]
13615
13616 Though the offsets are out of the range supported by stp, we can
13617 still pair them after adjusting the offset, like:
13618
13619 add scratch, xb, 0x100
13620 stp w1, w1, [scratch]
13621 stp w1, w1, [scratch, 0x8]
13622
13623 The peephole patterns detecting this opportunity should guarantee
13624 the scratch register is avaliable. */
13625
13626bool
13627aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
13628 enum machine_mode mode)
13629{
13630 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
13631 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
13632 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
13633 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
13634
13635 if (load)
13636 {
13637 reg_1 = operands[0];
13638 mem_1 = operands[1];
13639 reg_2 = operands[2];
13640 mem_2 = operands[3];
13641 reg_3 = operands[4];
13642 mem_3 = operands[5];
13643 reg_4 = operands[6];
13644 mem_4 = operands[7];
13645 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
13646 && REG_P (reg_3) && REG_P (reg_4));
13647 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
13648 return false;
13649 }
13650 else
13651 {
13652 mem_1 = operands[0];
13653 reg_1 = operands[1];
13654 mem_2 = operands[2];
13655 reg_2 = operands[3];
13656 mem_3 = operands[4];
13657 reg_3 = operands[5];
13658 mem_4 = operands[6];
13659 reg_4 = operands[7];
13660 }
13661 /* Skip if memory operand is by itslef valid for ldp/stp. */
13662 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
13663 return false;
13664
8bf5b2f8 13665 /* The mems cannot be volatile. */
13666 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
13667 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
13668 return false;
13669
18852586 13670 /* Check if the addresses are in the form of [base+offset]. */
13671 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13672 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13673 return false;
13674 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13675 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13676 return false;
13677 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
13678 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
13679 return false;
13680 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
13681 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
13682 return false;
13683
13684 /* Check if the bases are same. */
13685 if (!rtx_equal_p (base_1, base_2)
13686 || !rtx_equal_p (base_2, base_3)
13687 || !rtx_equal_p (base_3, base_4))
13688 return false;
13689
13690 offval_1 = INTVAL (offset_1);
13691 offval_2 = INTVAL (offset_2);
13692 offval_3 = INTVAL (offset_3);
13693 offval_4 = INTVAL (offset_4);
13694 msize = GET_MODE_SIZE (mode);
13695 /* Check if the offsets are consecutive. */
13696 if ((offval_1 != (offval_2 + msize)
13697 || offval_1 != (offval_3 + msize * 2)
13698 || offval_1 != (offval_4 + msize * 3))
13699 && (offval_4 != (offval_3 + msize)
13700 || offval_4 != (offval_2 + msize * 2)
13701 || offval_4 != (offval_1 + msize * 3)))
13702 return false;
13703
13704 /* Check if the addresses are clobbered by load. */
13705 if (load)
13706 {
13707 if (reg_mentioned_p (reg_1, mem_1)
13708 || reg_mentioned_p (reg_2, mem_2)
13709 || reg_mentioned_p (reg_3, mem_3))
13710 return false;
13711
13712 /* In increasing order, the last load can clobber the address. */
13713 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
13714 return false;
13715 }
13716
13717 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13718 rclass_1 = FP_REGS;
13719 else
13720 rclass_1 = GENERAL_REGS;
13721
13722 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13723 rclass_2 = FP_REGS;
13724 else
13725 rclass_2 = GENERAL_REGS;
13726
13727 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
13728 rclass_3 = FP_REGS;
13729 else
13730 rclass_3 = GENERAL_REGS;
13731
13732 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
13733 rclass_4 = FP_REGS;
13734 else
13735 rclass_4 = GENERAL_REGS;
13736
13737 /* Check if the registers are of same class. */
13738 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
13739 return false;
13740
13741 return true;
13742}
13743
13744/* Given OPERANDS of consecutive load/store, this function pairs them
13745 into ldp/stp after adjusting the offset. It depends on the fact
13746 that addresses of load/store instructions are in increasing order.
13747 MODE is the mode of memory operands. CODE is the rtl operator
13748 which should be applied to all memory operands, it's SIGN_EXTEND,
13749 ZERO_EXTEND or UNKNOWN. */
13750
13751bool
13752aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
13753 enum machine_mode mode, RTX_CODE code)
13754{
13755 rtx base, offset, t1, t2;
13756 rtx mem_1, mem_2, mem_3, mem_4;
13757 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
13758
13759 if (load)
13760 {
13761 mem_1 = operands[1];
13762 mem_2 = operands[3];
13763 mem_3 = operands[5];
13764 mem_4 = operands[7];
13765 }
13766 else
13767 {
13768 mem_1 = operands[0];
13769 mem_2 = operands[2];
13770 mem_3 = operands[4];
13771 mem_4 = operands[6];
13772 gcc_assert (code == UNKNOWN);
13773 }
13774
13775 extract_base_offset_in_addr (mem_1, &base, &offset);
13776 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
13777
13778 /* Adjust offset thus it can fit in ldp/stp instruction. */
13779 msize = GET_MODE_SIZE (mode);
13780 stp_off_limit = msize * 0x40;
13781 off_val = INTVAL (offset);
13782 abs_off = (off_val < 0) ? -off_val : off_val;
13783 new_off = abs_off % stp_off_limit;
13784 adj_off = abs_off - new_off;
13785
13786 /* Further adjust to make sure all offsets are OK. */
13787 if ((new_off + msize * 2) >= stp_off_limit)
13788 {
13789 adj_off += stp_off_limit;
13790 new_off -= stp_off_limit;
13791 }
13792
13793 /* Make sure the adjustment can be done with ADD/SUB instructions. */
13794 if (adj_off >= 0x1000)
13795 return false;
13796
13797 if (off_val < 0)
13798 {
13799 adj_off = -adj_off;
13800 new_off = -new_off;
13801 }
13802
13803 /* Create new memory references. */
13804 mem_1 = change_address (mem_1, VOIDmode,
13805 plus_constant (DImode, operands[8], new_off));
13806
13807 /* Check if the adjusted address is OK for ldp/stp. */
13808 if (!aarch64_mem_pair_operand (mem_1, mode))
13809 return false;
13810
13811 msize = GET_MODE_SIZE (mode);
13812 mem_2 = change_address (mem_2, VOIDmode,
13813 plus_constant (DImode,
13814 operands[8],
13815 new_off + msize));
13816 mem_3 = change_address (mem_3, VOIDmode,
13817 plus_constant (DImode,
13818 operands[8],
13819 new_off + msize * 2));
13820 mem_4 = change_address (mem_4, VOIDmode,
13821 plus_constant (DImode,
13822 operands[8],
13823 new_off + msize * 3));
13824
13825 if (code == ZERO_EXTEND)
13826 {
13827 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
13828 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
13829 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
13830 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
13831 }
13832 else if (code == SIGN_EXTEND)
13833 {
13834 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
13835 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
13836 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
13837 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
13838 }
13839
13840 if (load)
13841 {
13842 operands[1] = mem_1;
13843 operands[3] = mem_2;
13844 operands[5] = mem_3;
13845 operands[7] = mem_4;
13846 }
13847 else
13848 {
13849 operands[0] = mem_1;
13850 operands[2] = mem_2;
13851 operands[4] = mem_3;
13852 operands[6] = mem_4;
13853 }
13854
13855 /* Emit adjusting instruction. */
d1f9b275 13856 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
18852586 13857 /* Emit ldp/stp instructions. */
d1f9b275 13858 t1 = gen_rtx_SET (operands[0], operands[1]);
13859 t2 = gen_rtx_SET (operands[2], operands[3]);
18852586 13860 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
d1f9b275 13861 t1 = gen_rtx_SET (operands[4], operands[5]);
13862 t2 = gen_rtx_SET (operands[6], operands[7]);
18852586 13863 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
13864 return true;
13865}
13866
65f988f7 13867/* Return 1 if pseudo register should be created and used to hold
13868 GOT address for PIC code. */
13869
13870bool
13871aarch64_use_pseudo_pic_reg (void)
13872{
13873 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
13874}
13875
3c70ff33 13876/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
13877
13878static int
13879aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
13880{
13881 switch (XINT (x, 1))
13882 {
13883 case UNSPEC_GOTSMALLPIC:
13884 case UNSPEC_GOTSMALLPIC28K:
13885 case UNSPEC_GOTTINYPIC:
13886 return 0;
13887 default:
13888 break;
13889 }
13890
13891 return default_unspec_may_trap_p (x, flags);
13892}
13893
ac80c076 13894
13895/* If X is a positive CONST_DOUBLE with a value that is a power of 2
13896 return the log2 of that value. Otherwise return -1. */
13897
13898int
13899aarch64_fpconst_pow_of_2 (rtx x)
13900{
13901 const REAL_VALUE_TYPE *r;
13902
13903 if (!CONST_DOUBLE_P (x))
13904 return -1;
13905
13906 r = CONST_DOUBLE_REAL_VALUE (x);
13907
13908 if (REAL_VALUE_NEGATIVE (*r)
13909 || REAL_VALUE_ISNAN (*r)
13910 || REAL_VALUE_ISINF (*r)
13911 || !real_isinteger (r, DFmode))
13912 return -1;
13913
13914 return exact_log2 (real_to_integer (r));
13915}
13916
13917/* If X is a vector of equal CONST_DOUBLE values and that value is
13918 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
13919
13920int
13921aarch64_vec_fpconst_pow_of_2 (rtx x)
13922{
13923 if (GET_CODE (x) != CONST_VECTOR)
13924 return -1;
13925
13926 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
13927 return -1;
13928
13929 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
13930 if (firstval <= 0)
13931 return -1;
13932
13933 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
13934 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
13935 return -1;
13936
13937 return firstval;
13938}
13939
6f520654 13940/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
13941static tree
13942aarch64_promoted_type (const_tree t)
13943{
13944 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
13945 return float_type_node;
13946 return NULL_TREE;
13947}
4cfd27a5 13948
13949/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
13950
13951static bool
13952aarch64_optab_supported_p (int op, machine_mode, machine_mode,
13953 optimization_type opt_type)
13954{
13955 switch (op)
13956 {
13957 case rsqrt_optab:
13958 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
13959
13960 default:
13961 return true;
13962 }
13963}
13964
df401d54 13965#undef TARGET_ADDRESS_COST
13966#define TARGET_ADDRESS_COST aarch64_address_cost
13967
13968/* This hook will determines whether unnamed bitfields affect the alignment
13969 of the containing structure. The hook returns true if the structure
13970 should inherit the alignment requirements of an unnamed bitfield's
13971 type. */
13972#undef TARGET_ALIGN_ANON_BITFIELD
13973#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
13974
13975#undef TARGET_ASM_ALIGNED_DI_OP
13976#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
13977
13978#undef TARGET_ASM_ALIGNED_HI_OP
13979#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
13980
13981#undef TARGET_ASM_ALIGNED_SI_OP
13982#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
13983
13984#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13985#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
13986 hook_bool_const_tree_hwi_hwi_const_tree_true
13987
df401d54 13988#undef TARGET_ASM_OUTPUT_MI_THUNK
13989#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
13990
13991#undef TARGET_ASM_SELECT_RTX_SECTION
13992#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
13993
13994#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13995#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
13996
13997#undef TARGET_BUILD_BUILTIN_VA_LIST
13998#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
13999
14000#undef TARGET_CALLEE_COPIES
14001#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14002
14003#undef TARGET_CAN_ELIMINATE
14004#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14005
b0269e32 14006#undef TARGET_CAN_INLINE_P
14007#define TARGET_CAN_INLINE_P aarch64_can_inline_p
14008
df401d54 14009#undef TARGET_CANNOT_FORCE_CONST_MEM
14010#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14011
35c51aa0 14012#undef TARGET_CASE_VALUES_THRESHOLD
14013#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14014
df401d54 14015#undef TARGET_CONDITIONAL_REGISTER_USAGE
14016#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14017
14018/* Only the least significant bit is used for initialization guard
14019 variables. */
14020#undef TARGET_CXX_GUARD_MASK_BIT
14021#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14022
14023#undef TARGET_C_MODE_FOR_SUFFIX
14024#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14025
14026#ifdef TARGET_BIG_ENDIAN_DEFAULT
14027#undef TARGET_DEFAULT_TARGET_FLAGS
14028#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14029#endif
14030
14031#undef TARGET_CLASS_MAX_NREGS
14032#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14033
18cee018 14034#undef TARGET_BUILTIN_DECL
14035#define TARGET_BUILTIN_DECL aarch64_builtin_decl
14036
e1a2ea91 14037#undef TARGET_BUILTIN_RECIPROCAL
14038#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14039
df401d54 14040#undef TARGET_EXPAND_BUILTIN
14041#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14042
14043#undef TARGET_EXPAND_BUILTIN_VA_START
14044#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14045
f164e9a5 14046#undef TARGET_FOLD_BUILTIN
14047#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14048
df401d54 14049#undef TARGET_FUNCTION_ARG
14050#define TARGET_FUNCTION_ARG aarch64_function_arg
14051
14052#undef TARGET_FUNCTION_ARG_ADVANCE
14053#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14054
14055#undef TARGET_FUNCTION_ARG_BOUNDARY
14056#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14057
14058#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14059#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14060
14061#undef TARGET_FUNCTION_VALUE
14062#define TARGET_FUNCTION_VALUE aarch64_function_value
14063
14064#undef TARGET_FUNCTION_VALUE_REGNO_P
14065#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14066
14067#undef TARGET_FRAME_POINTER_REQUIRED
14068#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14069
1ecd979f 14070#undef TARGET_GIMPLE_FOLD_BUILTIN
14071#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
58aab7ce 14072
df401d54 14073#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14074#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14075
14076#undef TARGET_INIT_BUILTINS
14077#define TARGET_INIT_BUILTINS aarch64_init_builtins
14078
14079#undef TARGET_LEGITIMATE_ADDRESS_P
14080#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14081
14082#undef TARGET_LEGITIMATE_CONSTANT_P
14083#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14084
14085#undef TARGET_LIBGCC_CMP_RETURN_MODE
14086#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14087
07ca5686 14088#undef TARGET_LRA_P
757bc674 14089#define TARGET_LRA_P hook_bool_void_true
07ca5686 14090
ea092ff3 14091#undef TARGET_MANGLE_TYPE
14092#define TARGET_MANGLE_TYPE aarch64_mangle_type
14093
df401d54 14094#undef TARGET_MEMORY_MOVE_COST
14095#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14096
c8203104 14097#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14098#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14099
df401d54 14100#undef TARGET_MUST_PASS_IN_STACK
14101#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14102
14103/* This target hook should return true if accesses to volatile bitfields
14104 should use the narrowest mode possible. It should return false if these
14105 accesses should use the bitfield container type. */
14106#undef TARGET_NARROW_VOLATILE_BITFIELD
14107#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14108
14109#undef TARGET_OPTION_OVERRIDE
14110#define TARGET_OPTION_OVERRIDE aarch64_override_options
14111
14112#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14113#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14114 aarch64_override_options_after_change
14115
a0db861f 14116#undef TARGET_OPTION_SAVE
14117#define TARGET_OPTION_SAVE aarch64_option_save
14118
14119#undef TARGET_OPTION_RESTORE
14120#define TARGET_OPTION_RESTORE aarch64_option_restore
14121
14122#undef TARGET_OPTION_PRINT
14123#define TARGET_OPTION_PRINT aarch64_option_print
14124
aadb8e17 14125#undef TARGET_OPTION_VALID_ATTRIBUTE_P
14126#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14127
f59387ab 14128#undef TARGET_SET_CURRENT_FUNCTION
14129#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14130
df401d54 14131#undef TARGET_PASS_BY_REFERENCE
14132#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14133
14134#undef TARGET_PREFERRED_RELOAD_CLASS
14135#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
14136
88227718 14137#undef TARGET_SCHED_REASSOCIATION_WIDTH
14138#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
14139
6f520654 14140#undef TARGET_PROMOTED_TYPE
14141#define TARGET_PROMOTED_TYPE aarch64_promoted_type
14142
df401d54 14143#undef TARGET_SECONDARY_RELOAD
14144#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
14145
14146#undef TARGET_SHIFT_TRUNCATION_MASK
14147#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
14148
14149#undef TARGET_SETUP_INCOMING_VARARGS
14150#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
14151
14152#undef TARGET_STRUCT_VALUE_RTX
14153#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
14154
14155#undef TARGET_REGISTER_MOVE_COST
14156#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
14157
14158#undef TARGET_RETURN_IN_MEMORY
14159#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
14160
14161#undef TARGET_RETURN_IN_MSB
14162#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
14163
14164#undef TARGET_RTX_COSTS
d515bbc9 14165#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
df401d54 14166
268a662f 14167#undef TARGET_SCHED_ISSUE_RATE
14168#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
14169
65d538fd 14170#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14171#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
14172 aarch64_sched_first_cycle_multipass_dfa_lookahead
14173
5f73ddf0 14174#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
14175#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
14176 aarch64_first_cycle_multipass_dfa_lookahead_guard
14177
df401d54 14178#undef TARGET_TRAMPOLINE_INIT
14179#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
14180
14181#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
14182#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
14183
14184#undef TARGET_VECTOR_MODE_SUPPORTED_P
14185#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
14186
14187#undef TARGET_ARRAY_MODE_SUPPORTED_P
14188#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
14189
61d9499e 14190#undef TARGET_VECTORIZE_ADD_STMT_COST
14191#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
14192
14193#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
14194#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
14195 aarch64_builtin_vectorization_cost
14196
df401d54 14197#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14198#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
14199
ba640418 14200#undef TARGET_VECTORIZE_BUILTINS
14201#define TARGET_VECTORIZE_BUILTINS
14202
14203#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
14204#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
14205 aarch64_builtin_vectorized_function
14206
b9ed2299 14207#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
14208#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
14209 aarch64_autovectorize_vector_sizes
14210
1343c5e0 14211#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14212#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
14213 aarch64_atomic_assign_expand_fenv
14214
df401d54 14215/* Section anchor support. */
14216
14217#undef TARGET_MIN_ANCHOR_OFFSET
14218#define TARGET_MIN_ANCHOR_OFFSET -256
14219
14220/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
14221 byte offset; we can do much more for larger data types, but have no way
14222 to determine the size of the access. We assume accesses are aligned. */
14223#undef TARGET_MAX_ANCHOR_OFFSET
14224#define TARGET_MAX_ANCHOR_OFFSET 4095
14225
bb374c4d 14226#undef TARGET_VECTOR_ALIGNMENT
14227#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
14228
14229#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
14230#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
14231 aarch64_simd_vector_alignment_reachable
14232
5de1fcdb 14233/* vec_perm support. */
14234
14235#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
14236#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
14237 aarch64_vectorize_vec_perm_const_ok
14238
6f520654 14239#undef TARGET_INIT_LIBFUNCS
14240#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
b49f35d1 14241
9fc8b0bf 14242#undef TARGET_FIXED_CONDITION_CODE_REGS
b49f35d1 14243#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
14244
c0277521 14245#undef TARGET_FLAGS_REGNUM
14246#define TARGET_FLAGS_REGNUM CC_REGNUM
14247
ba189be5 14248#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
14249#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
14250
6153b62d 14251#undef TARGET_ASAN_SHADOW_OFFSET
14252#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
14253
10635db9 14254#undef TARGET_LEGITIMIZE_ADDRESS
14255#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
14256
16b382d1 14257#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14258#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14259 aarch64_use_by_pieces_infrastructure_p
14260
9d296e7d 14261#undef TARGET_CAN_USE_DOLOOP_P
14262#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
14263
498fec4d 14264#undef TARGET_SCHED_MACRO_FUSION_P
14265#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
14266
14267#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
14268#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
14269
18852586 14270#undef TARGET_SCHED_FUSION_PRIORITY
14271#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
14272
3c70ff33 14273#undef TARGET_UNSPEC_MAY_TRAP_P
14274#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
14275
65f988f7 14276#undef TARGET_USE_PSEUDO_PIC_REG
14277#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
14278
3c047fe9 14279#undef TARGET_PRINT_OPERAND
14280#define TARGET_PRINT_OPERAND aarch64_print_operand
14281
14282#undef TARGET_PRINT_OPERAND_ADDRESS
14283#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
14284
4cfd27a5 14285#undef TARGET_OPTAB_SUPPORTED_P
14286#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
14287
df401d54 14288struct gcc_target targetm = TARGET_INITIALIZER;
14289
14290#include "gt-aarch64.h"