]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
update-copyright.py (TestsuiteFilter): Skip params/README.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
818ab71a 2 Copyright (C) 2009-2016 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
36#include "optabs.h"
37#include "regs.h"
38#include "emit-rtl.h"
39#include "recog.h"
40#include "diagnostic.h"
43e9d192 41#include "insn-attr.h"
40e23961 42#include "alias.h"
40e23961 43#include "fold-const.h"
d8a2d370
DN
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
43e9d192 47#include "output.h"
36566b39 48#include "flags.h"
36566b39 49#include "explow.h"
43e9d192
IB
50#include "expr.h"
51#include "reload.h"
43e9d192 52#include "langhooks.h"
5a2c8331 53#include "opts.h"
2d6bc7fa 54#include "params.h"
45b0be94 55#include "gimplify.h"
43e9d192 56#include "dwarf2.h"
61d371eb 57#include "gimple-iterator.h"
8990e73a 58#include "tree-vectorizer.h"
d1bcc29f 59#include "aarch64-cost-tables.h"
0ee859b5 60#include "dumpfile.h"
9b2b7279 61#include "builtins.h"
8baff86e 62#include "rtl-iter.h"
9bbe08fe 63#include "tm-constrs.h"
d03f7e44 64#include "sched-int.h"
d78006d9 65#include "target-globals.h"
a3eb8a52 66#include "common/common-target.h"
43e9d192 67
994c5d85 68/* This file should be included last. */
d58627a0
RS
69#include "target-def.h"
70
28514dda
YZ
71/* Defined for convenience. */
72#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
73
43e9d192
IB
74/* Classifies an address.
75
76 ADDRESS_REG_IMM
77 A simple base register plus immediate offset.
78
79 ADDRESS_REG_WB
80 A base register indexed by immediate offset with writeback.
81
82 ADDRESS_REG_REG
83 A base register indexed by (optionally scaled) register.
84
85 ADDRESS_REG_UXTW
86 A base register indexed by (optionally scaled) zero-extended register.
87
88 ADDRESS_REG_SXTW
89 A base register indexed by (optionally scaled) sign-extended register.
90
91 ADDRESS_LO_SUM
92 A LO_SUM rtx with a base register and "LO12" symbol relocation.
93
94 ADDRESS_SYMBOLIC:
95 A constant symbolic address, in pc-relative literal pool. */
96
97enum aarch64_address_type {
98 ADDRESS_REG_IMM,
99 ADDRESS_REG_WB,
100 ADDRESS_REG_REG,
101 ADDRESS_REG_UXTW,
102 ADDRESS_REG_SXTW,
103 ADDRESS_LO_SUM,
104 ADDRESS_SYMBOLIC
105};
106
107struct aarch64_address_info {
108 enum aarch64_address_type type;
109 rtx base;
110 rtx offset;
111 int shift;
112 enum aarch64_symbol_type symbol_type;
113};
114
48063b9d
IB
115struct simd_immediate_info
116{
117 rtx value;
118 int shift;
119 int element_width;
48063b9d 120 bool mvn;
e4f0f84d 121 bool msl;
48063b9d
IB
122};
123
43e9d192
IB
124/* The current code model. */
125enum aarch64_code_model aarch64_cmodel;
126
127#ifdef HAVE_AS_TLS
128#undef TARGET_HAVE_TLS
129#define TARGET_HAVE_TLS 1
130#endif
131
ef4bddc2
RS
132static bool aarch64_composite_type_p (const_tree, machine_mode);
133static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 134 const_tree,
ef4bddc2 135 machine_mode *, int *,
43e9d192
IB
136 bool *);
137static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
138static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 139static void aarch64_override_options_after_change (void);
ef4bddc2 140static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 141static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 142 const unsigned char *sel);
ef4bddc2 143static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
144static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
145 const_tree type,
146 int misalignment,
147 bool is_packed);
88b08073 148
0c6caaf8
RL
149/* Major revision number of the ARM Architecture implemented by the target. */
150unsigned aarch64_architecture_version;
151
43e9d192 152/* The processor for which instructions should be scheduled. */
02fdbd5b 153enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 154
43e9d192
IB
155/* Mask to specify which instruction scheduling options should be used. */
156unsigned long aarch64_tune_flags = 0;
157
1be34295 158/* Global flag for PC relative loads. */
9ee6540a 159bool aarch64_pcrelative_literal_loads;
1be34295 160
8dec06f2
JG
161/* Support for command line parsing of boolean flags in the tuning
162 structures. */
163struct aarch64_flag_desc
164{
165 const char* name;
166 unsigned int flag;
167};
168
ed9fa8d2 169#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
170 { name, AARCH64_FUSE_##internal_name },
171static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
172{
173 { "none", AARCH64_FUSE_NOTHING },
174#include "aarch64-fusion-pairs.def"
175 { "all", AARCH64_FUSE_ALL },
176 { NULL, AARCH64_FUSE_NOTHING }
177};
8dec06f2 178
a339a01c 179#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
180 { name, AARCH64_EXTRA_TUNE_##internal_name },
181static const struct aarch64_flag_desc aarch64_tuning_flags[] =
182{
183 { "none", AARCH64_EXTRA_TUNE_NONE },
184#include "aarch64-tuning-flags.def"
185 { "all", AARCH64_EXTRA_TUNE_ALL },
186 { NULL, AARCH64_EXTRA_TUNE_NONE }
187};
8dec06f2 188
43e9d192
IB
189/* Tuning parameters. */
190
43e9d192
IB
191static const struct cpu_addrcost_table generic_addrcost_table =
192{
67747367 193 {
bd95e655
JG
194 0, /* hi */
195 0, /* si */
196 0, /* di */
197 0, /* ti */
67747367 198 },
bd95e655
JG
199 0, /* pre_modify */
200 0, /* post_modify */
201 0, /* register_offset */
783879e6
EM
202 0, /* register_sextend */
203 0, /* register_zextend */
bd95e655 204 0 /* imm_offset */
43e9d192
IB
205};
206
60bff090
JG
207static const struct cpu_addrcost_table cortexa57_addrcost_table =
208{
60bff090 209 {
bd95e655
JG
210 1, /* hi */
211 0, /* si */
212 0, /* di */
213 1, /* ti */
60bff090 214 },
bd95e655
JG
215 0, /* pre_modify */
216 0, /* post_modify */
217 0, /* register_offset */
783879e6
EM
218 0, /* register_sextend */
219 0, /* register_zextend */
bd95e655 220 0, /* imm_offset */
60bff090
JG
221};
222
5ec1ae3b
EM
223static const struct cpu_addrcost_table exynosm1_addrcost_table =
224{
225 {
226 0, /* hi */
227 0, /* si */
228 0, /* di */
229 2, /* ti */
230 },
231 0, /* pre_modify */
232 0, /* post_modify */
233 1, /* register_offset */
234 1, /* register_sextend */
235 2, /* register_zextend */
236 0, /* imm_offset */
237};
238
381e27aa
PT
239static const struct cpu_addrcost_table xgene1_addrcost_table =
240{
381e27aa 241 {
bd95e655
JG
242 1, /* hi */
243 0, /* si */
244 0, /* di */
245 1, /* ti */
381e27aa 246 },
bd95e655
JG
247 1, /* pre_modify */
248 0, /* post_modify */
249 0, /* register_offset */
783879e6
EM
250 1, /* register_sextend */
251 1, /* register_zextend */
bd95e655 252 0, /* imm_offset */
381e27aa
PT
253};
254
ee446d9f
JW
255static const struct cpu_addrcost_table qdf24xx_addrcost_table =
256{
257 {
258 1, /* hi */
259 0, /* si */
260 0, /* di */
261 1, /* ti */
262 },
263 0, /* pre_modify */
264 0, /* post_modify */
265 0, /* register_offset */
266 0, /* register_sextend */
267 0, /* register_zextend */
268 0 /* imm_offset */
269};
270
ad611a4c
VP
271static const struct cpu_addrcost_table vulcan_addrcost_table =
272{
273 {
274 0, /* hi */
275 0, /* si */
276 0, /* di */
277 2, /* ti */
278 },
279 0, /* pre_modify */
280 0, /* post_modify */
281 2, /* register_offset */
282 3, /* register_sextend */
283 3, /* register_zextend */
284 0, /* imm_offset */
285};
286
43e9d192
IB
287static const struct cpu_regmove_cost generic_regmove_cost =
288{
bd95e655 289 1, /* GP2GP */
3969c510
WD
290 /* Avoid the use of slow int<->fp moves for spilling by setting
291 their cost higher than memmov_cost. */
bd95e655
JG
292 5, /* GP2FP */
293 5, /* FP2GP */
294 2 /* FP2FP */
43e9d192
IB
295};
296
e4a9c55a
WD
297static const struct cpu_regmove_cost cortexa57_regmove_cost =
298{
bd95e655 299 1, /* GP2GP */
e4a9c55a
WD
300 /* Avoid the use of slow int<->fp moves for spilling by setting
301 their cost higher than memmov_cost. */
bd95e655
JG
302 5, /* GP2FP */
303 5, /* FP2GP */
304 2 /* FP2FP */
e4a9c55a
WD
305};
306
307static const struct cpu_regmove_cost cortexa53_regmove_cost =
308{
bd95e655 309 1, /* GP2GP */
e4a9c55a
WD
310 /* Avoid the use of slow int<->fp moves for spilling by setting
311 their cost higher than memmov_cost. */
bd95e655
JG
312 5, /* GP2FP */
313 5, /* FP2GP */
314 2 /* FP2FP */
e4a9c55a
WD
315};
316
5ec1ae3b
EM
317static const struct cpu_regmove_cost exynosm1_regmove_cost =
318{
319 1, /* GP2GP */
320 /* Avoid the use of slow int<->fp moves for spilling by setting
321 their cost higher than memmov_cost (actual, 4 and 9). */
322 9, /* GP2FP */
323 9, /* FP2GP */
324 1 /* FP2FP */
325};
326
d1bcc29f
AP
327static const struct cpu_regmove_cost thunderx_regmove_cost =
328{
bd95e655
JG
329 2, /* GP2GP */
330 2, /* GP2FP */
331 6, /* FP2GP */
332 4 /* FP2FP */
d1bcc29f
AP
333};
334
381e27aa
PT
335static const struct cpu_regmove_cost xgene1_regmove_cost =
336{
bd95e655 337 1, /* GP2GP */
381e27aa
PT
338 /* Avoid the use of slow int<->fp moves for spilling by setting
339 their cost higher than memmov_cost. */
bd95e655
JG
340 8, /* GP2FP */
341 8, /* FP2GP */
342 2 /* FP2FP */
381e27aa
PT
343};
344
ee446d9f
JW
345static const struct cpu_regmove_cost qdf24xx_regmove_cost =
346{
347 2, /* GP2GP */
348 /* Avoid the use of int<->fp moves for spilling. */
349 6, /* GP2FP */
350 6, /* FP2GP */
351 4 /* FP2FP */
352};
353
ad611a4c
VP
354static const struct cpu_regmove_cost vulcan_regmove_cost =
355{
356 1, /* GP2GP */
357 /* Avoid the use of int<->fp moves for spilling. */
358 8, /* GP2FP */
359 8, /* FP2GP */
360 4 /* FP2FP */
361};
362
8990e73a 363/* Generic costs for vector insn classes. */
8990e73a
TB
364static const struct cpu_vector_cost generic_vector_cost =
365{
bd95e655
JG
366 1, /* scalar_stmt_cost */
367 1, /* scalar_load_cost */
368 1, /* scalar_store_cost */
369 1, /* vec_stmt_cost */
c428f91c 370 2, /* vec_permute_cost */
bd95e655
JG
371 1, /* vec_to_scalar_cost */
372 1, /* scalar_to_vec_cost */
373 1, /* vec_align_load_cost */
374 1, /* vec_unalign_load_cost */
375 1, /* vec_unalign_store_cost */
376 1, /* vec_store_cost */
377 3, /* cond_taken_branch_cost */
378 1 /* cond_not_taken_branch_cost */
8990e73a
TB
379};
380
c3f20327
AP
381/* ThunderX costs for vector insn classes. */
382static const struct cpu_vector_cost thunderx_vector_cost =
383{
384 1, /* scalar_stmt_cost */
385 3, /* scalar_load_cost */
386 1, /* scalar_store_cost */
387 4, /* vec_stmt_cost */
388 4, /* vec_permute_cost */
389 2, /* vec_to_scalar_cost */
390 2, /* scalar_to_vec_cost */
391 3, /* vec_align_load_cost */
392 10, /* vec_unalign_load_cost */
393 10, /* vec_unalign_store_cost */
394 1, /* vec_store_cost */
395 3, /* cond_taken_branch_cost */
396 3 /* cond_not_taken_branch_cost */
397};
398
60bff090 399/* Generic costs for vector insn classes. */
60bff090
JG
400static const struct cpu_vector_cost cortexa57_vector_cost =
401{
bd95e655
JG
402 1, /* scalar_stmt_cost */
403 4, /* scalar_load_cost */
404 1, /* scalar_store_cost */
db4a1c18 405 2, /* vec_stmt_cost */
c428f91c 406 3, /* vec_permute_cost */
bd95e655
JG
407 8, /* vec_to_scalar_cost */
408 8, /* scalar_to_vec_cost */
db4a1c18
WD
409 4, /* vec_align_load_cost */
410 4, /* vec_unalign_load_cost */
bd95e655
JG
411 1, /* vec_unalign_store_cost */
412 1, /* vec_store_cost */
413 1, /* cond_taken_branch_cost */
414 1 /* cond_not_taken_branch_cost */
60bff090
JG
415};
416
5ec1ae3b
EM
417static const struct cpu_vector_cost exynosm1_vector_cost =
418{
419 1, /* scalar_stmt_cost */
420 5, /* scalar_load_cost */
421 1, /* scalar_store_cost */
422 3, /* vec_stmt_cost */
c428f91c 423 3, /* vec_permute_cost */
5ec1ae3b
EM
424 3, /* vec_to_scalar_cost */
425 3, /* scalar_to_vec_cost */
426 5, /* vec_align_load_cost */
427 5, /* vec_unalign_load_cost */
428 1, /* vec_unalign_store_cost */
429 1, /* vec_store_cost */
430 1, /* cond_taken_branch_cost */
431 1 /* cond_not_taken_branch_cost */
432};
433
381e27aa 434/* Generic costs for vector insn classes. */
381e27aa
PT
435static const struct cpu_vector_cost xgene1_vector_cost =
436{
bd95e655
JG
437 1, /* scalar_stmt_cost */
438 5, /* scalar_load_cost */
439 1, /* scalar_store_cost */
440 2, /* vec_stmt_cost */
c428f91c 441 2, /* vec_permute_cost */
bd95e655
JG
442 4, /* vec_to_scalar_cost */
443 4, /* scalar_to_vec_cost */
444 10, /* vec_align_load_cost */
445 10, /* vec_unalign_load_cost */
446 2, /* vec_unalign_store_cost */
447 2, /* vec_store_cost */
448 2, /* cond_taken_branch_cost */
449 1 /* cond_not_taken_branch_cost */
381e27aa
PT
450};
451
ad611a4c
VP
452/* Costs for vector insn classes for Vulcan. */
453static const struct cpu_vector_cost vulcan_vector_cost =
454{
455 6, /* scalar_stmt_cost */
456 4, /* scalar_load_cost */
457 1, /* scalar_store_cost */
458 6, /* vec_stmt_cost */
459 3, /* vec_permute_cost */
460 6, /* vec_to_scalar_cost */
461 5, /* scalar_to_vec_cost */
462 8, /* vec_align_load_cost */
463 8, /* vec_unalign_load_cost */
464 4, /* vec_unalign_store_cost */
465 4, /* vec_store_cost */
466 2, /* cond_taken_branch_cost */
467 1 /* cond_not_taken_branch_cost */
468};
469
b9066f5a
MW
470/* Generic costs for branch instructions. */
471static const struct cpu_branch_cost generic_branch_cost =
472{
473 2, /* Predictable. */
474 2 /* Unpredictable. */
475};
476
67707f65
JG
477/* Branch costs for Cortex-A57. */
478static const struct cpu_branch_cost cortexa57_branch_cost =
479{
480 1, /* Predictable. */
481 3 /* Unpredictable. */
482};
483
ad611a4c
VP
484/* Branch costs for Vulcan. */
485static const struct cpu_branch_cost vulcan_branch_cost =
486{
487 1, /* Predictable. */
488 3 /* Unpredictable. */
489};
490
9acc9cbe
EM
491/* Generic approximation modes. */
492static const cpu_approx_modes generic_approx_modes =
493{
79a2bc2d 494 AARCH64_APPROX_NONE, /* division */
98daafa0 495 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
496 AARCH64_APPROX_NONE /* recip_sqrt */
497};
498
499/* Approximation modes for Exynos M1. */
500static const cpu_approx_modes exynosm1_approx_modes =
501{
79a2bc2d 502 AARCH64_APPROX_NONE, /* division */
98daafa0 503 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
504 AARCH64_APPROX_ALL /* recip_sqrt */
505};
506
507/* Approximation modes for X-Gene 1. */
508static const cpu_approx_modes xgene1_approx_modes =
509{
79a2bc2d 510 AARCH64_APPROX_NONE, /* division */
98daafa0 511 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
512 AARCH64_APPROX_ALL /* recip_sqrt */
513};
514
43e9d192
IB
515static const struct tune_params generic_tunings =
516{
4e2cd668 517 &cortexa57_extra_costs,
43e9d192
IB
518 &generic_addrcost_table,
519 &generic_regmove_cost,
8990e73a 520 &generic_vector_cost,
b9066f5a 521 &generic_branch_cost,
9acc9cbe 522 &generic_approx_modes,
bd95e655
JG
523 4, /* memmov_cost */
524 2, /* issue_rate */
e9a3a175 525 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
526 8, /* function_align. */
527 8, /* jump_align. */
528 4, /* loop_align. */
cee66c68
WD
529 2, /* int_reassoc_width. */
530 4, /* fp_reassoc_width. */
50093a33
WD
531 1, /* vec_reassoc_width. */
532 2, /* min_div_recip_mul_sf. */
dfba575f 533 2, /* min_div_recip_mul_df. */
50487d79
EM
534 0, /* max_case_values. */
535 0, /* cache_line_size. */
2d6bc7fa 536 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
dfba575f 537 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
538};
539
1c72a3ca
JG
540static const struct tune_params cortexa35_tunings =
541{
542 &cortexa53_extra_costs,
543 &generic_addrcost_table,
544 &cortexa53_regmove_cost,
545 &generic_vector_cost,
0bc24338 546 &cortexa57_branch_cost,
9acc9cbe 547 &generic_approx_modes,
1c72a3ca
JG
548 4, /* memmov_cost */
549 1, /* issue_rate */
0bc24338 550 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 551 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 552 16, /* function_align. */
1c72a3ca 553 8, /* jump_align. */
d4407370 554 8, /* loop_align. */
1c72a3ca
JG
555 2, /* int_reassoc_width. */
556 4, /* fp_reassoc_width. */
557 1, /* vec_reassoc_width. */
558 2, /* min_div_recip_mul_sf. */
559 2, /* min_div_recip_mul_df. */
560 0, /* max_case_values. */
561 0, /* cache_line_size. */
562 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
563 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
564};
565
984239ad
KT
566static const struct tune_params cortexa53_tunings =
567{
568 &cortexa53_extra_costs,
569 &generic_addrcost_table,
e4a9c55a 570 &cortexa53_regmove_cost,
984239ad 571 &generic_vector_cost,
0bc24338 572 &cortexa57_branch_cost,
9acc9cbe 573 &generic_approx_modes,
bd95e655
JG
574 4, /* memmov_cost */
575 2, /* issue_rate */
00a8574a 576 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 577 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 578 16, /* function_align. */
0b82a5a2 579 8, /* jump_align. */
d4407370 580 8, /* loop_align. */
cee66c68
WD
581 2, /* int_reassoc_width. */
582 4, /* fp_reassoc_width. */
50093a33
WD
583 1, /* vec_reassoc_width. */
584 2, /* min_div_recip_mul_sf. */
dfba575f 585 2, /* min_div_recip_mul_df. */
50487d79
EM
586 0, /* max_case_values. */
587 0, /* cache_line_size. */
2d6bc7fa 588 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 589 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
590};
591
4fd92af6
KT
592static const struct tune_params cortexa57_tunings =
593{
594 &cortexa57_extra_costs,
60bff090 595 &cortexa57_addrcost_table,
e4a9c55a 596 &cortexa57_regmove_cost,
60bff090 597 &cortexa57_vector_cost,
67707f65 598 &cortexa57_branch_cost,
9acc9cbe 599 &generic_approx_modes,
bd95e655
JG
600 4, /* memmov_cost */
601 3, /* issue_rate */
00a8574a 602 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 603 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
604 16, /* function_align. */
605 8, /* jump_align. */
d4407370 606 8, /* loop_align. */
cee66c68
WD
607 2, /* int_reassoc_width. */
608 4, /* fp_reassoc_width. */
50093a33
WD
609 1, /* vec_reassoc_width. */
610 2, /* min_div_recip_mul_sf. */
dfba575f 611 2, /* min_div_recip_mul_df. */
50487d79
EM
612 0, /* max_case_values. */
613 0, /* cache_line_size. */
2d6bc7fa 614 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
7c175186 615 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
dfba575f
JG
616};
617
618static const struct tune_params cortexa72_tunings =
619{
620 &cortexa57_extra_costs,
621 &cortexa57_addrcost_table,
622 &cortexa57_regmove_cost,
623 &cortexa57_vector_cost,
0bc24338 624 &cortexa57_branch_cost,
9acc9cbe 625 &generic_approx_modes,
dfba575f
JG
626 4, /* memmov_cost */
627 3, /* issue_rate */
00a8574a 628 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
629 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
630 16, /* function_align. */
631 8, /* jump_align. */
d4407370 632 8, /* loop_align. */
dfba575f
JG
633 2, /* int_reassoc_width. */
634 4, /* fp_reassoc_width. */
635 1, /* vec_reassoc_width. */
636 2, /* min_div_recip_mul_sf. */
637 2, /* min_div_recip_mul_df. */
50487d79
EM
638 0, /* max_case_values. */
639 0, /* cache_line_size. */
0bc24338 640 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 641 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
642};
643
4fb570c4
KT
644static const struct tune_params cortexa73_tunings =
645{
646 &cortexa57_extra_costs,
647 &cortexa57_addrcost_table,
648 &cortexa57_regmove_cost,
649 &cortexa57_vector_cost,
0bc24338 650 &cortexa57_branch_cost,
4fb570c4
KT
651 &generic_approx_modes,
652 4, /* memmov_cost. */
653 2, /* issue_rate. */
654 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
655 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
656 16, /* function_align. */
657 8, /* jump_align. */
d4407370 658 8, /* loop_align. */
4fb570c4
KT
659 2, /* int_reassoc_width. */
660 4, /* fp_reassoc_width. */
661 1, /* vec_reassoc_width. */
662 2, /* min_div_recip_mul_sf. */
663 2, /* min_div_recip_mul_df. */
664 0, /* max_case_values. */
665 0, /* cache_line_size. */
666 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
667 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
668};
669
5ec1ae3b
EM
670static const struct tune_params exynosm1_tunings =
671{
672 &exynosm1_extra_costs,
673 &exynosm1_addrcost_table,
674 &exynosm1_regmove_cost,
675 &exynosm1_vector_cost,
676 &generic_branch_cost,
9acc9cbe 677 &exynosm1_approx_modes,
5ec1ae3b
EM
678 4, /* memmov_cost */
679 3, /* issue_rate */
25cc2199 680 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
681 4, /* function_align. */
682 4, /* jump_align. */
683 4, /* loop_align. */
684 2, /* int_reassoc_width. */
685 4, /* fp_reassoc_width. */
686 1, /* vec_reassoc_width. */
687 2, /* min_div_recip_mul_sf. */
688 2, /* min_div_recip_mul_df. */
689 48, /* max_case_values. */
690 64, /* cache_line_size. */
220379df 691 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9acc9cbe 692 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
5ec1ae3b
EM
693};
694
d1bcc29f
AP
695static const struct tune_params thunderx_tunings =
696{
697 &thunderx_extra_costs,
698 &generic_addrcost_table,
699 &thunderx_regmove_cost,
c3f20327 700 &thunderx_vector_cost,
b9066f5a 701 &generic_branch_cost,
9acc9cbe 702 &generic_approx_modes,
bd95e655
JG
703 6, /* memmov_cost */
704 2, /* issue_rate */
e9a3a175 705 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
706 8, /* function_align. */
707 8, /* jump_align. */
708 8, /* loop_align. */
cee66c68
WD
709 2, /* int_reassoc_width. */
710 4, /* fp_reassoc_width. */
50093a33
WD
711 1, /* vec_reassoc_width. */
712 2, /* min_div_recip_mul_sf. */
dfba575f 713 2, /* min_div_recip_mul_df. */
50487d79
EM
714 0, /* max_case_values. */
715 0, /* cache_line_size. */
2d6bc7fa 716 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
54700e2e 717 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
d1bcc29f
AP
718};
719
381e27aa
PT
720static const struct tune_params xgene1_tunings =
721{
722 &xgene1_extra_costs,
723 &xgene1_addrcost_table,
724 &xgene1_regmove_cost,
725 &xgene1_vector_cost,
b9066f5a 726 &generic_branch_cost,
9acc9cbe 727 &xgene1_approx_modes,
bd95e655
JG
728 6, /* memmov_cost */
729 4, /* issue_rate */
e9a3a175 730 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
731 16, /* function_align. */
732 8, /* jump_align. */
733 16, /* loop_align. */
734 2, /* int_reassoc_width. */
735 4, /* fp_reassoc_width. */
50093a33
WD
736 1, /* vec_reassoc_width. */
737 2, /* min_div_recip_mul_sf. */
dfba575f 738 2, /* min_div_recip_mul_df. */
50487d79
EM
739 0, /* max_case_values. */
740 0, /* cache_line_size. */
2d6bc7fa 741 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9acc9cbe 742 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
743};
744
ee446d9f
JW
745static const struct tune_params qdf24xx_tunings =
746{
747 &qdf24xx_extra_costs,
748 &qdf24xx_addrcost_table,
749 &qdf24xx_regmove_cost,
750 &generic_vector_cost,
751 &generic_branch_cost,
752 &generic_approx_modes,
753 4, /* memmov_cost */
754 4, /* issue_rate */
755 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
756 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
757 16, /* function_align. */
758 8, /* jump_align. */
759 16, /* loop_align. */
760 2, /* int_reassoc_width. */
761 4, /* fp_reassoc_width. */
762 1, /* vec_reassoc_width. */
763 2, /* min_div_recip_mul_sf. */
764 2, /* min_div_recip_mul_df. */
765 0, /* max_case_values. */
766 64, /* cache_line_size. */
767 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
768 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
769};
770
ad611a4c
VP
771static const struct tune_params vulcan_tunings =
772{
773 &vulcan_extra_costs,
774 &vulcan_addrcost_table,
775 &vulcan_regmove_cost,
776 &vulcan_vector_cost,
777 &vulcan_branch_cost,
778 &generic_approx_modes,
779 4, /* memmov_cost. */
780 4, /* issue_rate. */
781 AARCH64_FUSE_NOTHING, /* fuseable_ops. */
782 16, /* function_align. */
783 8, /* jump_align. */
784 16, /* loop_align. */
785 3, /* int_reassoc_width. */
786 2, /* fp_reassoc_width. */
787 2, /* vec_reassoc_width. */
788 2, /* min_div_recip_mul_sf. */
789 2, /* min_div_recip_mul_df. */
790 0, /* max_case_values. */
b91cd96b 791 64, /* cache_line_size. */
ad611a4c
VP
792 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
793 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
794};
795
8dec06f2
JG
796/* Support for fine-grained override of the tuning structures. */
797struct aarch64_tuning_override_function
798{
799 const char* name;
800 void (*parse_override)(const char*, struct tune_params*);
801};
802
803static void aarch64_parse_fuse_string (const char*, struct tune_params*);
804static void aarch64_parse_tune_string (const char*, struct tune_params*);
805
806static const struct aarch64_tuning_override_function
807aarch64_tuning_override_functions[] =
808{
809 { "fuse", aarch64_parse_fuse_string },
810 { "tune", aarch64_parse_tune_string },
811 { NULL, NULL }
812};
813
43e9d192
IB
814/* A processor implementing AArch64. */
815struct processor
816{
817 const char *const name;
46806c44
KT
818 enum aarch64_processor ident;
819 enum aarch64_processor sched_core;
393ae126 820 enum aarch64_arch arch;
0c6caaf8 821 unsigned architecture_version;
43e9d192
IB
822 const unsigned long flags;
823 const struct tune_params *const tune;
824};
825
393ae126
KT
826/* Architectures implementing AArch64. */
827static const struct processor all_architectures[] =
828{
829#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
830 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
831#include "aarch64-arches.def"
393ae126
KT
832 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
833};
834
43e9d192
IB
835/* Processor cores implementing AArch64. */
836static const struct processor all_cores[] =
837{
e8fcc9fa 838#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
839 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
840 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
841 FLAGS, &COSTS##_tunings},
43e9d192 842#include "aarch64-cores.def"
393ae126
KT
843 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
844 AARCH64_FL_FOR_ARCH8, &generic_tunings},
845 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
846};
847
43e9d192 848
361fb3ee
KT
849/* Target specification. These are populated by the -march, -mtune, -mcpu
850 handling code or by target attributes. */
43e9d192
IB
851static const struct processor *selected_arch;
852static const struct processor *selected_cpu;
853static const struct processor *selected_tune;
854
b175b679
JG
855/* The current tuning set. */
856struct tune_params aarch64_tune_params = generic_tunings;
857
43e9d192
IB
858#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
859
860/* An ISA extension in the co-processor and main instruction set space. */
861struct aarch64_option_extension
862{
863 const char *const name;
864 const unsigned long flags_on;
865 const unsigned long flags_off;
866};
867
43e9d192
IB
868typedef enum aarch64_cond_code
869{
870 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
871 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
872 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
873}
874aarch64_cc;
875
876#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
877
878/* The condition codes of the processor, and the inverse function. */
879static const char * const aarch64_condition_codes[] =
880{
881 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
882 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
883};
884
973d2e01
TP
885/* Generate code to enable conditional branches in functions over 1 MiB. */
886const char *
887aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
888 const char * branch_format)
889{
890 rtx_code_label * tmp_label = gen_label_rtx ();
891 char label_buf[256];
892 char buffer[128];
893 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
894 CODE_LABEL_NUMBER (tmp_label));
895 const char *label_ptr = targetm.strip_name_encoding (label_buf);
896 rtx dest_label = operands[pos_label];
897 operands[pos_label] = tmp_label;
898
899 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
900 output_asm_insn (buffer, operands);
901
902 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
903 operands[pos_label] = dest_label;
904 output_asm_insn (buffer, operands);
905 return "";
906}
907
261fb553
AL
908void
909aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
910{
911 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
912 if (TARGET_GENERAL_REGS_ONLY)
913 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
914 else
915 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
916}
917
c64f7d37
WD
918/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
919 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
920 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
921 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
922 cost (in this case the best class is the lowest cost one). Using ALL_REGS
923 irrespectively of its cost results in bad allocations with many redundant
924 int<->FP moves which are expensive on various cores.
925 To avoid this we don't allow ALL_REGS as the allocno class, but force a
926 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
927 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
928 Otherwise set the allocno class depending on the mode.
929 The result of this is that it is no longer inefficient to have a higher
930 memory move cost than the register move cost.
931*/
c64f7d37
WD
932
933static reg_class_t
31e2b5a3
WD
934aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
935 reg_class_t best_class)
c64f7d37
WD
936{
937 enum machine_mode mode;
938
939 if (allocno_class != ALL_REGS)
940 return allocno_class;
941
31e2b5a3
WD
942 if (best_class != ALL_REGS)
943 return best_class;
944
c64f7d37
WD
945 mode = PSEUDO_REGNO_MODE (regno);
946 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
947}
948
26e0ff94 949static unsigned int
50093a33 950aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 951{
50093a33 952 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
953 return aarch64_tune_params.min_div_recip_mul_sf;
954 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
955}
956
cee66c68
WD
957static int
958aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
959 enum machine_mode mode)
960{
961 if (VECTOR_MODE_P (mode))
b175b679 962 return aarch64_tune_params.vec_reassoc_width;
cee66c68 963 if (INTEGRAL_MODE_P (mode))
b175b679 964 return aarch64_tune_params.int_reassoc_width;
cee66c68 965 if (FLOAT_MODE_P (mode))
b175b679 966 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
967 return 1;
968}
969
43e9d192
IB
970/* Provide a mapping from gcc register numbers to dwarf register numbers. */
971unsigned
972aarch64_dbx_register_number (unsigned regno)
973{
974 if (GP_REGNUM_P (regno))
975 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
976 else if (regno == SP_REGNUM)
977 return AARCH64_DWARF_SP;
978 else if (FP_REGNUM_P (regno))
979 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
980
981 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
982 equivalent DWARF register. */
983 return DWARF_FRAME_REGISTERS;
984}
985
986/* Return TRUE if MODE is any of the large INT modes. */
987static bool
ef4bddc2 988aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
989{
990 return mode == OImode || mode == CImode || mode == XImode;
991}
992
993/* Return TRUE if MODE is any of the vector modes. */
994static bool
ef4bddc2 995aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
996{
997 return aarch64_vector_mode_supported_p (mode)
998 || aarch64_vect_struct_mode_p (mode);
999}
1000
1001/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1002static bool
ef4bddc2 1003aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1004 unsigned HOST_WIDE_INT nelems)
1005{
1006 if (TARGET_SIMD
635e66fe
AL
1007 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1008 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1009 && (nelems >= 2 && nelems <= 4))
1010 return true;
1011
1012 return false;
1013}
1014
1015/* Implement HARD_REGNO_NREGS. */
1016
1017int
ef4bddc2 1018aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1019{
1020 switch (aarch64_regno_regclass (regno))
1021 {
1022 case FP_REGS:
1023 case FP_LO_REGS:
1024 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1025 default:
1026 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1027 }
1028 gcc_unreachable ();
1029}
1030
1031/* Implement HARD_REGNO_MODE_OK. */
1032
1033int
ef4bddc2 1034aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1035{
1036 if (GET_MODE_CLASS (mode) == MODE_CC)
1037 return regno == CC_REGNUM;
1038
9259db42
YZ
1039 if (regno == SP_REGNUM)
1040 /* The purpose of comparing with ptr_mode is to support the
1041 global register variable associated with the stack pointer
1042 register via the syntax of asm ("wsp") in ILP32. */
1043 return mode == Pmode || mode == ptr_mode;
1044
1045 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1046 return mode == Pmode;
1047
1048 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1049 return 1;
1050
1051 if (FP_REGNUM_P (regno))
1052 {
1053 if (aarch64_vect_struct_mode_p (mode))
1054 return
1055 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1056 else
1057 return 1;
1058 }
1059
1060 return 0;
1061}
1062
73d9ac6a 1063/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1064machine_mode
73d9ac6a 1065aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1066 machine_mode mode)
73d9ac6a
IB
1067{
1068 /* Handle modes that fit within single registers. */
1069 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1070 {
1071 if (GET_MODE_SIZE (mode) >= 4)
1072 return mode;
1073 else
1074 return SImode;
1075 }
1076 /* Fall back to generic for multi-reg and very large modes. */
1077 else
1078 return choose_hard_reg_mode (regno, nregs, false);
1079}
1080
43e9d192
IB
1081/* Return true if calls to DECL should be treated as
1082 long-calls (ie called via a register). */
1083static bool
1084aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1085{
1086 return false;
1087}
1088
1089/* Return true if calls to symbol-ref SYM should be treated as
1090 long-calls (ie called via a register). */
1091bool
1092aarch64_is_long_call_p (rtx sym)
1093{
1094 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1095}
1096
b60d63cb
JW
1097/* Return true if calls to symbol-ref SYM should not go through
1098 plt stubs. */
1099
1100bool
1101aarch64_is_noplt_call_p (rtx sym)
1102{
1103 const_tree decl = SYMBOL_REF_DECL (sym);
1104
1105 if (flag_pic
1106 && decl
1107 && (!flag_plt
1108 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1109 && !targetm.binds_local_p (decl))
1110 return true;
1111
1112 return false;
1113}
1114
43e9d192
IB
1115/* Return true if the offsets to a zero/sign-extract operation
1116 represent an expression that matches an extend operation. The
1117 operands represent the paramters from
1118
4745e701 1119 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1120bool
ef4bddc2 1121aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1122 rtx extract_imm)
1123{
1124 HOST_WIDE_INT mult_val, extract_val;
1125
1126 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1127 return false;
1128
1129 mult_val = INTVAL (mult_imm);
1130 extract_val = INTVAL (extract_imm);
1131
1132 if (extract_val > 8
1133 && extract_val < GET_MODE_BITSIZE (mode)
1134 && exact_log2 (extract_val & ~7) > 0
1135 && (extract_val & 7) <= 4
1136 && mult_val == (1 << (extract_val & 7)))
1137 return true;
1138
1139 return false;
1140}
1141
1142/* Emit an insn that's a simple single-set. Both the operands must be
1143 known to be valid. */
827ab47a 1144inline static rtx_insn *
43e9d192
IB
1145emit_set_insn (rtx x, rtx y)
1146{
f7df4a84 1147 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1148}
1149
1150/* X and Y are two things to compare using CODE. Emit the compare insn and
1151 return the rtx for register 0 in the proper mode. */
1152rtx
1153aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1154{
ef4bddc2 1155 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1156 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1157
1158 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1159 return cc_reg;
1160}
1161
1162/* Build the SYMBOL_REF for __tls_get_addr. */
1163
1164static GTY(()) rtx tls_get_addr_libfunc;
1165
1166rtx
1167aarch64_tls_get_addr (void)
1168{
1169 if (!tls_get_addr_libfunc)
1170 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1171 return tls_get_addr_libfunc;
1172}
1173
1174/* Return the TLS model to use for ADDR. */
1175
1176static enum tls_model
1177tls_symbolic_operand_type (rtx addr)
1178{
1179 enum tls_model tls_kind = TLS_MODEL_NONE;
1180 rtx sym, addend;
1181
1182 if (GET_CODE (addr) == CONST)
1183 {
1184 split_const (addr, &sym, &addend);
1185 if (GET_CODE (sym) == SYMBOL_REF)
1186 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1187 }
1188 else if (GET_CODE (addr) == SYMBOL_REF)
1189 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1190
1191 return tls_kind;
1192}
1193
1194/* We'll allow lo_sum's in addresses in our legitimate addresses
1195 so that combine would take care of combining addresses where
1196 necessary, but for generation purposes, we'll generate the address
1197 as :
1198 RTL Absolute
1199 tmp = hi (symbol_ref); adrp x1, foo
1200 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1201 nop
1202
1203 PIC TLS
1204 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1205 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1206 bl __tls_get_addr
1207 nop
1208
1209 Load TLS symbol, depending on TLS mechanism and TLS access model.
1210
1211 Global Dynamic - Traditional TLS:
1212 adrp tmp, :tlsgd:imm
1213 add dest, tmp, #:tlsgd_lo12:imm
1214 bl __tls_get_addr
1215
1216 Global Dynamic - TLS Descriptors:
1217 adrp dest, :tlsdesc:imm
1218 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1219 add dest, dest, #:tlsdesc_lo12:imm
1220 blr tmp
1221 mrs tp, tpidr_el0
1222 add dest, dest, tp
1223
1224 Initial Exec:
1225 mrs tp, tpidr_el0
1226 adrp tmp, :gottprel:imm
1227 ldr dest, [tmp, #:gottprel_lo12:imm]
1228 add dest, dest, tp
1229
1230 Local Exec:
1231 mrs tp, tpidr_el0
0699caae
RL
1232 add t0, tp, #:tprel_hi12:imm, lsl #12
1233 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1234*/
1235
1236static void
1237aarch64_load_symref_appropriately (rtx dest, rtx imm,
1238 enum aarch64_symbol_type type)
1239{
1240 switch (type)
1241 {
1242 case SYMBOL_SMALL_ABSOLUTE:
1243 {
28514dda 1244 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1245 rtx tmp_reg = dest;
ef4bddc2 1246 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1247
1248 gcc_assert (mode == Pmode || mode == ptr_mode);
1249
43e9d192 1250 if (can_create_pseudo_p ())
28514dda 1251 tmp_reg = gen_reg_rtx (mode);
43e9d192 1252
28514dda 1253 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1254 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1255 return;
1256 }
1257
a5350ddc 1258 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1259 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1260 return;
1261
1b1e81f8
JW
1262 case SYMBOL_SMALL_GOT_28K:
1263 {
1264 machine_mode mode = GET_MODE (dest);
1265 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1266 rtx insn;
1267 rtx mem;
1b1e81f8
JW
1268
1269 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1270 here before rtl expand. Tree IVOPT will generate rtl pattern to
1271 decide rtx costs, in which case pic_offset_table_rtx is not
1272 initialized. For that case no need to generate the first adrp
026c3cfd 1273 instruction as the final cost for global variable access is
1b1e81f8
JW
1274 one instruction. */
1275 if (gp_rtx != NULL)
1276 {
1277 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1278 using the page base as GOT base, the first page may be wasted,
1279 in the worst scenario, there is only 28K space for GOT).
1280
1281 The generate instruction sequence for accessing global variable
1282 is:
1283
a3957742 1284 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1285
1286 Only one instruction needed. But we must initialize
1287 pic_offset_table_rtx properly. We generate initialize insn for
1288 every global access, and allow CSE to remove all redundant.
1289
1290 The final instruction sequences will look like the following
1291 for multiply global variables access.
1292
a3957742 1293 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1294
a3957742
JW
1295 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1296 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1297 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1298 ... */
1b1e81f8
JW
1299
1300 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1301 crtl->uses_pic_offset_table = 1;
1302 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1303
1304 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1305 gp_rtx = gen_lowpart (mode, gp_rtx);
1306
1b1e81f8
JW
1307 }
1308
1309 if (mode == ptr_mode)
1310 {
1311 if (mode == DImode)
53021678 1312 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1313 else
53021678
JW
1314 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1315
1316 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1317 }
1318 else
1319 {
1320 gcc_assert (mode == Pmode);
53021678
JW
1321
1322 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1323 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1324 }
1325
53021678
JW
1326 /* The operand is expected to be MEM. Whenever the related insn
1327 pattern changed, above code which calculate mem should be
1328 updated. */
1329 gcc_assert (GET_CODE (mem) == MEM);
1330 MEM_READONLY_P (mem) = 1;
1331 MEM_NOTRAP_P (mem) = 1;
1332 emit_insn (insn);
1b1e81f8
JW
1333 return;
1334 }
1335
6642bdb4 1336 case SYMBOL_SMALL_GOT_4G:
43e9d192 1337 {
28514dda
YZ
1338 /* In ILP32, the mode of dest can be either SImode or DImode,
1339 while the got entry is always of SImode size. The mode of
1340 dest depends on how dest is used: if dest is assigned to a
1341 pointer (e.g. in the memory), it has SImode; it may have
1342 DImode if dest is dereferenced to access the memeory.
1343 This is why we have to handle three different ldr_got_small
1344 patterns here (two patterns for ILP32). */
53021678
JW
1345
1346 rtx insn;
1347 rtx mem;
43e9d192 1348 rtx tmp_reg = dest;
ef4bddc2 1349 machine_mode mode = GET_MODE (dest);
28514dda 1350
43e9d192 1351 if (can_create_pseudo_p ())
28514dda
YZ
1352 tmp_reg = gen_reg_rtx (mode);
1353
1354 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1355 if (mode == ptr_mode)
1356 {
1357 if (mode == DImode)
53021678 1358 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1359 else
53021678
JW
1360 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1361
1362 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1363 }
1364 else
1365 {
1366 gcc_assert (mode == Pmode);
53021678
JW
1367
1368 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1369 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1370 }
1371
53021678
JW
1372 gcc_assert (GET_CODE (mem) == MEM);
1373 MEM_READONLY_P (mem) = 1;
1374 MEM_NOTRAP_P (mem) = 1;
1375 emit_insn (insn);
43e9d192
IB
1376 return;
1377 }
1378
1379 case SYMBOL_SMALL_TLSGD:
1380 {
5d8a22a5 1381 rtx_insn *insns;
23b88fda
N
1382 machine_mode mode = GET_MODE (dest);
1383 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1384
1385 start_sequence ();
23b88fda
N
1386 if (TARGET_ILP32)
1387 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1388 else
1389 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1390 insns = get_insns ();
1391 end_sequence ();
1392
1393 RTL_CONST_CALL_P (insns) = 1;
1394 emit_libcall_block (insns, dest, result, imm);
1395 return;
1396 }
1397
1398 case SYMBOL_SMALL_TLSDESC:
1399 {
ef4bddc2 1400 machine_mode mode = GET_MODE (dest);
621ad2de 1401 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1402 rtx tp;
1403
621ad2de
AP
1404 gcc_assert (mode == Pmode || mode == ptr_mode);
1405
2876a13f
JW
1406 /* In ILP32, the got entry is always of SImode size. Unlike
1407 small GOT, the dest is fixed at reg 0. */
1408 if (TARGET_ILP32)
1409 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1410 else
2876a13f 1411 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1412 tp = aarch64_load_tp (NULL);
621ad2de
AP
1413
1414 if (mode != Pmode)
1415 tp = gen_lowpart (mode, tp);
1416
2876a13f 1417 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1418 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1419 return;
1420 }
1421
79496620 1422 case SYMBOL_SMALL_TLSIE:
43e9d192 1423 {
621ad2de
AP
1424 /* In ILP32, the mode of dest can be either SImode or DImode,
1425 while the got entry is always of SImode size. The mode of
1426 dest depends on how dest is used: if dest is assigned to a
1427 pointer (e.g. in the memory), it has SImode; it may have
1428 DImode if dest is dereferenced to access the memeory.
1429 This is why we have to handle three different tlsie_small
1430 patterns here (two patterns for ILP32). */
ef4bddc2 1431 machine_mode mode = GET_MODE (dest);
621ad2de 1432 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1433 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1434
1435 if (mode == ptr_mode)
1436 {
1437 if (mode == DImode)
1438 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1439 else
1440 {
1441 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1442 tp = gen_lowpart (mode, tp);
1443 }
1444 }
1445 else
1446 {
1447 gcc_assert (mode == Pmode);
1448 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1449 }
1450
f7df4a84 1451 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1452 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1453 return;
1454 }
1455
cbf5629e 1456 case SYMBOL_TLSLE12:
d18ba284 1457 case SYMBOL_TLSLE24:
cbf5629e
JW
1458 case SYMBOL_TLSLE32:
1459 case SYMBOL_TLSLE48:
43e9d192 1460 {
cbf5629e 1461 machine_mode mode = GET_MODE (dest);
43e9d192 1462 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1463
cbf5629e
JW
1464 if (mode != Pmode)
1465 tp = gen_lowpart (mode, tp);
1466
1467 switch (type)
1468 {
1469 case SYMBOL_TLSLE12:
1470 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1471 (dest, tp, imm));
1472 break;
1473 case SYMBOL_TLSLE24:
1474 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1475 (dest, tp, imm));
1476 break;
1477 case SYMBOL_TLSLE32:
1478 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1479 (dest, imm));
1480 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1481 (dest, dest, tp));
1482 break;
1483 case SYMBOL_TLSLE48:
1484 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1485 (dest, imm));
1486 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1487 (dest, dest, tp));
1488 break;
1489 default:
1490 gcc_unreachable ();
1491 }
e6f7f0e9 1492
43e9d192
IB
1493 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1494 return;
1495 }
1496
87dd8ab0
MS
1497 case SYMBOL_TINY_GOT:
1498 emit_insn (gen_ldr_got_tiny (dest, imm));
1499 return;
1500
5ae7caad
JW
1501 case SYMBOL_TINY_TLSIE:
1502 {
1503 machine_mode mode = GET_MODE (dest);
1504 rtx tp = aarch64_load_tp (NULL);
1505
1506 if (mode == ptr_mode)
1507 {
1508 if (mode == DImode)
1509 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1510 else
1511 {
1512 tp = gen_lowpart (mode, tp);
1513 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1514 }
1515 }
1516 else
1517 {
1518 gcc_assert (mode == Pmode);
1519 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1520 }
1521
1522 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1523 return;
1524 }
1525
43e9d192
IB
1526 default:
1527 gcc_unreachable ();
1528 }
1529}
1530
1531/* Emit a move from SRC to DEST. Assume that the move expanders can
1532 handle all moves if !can_create_pseudo_p (). The distinction is
1533 important because, unlike emit_move_insn, the move expanders know
1534 how to force Pmode objects into the constant pool even when the
1535 constant pool address is not itself legitimate. */
1536static rtx
1537aarch64_emit_move (rtx dest, rtx src)
1538{
1539 return (can_create_pseudo_p ()
1540 ? emit_move_insn (dest, src)
1541 : emit_move_insn_1 (dest, src));
1542}
1543
030d03b8
RE
1544/* Split a 128-bit move operation into two 64-bit move operations,
1545 taking care to handle partial overlap of register to register
1546 copies. Special cases are needed when moving between GP regs and
1547 FP regs. SRC can be a register, constant or memory; DST a register
1548 or memory. If either operand is memory it must not have any side
1549 effects. */
43e9d192
IB
1550void
1551aarch64_split_128bit_move (rtx dst, rtx src)
1552{
030d03b8
RE
1553 rtx dst_lo, dst_hi;
1554 rtx src_lo, src_hi;
43e9d192 1555
ef4bddc2 1556 machine_mode mode = GET_MODE (dst);
12dc6974 1557
030d03b8
RE
1558 gcc_assert (mode == TImode || mode == TFmode);
1559 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1560 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1561
1562 if (REG_P (dst) && REG_P (src))
1563 {
030d03b8
RE
1564 int src_regno = REGNO (src);
1565 int dst_regno = REGNO (dst);
43e9d192 1566
030d03b8 1567 /* Handle FP <-> GP regs. */
43e9d192
IB
1568 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1569 {
030d03b8
RE
1570 src_lo = gen_lowpart (word_mode, src);
1571 src_hi = gen_highpart (word_mode, src);
1572
1573 if (mode == TImode)
1574 {
1575 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1576 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1577 }
1578 else
1579 {
1580 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1581 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1582 }
1583 return;
43e9d192
IB
1584 }
1585 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1586 {
030d03b8
RE
1587 dst_lo = gen_lowpart (word_mode, dst);
1588 dst_hi = gen_highpart (word_mode, dst);
1589
1590 if (mode == TImode)
1591 {
1592 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1593 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1594 }
1595 else
1596 {
1597 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1598 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1599 }
1600 return;
43e9d192 1601 }
43e9d192
IB
1602 }
1603
030d03b8
RE
1604 dst_lo = gen_lowpart (word_mode, dst);
1605 dst_hi = gen_highpart (word_mode, dst);
1606 src_lo = gen_lowpart (word_mode, src);
1607 src_hi = gen_highpart_mode (word_mode, mode, src);
1608
1609 /* At most one pairing may overlap. */
1610 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1611 {
1612 aarch64_emit_move (dst_hi, src_hi);
1613 aarch64_emit_move (dst_lo, src_lo);
1614 }
1615 else
1616 {
1617 aarch64_emit_move (dst_lo, src_lo);
1618 aarch64_emit_move (dst_hi, src_hi);
1619 }
43e9d192
IB
1620}
1621
1622bool
1623aarch64_split_128bit_move_p (rtx dst, rtx src)
1624{
1625 return (! REG_P (src)
1626 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1627}
1628
8b033a8a
SN
1629/* Split a complex SIMD combine. */
1630
1631void
1632aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1633{
ef4bddc2
RS
1634 machine_mode src_mode = GET_MODE (src1);
1635 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1636
1637 gcc_assert (VECTOR_MODE_P (dst_mode));
1638
1639 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1640 {
1641 rtx (*gen) (rtx, rtx, rtx);
1642
1643 switch (src_mode)
1644 {
1645 case V8QImode:
1646 gen = gen_aarch64_simd_combinev8qi;
1647 break;
1648 case V4HImode:
1649 gen = gen_aarch64_simd_combinev4hi;
1650 break;
1651 case V2SImode:
1652 gen = gen_aarch64_simd_combinev2si;
1653 break;
7c369485
AL
1654 case V4HFmode:
1655 gen = gen_aarch64_simd_combinev4hf;
1656 break;
8b033a8a
SN
1657 case V2SFmode:
1658 gen = gen_aarch64_simd_combinev2sf;
1659 break;
1660 case DImode:
1661 gen = gen_aarch64_simd_combinedi;
1662 break;
1663 case DFmode:
1664 gen = gen_aarch64_simd_combinedf;
1665 break;
1666 default:
1667 gcc_unreachable ();
1668 }
1669
1670 emit_insn (gen (dst, src1, src2));
1671 return;
1672 }
1673}
1674
fd4842cd
SN
1675/* Split a complex SIMD move. */
1676
1677void
1678aarch64_split_simd_move (rtx dst, rtx src)
1679{
ef4bddc2
RS
1680 machine_mode src_mode = GET_MODE (src);
1681 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1682
1683 gcc_assert (VECTOR_MODE_P (dst_mode));
1684
1685 if (REG_P (dst) && REG_P (src))
1686 {
c59b7e28
SN
1687 rtx (*gen) (rtx, rtx);
1688
fd4842cd
SN
1689 gcc_assert (VECTOR_MODE_P (src_mode));
1690
1691 switch (src_mode)
1692 {
1693 case V16QImode:
c59b7e28 1694 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1695 break;
1696 case V8HImode:
c59b7e28 1697 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1698 break;
1699 case V4SImode:
c59b7e28 1700 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1701 break;
1702 case V2DImode:
c59b7e28 1703 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1704 break;
71a11456
AL
1705 case V8HFmode:
1706 gen = gen_aarch64_split_simd_movv8hf;
1707 break;
fd4842cd 1708 case V4SFmode:
c59b7e28 1709 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1710 break;
1711 case V2DFmode:
c59b7e28 1712 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1713 break;
1714 default:
1715 gcc_unreachable ();
1716 }
c59b7e28
SN
1717
1718 emit_insn (gen (dst, src));
fd4842cd
SN
1719 return;
1720 }
1721}
1722
ef22810a
RH
1723bool
1724aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1725 machine_mode ymode, rtx y)
1726{
1727 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1728 gcc_assert (r != NULL);
1729 return rtx_equal_p (x, r);
1730}
1731
1732
43e9d192 1733static rtx
ef4bddc2 1734aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1735{
1736 if (can_create_pseudo_p ())
e18b4a81 1737 return force_reg (mode, value);
43e9d192
IB
1738 else
1739 {
1740 x = aarch64_emit_move (x, value);
1741 return x;
1742 }
1743}
1744
1745
1746static rtx
ef4bddc2 1747aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1748{
9c023bf0 1749 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1750 {
1751 rtx high;
1752 /* Load the full offset into a register. This
1753 might be improvable in the future. */
1754 high = GEN_INT (offset);
1755 offset = 0;
e18b4a81
YZ
1756 high = aarch64_force_temporary (mode, temp, high);
1757 reg = aarch64_force_temporary (mode, temp,
1758 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1759 }
1760 return plus_constant (mode, reg, offset);
1761}
1762
82614948
RR
1763static int
1764aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1765 machine_mode mode)
43e9d192 1766{
43e9d192 1767 int i;
9a4865db
WD
1768 unsigned HOST_WIDE_INT val, val2, mask;
1769 int one_match, zero_match;
1770 int num_insns;
43e9d192 1771
9a4865db
WD
1772 val = INTVAL (imm);
1773
1774 if (aarch64_move_imm (val, mode))
43e9d192 1775 {
82614948 1776 if (generate)
f7df4a84 1777 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1778 return 1;
43e9d192
IB
1779 }
1780
9a4865db 1781 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1782 {
82614948
RR
1783 if (generate)
1784 {
9a4865db
WD
1785 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1786 if (mode == SImode)
1787 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1788 GEN_INT ((val >> 16) & 0xffff)));
1789 else
1790 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1791 GEN_INT ((val >> 16) & 0xffff)));
82614948 1792 }
9a4865db 1793 return 2;
43e9d192
IB
1794 }
1795
1796 /* Remaining cases are all for DImode. */
1797
43e9d192 1798 mask = 0xffff;
9a4865db
WD
1799 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1800 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1801 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1802 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1803
62c8d76c 1804 if (zero_match != 2 && one_match != 2)
43e9d192 1805 {
62c8d76c
WD
1806 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1807 For a 64-bit bitmask try whether changing 16 bits to all ones or
1808 zeroes creates a valid bitmask. To check any repeated bitmask,
1809 try using 16 bits from the other 32-bit half of val. */
43e9d192 1810
62c8d76c 1811 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1812 {
62c8d76c
WD
1813 val2 = val & ~mask;
1814 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1815 break;
1816 val2 = val | mask;
1817 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1818 break;
1819 val2 = val2 & ~mask;
1820 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1821 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1822 break;
43e9d192 1823 }
62c8d76c 1824 if (i != 64)
43e9d192 1825 {
62c8d76c 1826 if (generate)
43e9d192 1827 {
62c8d76c
WD
1828 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1829 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1830 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1831 }
1312b1ba 1832 return 2;
43e9d192
IB
1833 }
1834 }
1835
9a4865db
WD
1836 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1837 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1838 otherwise skip zero bits. */
2c274197 1839
9a4865db 1840 num_insns = 1;
43e9d192 1841 mask = 0xffff;
9a4865db
WD
1842 val2 = one_match > zero_match ? ~val : val;
1843 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1844
1845 if (generate)
1846 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1847 ? (val | ~(mask << i))
1848 : (val & (mask << i)))));
1849 for (i += 16; i < 64; i += 16)
43e9d192 1850 {
9a4865db
WD
1851 if ((val2 & (mask << i)) == 0)
1852 continue;
1853 if (generate)
1854 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1855 GEN_INT ((val >> i) & 0xffff)));
1856 num_insns ++;
82614948
RR
1857 }
1858
1859 return num_insns;
1860}
1861
1862
1863void
1864aarch64_expand_mov_immediate (rtx dest, rtx imm)
1865{
1866 machine_mode mode = GET_MODE (dest);
1867
1868 gcc_assert (mode == SImode || mode == DImode);
1869
1870 /* Check on what type of symbol it is. */
1871 if (GET_CODE (imm) == SYMBOL_REF
1872 || GET_CODE (imm) == LABEL_REF
1873 || GET_CODE (imm) == CONST)
1874 {
1875 rtx mem, base, offset;
1876 enum aarch64_symbol_type sty;
1877
1878 /* If we have (const (plus symbol offset)), separate out the offset
1879 before we start classifying the symbol. */
1880 split_const (imm, &base, &offset);
1881
a6e0bfa7 1882 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1883 switch (sty)
1884 {
1885 case SYMBOL_FORCE_TO_MEM:
1886 if (offset != const0_rtx
1887 && targetm.cannot_force_const_mem (mode, imm))
1888 {
1889 gcc_assert (can_create_pseudo_p ());
1890 base = aarch64_force_temporary (mode, dest, base);
1891 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1892 aarch64_emit_move (dest, base);
1893 return;
1894 }
b4f50fd4 1895
82614948
RR
1896 mem = force_const_mem (ptr_mode, imm);
1897 gcc_assert (mem);
b4f50fd4
RR
1898
1899 /* If we aren't generating PC relative literals, then
1900 we need to expand the literal pool access carefully.
1901 This is something that needs to be done in a number
1902 of places, so could well live as a separate function. */
9ee6540a 1903 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1904 {
1905 gcc_assert (can_create_pseudo_p ());
1906 base = gen_reg_rtx (ptr_mode);
1907 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1908 mem = gen_rtx_MEM (ptr_mode, base);
1909 }
1910
82614948
RR
1911 if (mode != ptr_mode)
1912 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1913
f7df4a84 1914 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1915
82614948
RR
1916 return;
1917
1918 case SYMBOL_SMALL_TLSGD:
1919 case SYMBOL_SMALL_TLSDESC:
79496620 1920 case SYMBOL_SMALL_TLSIE:
1b1e81f8 1921 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1922 case SYMBOL_SMALL_GOT_4G:
82614948 1923 case SYMBOL_TINY_GOT:
5ae7caad 1924 case SYMBOL_TINY_TLSIE:
82614948
RR
1925 if (offset != const0_rtx)
1926 {
1927 gcc_assert(can_create_pseudo_p ());
1928 base = aarch64_force_temporary (mode, dest, base);
1929 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1930 aarch64_emit_move (dest, base);
1931 return;
1932 }
1933 /* FALLTHRU */
1934
82614948
RR
1935 case SYMBOL_SMALL_ABSOLUTE:
1936 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 1937 case SYMBOL_TLSLE12:
d18ba284 1938 case SYMBOL_TLSLE24:
cbf5629e
JW
1939 case SYMBOL_TLSLE32:
1940 case SYMBOL_TLSLE48:
82614948
RR
1941 aarch64_load_symref_appropriately (dest, imm, sty);
1942 return;
1943
1944 default:
1945 gcc_unreachable ();
1946 }
1947 }
1948
1949 if (!CONST_INT_P (imm))
1950 {
1951 if (GET_CODE (imm) == HIGH)
f7df4a84 1952 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1953 else
1954 {
1955 rtx mem = force_const_mem (mode, imm);
1956 gcc_assert (mem);
f7df4a84 1957 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1958 }
82614948
RR
1959
1960 return;
43e9d192 1961 }
82614948
RR
1962
1963 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1964}
1965
5be6b295
WD
1966/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1967 temporary value if necessary. FRAME_RELATED_P should be true if
1968 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1969 to the generated instructions. If SCRATCHREG is known to hold
1970 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
1971 immediate again.
1972
1973 Since this function may be used to adjust the stack pointer, we must
1974 ensure that it cannot cause transient stack deallocation (for example
1975 by first incrementing SP and then decrementing when adjusting by a
1976 large immediate). */
c4ddc43a
JW
1977
1978static void
5be6b295
WD
1979aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
1980 HOST_WIDE_INT delta, bool frame_related_p,
1981 bool emit_move_imm)
c4ddc43a
JW
1982{
1983 HOST_WIDE_INT mdelta = abs_hwi (delta);
1984 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 1985 rtx_insn *insn;
c4ddc43a 1986
c4ddc43a
JW
1987 if (!mdelta)
1988 return;
1989
5be6b295 1990 /* Single instruction adjustment. */
c4ddc43a
JW
1991 if (aarch64_uimm12_shift (mdelta))
1992 {
37d6a4b7
JW
1993 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
1994 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
1995 return;
1996 }
1997
5be6b295
WD
1998 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
1999 Only do this if mdelta is not a 16-bit move as adjusting using a move
2000 is better. */
2001 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
2002 {
2003 HOST_WIDE_INT low_off = mdelta & 0xfff;
2004
2005 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
2006 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2007 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2008 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2009 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2010 return;
2011 }
2012
5be6b295 2013 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2014 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2015 if (emit_move_imm)
2016 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2017 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2018 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2019 if (frame_related_p)
2020 {
2021 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2022 rtx adj = plus_constant (mode, this_rtx, delta);
2023 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2024 }
c4ddc43a
JW
2025}
2026
5be6b295
WD
2027static inline void
2028aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
2029 HOST_WIDE_INT delta)
2030{
2031 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2032}
2033
2034static inline void
2035aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2036{
2037 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2038 true, emit_move_imm);
2039}
2040
2041static inline void
2042aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2043{
2044 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2045 frame_related_p, true);
2046}
2047
43e9d192 2048static bool
fee9ba42
JW
2049aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2050 tree exp ATTRIBUTE_UNUSED)
43e9d192 2051{
fee9ba42 2052 /* Currently, always true. */
43e9d192
IB
2053 return true;
2054}
2055
2056/* Implement TARGET_PASS_BY_REFERENCE. */
2057
2058static bool
2059aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2060 machine_mode mode,
43e9d192
IB
2061 const_tree type,
2062 bool named ATTRIBUTE_UNUSED)
2063{
2064 HOST_WIDE_INT size;
ef4bddc2 2065 machine_mode dummymode;
43e9d192
IB
2066 int nregs;
2067
2068 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2069 size = (mode == BLKmode && type)
2070 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2071
aadc1c43
MHD
2072 /* Aggregates are passed by reference based on their size. */
2073 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2074 {
aadc1c43 2075 size = int_size_in_bytes (type);
43e9d192
IB
2076 }
2077
2078 /* Variable sized arguments are always returned by reference. */
2079 if (size < 0)
2080 return true;
2081
2082 /* Can this be a candidate to be passed in fp/simd register(s)? */
2083 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2084 &dummymode, &nregs,
2085 NULL))
2086 return false;
2087
2088 /* Arguments which are variable sized or larger than 2 registers are
2089 passed by reference unless they are a homogenous floating point
2090 aggregate. */
2091 return size > 2 * UNITS_PER_WORD;
2092}
2093
2094/* Return TRUE if VALTYPE is padded to its least significant bits. */
2095static bool
2096aarch64_return_in_msb (const_tree valtype)
2097{
ef4bddc2 2098 machine_mode dummy_mode;
43e9d192
IB
2099 int dummy_int;
2100
2101 /* Never happens in little-endian mode. */
2102 if (!BYTES_BIG_ENDIAN)
2103 return false;
2104
2105 /* Only composite types smaller than or equal to 16 bytes can
2106 be potentially returned in registers. */
2107 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2108 || int_size_in_bytes (valtype) <= 0
2109 || int_size_in_bytes (valtype) > 16)
2110 return false;
2111
2112 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2113 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2114 is always passed/returned in the least significant bits of fp/simd
2115 register(s). */
2116 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2117 &dummy_mode, &dummy_int, NULL))
2118 return false;
2119
2120 return true;
2121}
2122
2123/* Implement TARGET_FUNCTION_VALUE.
2124 Define how to find the value returned by a function. */
2125
2126static rtx
2127aarch64_function_value (const_tree type, const_tree func,
2128 bool outgoing ATTRIBUTE_UNUSED)
2129{
ef4bddc2 2130 machine_mode mode;
43e9d192
IB
2131 int unsignedp;
2132 int count;
ef4bddc2 2133 machine_mode ag_mode;
43e9d192
IB
2134
2135 mode = TYPE_MODE (type);
2136 if (INTEGRAL_TYPE_P (type))
2137 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2138
2139 if (aarch64_return_in_msb (type))
2140 {
2141 HOST_WIDE_INT size = int_size_in_bytes (type);
2142
2143 if (size % UNITS_PER_WORD != 0)
2144 {
2145 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2146 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2147 }
2148 }
2149
2150 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2151 &ag_mode, &count, NULL))
2152 {
2153 if (!aarch64_composite_type_p (type, mode))
2154 {
2155 gcc_assert (count == 1 && mode == ag_mode);
2156 return gen_rtx_REG (mode, V0_REGNUM);
2157 }
2158 else
2159 {
2160 int i;
2161 rtx par;
2162
2163 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2164 for (i = 0; i < count; i++)
2165 {
2166 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2167 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2168 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2169 XVECEXP (par, 0, i) = tmp;
2170 }
2171 return par;
2172 }
2173 }
2174 else
2175 return gen_rtx_REG (mode, R0_REGNUM);
2176}
2177
2178/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2179 Return true if REGNO is the number of a hard register in which the values
2180 of called function may come back. */
2181
2182static bool
2183aarch64_function_value_regno_p (const unsigned int regno)
2184{
2185 /* Maximum of 16 bytes can be returned in the general registers. Examples
2186 of 16-byte return values are: 128-bit integers and 16-byte small
2187 structures (excluding homogeneous floating-point aggregates). */
2188 if (regno == R0_REGNUM || regno == R1_REGNUM)
2189 return true;
2190
2191 /* Up to four fp/simd registers can return a function value, e.g. a
2192 homogeneous floating-point aggregate having four members. */
2193 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2194 return TARGET_FLOAT;
43e9d192
IB
2195
2196 return false;
2197}
2198
2199/* Implement TARGET_RETURN_IN_MEMORY.
2200
2201 If the type T of the result of a function is such that
2202 void func (T arg)
2203 would require that arg be passed as a value in a register (or set of
2204 registers) according to the parameter passing rules, then the result
2205 is returned in the same registers as would be used for such an
2206 argument. */
2207
2208static bool
2209aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2210{
2211 HOST_WIDE_INT size;
ef4bddc2 2212 machine_mode ag_mode;
43e9d192
IB
2213 int count;
2214
2215 if (!AGGREGATE_TYPE_P (type)
2216 && TREE_CODE (type) != COMPLEX_TYPE
2217 && TREE_CODE (type) != VECTOR_TYPE)
2218 /* Simple scalar types always returned in registers. */
2219 return false;
2220
2221 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2222 type,
2223 &ag_mode,
2224 &count,
2225 NULL))
2226 return false;
2227
2228 /* Types larger than 2 registers returned in memory. */
2229 size = int_size_in_bytes (type);
2230 return (size < 0 || size > 2 * UNITS_PER_WORD);
2231}
2232
2233static bool
ef4bddc2 2234aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2235 const_tree type, int *nregs)
2236{
2237 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2238 return aarch64_vfp_is_call_or_return_candidate (mode,
2239 type,
2240 &pcum->aapcs_vfp_rmode,
2241 nregs,
2242 NULL);
2243}
2244
2245/* Given MODE and TYPE of a function argument, return the alignment in
2246 bits. The idea is to suppress any stronger alignment requested by
2247 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2248 This is a helper function for local use only. */
2249
2250static unsigned int
ef4bddc2 2251aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2252{
75d6cc81
AL
2253 if (!type)
2254 return GET_MODE_ALIGNMENT (mode);
2255 if (integer_zerop (TYPE_SIZE (type)))
2256 return 0;
43e9d192 2257
75d6cc81
AL
2258 gcc_assert (TYPE_MODE (type) == mode);
2259
2260 if (!AGGREGATE_TYPE_P (type))
2261 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
2262
2263 if (TREE_CODE (type) == ARRAY_TYPE)
2264 return TYPE_ALIGN (TREE_TYPE (type));
2265
2266 unsigned int alignment = 0;
2267
2268 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2269 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192
IB
2270
2271 return alignment;
2272}
2273
2274/* Layout a function argument according to the AAPCS64 rules. The rule
2275 numbers refer to the rule numbers in the AAPCS64. */
2276
2277static void
ef4bddc2 2278aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2279 const_tree type,
2280 bool named ATTRIBUTE_UNUSED)
2281{
2282 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2283 int ncrn, nvrn, nregs;
2284 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2285 HOST_WIDE_INT size;
43e9d192
IB
2286
2287 /* We need to do this once per argument. */
2288 if (pcum->aapcs_arg_processed)
2289 return;
2290
2291 pcum->aapcs_arg_processed = true;
2292
3abf17cf
YZ
2293 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2294 size
4f59f9f2
UB
2295 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2296 UNITS_PER_WORD);
3abf17cf 2297
43e9d192
IB
2298 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2299 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2300 mode,
2301 type,
2302 &nregs);
2303
2304 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2305 The following code thus handles passing by SIMD/FP registers first. */
2306
2307 nvrn = pcum->aapcs_nvrn;
2308
2309 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2310 and homogenous short-vector aggregates (HVA). */
2311 if (allocate_nvrn)
2312 {
261fb553
AL
2313 if (!TARGET_FLOAT)
2314 aarch64_err_no_fpadvsimd (mode, "argument");
2315
43e9d192
IB
2316 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2317 {
2318 pcum->aapcs_nextnvrn = nvrn + nregs;
2319 if (!aarch64_composite_type_p (type, mode))
2320 {
2321 gcc_assert (nregs == 1);
2322 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2323 }
2324 else
2325 {
2326 rtx par;
2327 int i;
2328 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2329 for (i = 0; i < nregs; i++)
2330 {
2331 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2332 V0_REGNUM + nvrn + i);
2333 tmp = gen_rtx_EXPR_LIST
2334 (VOIDmode, tmp,
2335 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2336 XVECEXP (par, 0, i) = tmp;
2337 }
2338 pcum->aapcs_reg = par;
2339 }
2340 return;
2341 }
2342 else
2343 {
2344 /* C.3 NSRN is set to 8. */
2345 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2346 goto on_stack;
2347 }
2348 }
2349
2350 ncrn = pcum->aapcs_ncrn;
3abf17cf 2351 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2352
2353 /* C6 - C9. though the sign and zero extension semantics are
2354 handled elsewhere. This is the case where the argument fits
2355 entirely general registers. */
2356 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2357 {
2358 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2359
2360 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2361
2362 /* C.8 if the argument has an alignment of 16 then the NGRN is
2363 rounded up to the next even number. */
2364 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
2365 {
2366 ++ncrn;
2367 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2368 }
2369 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2370 A reg is still generated for it, but the caller should be smart
2371 enough not to use it. */
2372 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2373 {
2374 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2375 }
2376 else
2377 {
2378 rtx par;
2379 int i;
2380
2381 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2382 for (i = 0; i < nregs; i++)
2383 {
2384 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2385 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2386 GEN_INT (i * UNITS_PER_WORD));
2387 XVECEXP (par, 0, i) = tmp;
2388 }
2389 pcum->aapcs_reg = par;
2390 }
2391
2392 pcum->aapcs_nextncrn = ncrn + nregs;
2393 return;
2394 }
2395
2396 /* C.11 */
2397 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2398
2399 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2400 this argument and align the total size if necessary. */
43e9d192 2401on_stack:
3abf17cf 2402 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192 2403 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2404 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2405 16 / UNITS_PER_WORD);
43e9d192
IB
2406 return;
2407}
2408
2409/* Implement TARGET_FUNCTION_ARG. */
2410
2411static rtx
ef4bddc2 2412aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2413 const_tree type, bool named)
2414{
2415 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2416 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2417
2418 if (mode == VOIDmode)
2419 return NULL_RTX;
2420
2421 aarch64_layout_arg (pcum_v, mode, type, named);
2422 return pcum->aapcs_reg;
2423}
2424
2425void
2426aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2427 const_tree fntype ATTRIBUTE_UNUSED,
2428 rtx libname ATTRIBUTE_UNUSED,
2429 const_tree fndecl ATTRIBUTE_UNUSED,
2430 unsigned n_named ATTRIBUTE_UNUSED)
2431{
2432 pcum->aapcs_ncrn = 0;
2433 pcum->aapcs_nvrn = 0;
2434 pcum->aapcs_nextncrn = 0;
2435 pcum->aapcs_nextnvrn = 0;
2436 pcum->pcs_variant = ARM_PCS_AAPCS64;
2437 pcum->aapcs_reg = NULL_RTX;
2438 pcum->aapcs_arg_processed = false;
2439 pcum->aapcs_stack_words = 0;
2440 pcum->aapcs_stack_size = 0;
2441
261fb553
AL
2442 if (!TARGET_FLOAT
2443 && fndecl && TREE_PUBLIC (fndecl)
2444 && fntype && fntype != error_mark_node)
2445 {
2446 const_tree type = TREE_TYPE (fntype);
2447 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2448 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2449 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2450 &mode, &nregs, NULL))
2451 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2452 }
43e9d192
IB
2453 return;
2454}
2455
2456static void
2457aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2458 machine_mode mode,
43e9d192
IB
2459 const_tree type,
2460 bool named)
2461{
2462 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2463 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2464 {
2465 aarch64_layout_arg (pcum_v, mode, type, named);
2466 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2467 != (pcum->aapcs_stack_words != 0));
2468 pcum->aapcs_arg_processed = false;
2469 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2470 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2471 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2472 pcum->aapcs_stack_words = 0;
2473 pcum->aapcs_reg = NULL_RTX;
2474 }
2475}
2476
2477bool
2478aarch64_function_arg_regno_p (unsigned regno)
2479{
2480 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2481 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2482}
2483
2484/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2485 PARM_BOUNDARY bits of alignment, but will be given anything up
2486 to STACK_BOUNDARY bits if the type requires it. This makes sure
2487 that both before and after the layout of each argument, the Next
2488 Stacked Argument Address (NSAA) will have a minimum alignment of
2489 8 bytes. */
2490
2491static unsigned int
ef4bddc2 2492aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2493{
2494 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2495
2496 if (alignment < PARM_BOUNDARY)
2497 alignment = PARM_BOUNDARY;
2498 if (alignment > STACK_BOUNDARY)
2499 alignment = STACK_BOUNDARY;
2500 return alignment;
2501}
2502
2503/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2504
2505 Return true if an argument passed on the stack should be padded upwards,
2506 i.e. if the least-significant byte of the stack slot has useful data.
2507
2508 Small aggregate types are placed in the lowest memory address.
2509
2510 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2511
2512bool
ef4bddc2 2513aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2514{
2515 /* On little-endian targets, the least significant byte of every stack
2516 argument is passed at the lowest byte address of the stack slot. */
2517 if (!BYTES_BIG_ENDIAN)
2518 return true;
2519
00edcfbe 2520 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2521 the least significant byte of a stack argument is passed at the highest
2522 byte address of the stack slot. */
2523 if (type
00edcfbe
YZ
2524 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2525 || POINTER_TYPE_P (type))
43e9d192
IB
2526 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2527 return false;
2528
2529 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2530 return true;
2531}
2532
2533/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2534
2535 It specifies padding for the last (may also be the only)
2536 element of a block move between registers and memory. If
2537 assuming the block is in the memory, padding upward means that
2538 the last element is padded after its highest significant byte,
2539 while in downward padding, the last element is padded at the
2540 its least significant byte side.
2541
2542 Small aggregates and small complex types are always padded
2543 upwards.
2544
2545 We don't need to worry about homogeneous floating-point or
2546 short-vector aggregates; their move is not affected by the
2547 padding direction determined here. Regardless of endianness,
2548 each element of such an aggregate is put in the least
2549 significant bits of a fp/simd register.
2550
2551 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2552 register has useful data, and return the opposite if the most
2553 significant byte does. */
2554
2555bool
ef4bddc2 2556aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2557 bool first ATTRIBUTE_UNUSED)
2558{
2559
2560 /* Small composite types are always padded upward. */
2561 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2562 {
2563 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2564 : GET_MODE_SIZE (mode));
2565 if (size < 2 * UNITS_PER_WORD)
2566 return true;
2567 }
2568
2569 /* Otherwise, use the default padding. */
2570 return !BYTES_BIG_ENDIAN;
2571}
2572
ef4bddc2 2573static machine_mode
43e9d192
IB
2574aarch64_libgcc_cmp_return_mode (void)
2575{
2576 return SImode;
2577}
2578
a3eb8a52
EB
2579#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2580
2581/* We use the 12-bit shifted immediate arithmetic instructions so values
2582 must be multiple of (1 << 12), i.e. 4096. */
2583#define ARITH_FACTOR 4096
2584
2585#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2586#error Cannot use simple address calculation for stack probing
2587#endif
2588
2589/* The pair of scratch registers used for stack probing. */
2590#define PROBE_STACK_FIRST_REG 9
2591#define PROBE_STACK_SECOND_REG 10
2592
2593/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2594 inclusive. These are offsets from the current stack pointer. */
2595
2596static void
2597aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2598{
2599 rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
2600
2601 /* See the same assertion on PROBE_INTERVAL above. */
2602 gcc_assert ((first % ARITH_FACTOR) == 0);
2603
2604 /* See if we have a constant small number of probes to generate. If so,
2605 that's the easy case. */
2606 if (size <= PROBE_INTERVAL)
2607 {
2608 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2609
2610 emit_set_insn (reg1,
2611 plus_constant (ptr_mode,
2612 stack_pointer_rtx, -(first + base)));
2613 emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
2614 }
2615
2616 /* The run-time loop is made up of 8 insns in the generic case while the
2617 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2618 else if (size <= 4 * PROBE_INTERVAL)
2619 {
2620 HOST_WIDE_INT i, rem;
2621
2622 emit_set_insn (reg1,
2623 plus_constant (ptr_mode,
2624 stack_pointer_rtx,
2625 -(first + PROBE_INTERVAL)));
2626 emit_stack_probe (reg1);
2627
2628 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2629 it exceeds SIZE. If only two probes are needed, this will not
2630 generate any code. Then probe at FIRST + SIZE. */
2631 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2632 {
2633 emit_set_insn (reg1,
2634 plus_constant (ptr_mode, reg1, -PROBE_INTERVAL));
2635 emit_stack_probe (reg1);
2636 }
2637
2638 rem = size - (i - PROBE_INTERVAL);
2639 if (rem > 256)
2640 {
2641 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2642
2643 emit_set_insn (reg1, plus_constant (ptr_mode, reg1, -base));
2644 emit_stack_probe (plus_constant (ptr_mode, reg1, base - rem));
2645 }
2646 else
2647 emit_stack_probe (plus_constant (ptr_mode, reg1, -rem));
2648 }
2649
2650 /* Otherwise, do the same as above, but in a loop. Note that we must be
2651 extra careful with variables wrapping around because we might be at
2652 the very top (or the very bottom) of the address space and we have
2653 to be able to handle this case properly; in particular, we use an
2654 equality test for the loop condition. */
2655 else
2656 {
2657 rtx reg2 = gen_rtx_REG (ptr_mode, PROBE_STACK_SECOND_REG);
2658
2659 /* Step 1: round SIZE to the previous multiple of the interval. */
2660
2661 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2662
2663
2664 /* Step 2: compute initial and final value of the loop counter. */
2665
2666 /* TEST_ADDR = SP + FIRST. */
2667 emit_set_insn (reg1,
2668 plus_constant (ptr_mode, stack_pointer_rtx, -first));
2669
2670 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2671 emit_set_insn (reg2,
2672 plus_constant (ptr_mode, stack_pointer_rtx,
2673 -(first + rounded_size)));
2674
2675
2676 /* Step 3: the loop
2677
2678 do
2679 {
2680 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2681 probe at TEST_ADDR
2682 }
2683 while (TEST_ADDR != LAST_ADDR)
2684
2685 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2686 until it is equal to ROUNDED_SIZE. */
2687
2688 if (ptr_mode == DImode)
2689 emit_insn (gen_probe_stack_range_di (reg1, reg1, reg2));
2690 else
2691 emit_insn (gen_probe_stack_range_si (reg1, reg1, reg2));
2692
2693
2694 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2695 that SIZE is equal to ROUNDED_SIZE. */
2696
2697 if (size != rounded_size)
2698 {
2699 HOST_WIDE_INT rem = size - rounded_size;
2700
2701 if (rem > 256)
2702 {
2703 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2704
2705 emit_set_insn (reg2, plus_constant (ptr_mode, reg2, -base));
2706 emit_stack_probe (plus_constant (ptr_mode, reg2, base - rem));
2707 }
2708 else
2709 emit_stack_probe (plus_constant (ptr_mode, reg2, -rem));
2710 }
2711 }
2712
2713 /* Make sure nothing is scheduled before we are done. */
2714 emit_insn (gen_blockage ());
2715}
2716
2717/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2718 absolute addresses. */
2719
2720const char *
2721aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2722{
2723 static int labelno = 0;
2724 char loop_lab[32];
2725 rtx xops[2];
2726
2727 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2728
2729 /* Loop. */
2730 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2731
2732 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2733 xops[0] = reg1;
2734 xops[1] = GEN_INT (PROBE_INTERVAL);
2735 output_asm_insn ("sub\t%0, %0, %1", xops);
2736
2737 /* Probe at TEST_ADDR. */
2738 output_asm_insn ("str\txzr, [%0]", xops);
2739
2740 /* Test if TEST_ADDR == LAST_ADDR. */
2741 xops[1] = reg2;
2742 output_asm_insn ("cmp\t%0, %1", xops);
2743
2744 /* Branch. */
2745 fputs ("\tb.ne\t", asm_out_file);
2746 assemble_name_raw (asm_out_file, loop_lab);
2747 fputc ('\n', asm_out_file);
2748
2749 return "";
2750}
2751
43e9d192
IB
2752static bool
2753aarch64_frame_pointer_required (void)
2754{
0b7f8166
MS
2755 /* In aarch64_override_options_after_change
2756 flag_omit_leaf_frame_pointer turns off the frame pointer by
2757 default. Turn it back on now if we've not got a leaf
2758 function. */
2759 if (flag_omit_leaf_frame_pointer
2760 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2761 return true;
43e9d192 2762
0b7f8166 2763 return false;
43e9d192
IB
2764}
2765
2766/* Mark the registers that need to be saved by the callee and calculate
2767 the size of the callee-saved registers area and frame record (both FP
2768 and LR may be omitted). */
2769static void
2770aarch64_layout_frame (void)
2771{
2772 HOST_WIDE_INT offset = 0;
4b0685d9 2773 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2774
2775 if (reload_completed && cfun->machine->frame.laid_out)
2776 return;
2777
97826595
MS
2778#define SLOT_NOT_REQUIRED (-2)
2779#define SLOT_REQUIRED (-1)
2780
71bfb77a
WD
2781 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2782 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2783
43e9d192
IB
2784 /* First mark all the registers that really need to be saved... */
2785 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2786 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2787
2788 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2789 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2790
2791 /* ... that includes the eh data registers (if needed)... */
2792 if (crtl->calls_eh_return)
2793 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2794 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2795 = SLOT_REQUIRED;
43e9d192
IB
2796
2797 /* ... and any callee saved register that dataflow says is live. */
2798 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2799 if (df_regs_ever_live_p (regno)
1c923b60
JW
2800 && (regno == R30_REGNUM
2801 || !call_used_regs[regno]))
97826595 2802 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2803
2804 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2805 if (df_regs_ever_live_p (regno)
2806 && !call_used_regs[regno])
4b0685d9
WD
2807 {
2808 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2809 last_fp_reg = regno;
2810 }
43e9d192
IB
2811
2812 if (frame_pointer_needed)
2813 {
2e1cdae5 2814 /* FP and LR are placed in the linkage record. */
43e9d192 2815 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2816 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2817 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2818 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2819 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2820 }
2821
2822 /* Now assign stack slots for them. */
2e1cdae5 2823 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2824 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2825 {
2826 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2827 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2828 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2829 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2830 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2831 offset += UNITS_PER_WORD;
2832 }
2833
4b0685d9
WD
2834 HOST_WIDE_INT max_int_offset = offset;
2835 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2836 bool has_align_gap = offset != max_int_offset;
2837
43e9d192 2838 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2839 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2840 {
4b0685d9
WD
2841 /* If there is an alignment gap between integer and fp callee-saves,
2842 allocate the last fp register to it if possible. */
2843 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2844 {
2845 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2846 break;
2847 }
2848
43e9d192 2849 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2850 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2851 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2852 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2853 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2854 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2855 offset += UNITS_PER_WORD;
2856 }
2857
4f59f9f2 2858 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2859
2860 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2861
71bfb77a
WD
2862 HOST_WIDE_INT varargs_and_saved_regs_size
2863 = offset + cfun->machine->frame.saved_varargs_size;
2864
1c960e02 2865 cfun->machine->frame.hard_fp_offset
71bfb77a 2866 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2867 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2868
2869 cfun->machine->frame.frame_size
4f59f9f2
UB
2870 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2871 + crtl->outgoing_args_size,
2872 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2873
71bfb77a
WD
2874 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2875
2876 cfun->machine->frame.initial_adjust = 0;
2877 cfun->machine->frame.final_adjust = 0;
2878 cfun->machine->frame.callee_adjust = 0;
2879 cfun->machine->frame.callee_offset = 0;
2880
2881 HOST_WIDE_INT max_push_offset = 0;
2882 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2883 max_push_offset = 512;
2884 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2885 max_push_offset = 256;
2886
2887 if (cfun->machine->frame.frame_size < max_push_offset
2888 && crtl->outgoing_args_size == 0)
2889 {
2890 /* Simple, small frame with no outgoing arguments:
2891 stp reg1, reg2, [sp, -frame_size]!
2892 stp reg3, reg4, [sp, 16] */
2893 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
2894 }
2895 else if ((crtl->outgoing_args_size
2896 + cfun->machine->frame.saved_regs_size < 512)
2897 && !(cfun->calls_alloca
2898 && cfun->machine->frame.hard_fp_offset < max_push_offset))
2899 {
2900 /* Frame with small outgoing arguments:
2901 sub sp, sp, frame_size
2902 stp reg1, reg2, [sp, outgoing_args_size]
2903 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2904 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
2905 cfun->machine->frame.callee_offset
2906 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
2907 }
2908 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
2909 {
2910 /* Frame with large outgoing arguments but a small local area:
2911 stp reg1, reg2, [sp, -hard_fp_offset]!
2912 stp reg3, reg4, [sp, 16]
2913 sub sp, sp, outgoing_args_size */
2914 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
2915 cfun->machine->frame.final_adjust
2916 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2917 }
2918 else if (!frame_pointer_needed
2919 && varargs_and_saved_regs_size < max_push_offset)
2920 {
2921 /* Frame with large local area and outgoing arguments (this pushes the
2922 callee-saves first, followed by the locals and outgoing area):
2923 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
2924 stp reg3, reg4, [sp, 16]
2925 sub sp, sp, frame_size - varargs_and_saved_regs_size */
2926 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
2927 cfun->machine->frame.final_adjust
2928 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2929 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
2930 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
2931 }
2932 else
2933 {
2934 /* Frame with large local area and outgoing arguments using frame pointer:
2935 sub sp, sp, hard_fp_offset
2936 stp x29, x30, [sp, 0]
2937 add x29, sp, 0
2938 stp reg3, reg4, [sp, 16]
2939 sub sp, sp, outgoing_args_size */
2940 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
2941 cfun->machine->frame.final_adjust
2942 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
2943 }
2944
43e9d192
IB
2945 cfun->machine->frame.laid_out = true;
2946}
2947
04ddfe06
KT
2948/* Return true if the register REGNO is saved on entry to
2949 the current function. */
2950
43e9d192
IB
2951static bool
2952aarch64_register_saved_on_entry (int regno)
2953{
97826595 2954 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2955}
2956
04ddfe06
KT
2957/* Return the next register up from REGNO up to LIMIT for the callee
2958 to save. */
2959
64dedd72
JW
2960static unsigned
2961aarch64_next_callee_save (unsigned regno, unsigned limit)
2962{
2963 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2964 regno ++;
2965 return regno;
2966}
43e9d192 2967
04ddfe06
KT
2968/* Push the register number REGNO of mode MODE to the stack with write-back
2969 adjusting the stack by ADJUSTMENT. */
2970
c5e1f66e 2971static void
ef4bddc2 2972aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2973 HOST_WIDE_INT adjustment)
2974 {
2975 rtx base_rtx = stack_pointer_rtx;
2976 rtx insn, reg, mem;
2977
2978 reg = gen_rtx_REG (mode, regno);
2979 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2980 plus_constant (Pmode, base_rtx, -adjustment));
2981 mem = gen_rtx_MEM (mode, mem);
2982
2983 insn = emit_move_insn (mem, reg);
2984 RTX_FRAME_RELATED_P (insn) = 1;
2985}
2986
04ddfe06
KT
2987/* Generate and return an instruction to store the pair of registers
2988 REG and REG2 of mode MODE to location BASE with write-back adjusting
2989 the stack location BASE by ADJUSTMENT. */
2990
80c11907 2991static rtx
ef4bddc2 2992aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2993 HOST_WIDE_INT adjustment)
2994{
2995 switch (mode)
2996 {
2997 case DImode:
2998 return gen_storewb_pairdi_di (base, base, reg, reg2,
2999 GEN_INT (-adjustment),
3000 GEN_INT (UNITS_PER_WORD - adjustment));
3001 case DFmode:
3002 return gen_storewb_pairdf_di (base, base, reg, reg2,
3003 GEN_INT (-adjustment),
3004 GEN_INT (UNITS_PER_WORD - adjustment));
3005 default:
3006 gcc_unreachable ();
3007 }
3008}
3009
04ddfe06
KT
3010/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3011 stack pointer by ADJUSTMENT. */
3012
80c11907 3013static void
89ac681e 3014aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3015{
5d8a22a5 3016 rtx_insn *insn;
89ac681e
WD
3017 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3018
71bfb77a 3019 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3020 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3021
80c11907
JW
3022 rtx reg1 = gen_rtx_REG (mode, regno1);
3023 rtx reg2 = gen_rtx_REG (mode, regno2);
3024
3025 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3026 reg2, adjustment));
3027 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3028 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3029 RTX_FRAME_RELATED_P (insn) = 1;
3030}
3031
04ddfe06
KT
3032/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3033 adjusting it by ADJUSTMENT afterwards. */
3034
159313d9 3035static rtx
ef4bddc2 3036aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3037 HOST_WIDE_INT adjustment)
3038{
3039 switch (mode)
3040 {
3041 case DImode:
3042 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3043 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3044 case DFmode:
3045 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3046 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3047 default:
3048 gcc_unreachable ();
3049 }
3050}
3051
04ddfe06
KT
3052/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3053 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3054 into CFI_OPS. */
3055
89ac681e
WD
3056static void
3057aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3058 rtx *cfi_ops)
3059{
3060 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3061 rtx reg1 = gen_rtx_REG (mode, regno1);
3062
3063 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3064
71bfb77a 3065 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3066 {
3067 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3068 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3069 emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
3070 }
3071 else
3072 {
3073 rtx reg2 = gen_rtx_REG (mode, regno2);
3074 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3075 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3076 reg2, adjustment));
3077 }
3078}
3079
04ddfe06
KT
3080/* Generate and return a store pair instruction of mode MODE to store
3081 register REG1 to MEM1 and register REG2 to MEM2. */
3082
72df5c1f 3083static rtx
ef4bddc2 3084aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3085 rtx reg2)
3086{
3087 switch (mode)
3088 {
3089 case DImode:
3090 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3091
3092 case DFmode:
3093 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3094
3095 default:
3096 gcc_unreachable ();
3097 }
3098}
3099
04ddfe06
KT
3100/* Generate and regurn a load pair isntruction of mode MODE to load register
3101 REG1 from MEM1 and register REG2 from MEM2. */
3102
72df5c1f 3103static rtx
ef4bddc2 3104aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3105 rtx mem2)
3106{
3107 switch (mode)
3108 {
3109 case DImode:
3110 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3111
3112 case DFmode:
3113 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3114
3115 default:
3116 gcc_unreachable ();
3117 }
3118}
3119
04ddfe06
KT
3120/* Emit code to save the callee-saved registers from register number START
3121 to LIMIT to the stack at the location starting at offset START_OFFSET,
3122 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3123
43e9d192 3124static void
ef4bddc2 3125aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3126 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3127{
5d8a22a5 3128 rtx_insn *insn;
ef4bddc2 3129 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 3130 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
3131 unsigned regno;
3132 unsigned regno2;
3133
0ec74a1e 3134 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3135 regno <= limit;
3136 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3137 {
ae13fce3
JW
3138 rtx reg, mem;
3139 HOST_WIDE_INT offset;
64dedd72 3140
ae13fce3
JW
3141 if (skip_wb
3142 && (regno == cfun->machine->frame.wb_candidate1
3143 || regno == cfun->machine->frame.wb_candidate2))
3144 continue;
3145
827ab47a
KT
3146 if (cfun->machine->reg_is_wrapped_separately[regno])
3147 continue;
3148
ae13fce3
JW
3149 reg = gen_rtx_REG (mode, regno);
3150 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
3151 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3152 offset));
64dedd72
JW
3153
3154 regno2 = aarch64_next_callee_save (regno + 1, limit);
3155
3156 if (regno2 <= limit
827ab47a 3157 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
3158 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3159 == cfun->machine->frame.reg_offset[regno2]))
3160
43e9d192 3161 {
0ec74a1e 3162 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3163 rtx mem2;
3164
3165 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
3166 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3167 offset));
3168 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3169 reg2));
0b4a9743 3170
64dedd72
JW
3171 /* The first part of a frame-related parallel insn is
3172 always assumed to be relevant to the frame
3173 calculations; subsequent parts, are only
3174 frame-related if explicitly marked. */
3175 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3176 regno = regno2;
3177 }
3178 else
8ed2fc62
JW
3179 insn = emit_move_insn (mem, reg);
3180
3181 RTX_FRAME_RELATED_P (insn) = 1;
3182 }
3183}
3184
04ddfe06
KT
3185/* Emit code to restore the callee registers of mode MODE from register
3186 number START up to and including LIMIT. Restore from the stack offset
3187 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3188 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3189
8ed2fc62 3190static void
ef4bddc2 3191aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3192 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3193 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3194{
8ed2fc62 3195 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 3196 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
3197 ? gen_frame_mem : gen_rtx_MEM);
3198 unsigned regno;
3199 unsigned regno2;
3200 HOST_WIDE_INT offset;
3201
3202 for (regno = aarch64_next_callee_save (start, limit);
3203 regno <= limit;
3204 regno = aarch64_next_callee_save (regno + 1, limit))
3205 {
827ab47a
KT
3206 if (cfun->machine->reg_is_wrapped_separately[regno])
3207 continue;
3208
ae13fce3 3209 rtx reg, mem;
8ed2fc62 3210
ae13fce3
JW
3211 if (skip_wb
3212 && (regno == cfun->machine->frame.wb_candidate1
3213 || regno == cfun->machine->frame.wb_candidate2))
3214 continue;
3215
3216 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
3217 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3218 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
3219
3220 regno2 = aarch64_next_callee_save (regno + 1, limit);
3221
3222 if (regno2 <= limit
827ab47a 3223 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
3224 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3225 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3226 {
8ed2fc62
JW
3227 rtx reg2 = gen_rtx_REG (mode, regno2);
3228 rtx mem2;
3229
3230 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3231 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3232 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3233
dd991abb 3234 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3235 regno = regno2;
43e9d192 3236 }
8ed2fc62 3237 else
dd991abb
RH
3238 emit_move_insn (reg, mem);
3239 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3240 }
43e9d192
IB
3241}
3242
827ab47a
KT
3243static inline bool
3244offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3245 HOST_WIDE_INT offset)
3246{
3247 return offset >= -256 && offset < 256;
3248}
3249
3250static inline bool
3251offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3252{
3253 return (offset >= 0
3254 && offset < 4096 * GET_MODE_SIZE (mode)
3255 && offset % GET_MODE_SIZE (mode) == 0);
3256}
3257
3258bool
3259aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3260{
3261 return (offset >= -64 * GET_MODE_SIZE (mode)
3262 && offset < 64 * GET_MODE_SIZE (mode)
3263 && offset % GET_MODE_SIZE (mode) == 0);
3264}
3265
3266/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3267
3268static sbitmap
3269aarch64_get_separate_components (void)
3270{
3271 aarch64_layout_frame ();
3272
3273 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3274 bitmap_clear (components);
3275
3276 /* The registers we need saved to the frame. */
3277 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3278 if (aarch64_register_saved_on_entry (regno))
3279 {
3280 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3281 if (!frame_pointer_needed)
3282 offset += cfun->machine->frame.frame_size
3283 - cfun->machine->frame.hard_fp_offset;
3284 /* Check that we can access the stack slot of the register with one
3285 direct load with no adjustments needed. */
3286 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3287 bitmap_set_bit (components, regno);
3288 }
3289
3290 /* Don't mess with the hard frame pointer. */
3291 if (frame_pointer_needed)
3292 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3293
3294 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3295 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3296 /* If aarch64_layout_frame has chosen registers to store/restore with
3297 writeback don't interfere with them to avoid having to output explicit
3298 stack adjustment instructions. */
3299 if (reg2 != INVALID_REGNUM)
3300 bitmap_clear_bit (components, reg2);
3301 if (reg1 != INVALID_REGNUM)
3302 bitmap_clear_bit (components, reg1);
3303
3304 bitmap_clear_bit (components, LR_REGNUM);
3305 bitmap_clear_bit (components, SP_REGNUM);
3306
3307 return components;
3308}
3309
3310/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3311
3312static sbitmap
3313aarch64_components_for_bb (basic_block bb)
3314{
3315 bitmap in = DF_LIVE_IN (bb);
3316 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3317 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3318
3319 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3320 bitmap_clear (components);
3321
3322 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3323 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3324 if ((!call_used_regs[regno])
3325 && (bitmap_bit_p (in, regno)
3326 || bitmap_bit_p (gen, regno)
3327 || bitmap_bit_p (kill, regno)))
3328 bitmap_set_bit (components, regno);
3329
3330 return components;
3331}
3332
3333/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3334 Nothing to do for aarch64. */
3335
3336static void
3337aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3338{
3339}
3340
3341/* Return the next set bit in BMP from START onwards. Return the total number
3342 of bits in BMP if no set bit is found at or after START. */
3343
3344static unsigned int
3345aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3346{
3347 unsigned int nbits = SBITMAP_SIZE (bmp);
3348 if (start == nbits)
3349 return start;
3350
3351 gcc_assert (start < nbits);
3352 for (unsigned int i = start; i < nbits; i++)
3353 if (bitmap_bit_p (bmp, i))
3354 return i;
3355
3356 return nbits;
3357}
3358
3359/* Do the work for aarch64_emit_prologue_components and
3360 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3361 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3362 for these components or the epilogue sequence. That is, it determines
3363 whether we should emit stores or loads and what kind of CFA notes to attach
3364 to the insns. Otherwise the logic for the two sequences is very
3365 similar. */
3366
3367static void
3368aarch64_process_components (sbitmap components, bool prologue_p)
3369{
3370 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3371 ? HARD_FRAME_POINTER_REGNUM
3372 : STACK_POINTER_REGNUM);
3373
3374 unsigned last_regno = SBITMAP_SIZE (components);
3375 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3376 rtx_insn *insn = NULL;
3377
3378 while (regno != last_regno)
3379 {
3380 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3381 so DFmode for the vector registers is enough. */
3382 machine_mode mode = GP_REGNUM_P (regno) ? DImode : DFmode;
3383 rtx reg = gen_rtx_REG (mode, regno);
3384 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3385 if (!frame_pointer_needed)
3386 offset += cfun->machine->frame.frame_size
3387 - cfun->machine->frame.hard_fp_offset;
3388 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3389 rtx mem = gen_frame_mem (mode, addr);
3390
3391 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3392 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3393 /* No more registers to handle after REGNO.
3394 Emit a single save/restore and exit. */
3395 if (regno2 == last_regno)
3396 {
3397 insn = emit_insn (set);
3398 RTX_FRAME_RELATED_P (insn) = 1;
3399 if (prologue_p)
3400 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3401 else
3402 add_reg_note (insn, REG_CFA_RESTORE, reg);
3403 break;
3404 }
3405
3406 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3407 /* The next register is not of the same class or its offset is not
3408 mergeable with the current one into a pair. */
3409 if (!satisfies_constraint_Ump (mem)
3410 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3411 || (offset2 - cfun->machine->frame.reg_offset[regno])
3412 != GET_MODE_SIZE (mode))
3413 {
3414 insn = emit_insn (set);
3415 RTX_FRAME_RELATED_P (insn) = 1;
3416 if (prologue_p)
3417 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3418 else
3419 add_reg_note (insn, REG_CFA_RESTORE, reg);
3420
3421 regno = regno2;
3422 continue;
3423 }
3424
3425 /* REGNO2 can be saved/restored in a pair with REGNO. */
3426 rtx reg2 = gen_rtx_REG (mode, regno2);
3427 if (!frame_pointer_needed)
3428 offset2 += cfun->machine->frame.frame_size
3429 - cfun->machine->frame.hard_fp_offset;
3430 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3431 rtx mem2 = gen_frame_mem (mode, addr2);
3432 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3433 : gen_rtx_SET (reg2, mem2);
3434
3435 if (prologue_p)
3436 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3437 else
3438 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3439
3440 RTX_FRAME_RELATED_P (insn) = 1;
3441 if (prologue_p)
3442 {
3443 add_reg_note (insn, REG_CFA_OFFSET, set);
3444 add_reg_note (insn, REG_CFA_OFFSET, set2);
3445 }
3446 else
3447 {
3448 add_reg_note (insn, REG_CFA_RESTORE, reg);
3449 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3450 }
3451
3452 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3453 }
3454}
3455
3456/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3457
3458static void
3459aarch64_emit_prologue_components (sbitmap components)
3460{
3461 aarch64_process_components (components, true);
3462}
3463
3464/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3465
3466static void
3467aarch64_emit_epilogue_components (sbitmap components)
3468{
3469 aarch64_process_components (components, false);
3470}
3471
3472/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3473
3474static void
3475aarch64_set_handled_components (sbitmap components)
3476{
3477 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3478 if (bitmap_bit_p (components, regno))
3479 cfun->machine->reg_is_wrapped_separately[regno] = true;
3480}
3481
43e9d192
IB
3482/* AArch64 stack frames generated by this compiler look like:
3483
3484 +-------------------------------+
3485 | |
3486 | incoming stack arguments |
3487 | |
34834420
MS
3488 +-------------------------------+
3489 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3490 | callee-allocated save area |
3491 | for register varargs |
3492 | |
34834420
MS
3493 +-------------------------------+
3494 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3495 | |
3496 +-------------------------------+
454fdba9
RL
3497 | padding0 | \
3498 +-------------------------------+ |
454fdba9 3499 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3500 +-------------------------------+ |
3501 | LR' | |
3502 +-------------------------------+ |
34834420
MS
3503 | FP' | / <- hard_frame_pointer_rtx (aligned)
3504 +-------------------------------+
43e9d192
IB
3505 | dynamic allocation |
3506 +-------------------------------+
34834420
MS
3507 | padding |
3508 +-------------------------------+
3509 | outgoing stack arguments | <-- arg_pointer
3510 | |
3511 +-------------------------------+
3512 | | <-- stack_pointer_rtx (aligned)
43e9d192 3513
34834420
MS
3514 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3515 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3516 unchanged. */
43e9d192
IB
3517
3518/* Generate the prologue instructions for entry into a function.
3519 Establish the stack frame by decreasing the stack pointer with a
3520 properly calculated size and, if necessary, create a frame record
3521 filled with the values of LR and previous frame pointer. The
6991c977 3522 current FP is also set up if it is in use. */
43e9d192
IB
3523
3524void
3525aarch64_expand_prologue (void)
3526{
43e9d192 3527 aarch64_layout_frame ();
43e9d192 3528
71bfb77a
WD
3529 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3530 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3531 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3532 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3533 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3534 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3535 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3536 rtx_insn *insn;
43e9d192 3537
dd991abb
RH
3538 if (flag_stack_usage_info)
3539 current_function_static_stack_size = frame_size;
43e9d192 3540
a3eb8a52
EB
3541 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3542 {
3543 if (crtl->is_leaf && !cfun->calls_alloca)
3544 {
3545 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3546 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3547 frame_size - STACK_CHECK_PROTECT);
3548 }
3549 else if (frame_size > 0)
3550 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3551 }
3552
5be6b295 3553 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3554
71bfb77a
WD
3555 if (callee_adjust != 0)
3556 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3557
71bfb77a 3558 if (frame_pointer_needed)
43e9d192 3559 {
71bfb77a
WD
3560 if (callee_adjust == 0)
3561 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3562 R30_REGNUM, false);
3563 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3564 stack_pointer_rtx,
3565 GEN_INT (callee_offset)));
3566 RTX_FRAME_RELATED_P (insn) = 1;
3567 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3568 }
71bfb77a
WD
3569
3570 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3571 callee_adjust != 0 || frame_pointer_needed);
3572 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3573 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3574 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3575}
3576
4f942779
RL
3577/* Return TRUE if we can use a simple_return insn.
3578
3579 This function checks whether the callee saved stack is empty, which
3580 means no restore actions are need. The pro_and_epilogue will use
3581 this to check whether shrink-wrapping opt is feasible. */
3582
3583bool
3584aarch64_use_return_insn_p (void)
3585{
3586 if (!reload_completed)
3587 return false;
3588
3589 if (crtl->profile)
3590 return false;
3591
3592 aarch64_layout_frame ();
3593
3594 return cfun->machine->frame.frame_size == 0;
3595}
3596
71bfb77a
WD
3597/* Generate the epilogue instructions for returning from a function.
3598 This is almost exactly the reverse of the prolog sequence, except
3599 that we need to insert barriers to avoid scheduling loads that read
3600 from a deallocated stack, and we optimize the unwind records by
3601 emitting them all together if possible. */
43e9d192
IB
3602void
3603aarch64_expand_epilogue (bool for_sibcall)
3604{
43e9d192 3605 aarch64_layout_frame ();
43e9d192 3606
71bfb77a
WD
3607 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3608 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3609 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3610 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3611 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3612 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3613 rtx cfi_ops = NULL;
3614 rtx_insn *insn;
44c0e7b9 3615
71bfb77a
WD
3616 /* We need to add memory barrier to prevent read from deallocated stack. */
3617 bool need_barrier_p = (get_frame_size ()
3618 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3619
71bfb77a
WD
3620 /* Emit a barrier to prevent loads from a deallocated stack. */
3621 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
43e9d192 3622 {
71bfb77a
WD
3623 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3624 need_barrier_p = false;
3625 }
7e8c2bd5 3626
71bfb77a
WD
3627 /* Restore the stack pointer from the frame pointer if it may not
3628 be the same as the stack pointer. */
3629 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3630 {
43e9d192
IB
3631 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3632 hard_frame_pointer_rtx,
71bfb77a
WD
3633 GEN_INT (-callee_offset)));
3634 /* If writeback is used when restoring callee-saves, the CFA
3635 is restored on the instruction doing the writeback. */
3636 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3637 }
71bfb77a 3638 else
5be6b295 3639 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3640
71bfb77a
WD
3641 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3642 callee_adjust != 0, &cfi_ops);
3643 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3644 callee_adjust != 0, &cfi_ops);
43e9d192 3645
71bfb77a
WD
3646 if (need_barrier_p)
3647 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3648
3649 if (callee_adjust != 0)
3650 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3651
3652 if (callee_adjust != 0 || initial_adjust > 65536)
3653 {
3654 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3655 insn = get_last_insn ();
71bfb77a
WD
3656 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3657 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3658 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3659 cfi_ops = NULL;
43e9d192
IB
3660 }
3661
5be6b295 3662 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3663
71bfb77a
WD
3664 if (cfi_ops)
3665 {
3666 /* Emit delayed restores and reset the CFA to be SP. */
3667 insn = get_last_insn ();
3668 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3669 REG_NOTES (insn) = cfi_ops;
3670 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3671 }
3672
3673 /* Stack adjustment for exception handler. */
3674 if (crtl->calls_eh_return)
3675 {
3676 /* We need to unwind the stack by the offset computed by
3677 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3678 to be SP; letting the CFA move during this adjustment
3679 is just as correct as retaining the CFA from the body
3680 of the function. Therefore, do nothing special. */
3681 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3682 }
3683
3684 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3685 if (!for_sibcall)
3686 emit_jump_insn (ret_rtx);
3687}
3688
3689/* Return the place to copy the exception unwinding return address to.
3690 This will probably be a stack slot, but could (in theory be the
3691 return register). */
3692rtx
3693aarch64_final_eh_return_addr (void)
3694{
1c960e02
MS
3695 HOST_WIDE_INT fp_offset;
3696
43e9d192 3697 aarch64_layout_frame ();
1c960e02
MS
3698
3699 fp_offset = cfun->machine->frame.frame_size
3700 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
3701
3702 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
3703 return gen_rtx_REG (DImode, LR_REGNUM);
3704
3705 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
3706 result in a store to save LR introduced by builtin_eh_return () being
3707 incorrectly deleted because the alias is not detected.
3708 So in the calculation of the address to copy the exception unwinding
3709 return address to, we note 2 cases.
3710 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
3711 we return a SP-relative location since all the addresses are SP-relative
3712 in this case. This prevents the store from being optimized away.
3713 If the fp_offset is not 0, then the addresses will be FP-relative and
3714 therefore we return a FP-relative location. */
3715
3716 if (frame_pointer_needed)
3717 {
3718 if (fp_offset)
3719 return gen_frame_mem (DImode,
3720 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3721 else
3722 return gen_frame_mem (DImode,
3723 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
3724 }
3725
3726 /* If FP is not needed, we calculate the location of LR, which would be
3727 at the top of the saved registers block. */
3728
3729 return gen_frame_mem (DImode,
3730 plus_constant (Pmode,
3731 stack_pointer_rtx,
3732 fp_offset
3733 + cfun->machine->frame.saved_regs_size
3734 - 2 * UNITS_PER_WORD));
3735}
3736
43e9d192
IB
3737/* Output code to add DELTA to the first argument, and then jump
3738 to FUNCTION. Used for C++ multiple inheritance. */
3739static void
3740aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3741 HOST_WIDE_INT delta,
3742 HOST_WIDE_INT vcall_offset,
3743 tree function)
3744{
3745 /* The this pointer is always in x0. Note that this differs from
3746 Arm where the this pointer maybe bumped to r1 if r0 is required
3747 to return a pointer to an aggregate. On AArch64 a result value
3748 pointer will be in x8. */
3749 int this_regno = R0_REGNUM;
5d8a22a5
DM
3750 rtx this_rtx, temp0, temp1, addr, funexp;
3751 rtx_insn *insn;
43e9d192 3752
75f1d6fc
SN
3753 reload_completed = 1;
3754 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3755
3756 if (vcall_offset == 0)
5be6b295 3757 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3758 else
3759 {
28514dda 3760 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3761
75f1d6fc
SN
3762 this_rtx = gen_rtx_REG (Pmode, this_regno);
3763 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3764 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3765
75f1d6fc
SN
3766 addr = this_rtx;
3767 if (delta != 0)
3768 {
3769 if (delta >= -256 && delta < 256)
3770 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3771 plus_constant (Pmode, this_rtx, delta));
3772 else
5be6b295 3773 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3774 }
3775
28514dda
YZ
3776 if (Pmode == ptr_mode)
3777 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3778 else
3779 aarch64_emit_move (temp0,
3780 gen_rtx_ZERO_EXTEND (Pmode,
3781 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3782
28514dda 3783 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3784 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3785 else
3786 {
f43657b4
JW
3787 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3788 Pmode);
75f1d6fc 3789 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3790 }
3791
28514dda
YZ
3792 if (Pmode == ptr_mode)
3793 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3794 else
3795 aarch64_emit_move (temp1,
3796 gen_rtx_SIGN_EXTEND (Pmode,
3797 gen_rtx_MEM (ptr_mode, addr)));
3798
75f1d6fc 3799 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3800 }
3801
75f1d6fc
SN
3802 /* Generate a tail call to the target function. */
3803 if (!TREE_USED (function))
3804 {
3805 assemble_external (function);
3806 TREE_USED (function) = 1;
3807 }
3808 funexp = XEXP (DECL_RTL (function), 0);
3809 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3810 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3811 SIBLING_CALL_P (insn) = 1;
3812
3813 insn = get_insns ();
3814 shorten_branches (insn);
3815 final_start_function (insn, file, 1);
3816 final (insn, file, 1);
43e9d192 3817 final_end_function ();
75f1d6fc
SN
3818
3819 /* Stop pretending to be a post-reload pass. */
3820 reload_completed = 0;
43e9d192
IB
3821}
3822
43e9d192
IB
3823static bool
3824aarch64_tls_referenced_p (rtx x)
3825{
3826 if (!TARGET_HAVE_TLS)
3827 return false;
e7de8563
RS
3828 subrtx_iterator::array_type array;
3829 FOR_EACH_SUBRTX (iter, array, x, ALL)
3830 {
3831 const_rtx x = *iter;
3832 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3833 return true;
3834 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3835 TLS offsets, not real symbol references. */
3836 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3837 iter.skip_subrtxes ();
3838 }
3839 return false;
43e9d192
IB
3840}
3841
3842
43e9d192
IB
3843/* Return true if val can be encoded as a 12-bit unsigned immediate with
3844 a left shift of 0 or 12 bits. */
3845bool
3846aarch64_uimm12_shift (HOST_WIDE_INT val)
3847{
3848 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3849 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3850 );
3851}
3852
3853
3854/* Return true if val is an immediate that can be loaded into a
3855 register by a MOVZ instruction. */
3856static bool
ef4bddc2 3857aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3858{
3859 if (GET_MODE_SIZE (mode) > 4)
3860 {
3861 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3862 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3863 return 1;
3864 }
3865 else
3866 {
3867 /* Ignore sign extension. */
3868 val &= (HOST_WIDE_INT) 0xffffffff;
3869 }
3870 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3871 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3872}
3873
a64c73a2
WD
3874/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3875
3876static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
3877 {
3878 0x0000000100000001ull,
3879 0x0001000100010001ull,
3880 0x0101010101010101ull,
3881 0x1111111111111111ull,
3882 0x5555555555555555ull,
3883 };
3884
43e9d192
IB
3885
3886/* Return true if val is a valid bitmask immediate. */
a64c73a2 3887
43e9d192 3888bool
a64c73a2 3889aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 3890{
a64c73a2
WD
3891 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
3892 int bits;
3893
3894 /* Check for a single sequence of one bits and return quickly if so.
3895 The special cases of all ones and all zeroes returns false. */
3896 val = (unsigned HOST_WIDE_INT) val_in;
3897 tmp = val + (val & -val);
3898
3899 if (tmp == (tmp & -tmp))
3900 return (val + 1) > 1;
3901
3902 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3903 if (mode == SImode)
3904 val = (val << 32) | (val & 0xffffffff);
3905
3906 /* Invert if the immediate doesn't start with a zero bit - this means we
3907 only need to search for sequences of one bits. */
3908 if (val & 1)
3909 val = ~val;
3910
3911 /* Find the first set bit and set tmp to val with the first sequence of one
3912 bits removed. Return success if there is a single sequence of ones. */
3913 first_one = val & -val;
3914 tmp = val & (val + first_one);
3915
3916 if (tmp == 0)
3917 return true;
3918
3919 /* Find the next set bit and compute the difference in bit position. */
3920 next_one = tmp & -tmp;
3921 bits = clz_hwi (first_one) - clz_hwi (next_one);
3922 mask = val ^ tmp;
3923
3924 /* Check the bit position difference is a power of 2, and that the first
3925 sequence of one bits fits within 'bits' bits. */
3926 if ((mask >> bits) != 0 || bits != (bits & -bits))
3927 return false;
3928
3929 /* Check the sequence of one bits is repeated 64/bits times. */
3930 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
3931}
3932
43fd192f
MC
3933/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
3934 Assumed precondition: VAL_IN Is not zero. */
3935
3936unsigned HOST_WIDE_INT
3937aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
3938{
3939 int lowest_bit_set = ctz_hwi (val_in);
3940 int highest_bit_set = floor_log2 (val_in);
3941 gcc_assert (val_in != 0);
3942
3943 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
3944 (HOST_WIDE_INT_1U << lowest_bit_set));
3945}
3946
3947/* Create constant where bits outside of lowest bit set to highest bit set
3948 are set to 1. */
3949
3950unsigned HOST_WIDE_INT
3951aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
3952{
3953 return val_in | ~aarch64_and_split_imm1 (val_in);
3954}
3955
3956/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
3957
3958bool
3959aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
3960{
3961 if (aarch64_bitmask_imm (val_in, mode))
3962 return false;
3963
3964 if (aarch64_move_imm (val_in, mode))
3965 return false;
3966
3967 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
3968
3969 return aarch64_bitmask_imm (imm2, mode);
3970}
43e9d192
IB
3971
3972/* Return true if val is an immediate that can be loaded into a
3973 register in a single instruction. */
3974bool
ef4bddc2 3975aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3976{
3977 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3978 return 1;
3979 return aarch64_bitmask_imm (val, mode);
3980}
3981
3982static bool
ef4bddc2 3983aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3984{
3985 rtx base, offset;
7eda14e1 3986
43e9d192
IB
3987 if (GET_CODE (x) == HIGH)
3988 return true;
3989
3990 split_const (x, &base, &offset);
3991 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3992 {
a6e0bfa7 3993 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
3994 != SYMBOL_FORCE_TO_MEM)
3995 return true;
3996 else
3997 /* Avoid generating a 64-bit relocation in ILP32; leave
3998 to aarch64_expand_mov_immediate to handle it properly. */
3999 return mode != ptr_mode;
4000 }
43e9d192
IB
4001
4002 return aarch64_tls_referenced_p (x);
4003}
4004
e79136e4
WD
4005/* Implement TARGET_CASE_VALUES_THRESHOLD.
4006 The expansion for a table switch is quite expensive due to the number
4007 of instructions, the table lookup and hard to predict indirect jump.
4008 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4009 set, otherwise use tables for > 16 cases as a tradeoff between size and
4010 performance. When optimizing for size, use the default setting. */
50487d79
EM
4011
4012static unsigned int
4013aarch64_case_values_threshold (void)
4014{
4015 /* Use the specified limit for the number of cases before using jump
4016 tables at higher optimization levels. */
4017 if (optimize > 2
4018 && selected_cpu->tune->max_case_values != 0)
4019 return selected_cpu->tune->max_case_values;
4020 else
e79136e4 4021 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
4022}
4023
43e9d192
IB
4024/* Return true if register REGNO is a valid index register.
4025 STRICT_P is true if REG_OK_STRICT is in effect. */
4026
4027bool
4028aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4029{
4030 if (!HARD_REGISTER_NUM_P (regno))
4031 {
4032 if (!strict_p)
4033 return true;
4034
4035 if (!reg_renumber)
4036 return false;
4037
4038 regno = reg_renumber[regno];
4039 }
4040 return GP_REGNUM_P (regno);
4041}
4042
4043/* Return true if register REGNO is a valid base register for mode MODE.
4044 STRICT_P is true if REG_OK_STRICT is in effect. */
4045
4046bool
4047aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4048{
4049 if (!HARD_REGISTER_NUM_P (regno))
4050 {
4051 if (!strict_p)
4052 return true;
4053
4054 if (!reg_renumber)
4055 return false;
4056
4057 regno = reg_renumber[regno];
4058 }
4059
4060 /* The fake registers will be eliminated to either the stack or
4061 hard frame pointer, both of which are usually valid base registers.
4062 Reload deals with the cases where the eliminated form isn't valid. */
4063 return (GP_REGNUM_P (regno)
4064 || regno == SP_REGNUM
4065 || regno == FRAME_POINTER_REGNUM
4066 || regno == ARG_POINTER_REGNUM);
4067}
4068
4069/* Return true if X is a valid base register for mode MODE.
4070 STRICT_P is true if REG_OK_STRICT is in effect. */
4071
4072static bool
4073aarch64_base_register_rtx_p (rtx x, bool strict_p)
4074{
4075 if (!strict_p && GET_CODE (x) == SUBREG)
4076 x = SUBREG_REG (x);
4077
4078 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4079}
4080
4081/* Return true if address offset is a valid index. If it is, fill in INFO
4082 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4083
4084static bool
4085aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 4086 machine_mode mode, bool strict_p)
43e9d192
IB
4087{
4088 enum aarch64_address_type type;
4089 rtx index;
4090 int shift;
4091
4092 /* (reg:P) */
4093 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4094 && GET_MODE (x) == Pmode)
4095 {
4096 type = ADDRESS_REG_REG;
4097 index = x;
4098 shift = 0;
4099 }
4100 /* (sign_extend:DI (reg:SI)) */
4101 else if ((GET_CODE (x) == SIGN_EXTEND
4102 || GET_CODE (x) == ZERO_EXTEND)
4103 && GET_MODE (x) == DImode
4104 && GET_MODE (XEXP (x, 0)) == SImode)
4105 {
4106 type = (GET_CODE (x) == SIGN_EXTEND)
4107 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4108 index = XEXP (x, 0);
4109 shift = 0;
4110 }
4111 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4112 else if (GET_CODE (x) == MULT
4113 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4114 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4115 && GET_MODE (XEXP (x, 0)) == DImode
4116 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4117 && CONST_INT_P (XEXP (x, 1)))
4118 {
4119 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4120 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4121 index = XEXP (XEXP (x, 0), 0);
4122 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4123 }
4124 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4125 else if (GET_CODE (x) == ASHIFT
4126 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4127 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4128 && GET_MODE (XEXP (x, 0)) == DImode
4129 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4130 && CONST_INT_P (XEXP (x, 1)))
4131 {
4132 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4133 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4134 index = XEXP (XEXP (x, 0), 0);
4135 shift = INTVAL (XEXP (x, 1));
4136 }
4137 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4138 else if ((GET_CODE (x) == SIGN_EXTRACT
4139 || GET_CODE (x) == ZERO_EXTRACT)
4140 && GET_MODE (x) == DImode
4141 && GET_CODE (XEXP (x, 0)) == MULT
4142 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4143 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4144 {
4145 type = (GET_CODE (x) == SIGN_EXTRACT)
4146 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4147 index = XEXP (XEXP (x, 0), 0);
4148 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4149 if (INTVAL (XEXP (x, 1)) != 32 + shift
4150 || INTVAL (XEXP (x, 2)) != 0)
4151 shift = -1;
4152 }
4153 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4154 (const_int 0xffffffff<<shift)) */
4155 else if (GET_CODE (x) == AND
4156 && GET_MODE (x) == DImode
4157 && GET_CODE (XEXP (x, 0)) == MULT
4158 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4159 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4160 && CONST_INT_P (XEXP (x, 1)))
4161 {
4162 type = ADDRESS_REG_UXTW;
4163 index = XEXP (XEXP (x, 0), 0);
4164 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4165 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4166 shift = -1;
4167 }
4168 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4169 else if ((GET_CODE (x) == SIGN_EXTRACT
4170 || GET_CODE (x) == ZERO_EXTRACT)
4171 && GET_MODE (x) == DImode
4172 && GET_CODE (XEXP (x, 0)) == ASHIFT
4173 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4174 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4175 {
4176 type = (GET_CODE (x) == SIGN_EXTRACT)
4177 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4178 index = XEXP (XEXP (x, 0), 0);
4179 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4180 if (INTVAL (XEXP (x, 1)) != 32 + shift
4181 || INTVAL (XEXP (x, 2)) != 0)
4182 shift = -1;
4183 }
4184 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4185 (const_int 0xffffffff<<shift)) */
4186 else if (GET_CODE (x) == AND
4187 && GET_MODE (x) == DImode
4188 && GET_CODE (XEXP (x, 0)) == ASHIFT
4189 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4190 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4191 && CONST_INT_P (XEXP (x, 1)))
4192 {
4193 type = ADDRESS_REG_UXTW;
4194 index = XEXP (XEXP (x, 0), 0);
4195 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4196 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4197 shift = -1;
4198 }
4199 /* (mult:P (reg:P) (const_int scale)) */
4200 else if (GET_CODE (x) == MULT
4201 && GET_MODE (x) == Pmode
4202 && GET_MODE (XEXP (x, 0)) == Pmode
4203 && CONST_INT_P (XEXP (x, 1)))
4204 {
4205 type = ADDRESS_REG_REG;
4206 index = XEXP (x, 0);
4207 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4208 }
4209 /* (ashift:P (reg:P) (const_int shift)) */
4210 else if (GET_CODE (x) == ASHIFT
4211 && GET_MODE (x) == Pmode
4212 && GET_MODE (XEXP (x, 0)) == Pmode
4213 && CONST_INT_P (XEXP (x, 1)))
4214 {
4215 type = ADDRESS_REG_REG;
4216 index = XEXP (x, 0);
4217 shift = INTVAL (XEXP (x, 1));
4218 }
4219 else
4220 return false;
4221
4222 if (GET_CODE (index) == SUBREG)
4223 index = SUBREG_REG (index);
4224
4225 if ((shift == 0 ||
4226 (shift > 0 && shift <= 3
4227 && (1 << shift) == GET_MODE_SIZE (mode)))
4228 && REG_P (index)
4229 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4230 {
4231 info->type = type;
4232 info->offset = index;
4233 info->shift = shift;
4234 return true;
4235 }
4236
4237 return false;
4238}
4239
abc52318
KT
4240/* Return true if MODE is one of the modes for which we
4241 support LDP/STP operations. */
4242
4243static bool
4244aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4245{
4246 return mode == SImode || mode == DImode
4247 || mode == SFmode || mode == DFmode
4248 || (aarch64_vector_mode_supported_p (mode)
4249 && GET_MODE_SIZE (mode) == 8);
4250}
4251
9e0218fc
RH
4252/* Return true if REGNO is a virtual pointer register, or an eliminable
4253 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4254 include stack_pointer or hard_frame_pointer. */
4255static bool
4256virt_or_elim_regno_p (unsigned regno)
4257{
4258 return ((regno >= FIRST_VIRTUAL_REGISTER
4259 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4260 || regno == FRAME_POINTER_REGNUM
4261 || regno == ARG_POINTER_REGNUM);
4262}
4263
43e9d192
IB
4264/* Return true if X is a valid address for machine mode MODE. If it is,
4265 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4266 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4267
4268static bool
4269aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 4270 rtx x, machine_mode mode,
43e9d192
IB
4271 RTX_CODE outer_code, bool strict_p)
4272{
4273 enum rtx_code code = GET_CODE (x);
4274 rtx op0, op1;
2d8c6dc1 4275
80d43579
WD
4276 /* On BE, we use load/store pair for all large int mode load/stores.
4277 TI/TFmode may also use a load/store pair. */
2d8c6dc1 4278 bool load_store_pair_p = (outer_code == PARALLEL
80d43579
WD
4279 || mode == TImode
4280 || mode == TFmode
2d8c6dc1
AH
4281 || (BYTES_BIG_ENDIAN
4282 && aarch64_vect_struct_mode_p (mode)));
4283
43e9d192 4284 bool allow_reg_index_p =
2d8c6dc1
AH
4285 !load_store_pair_p
4286 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4287 && !aarch64_vect_struct_mode_p (mode);
4288
4289 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4290 REG addressing. */
4291 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4292 && (code != POST_INC && code != REG))
4293 return false;
4294
4295 switch (code)
4296 {
4297 case REG:
4298 case SUBREG:
4299 info->type = ADDRESS_REG_IMM;
4300 info->base = x;
4301 info->offset = const0_rtx;
4302 return aarch64_base_register_rtx_p (x, strict_p);
4303
4304 case PLUS:
4305 op0 = XEXP (x, 0);
4306 op1 = XEXP (x, 1);
15c0c5c9
JW
4307
4308 if (! strict_p
4aa81c2e 4309 && REG_P (op0)
9e0218fc 4310 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4311 && CONST_INT_P (op1))
15c0c5c9
JW
4312 {
4313 info->type = ADDRESS_REG_IMM;
4314 info->base = op0;
4315 info->offset = op1;
4316
4317 return true;
4318 }
4319
43e9d192
IB
4320 if (GET_MODE_SIZE (mode) != 0
4321 && CONST_INT_P (op1)
4322 && aarch64_base_register_rtx_p (op0, strict_p))
4323 {
4324 HOST_WIDE_INT offset = INTVAL (op1);
4325
4326 info->type = ADDRESS_REG_IMM;
4327 info->base = op0;
4328 info->offset = op1;
4329
4330 /* TImode and TFmode values are allowed in both pairs of X
4331 registers and individual Q registers. The available
4332 address modes are:
4333 X,X: 7-bit signed scaled offset
4334 Q: 9-bit signed offset
4335 We conservatively require an offset representable in either mode.
8ed49fab
KT
4336 When performing the check for pairs of X registers i.e. LDP/STP
4337 pass down DImode since that is the natural size of the LDP/STP
4338 instruction memory accesses. */
43e9d192 4339 if (mode == TImode || mode == TFmode)
8ed49fab 4340 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
4341 && (offset_9bit_signed_unscaled_p (mode, offset)
4342 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 4343
2d8c6dc1
AH
4344 /* A 7bit offset check because OImode will emit a ldp/stp
4345 instruction (only big endian will get here).
4346 For ldp/stp instructions, the offset is scaled for the size of a
4347 single element of the pair. */
4348 if (mode == OImode)
4349 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4350
4351 /* Three 9/12 bit offsets checks because CImode will emit three
4352 ldr/str instructions (only big endian will get here). */
4353 if (mode == CImode)
4354 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4355 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4356 || offset_12bit_unsigned_scaled_p (V16QImode,
4357 offset + 32)));
4358
4359 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4360 instructions (only big endian will get here). */
4361 if (mode == XImode)
4362 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4363 && aarch64_offset_7bit_signed_scaled_p (TImode,
4364 offset + 32));
4365
4366 if (load_store_pair_p)
43e9d192 4367 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4368 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4369 else
4370 return (offset_9bit_signed_unscaled_p (mode, offset)
4371 || offset_12bit_unsigned_scaled_p (mode, offset));
4372 }
4373
4374 if (allow_reg_index_p)
4375 {
4376 /* Look for base + (scaled/extended) index register. */
4377 if (aarch64_base_register_rtx_p (op0, strict_p)
4378 && aarch64_classify_index (info, op1, mode, strict_p))
4379 {
4380 info->base = op0;
4381 return true;
4382 }
4383 if (aarch64_base_register_rtx_p (op1, strict_p)
4384 && aarch64_classify_index (info, op0, mode, strict_p))
4385 {
4386 info->base = op1;
4387 return true;
4388 }
4389 }
4390
4391 return false;
4392
4393 case POST_INC:
4394 case POST_DEC:
4395 case PRE_INC:
4396 case PRE_DEC:
4397 info->type = ADDRESS_REG_WB;
4398 info->base = XEXP (x, 0);
4399 info->offset = NULL_RTX;
4400 return aarch64_base_register_rtx_p (info->base, strict_p);
4401
4402 case POST_MODIFY:
4403 case PRE_MODIFY:
4404 info->type = ADDRESS_REG_WB;
4405 info->base = XEXP (x, 0);
4406 if (GET_CODE (XEXP (x, 1)) == PLUS
4407 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4408 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4409 && aarch64_base_register_rtx_p (info->base, strict_p))
4410 {
4411 HOST_WIDE_INT offset;
4412 info->offset = XEXP (XEXP (x, 1), 1);
4413 offset = INTVAL (info->offset);
4414
4415 /* TImode and TFmode values are allowed in both pairs of X
4416 registers and individual Q registers. The available
4417 address modes are:
4418 X,X: 7-bit signed scaled offset
4419 Q: 9-bit signed offset
4420 We conservatively require an offset representable in either mode.
4421 */
4422 if (mode == TImode || mode == TFmode)
44707478 4423 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4424 && offset_9bit_signed_unscaled_p (mode, offset));
4425
2d8c6dc1 4426 if (load_store_pair_p)
43e9d192 4427 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4428 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4429 else
4430 return offset_9bit_signed_unscaled_p (mode, offset);
4431 }
4432 return false;
4433
4434 case CONST:
4435 case SYMBOL_REF:
4436 case LABEL_REF:
79517551
SN
4437 /* load literal: pc-relative constant pool entry. Only supported
4438 for SI mode or larger. */
43e9d192 4439 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4440
4441 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4442 {
4443 rtx sym, addend;
4444
4445 split_const (x, &sym, &addend);
b4f50fd4
RR
4446 return ((GET_CODE (sym) == LABEL_REF
4447 || (GET_CODE (sym) == SYMBOL_REF
4448 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4449 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4450 }
4451 return false;
4452
4453 case LO_SUM:
4454 info->type = ADDRESS_LO_SUM;
4455 info->base = XEXP (x, 0);
4456 info->offset = XEXP (x, 1);
4457 if (allow_reg_index_p
4458 && aarch64_base_register_rtx_p (info->base, strict_p))
4459 {
4460 rtx sym, offs;
4461 split_const (info->offset, &sym, &offs);
4462 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4463 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4464 {
4465 /* The symbol and offset must be aligned to the access size. */
4466 unsigned int align;
4467 unsigned int ref_size;
4468
4469 if (CONSTANT_POOL_ADDRESS_P (sym))
4470 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4471 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4472 {
4473 tree exp = SYMBOL_REF_DECL (sym);
4474 align = TYPE_ALIGN (TREE_TYPE (exp));
4475 align = CONSTANT_ALIGNMENT (exp, align);
4476 }
4477 else if (SYMBOL_REF_DECL (sym))
4478 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4479 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4480 && SYMBOL_REF_BLOCK (sym) != NULL)
4481 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4482 else
4483 align = BITS_PER_UNIT;
4484
4485 ref_size = GET_MODE_SIZE (mode);
4486 if (ref_size == 0)
4487 ref_size = GET_MODE_SIZE (DImode);
4488
4489 return ((INTVAL (offs) & (ref_size - 1)) == 0
4490 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4491 }
4492 }
4493 return false;
4494
4495 default:
4496 return false;
4497 }
4498}
4499
4500bool
4501aarch64_symbolic_address_p (rtx x)
4502{
4503 rtx offset;
4504
4505 split_const (x, &x, &offset);
4506 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4507}
4508
a6e0bfa7 4509/* Classify the base of symbolic expression X. */
da4f13a4
MS
4510
4511enum aarch64_symbol_type
a6e0bfa7 4512aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4513{
4514 rtx offset;
da4f13a4 4515
43e9d192 4516 split_const (x, &x, &offset);
a6e0bfa7 4517 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4518}
4519
4520
4521/* Return TRUE if X is a legitimate address for accessing memory in
4522 mode MODE. */
4523static bool
ef4bddc2 4524aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4525{
4526 struct aarch64_address_info addr;
4527
4528 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4529}
4530
4531/* Return TRUE if X is a legitimate address for accessing memory in
4532 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4533 pair operation. */
4534bool
ef4bddc2 4535aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4536 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4537{
4538 struct aarch64_address_info addr;
4539
4540 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4541}
4542
491ec060
WD
4543/* Split an out-of-range address displacement into a base and offset.
4544 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4545 to increase opportunities for sharing the base address of different sizes.
8734dfac 4546 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
491ec060
WD
4547static bool
4548aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4549{
8734dfac
WD
4550 HOST_WIDE_INT offset = INTVAL (*disp);
4551 HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
491ec060 4552
8734dfac
WD
4553 if (mode == TImode || mode == TFmode
4554 || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4555 base = (offset + 0x100) & ~0x1ff;
491ec060 4556
8734dfac
WD
4557 *off = GEN_INT (base);
4558 *disp = GEN_INT (offset - base);
491ec060
WD
4559 return true;
4560}
4561
43e9d192
IB
4562/* Return TRUE if rtx X is immediate constant 0.0 */
4563bool
3520f7cc 4564aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4565{
43e9d192
IB
4566 if (GET_MODE (x) == VOIDmode)
4567 return false;
4568
34a72c33 4569 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4570 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4571 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4572}
4573
70f09188
AP
4574/* Return the fixed registers used for condition codes. */
4575
4576static bool
4577aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4578{
4579 *p1 = CC_REGNUM;
4580 *p2 = INVALID_REGNUM;
4581 return true;
4582}
4583
78607708
TV
4584/* Emit call insn with PAT and do aarch64-specific handling. */
4585
d07a3fed 4586void
78607708
TV
4587aarch64_emit_call_insn (rtx pat)
4588{
4589 rtx insn = emit_call_insn (pat);
4590
4591 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4592 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4593 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4594}
4595
ef4bddc2 4596machine_mode
43e9d192
IB
4597aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4598{
4599 /* All floating point compares return CCFP if it is an equality
4600 comparison, and CCFPE otherwise. */
4601 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4602 {
4603 switch (code)
4604 {
4605 case EQ:
4606 case NE:
4607 case UNORDERED:
4608 case ORDERED:
4609 case UNLT:
4610 case UNLE:
4611 case UNGT:
4612 case UNGE:
4613 case UNEQ:
4614 case LTGT:
4615 return CCFPmode;
4616
4617 case LT:
4618 case LE:
4619 case GT:
4620 case GE:
4621 return CCFPEmode;
4622
4623 default:
4624 gcc_unreachable ();
4625 }
4626 }
4627
2b8568fe
KT
4628 /* Equality comparisons of short modes against zero can be performed
4629 using the TST instruction with the appropriate bitmask. */
4630 if (y == const0_rtx && REG_P (x)
4631 && (code == EQ || code == NE)
4632 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4633 return CC_NZmode;
4634
b06335f9
KT
4635 /* Similarly, comparisons of zero_extends from shorter modes can
4636 be performed using an ANDS with an immediate mask. */
4637 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4638 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4639 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4640 && (code == EQ || code == NE))
4641 return CC_NZmode;
4642
43e9d192
IB
4643 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4644 && y == const0_rtx
4645 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4646 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4647 || GET_CODE (x) == NEG
4648 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4649 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4650 return CC_NZmode;
4651
1c992d1e 4652 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4653 the comparison will have to be swapped when we emit the assembly
4654 code. */
4655 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4656 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
4657 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4658 || GET_CODE (x) == LSHIFTRT
1c992d1e 4659 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4660 return CC_SWPmode;
4661
1c992d1e
RE
4662 /* Similarly for a negated operand, but we can only do this for
4663 equalities. */
4664 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4665 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4666 && (code == EQ || code == NE)
4667 && GET_CODE (x) == NEG)
4668 return CC_Zmode;
4669
ef22810a
RH
4670 /* A test for unsigned overflow. */
4671 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4672 && code == NE
4673 && GET_CODE (x) == PLUS
4674 && GET_CODE (y) == ZERO_EXTEND)
4675 return CC_Cmode;
4676
43e9d192
IB
4677 /* For everything else, return CCmode. */
4678 return CCmode;
4679}
4680
3dfa7055
ZC
4681static int
4682aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4683
cd5660ab 4684int
43e9d192
IB
4685aarch64_get_condition_code (rtx x)
4686{
ef4bddc2 4687 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4688 enum rtx_code comp_code = GET_CODE (x);
4689
4690 if (GET_MODE_CLASS (mode) != MODE_CC)
4691 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4692 return aarch64_get_condition_code_1 (mode, comp_code);
4693}
43e9d192 4694
3dfa7055
ZC
4695static int
4696aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4697{
43e9d192
IB
4698 switch (mode)
4699 {
4700 case CCFPmode:
4701 case CCFPEmode:
4702 switch (comp_code)
4703 {
4704 case GE: return AARCH64_GE;
4705 case GT: return AARCH64_GT;
4706 case LE: return AARCH64_LS;
4707 case LT: return AARCH64_MI;
4708 case NE: return AARCH64_NE;
4709 case EQ: return AARCH64_EQ;
4710 case ORDERED: return AARCH64_VC;
4711 case UNORDERED: return AARCH64_VS;
4712 case UNLT: return AARCH64_LT;
4713 case UNLE: return AARCH64_LE;
4714 case UNGT: return AARCH64_HI;
4715 case UNGE: return AARCH64_PL;
cd5660ab 4716 default: return -1;
43e9d192
IB
4717 }
4718 break;
4719
4720 case CCmode:
4721 switch (comp_code)
4722 {
4723 case NE: return AARCH64_NE;
4724 case EQ: return AARCH64_EQ;
4725 case GE: return AARCH64_GE;
4726 case GT: return AARCH64_GT;
4727 case LE: return AARCH64_LE;
4728 case LT: return AARCH64_LT;
4729 case GEU: return AARCH64_CS;
4730 case GTU: return AARCH64_HI;
4731 case LEU: return AARCH64_LS;
4732 case LTU: return AARCH64_CC;
cd5660ab 4733 default: return -1;
43e9d192
IB
4734 }
4735 break;
4736
4737 case CC_SWPmode:
43e9d192
IB
4738 switch (comp_code)
4739 {
4740 case NE: return AARCH64_NE;
4741 case EQ: return AARCH64_EQ;
4742 case GE: return AARCH64_LE;
4743 case GT: return AARCH64_LT;
4744 case LE: return AARCH64_GE;
4745 case LT: return AARCH64_GT;
4746 case GEU: return AARCH64_LS;
4747 case GTU: return AARCH64_CC;
4748 case LEU: return AARCH64_CS;
4749 case LTU: return AARCH64_HI;
cd5660ab 4750 default: return -1;
43e9d192
IB
4751 }
4752 break;
4753
4754 case CC_NZmode:
4755 switch (comp_code)
4756 {
4757 case NE: return AARCH64_NE;
4758 case EQ: return AARCH64_EQ;
4759 case GE: return AARCH64_PL;
4760 case LT: return AARCH64_MI;
cd5660ab 4761 default: return -1;
43e9d192
IB
4762 }
4763 break;
4764
1c992d1e
RE
4765 case CC_Zmode:
4766 switch (comp_code)
4767 {
4768 case NE: return AARCH64_NE;
4769 case EQ: return AARCH64_EQ;
cd5660ab 4770 default: return -1;
1c992d1e
RE
4771 }
4772 break;
4773
ef22810a
RH
4774 case CC_Cmode:
4775 switch (comp_code)
4776 {
4777 case NE: return AARCH64_CS;
4778 case EQ: return AARCH64_CC;
4779 default: return -1;
4780 }
4781 break;
4782
43e9d192 4783 default:
cd5660ab 4784 return -1;
43e9d192 4785 }
3dfa7055 4786
3dfa7055 4787 return -1;
43e9d192
IB
4788}
4789
ddeabd3e
AL
4790bool
4791aarch64_const_vec_all_same_in_range_p (rtx x,
4792 HOST_WIDE_INT minval,
4793 HOST_WIDE_INT maxval)
4794{
4795 HOST_WIDE_INT firstval;
4796 int count, i;
4797
4798 if (GET_CODE (x) != CONST_VECTOR
4799 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4800 return false;
4801
4802 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4803 if (firstval < minval || firstval > maxval)
4804 return false;
4805
4806 count = CONST_VECTOR_NUNITS (x);
4807 for (i = 1; i < count; i++)
4808 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4809 return false;
4810
4811 return true;
4812}
4813
4814bool
4815aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4816{
4817 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4818}
4819
43e9d192 4820
cf670503
ZC
4821/* N Z C V. */
4822#define AARCH64_CC_V 1
4823#define AARCH64_CC_C (1 << 1)
4824#define AARCH64_CC_Z (1 << 2)
4825#define AARCH64_CC_N (1 << 3)
4826
c8012fbc
WD
4827/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4828static const int aarch64_nzcv_codes[] =
4829{
4830 0, /* EQ, Z == 1. */
4831 AARCH64_CC_Z, /* NE, Z == 0. */
4832 0, /* CS, C == 1. */
4833 AARCH64_CC_C, /* CC, C == 0. */
4834 0, /* MI, N == 1. */
4835 AARCH64_CC_N, /* PL, N == 0. */
4836 0, /* VS, V == 1. */
4837 AARCH64_CC_V, /* VC, V == 0. */
4838 0, /* HI, C ==1 && Z == 0. */
4839 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
4840 AARCH64_CC_V, /* GE, N == V. */
4841 0, /* LT, N != V. */
4842 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
4843 0, /* LE, !(Z == 0 && N == V). */
4844 0, /* AL, Any. */
4845 0 /* NV, Any. */
cf670503
ZC
4846};
4847
cc8ca59e
JB
4848static void
4849aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
4850{
4851 switch (code)
4852 {
f541a481
KT
4853 /* An integer or symbol address without a preceding # sign. */
4854 case 'c':
4855 switch (GET_CODE (x))
4856 {
4857 case CONST_INT:
4858 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4859 break;
4860
4861 case SYMBOL_REF:
4862 output_addr_const (f, x);
4863 break;
4864
4865 case CONST:
4866 if (GET_CODE (XEXP (x, 0)) == PLUS
4867 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4868 {
4869 output_addr_const (f, x);
4870 break;
4871 }
4872 /* Fall through. */
4873
4874 default:
4875 output_operand_lossage ("Unsupported operand for code '%c'", code);
4876 }
4877 break;
4878
43e9d192
IB
4879 case 'e':
4880 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4881 {
4882 int n;
4883
4aa81c2e 4884 if (!CONST_INT_P (x)
43e9d192
IB
4885 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4886 {
4887 output_operand_lossage ("invalid operand for '%%%c'", code);
4888 return;
4889 }
4890
4891 switch (n)
4892 {
4893 case 3:
4894 fputc ('b', f);
4895 break;
4896 case 4:
4897 fputc ('h', f);
4898 break;
4899 case 5:
4900 fputc ('w', f);
4901 break;
4902 default:
4903 output_operand_lossage ("invalid operand for '%%%c'", code);
4904 return;
4905 }
4906 }
4907 break;
4908
4909 case 'p':
4910 {
4911 int n;
4912
4913 /* Print N such that 2^N == X. */
4aa81c2e 4914 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4915 {
4916 output_operand_lossage ("invalid operand for '%%%c'", code);
4917 return;
4918 }
4919
4920 asm_fprintf (f, "%d", n);
4921 }
4922 break;
4923
4924 case 'P':
4925 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4926 if (!CONST_INT_P (x))
43e9d192
IB
4927 {
4928 output_operand_lossage ("invalid operand for '%%%c'", code);
4929 return;
4930 }
4931
8d55c61b 4932 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
4933 break;
4934
4935 case 'H':
4936 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4937 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4938 {
4939 output_operand_lossage ("invalid operand for '%%%c'", code);
4940 return;
4941 }
4942
01a3a324 4943 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4944 break;
4945
43e9d192 4946 case 'M':
c8012fbc 4947 case 'm':
cd5660ab
KT
4948 {
4949 int cond_code;
c8012fbc 4950 /* Print a condition (eq, ne, etc) or its inverse. */
43e9d192 4951
c8012fbc
WD
4952 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
4953 if (x == const_true_rtx)
cd5660ab 4954 {
c8012fbc
WD
4955 if (code == 'M')
4956 fputs ("nv", f);
cd5660ab
KT
4957 return;
4958 }
43e9d192 4959
cd5660ab
KT
4960 if (!COMPARISON_P (x))
4961 {
4962 output_operand_lossage ("invalid operand for '%%%c'", code);
4963 return;
4964 }
c8012fbc 4965
cd5660ab
KT
4966 cond_code = aarch64_get_condition_code (x);
4967 gcc_assert (cond_code >= 0);
c8012fbc
WD
4968 if (code == 'M')
4969 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
4970 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 4971 }
43e9d192
IB
4972 break;
4973
4974 case 'b':
4975 case 'h':
4976 case 's':
4977 case 'd':
4978 case 'q':
4979 /* Print a scalar FP/SIMD register name. */
4980 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4981 {
4982 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4983 return;
4984 }
50ce6f88 4985 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4986 break;
4987
4988 case 'S':
4989 case 'T':
4990 case 'U':
4991 case 'V':
4992 /* Print the first FP/SIMD register name in a list. */
4993 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4994 {
4995 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4996 return;
4997 }
50ce6f88 4998 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4999 break;
5000
2d8c6dc1
AH
5001 case 'R':
5002 /* Print a scalar FP/SIMD register name + 1. */
5003 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5004 {
5005 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5006 return;
5007 }
5008 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5009 break;
5010
a05c0ddf 5011 case 'X':
50d38551 5012 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 5013 if (!CONST_INT_P (x))
a05c0ddf
IB
5014 {
5015 output_operand_lossage ("invalid operand for '%%%c'", code);
5016 return;
5017 }
50d38551 5018 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
5019 break;
5020
43e9d192
IB
5021 case 'w':
5022 case 'x':
5023 /* Print a general register name or the zero register (32-bit or
5024 64-bit). */
3520f7cc
JG
5025 if (x == const0_rtx
5026 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 5027 {
50ce6f88 5028 asm_fprintf (f, "%czr", code);
43e9d192
IB
5029 break;
5030 }
5031
5032 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5033 {
50ce6f88 5034 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
5035 break;
5036 }
5037
5038 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5039 {
50ce6f88 5040 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
5041 break;
5042 }
5043
5044 /* Fall through */
5045
5046 case 0:
5047 /* Print a normal operand, if it's a general register, then we
5048 assume DImode. */
5049 if (x == NULL)
5050 {
5051 output_operand_lossage ("missing operand");
5052 return;
5053 }
5054
5055 switch (GET_CODE (x))
5056 {
5057 case REG:
01a3a324 5058 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
5059 break;
5060
5061 case MEM:
cc8ca59e 5062 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
5063 break;
5064
2af16a7c 5065 case CONST:
43e9d192
IB
5066 case LABEL_REF:
5067 case SYMBOL_REF:
5068 output_addr_const (asm_out_file, x);
5069 break;
5070
5071 case CONST_INT:
5072 asm_fprintf (f, "%wd", INTVAL (x));
5073 break;
5074
5075 case CONST_VECTOR:
3520f7cc
JG
5076 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5077 {
ddeabd3e
AL
5078 gcc_assert (
5079 aarch64_const_vec_all_same_in_range_p (x,
5080 HOST_WIDE_INT_MIN,
5081 HOST_WIDE_INT_MAX));
3520f7cc
JG
5082 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5083 }
5084 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5085 {
5086 fputc ('0', f);
5087 }
5088 else
5089 gcc_unreachable ();
43e9d192
IB
5090 break;
5091
3520f7cc 5092 case CONST_DOUBLE:
2ca5b430
KT
5093 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5094 be getting CONST_DOUBLEs holding integers. */
5095 gcc_assert (GET_MODE (x) != VOIDmode);
5096 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
5097 {
5098 fputc ('0', f);
5099 break;
5100 }
5101 else if (aarch64_float_const_representable_p (x))
5102 {
5103#define buf_size 20
5104 char float_buf[buf_size] = {'\0'};
34a72c33
RS
5105 real_to_decimal_for_mode (float_buf,
5106 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
5107 buf_size, buf_size,
5108 1, GET_MODE (x));
5109 asm_fprintf (asm_out_file, "%s", float_buf);
5110 break;
5111#undef buf_size
5112 }
5113 output_operand_lossage ("invalid constant");
5114 return;
43e9d192
IB
5115 default:
5116 output_operand_lossage ("invalid operand");
5117 return;
5118 }
5119 break;
5120
5121 case 'A':
5122 if (GET_CODE (x) == HIGH)
5123 x = XEXP (x, 0);
5124
a6e0bfa7 5125 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5126 {
6642bdb4 5127 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5128 asm_fprintf (asm_out_file, ":got:");
5129 break;
5130
5131 case SYMBOL_SMALL_TLSGD:
5132 asm_fprintf (asm_out_file, ":tlsgd:");
5133 break;
5134
5135 case SYMBOL_SMALL_TLSDESC:
5136 asm_fprintf (asm_out_file, ":tlsdesc:");
5137 break;
5138
79496620 5139 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5140 asm_fprintf (asm_out_file, ":gottprel:");
5141 break;
5142
d18ba284 5143 case SYMBOL_TLSLE24:
43e9d192
IB
5144 asm_fprintf (asm_out_file, ":tprel:");
5145 break;
5146
87dd8ab0
MS
5147 case SYMBOL_TINY_GOT:
5148 gcc_unreachable ();
5149 break;
5150
43e9d192
IB
5151 default:
5152 break;
5153 }
5154 output_addr_const (asm_out_file, x);
5155 break;
5156
5157 case 'L':
a6e0bfa7 5158 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5159 {
6642bdb4 5160 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5161 asm_fprintf (asm_out_file, ":lo12:");
5162 break;
5163
5164 case SYMBOL_SMALL_TLSGD:
5165 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5166 break;
5167
5168 case SYMBOL_SMALL_TLSDESC:
5169 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5170 break;
5171
79496620 5172 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5173 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5174 break;
5175
cbf5629e
JW
5176 case SYMBOL_TLSLE12:
5177 asm_fprintf (asm_out_file, ":tprel_lo12:");
5178 break;
5179
d18ba284 5180 case SYMBOL_TLSLE24:
43e9d192
IB
5181 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5182 break;
5183
87dd8ab0
MS
5184 case SYMBOL_TINY_GOT:
5185 asm_fprintf (asm_out_file, ":got:");
5186 break;
5187
5ae7caad
JW
5188 case SYMBOL_TINY_TLSIE:
5189 asm_fprintf (asm_out_file, ":gottprel:");
5190 break;
5191
43e9d192
IB
5192 default:
5193 break;
5194 }
5195 output_addr_const (asm_out_file, x);
5196 break;
5197
5198 case 'G':
5199
a6e0bfa7 5200 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5201 {
d18ba284 5202 case SYMBOL_TLSLE24:
43e9d192
IB
5203 asm_fprintf (asm_out_file, ":tprel_hi12:");
5204 break;
5205 default:
5206 break;
5207 }
5208 output_addr_const (asm_out_file, x);
5209 break;
5210
cf670503
ZC
5211 case 'k':
5212 {
c8012fbc 5213 HOST_WIDE_INT cond_code;
cf670503
ZC
5214 /* Print nzcv. */
5215
c8012fbc 5216 if (!CONST_INT_P (x))
cf670503
ZC
5217 {
5218 output_operand_lossage ("invalid operand for '%%%c'", code);
5219 return;
5220 }
5221
c8012fbc
WD
5222 cond_code = INTVAL (x);
5223 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5224 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
5225 }
5226 break;
5227
43e9d192
IB
5228 default:
5229 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5230 return;
5231 }
5232}
5233
cc8ca59e
JB
5234static void
5235aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
5236{
5237 struct aarch64_address_info addr;
5238
cc8ca59e 5239 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
5240 switch (addr.type)
5241 {
5242 case ADDRESS_REG_IMM:
5243 if (addr.offset == const0_rtx)
01a3a324 5244 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 5245 else
16a3246f 5246 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
5247 INTVAL (addr.offset));
5248 return;
5249
5250 case ADDRESS_REG_REG:
5251 if (addr.shift == 0)
16a3246f 5252 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 5253 reg_names [REGNO (addr.offset)]);
43e9d192 5254 else
16a3246f 5255 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 5256 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
5257 return;
5258
5259 case ADDRESS_REG_UXTW:
5260 if (addr.shift == 0)
16a3246f 5261 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5262 REGNO (addr.offset) - R0_REGNUM);
5263 else
16a3246f 5264 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5265 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5266 return;
5267
5268 case ADDRESS_REG_SXTW:
5269 if (addr.shift == 0)
16a3246f 5270 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5271 REGNO (addr.offset) - R0_REGNUM);
5272 else
16a3246f 5273 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5274 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5275 return;
5276
5277 case ADDRESS_REG_WB:
5278 switch (GET_CODE (x))
5279 {
5280 case PRE_INC:
16a3246f 5281 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5282 GET_MODE_SIZE (mode));
43e9d192
IB
5283 return;
5284 case POST_INC:
16a3246f 5285 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5286 GET_MODE_SIZE (mode));
43e9d192
IB
5287 return;
5288 case PRE_DEC:
16a3246f 5289 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5290 GET_MODE_SIZE (mode));
43e9d192
IB
5291 return;
5292 case POST_DEC:
16a3246f 5293 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5294 GET_MODE_SIZE (mode));
43e9d192
IB
5295 return;
5296 case PRE_MODIFY:
16a3246f 5297 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5298 INTVAL (addr.offset));
5299 return;
5300 case POST_MODIFY:
16a3246f 5301 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5302 INTVAL (addr.offset));
5303 return;
5304 default:
5305 break;
5306 }
5307 break;
5308
5309 case ADDRESS_LO_SUM:
16a3246f 5310 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5311 output_addr_const (f, addr.offset);
5312 asm_fprintf (f, "]");
5313 return;
5314
5315 case ADDRESS_SYMBOLIC:
5316 break;
5317 }
5318
5319 output_addr_const (f, x);
5320}
5321
43e9d192
IB
5322bool
5323aarch64_label_mentioned_p (rtx x)
5324{
5325 const char *fmt;
5326 int i;
5327
5328 if (GET_CODE (x) == LABEL_REF)
5329 return true;
5330
5331 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5332 referencing instruction, but they are constant offsets, not
5333 symbols. */
5334 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5335 return false;
5336
5337 fmt = GET_RTX_FORMAT (GET_CODE (x));
5338 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5339 {
5340 if (fmt[i] == 'E')
5341 {
5342 int j;
5343
5344 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5345 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5346 return 1;
5347 }
5348 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5349 return 1;
5350 }
5351
5352 return 0;
5353}
5354
5355/* Implement REGNO_REG_CLASS. */
5356
5357enum reg_class
5358aarch64_regno_regclass (unsigned regno)
5359{
5360 if (GP_REGNUM_P (regno))
a4a182c6 5361 return GENERAL_REGS;
43e9d192
IB
5362
5363 if (regno == SP_REGNUM)
5364 return STACK_REG;
5365
5366 if (regno == FRAME_POINTER_REGNUM
5367 || regno == ARG_POINTER_REGNUM)
f24bb080 5368 return POINTER_REGS;
43e9d192
IB
5369
5370 if (FP_REGNUM_P (regno))
5371 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5372
5373 return NO_REGS;
5374}
5375
0c4ec427 5376static rtx
ef4bddc2 5377aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5378{
5379 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5380 where mask is selected by alignment and size of the offset.
5381 We try to pick as large a range for the offset as possible to
5382 maximize the chance of a CSE. However, for aligned addresses
5383 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5384 elements are likely to use the same base. We need to be careful
5385 not to split a CONST for some forms of address expression, otherwise
5386 it will generate sub-optimal code. */
0c4ec427
RE
5387
5388 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5389 {
9e0218fc 5390 rtx base = XEXP (x, 0);
17d7bdd8 5391 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5392 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5393
9e0218fc 5394 if (GET_CODE (base) == PLUS)
e8426e0a 5395 {
9e0218fc
RH
5396 rtx op0 = XEXP (base, 0);
5397 rtx op1 = XEXP (base, 1);
5398
5399 /* Force any scaling into a temp for CSE. */
5400 op0 = force_reg (Pmode, op0);
5401 op1 = force_reg (Pmode, op1);
5402
5403 /* Let the pointer register be in op0. */
5404 if (REG_POINTER (op1))
5405 std::swap (op0, op1);
5406
5407 /* If the pointer is virtual or frame related, then we know that
5408 virtual register instantiation or register elimination is going
5409 to apply a second constant. We want the two constants folded
5410 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5411 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5412 {
9e0218fc
RH
5413 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5414 NULL_RTX, true, OPTAB_DIRECT);
5415 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5416 }
e8426e0a 5417
9e0218fc
RH
5418 /* Otherwise, in order to encourage CSE (and thence loop strength
5419 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5420 base = expand_binop (Pmode, add_optab, op0, op1,
5421 NULL_RTX, true, OPTAB_DIRECT);
5422 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5423 }
5424
8734dfac 5425 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9e0218fc 5426 HOST_WIDE_INT base_offset;
8734dfac
WD
5427 if (GET_MODE_SIZE (mode) > 16)
5428 base_offset = (offset + 0x400) & ~0x7f0;
0c4ec427
RE
5429 /* For offsets aren't a multiple of the access size, the limit is
5430 -256...255. */
5431 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5432 {
5433 base_offset = (offset + 0x100) & ~0x1ff;
5434
5435 /* BLKmode typically uses LDP of X-registers. */
5436 if (mode == BLKmode)
5437 base_offset = (offset + 512) & ~0x3ff;
5438 }
5439 /* Small negative offsets are supported. */
5440 else if (IN_RANGE (offset, -256, 0))
5441 base_offset = 0;
8734dfac
WD
5442 else if (mode == TImode || mode == TFmode)
5443 base_offset = (offset + 0x100) & ~0x1ff;
ff0f3f1c 5444 /* Use 12-bit offset by access size. */
0c4ec427 5445 else
ff0f3f1c 5446 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5447
9e0218fc
RH
5448 if (base_offset != 0)
5449 {
5450 base = plus_constant (Pmode, base, base_offset);
5451 base = force_operand (base, NULL_RTX);
5452 return plus_constant (Pmode, base, offset - base_offset);
5453 }
0c4ec427
RE
5454 }
5455
5456 return x;
5457}
5458
b4f50fd4
RR
5459/* Return the reload icode required for a constant pool in mode. */
5460static enum insn_code
5461aarch64_constant_pool_reload_icode (machine_mode mode)
5462{
5463 switch (mode)
5464 {
5465 case SFmode:
5466 return CODE_FOR_aarch64_reload_movcpsfdi;
5467
5468 case DFmode:
5469 return CODE_FOR_aarch64_reload_movcpdfdi;
5470
5471 case TFmode:
5472 return CODE_FOR_aarch64_reload_movcptfdi;
5473
5474 case V8QImode:
5475 return CODE_FOR_aarch64_reload_movcpv8qidi;
5476
5477 case V16QImode:
5478 return CODE_FOR_aarch64_reload_movcpv16qidi;
5479
5480 case V4HImode:
5481 return CODE_FOR_aarch64_reload_movcpv4hidi;
5482
5483 case V8HImode:
5484 return CODE_FOR_aarch64_reload_movcpv8hidi;
5485
5486 case V2SImode:
5487 return CODE_FOR_aarch64_reload_movcpv2sidi;
5488
5489 case V4SImode:
5490 return CODE_FOR_aarch64_reload_movcpv4sidi;
5491
5492 case V2DImode:
5493 return CODE_FOR_aarch64_reload_movcpv2didi;
5494
5495 case V2DFmode:
5496 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5497
5498 default:
5499 gcc_unreachable ();
5500 }
5501
5502 gcc_unreachable ();
5503}
43e9d192
IB
5504static reg_class_t
5505aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5506 reg_class_t rclass,
ef4bddc2 5507 machine_mode mode,
43e9d192
IB
5508 secondary_reload_info *sri)
5509{
b4f50fd4
RR
5510
5511 /* If we have to disable direct literal pool loads and stores because the
5512 function is too big, then we need a scratch register. */
5513 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5514 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5515 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5516 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5517 {
5518 sri->icode = aarch64_constant_pool_reload_icode (mode);
5519 return NO_REGS;
5520 }
5521
43e9d192
IB
5522 /* Without the TARGET_SIMD instructions we cannot move a Q register
5523 to a Q register directly. We need a scratch. */
5524 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5525 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5526 && reg_class_subset_p (rclass, FP_REGS))
5527 {
5528 if (mode == TFmode)
5529 sri->icode = CODE_FOR_aarch64_reload_movtf;
5530 else if (mode == TImode)
5531 sri->icode = CODE_FOR_aarch64_reload_movti;
5532 return NO_REGS;
5533 }
5534
5535 /* A TFmode or TImode memory access should be handled via an FP_REGS
5536 because AArch64 has richer addressing modes for LDR/STR instructions
5537 than LDP/STP instructions. */
d5726973 5538 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5539 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5540 return FP_REGS;
5541
5542 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5543 return GENERAL_REGS;
43e9d192
IB
5544
5545 return NO_REGS;
5546}
5547
5548static bool
5549aarch64_can_eliminate (const int from, const int to)
5550{
5551 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5552 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5553
5554 if (frame_pointer_needed)
5555 {
5556 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5557 return true;
5558 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5559 return false;
5560 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5561 && !cfun->calls_alloca)
5562 return true;
5563 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5564 return true;
0b7f8166
MS
5565
5566 return false;
43e9d192 5567 }
1c923b60
JW
5568 else
5569 {
5570 /* If we decided that we didn't need a leaf frame pointer but then used
5571 LR in the function, then we'll want a frame pointer after all, so
5572 prevent this elimination to ensure a frame pointer is used. */
5573 if (to == STACK_POINTER_REGNUM
5574 && flag_omit_leaf_frame_pointer
5575 && df_regs_ever_live_p (LR_REGNUM))
5576 return false;
5577 }
777e6976 5578
43e9d192
IB
5579 return true;
5580}
5581
5582HOST_WIDE_INT
5583aarch64_initial_elimination_offset (unsigned from, unsigned to)
5584{
43e9d192 5585 aarch64_layout_frame ();
78c29983
MS
5586
5587 if (to == HARD_FRAME_POINTER_REGNUM)
5588 {
5589 if (from == ARG_POINTER_REGNUM)
71bfb77a 5590 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5591
5592 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5593 return cfun->machine->frame.hard_fp_offset
5594 - cfun->machine->frame.locals_offset;
78c29983
MS
5595 }
5596
5597 if (to == STACK_POINTER_REGNUM)
5598 {
5599 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5600 return cfun->machine->frame.frame_size
5601 - cfun->machine->frame.locals_offset;
78c29983
MS
5602 }
5603
1c960e02 5604 return cfun->machine->frame.frame_size;
43e9d192
IB
5605}
5606
43e9d192
IB
5607/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5608 previous frame. */
5609
5610rtx
5611aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5612{
5613 if (count != 0)
5614 return const0_rtx;
5615 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5616}
5617
5618
5619static void
5620aarch64_asm_trampoline_template (FILE *f)
5621{
28514dda
YZ
5622 if (TARGET_ILP32)
5623 {
5624 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5625 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5626 }
5627 else
5628 {
5629 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5630 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5631 }
01a3a324 5632 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5633 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5634 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5635 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5636}
5637
5638static void
5639aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5640{
5641 rtx fnaddr, mem, a_tramp;
28514dda 5642 const int tramp_code_sz = 16;
43e9d192
IB
5643
5644 /* Don't need to copy the trailing D-words, we fill those in below. */
5645 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5646 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5647 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5648 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5649 if (GET_MODE (fnaddr) != ptr_mode)
5650 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5651 emit_move_insn (mem, fnaddr);
5652
28514dda 5653 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5654 emit_move_insn (mem, chain_value);
5655
5656 /* XXX We should really define a "clear_cache" pattern and use
5657 gen_clear_cache(). */
5658 a_tramp = XEXP (m_tramp, 0);
5659 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5660 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5661 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5662 ptr_mode);
43e9d192
IB
5663}
5664
5665static unsigned char
ef4bddc2 5666aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5667{
5668 switch (regclass)
5669 {
fee9ba42 5670 case CALLER_SAVE_REGS:
43e9d192
IB
5671 case POINTER_REGS:
5672 case GENERAL_REGS:
5673 case ALL_REGS:
5674 case FP_REGS:
5675 case FP_LO_REGS:
5676 return
7bd11911
KT
5677 aarch64_vector_mode_p (mode)
5678 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5679 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5680 case STACK_REG:
5681 return 1;
5682
5683 case NO_REGS:
5684 return 0;
5685
5686 default:
5687 break;
5688 }
5689 gcc_unreachable ();
5690}
5691
5692static reg_class_t
78d8b9f0 5693aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5694{
51bb310d 5695 if (regclass == POINTER_REGS)
78d8b9f0
IB
5696 return GENERAL_REGS;
5697
51bb310d
MS
5698 if (regclass == STACK_REG)
5699 {
5700 if (REG_P(x)
5701 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5702 return regclass;
5703
5704 return NO_REGS;
5705 }
5706
78d8b9f0
IB
5707 /* If it's an integer immediate that MOVI can't handle, then
5708 FP_REGS is not an option, so we return NO_REGS instead. */
5709 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5710 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5711 return NO_REGS;
5712
27bd251b
IB
5713 /* Register eliminiation can result in a request for
5714 SP+constant->FP_REGS. We cannot support such operations which
5715 use SP as source and an FP_REG as destination, so reject out
5716 right now. */
5717 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5718 {
5719 rtx lhs = XEXP (x, 0);
5720
5721 /* Look through a possible SUBREG introduced by ILP32. */
5722 if (GET_CODE (lhs) == SUBREG)
5723 lhs = SUBREG_REG (lhs);
5724
5725 gcc_assert (REG_P (lhs));
5726 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5727 POINTER_REGS));
5728 return NO_REGS;
5729 }
5730
78d8b9f0 5731 return regclass;
43e9d192
IB
5732}
5733
5734void
5735aarch64_asm_output_labelref (FILE* f, const char *name)
5736{
5737 asm_fprintf (f, "%U%s", name);
5738}
5739
5740static void
5741aarch64_elf_asm_constructor (rtx symbol, int priority)
5742{
5743 if (priority == DEFAULT_INIT_PRIORITY)
5744 default_ctor_section_asm_out_constructor (symbol, priority);
5745 else
5746 {
5747 section *s;
5748 char buf[18];
5749 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5750 s = get_section (buf, SECTION_WRITE, NULL);
5751 switch_to_section (s);
5752 assemble_align (POINTER_SIZE);
28514dda 5753 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5754 }
5755}
5756
5757static void
5758aarch64_elf_asm_destructor (rtx symbol, int priority)
5759{
5760 if (priority == DEFAULT_INIT_PRIORITY)
5761 default_dtor_section_asm_out_destructor (symbol, priority);
5762 else
5763 {
5764 section *s;
5765 char buf[18];
5766 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5767 s = get_section (buf, SECTION_WRITE, NULL);
5768 switch_to_section (s);
5769 assemble_align (POINTER_SIZE);
28514dda 5770 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5771 }
5772}
5773
5774const char*
5775aarch64_output_casesi (rtx *operands)
5776{
5777 char buf[100];
5778 char label[100];
b32d5189 5779 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5780 int index;
5781 static const char *const patterns[4][2] =
5782 {
5783 {
5784 "ldrb\t%w3, [%0,%w1,uxtw]",
5785 "add\t%3, %4, %w3, sxtb #2"
5786 },
5787 {
5788 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5789 "add\t%3, %4, %w3, sxth #2"
5790 },
5791 {
5792 "ldr\t%w3, [%0,%w1,uxtw #2]",
5793 "add\t%3, %4, %w3, sxtw #2"
5794 },
5795 /* We assume that DImode is only generated when not optimizing and
5796 that we don't really need 64-bit address offsets. That would
5797 imply an object file with 8GB of code in a single function! */
5798 {
5799 "ldr\t%w3, [%0,%w1,uxtw #2]",
5800 "add\t%3, %4, %w3, sxtw #2"
5801 }
5802 };
5803
5804 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5805
5806 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5807
5808 gcc_assert (index >= 0 && index <= 3);
5809
5810 /* Need to implement table size reduction, by chaning the code below. */
5811 output_asm_insn (patterns[index][0], operands);
5812 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5813 snprintf (buf, sizeof (buf),
5814 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5815 output_asm_insn (buf, operands);
5816 output_asm_insn (patterns[index][1], operands);
5817 output_asm_insn ("br\t%3", operands);
5818 assemble_label (asm_out_file, label);
5819 return "";
5820}
5821
5822
5823/* Return size in bits of an arithmetic operand which is shifted/scaled and
5824 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5825 operator. */
5826
5827int
5828aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5829{
5830 if (shift >= 0 && shift <= 3)
5831 {
5832 int size;
5833 for (size = 8; size <= 32; size *= 2)
5834 {
5835 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5836 if (mask == bits << shift)
5837 return size;
5838 }
5839 }
5840 return 0;
5841}
5842
e78d485e
RR
5843/* Constant pools are per function only when PC relative
5844 literal loads are true or we are in the large memory
5845 model. */
5846
5847static inline bool
5848aarch64_can_use_per_function_literal_pools_p (void)
5849{
9ee6540a 5850 return (aarch64_pcrelative_literal_loads
e78d485e
RR
5851 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
5852}
5853
43e9d192 5854static bool
e78d485e 5855aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 5856{
3eece53d
RR
5857 /* Fixme:: In an ideal world this would work similar
5858 to the logic in aarch64_select_rtx_section but this
5859 breaks bootstrap in gcc go. For now we workaround
5860 this by returning false here. */
5861 return false;
43e9d192
IB
5862}
5863
e78d485e
RR
5864/* Select appropriate section for constants depending
5865 on where we place literal pools. */
5866
43e9d192 5867static section *
e78d485e
RR
5868aarch64_select_rtx_section (machine_mode mode,
5869 rtx x,
5870 unsigned HOST_WIDE_INT align)
43e9d192 5871{
e78d485e
RR
5872 if (aarch64_can_use_per_function_literal_pools_p ())
5873 return function_section (current_function_decl);
43e9d192 5874
e78d485e
RR
5875 return default_elf_select_rtx_section (mode, x, align);
5876}
43e9d192 5877
5fca7b66
RH
5878/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
5879void
5880aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
5881 HOST_WIDE_INT offset)
5882{
5883 /* When using per-function literal pools, we must ensure that any code
5884 section is aligned to the minimal instruction length, lest we get
5885 errors from the assembler re "unaligned instructions". */
5886 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
5887 ASM_OUTPUT_ALIGN (f, 2);
5888}
5889
43e9d192
IB
5890/* Costs. */
5891
5892/* Helper function for rtx cost calculation. Strip a shift expression
5893 from X. Returns the inner operand if successful, or the original
5894 expression on failure. */
5895static rtx
5896aarch64_strip_shift (rtx x)
5897{
5898 rtx op = x;
5899
57b77d46
RE
5900 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5901 we can convert both to ROR during final output. */
43e9d192
IB
5902 if ((GET_CODE (op) == ASHIFT
5903 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5904 || GET_CODE (op) == LSHIFTRT
5905 || GET_CODE (op) == ROTATERT
5906 || GET_CODE (op) == ROTATE)
43e9d192
IB
5907 && CONST_INT_P (XEXP (op, 1)))
5908 return XEXP (op, 0);
5909
5910 if (GET_CODE (op) == MULT
5911 && CONST_INT_P (XEXP (op, 1))
5912 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5913 return XEXP (op, 0);
5914
5915 return x;
5916}
5917
4745e701 5918/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5919 expression from X. Returns the inner operand if successful, or the
5920 original expression on failure. We deal with a number of possible
5921 canonicalization variations here. */
5922static rtx
4745e701 5923aarch64_strip_extend (rtx x)
43e9d192
IB
5924{
5925 rtx op = x;
5926
5927 /* Zero and sign extraction of a widened value. */
5928 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5929 && XEXP (op, 2) == const0_rtx
4745e701 5930 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5931 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5932 XEXP (op, 1)))
5933 return XEXP (XEXP (op, 0), 0);
5934
5935 /* It can also be represented (for zero-extend) as an AND with an
5936 immediate. */
5937 if (GET_CODE (op) == AND
5938 && GET_CODE (XEXP (op, 0)) == MULT
5939 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5940 && CONST_INT_P (XEXP (op, 1))
5941 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5942 INTVAL (XEXP (op, 1))) != 0)
5943 return XEXP (XEXP (op, 0), 0);
5944
5945 /* Now handle extended register, as this may also have an optional
5946 left shift by 1..4. */
5947 if (GET_CODE (op) == ASHIFT
5948 && CONST_INT_P (XEXP (op, 1))
5949 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5950 op = XEXP (op, 0);
5951
5952 if (GET_CODE (op) == ZERO_EXTEND
5953 || GET_CODE (op) == SIGN_EXTEND)
5954 op = XEXP (op, 0);
5955
5956 if (op != x)
5957 return op;
5958
4745e701
JG
5959 return x;
5960}
5961
0a78ebe4
KT
5962/* Return true iff CODE is a shift supported in combination
5963 with arithmetic instructions. */
4d1919ed 5964
0a78ebe4
KT
5965static bool
5966aarch64_shift_p (enum rtx_code code)
5967{
5968 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5969}
5970
4745e701 5971/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5972 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5973 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5974 operands where needed. */
5975
5976static int
e548c9df 5977aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5978{
5979 rtx op0, op1;
5980 const struct cpu_cost_table *extra_cost
b175b679 5981 = aarch64_tune_params.insn_extra_cost;
4745e701 5982 int cost = 0;
0a78ebe4 5983 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5984 machine_mode mode = GET_MODE (x);
4745e701
JG
5985
5986 gcc_checking_assert (code == MULT);
5987
5988 op0 = XEXP (x, 0);
5989 op1 = XEXP (x, 1);
5990
5991 if (VECTOR_MODE_P (mode))
5992 mode = GET_MODE_INNER (mode);
5993
5994 /* Integer multiply/fma. */
5995 if (GET_MODE_CLASS (mode) == MODE_INT)
5996 {
5997 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5998 if (aarch64_shift_p (GET_CODE (x))
5999 || (CONST_INT_P (op1)
6000 && exact_log2 (INTVAL (op1)) > 0))
4745e701 6001 {
0a78ebe4
KT
6002 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6003 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
6004 if (speed)
6005 {
0a78ebe4
KT
6006 if (compound_p)
6007 {
6008 if (REG_P (op1))
6009 /* ARITH + shift-by-register. */
6010 cost += extra_cost->alu.arith_shift_reg;
6011 else if (is_extend)
6012 /* ARITH + extended register. We don't have a cost field
6013 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6014 cost += extra_cost->alu.extend_arith;
6015 else
6016 /* ARITH + shift-by-immediate. */
6017 cost += extra_cost->alu.arith_shift;
6018 }
4745e701
JG
6019 else
6020 /* LSL (immediate). */
0a78ebe4
KT
6021 cost += extra_cost->alu.shift;
6022
4745e701 6023 }
0a78ebe4
KT
6024 /* Strip extends as we will have costed them in the case above. */
6025 if (is_extend)
6026 op0 = aarch64_strip_extend (op0);
4745e701 6027
e548c9df 6028 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
6029
6030 return cost;
6031 }
6032
d2ac256b
KT
6033 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6034 compound and let the below cases handle it. After all, MNEG is a
6035 special-case alias of MSUB. */
6036 if (GET_CODE (op0) == NEG)
6037 {
6038 op0 = XEXP (op0, 0);
6039 compound_p = true;
6040 }
6041
4745e701
JG
6042 /* Integer multiplies or FMAs have zero/sign extending variants. */
6043 if ((GET_CODE (op0) == ZERO_EXTEND
6044 && GET_CODE (op1) == ZERO_EXTEND)
6045 || (GET_CODE (op0) == SIGN_EXTEND
6046 && GET_CODE (op1) == SIGN_EXTEND))
6047 {
e548c9df
AM
6048 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6049 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
6050
6051 if (speed)
6052 {
0a78ebe4 6053 if (compound_p)
d2ac256b 6054 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
6055 cost += extra_cost->mult[0].extend_add;
6056 else
6057 /* MUL/SMULL/UMULL. */
6058 cost += extra_cost->mult[0].extend;
6059 }
6060
6061 return cost;
6062 }
6063
d2ac256b 6064 /* This is either an integer multiply or a MADD. In both cases
4745e701 6065 we want to recurse and cost the operands. */
e548c9df
AM
6066 cost += rtx_cost (op0, mode, MULT, 0, speed);
6067 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6068
6069 if (speed)
6070 {
0a78ebe4 6071 if (compound_p)
d2ac256b 6072 /* MADD/MSUB. */
4745e701
JG
6073 cost += extra_cost->mult[mode == DImode].add;
6074 else
6075 /* MUL. */
6076 cost += extra_cost->mult[mode == DImode].simple;
6077 }
6078
6079 return cost;
6080 }
6081 else
6082 {
6083 if (speed)
6084 {
3d840f7d 6085 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
6086 operands, unless the rounding mode is upward or downward in
6087 which case FNMUL is different than FMUL with operand negation. */
6088 bool neg0 = GET_CODE (op0) == NEG;
6089 bool neg1 = GET_CODE (op1) == NEG;
6090 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6091 {
6092 if (neg0)
6093 op0 = XEXP (op0, 0);
6094 if (neg1)
6095 op1 = XEXP (op1, 0);
6096 }
4745e701 6097
0a78ebe4 6098 if (compound_p)
4745e701
JG
6099 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6100 cost += extra_cost->fp[mode == DFmode].fma;
6101 else
3d840f7d 6102 /* FMUL/FNMUL. */
4745e701
JG
6103 cost += extra_cost->fp[mode == DFmode].mult;
6104 }
6105
e548c9df
AM
6106 cost += rtx_cost (op0, mode, MULT, 0, speed);
6107 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6108 return cost;
6109 }
43e9d192
IB
6110}
6111
67747367
JG
6112static int
6113aarch64_address_cost (rtx x,
ef4bddc2 6114 machine_mode mode,
67747367
JG
6115 addr_space_t as ATTRIBUTE_UNUSED,
6116 bool speed)
6117{
6118 enum rtx_code c = GET_CODE (x);
b175b679 6119 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
6120 struct aarch64_address_info info;
6121 int cost = 0;
6122 info.shift = 0;
6123
6124 if (!aarch64_classify_address (&info, x, mode, c, false))
6125 {
6126 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6127 {
6128 /* This is a CONST or SYMBOL ref which will be split
6129 in a different way depending on the code model in use.
6130 Cost it through the generic infrastructure. */
e548c9df 6131 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
6132 /* Divide through by the cost of one instruction to
6133 bring it to the same units as the address costs. */
6134 cost_symbol_ref /= COSTS_N_INSNS (1);
6135 /* The cost is then the cost of preparing the address,
6136 followed by an immediate (possibly 0) offset. */
6137 return cost_symbol_ref + addr_cost->imm_offset;
6138 }
6139 else
6140 {
6141 /* This is most likely a jump table from a case
6142 statement. */
6143 return addr_cost->register_offset;
6144 }
6145 }
6146
6147 switch (info.type)
6148 {
6149 case ADDRESS_LO_SUM:
6150 case ADDRESS_SYMBOLIC:
6151 case ADDRESS_REG_IMM:
6152 cost += addr_cost->imm_offset;
6153 break;
6154
6155 case ADDRESS_REG_WB:
6156 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6157 cost += addr_cost->pre_modify;
6158 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6159 cost += addr_cost->post_modify;
6160 else
6161 gcc_unreachable ();
6162
6163 break;
6164
6165 case ADDRESS_REG_REG:
6166 cost += addr_cost->register_offset;
6167 break;
6168
67747367 6169 case ADDRESS_REG_SXTW:
783879e6
EM
6170 cost += addr_cost->register_sextend;
6171 break;
6172
6173 case ADDRESS_REG_UXTW:
6174 cost += addr_cost->register_zextend;
67747367
JG
6175 break;
6176
6177 default:
6178 gcc_unreachable ();
6179 }
6180
6181
6182 if (info.shift > 0)
6183 {
6184 /* For the sake of calculating the cost of the shifted register
6185 component, we can treat same sized modes in the same way. */
6186 switch (GET_MODE_BITSIZE (mode))
6187 {
6188 case 16:
6189 cost += addr_cost->addr_scale_costs.hi;
6190 break;
6191
6192 case 32:
6193 cost += addr_cost->addr_scale_costs.si;
6194 break;
6195
6196 case 64:
6197 cost += addr_cost->addr_scale_costs.di;
6198 break;
6199
6200 /* We can't tell, or this is a 128-bit vector. */
6201 default:
6202 cost += addr_cost->addr_scale_costs.ti;
6203 break;
6204 }
6205 }
6206
6207 return cost;
6208}
6209
b9066f5a
MW
6210/* Return the cost of a branch. If SPEED_P is true then the compiler is
6211 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6212 to be taken. */
6213
6214int
6215aarch64_branch_cost (bool speed_p, bool predictable_p)
6216{
6217 /* When optimizing for speed, use the cost of unpredictable branches. */
6218 const struct cpu_branch_cost *branch_costs =
b175b679 6219 aarch64_tune_params.branch_costs;
b9066f5a
MW
6220
6221 if (!speed_p || predictable_p)
6222 return branch_costs->predictable;
6223 else
6224 return branch_costs->unpredictable;
6225}
6226
7cc2145f
JG
6227/* Return true if the RTX X in mode MODE is a zero or sign extract
6228 usable in an ADD or SUB (extended register) instruction. */
6229static bool
ef4bddc2 6230aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
6231{
6232 /* Catch add with a sign extract.
6233 This is add_<optab><mode>_multp2. */
6234 if (GET_CODE (x) == SIGN_EXTRACT
6235 || GET_CODE (x) == ZERO_EXTRACT)
6236 {
6237 rtx op0 = XEXP (x, 0);
6238 rtx op1 = XEXP (x, 1);
6239 rtx op2 = XEXP (x, 2);
6240
6241 if (GET_CODE (op0) == MULT
6242 && CONST_INT_P (op1)
6243 && op2 == const0_rtx
6244 && CONST_INT_P (XEXP (op0, 1))
6245 && aarch64_is_extend_from_extract (mode,
6246 XEXP (op0, 1),
6247 op1))
6248 {
6249 return true;
6250 }
6251 }
e47c4031
KT
6252 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6253 No shift. */
6254 else if (GET_CODE (x) == SIGN_EXTEND
6255 || GET_CODE (x) == ZERO_EXTEND)
6256 return REG_P (XEXP (x, 0));
7cc2145f
JG
6257
6258 return false;
6259}
6260
61263118
KT
6261static bool
6262aarch64_frint_unspec_p (unsigned int u)
6263{
6264 switch (u)
6265 {
6266 case UNSPEC_FRINTZ:
6267 case UNSPEC_FRINTP:
6268 case UNSPEC_FRINTM:
6269 case UNSPEC_FRINTA:
6270 case UNSPEC_FRINTN:
6271 case UNSPEC_FRINTX:
6272 case UNSPEC_FRINTI:
6273 return true;
6274
6275 default:
6276 return false;
6277 }
6278}
6279
fb0cb7fa
KT
6280/* Return true iff X is an rtx that will match an extr instruction
6281 i.e. as described in the *extr<mode>5_insn family of patterns.
6282 OP0 and OP1 will be set to the operands of the shifts involved
6283 on success and will be NULL_RTX otherwise. */
6284
6285static bool
6286aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6287{
6288 rtx op0, op1;
6289 machine_mode mode = GET_MODE (x);
6290
6291 *res_op0 = NULL_RTX;
6292 *res_op1 = NULL_RTX;
6293
6294 if (GET_CODE (x) != IOR)
6295 return false;
6296
6297 op0 = XEXP (x, 0);
6298 op1 = XEXP (x, 1);
6299
6300 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6301 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6302 {
6303 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6304 if (GET_CODE (op1) == ASHIFT)
6305 std::swap (op0, op1);
6306
6307 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6308 return false;
6309
6310 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6311 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6312
6313 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6314 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6315 {
6316 *res_op0 = XEXP (op0, 0);
6317 *res_op1 = XEXP (op1, 0);
6318 return true;
6319 }
6320 }
6321
6322 return false;
6323}
6324
2d5ffe46
AP
6325/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6326 storing it in *COST. Result is true if the total cost of the operation
6327 has now been calculated. */
6328static bool
6329aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6330{
b9e3afe9
AP
6331 rtx inner;
6332 rtx comparator;
6333 enum rtx_code cmpcode;
6334
6335 if (COMPARISON_P (op0))
6336 {
6337 inner = XEXP (op0, 0);
6338 comparator = XEXP (op0, 1);
6339 cmpcode = GET_CODE (op0);
6340 }
6341 else
6342 {
6343 inner = op0;
6344 comparator = const0_rtx;
6345 cmpcode = NE;
6346 }
6347
2d5ffe46
AP
6348 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6349 {
6350 /* Conditional branch. */
b9e3afe9 6351 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6352 return true;
6353 else
6354 {
b9e3afe9 6355 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6356 {
2d5ffe46
AP
6357 if (comparator == const0_rtx)
6358 {
6359 /* TBZ/TBNZ/CBZ/CBNZ. */
6360 if (GET_CODE (inner) == ZERO_EXTRACT)
6361 /* TBZ/TBNZ. */
e548c9df
AM
6362 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6363 ZERO_EXTRACT, 0, speed);
6364 else
6365 /* CBZ/CBNZ. */
6366 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6367
6368 return true;
6369 }
6370 }
b9e3afe9 6371 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6372 {
2d5ffe46
AP
6373 /* TBZ/TBNZ. */
6374 if (comparator == const0_rtx)
6375 return true;
6376 }
6377 }
6378 }
b9e3afe9 6379 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6380 {
786298dc 6381 /* CCMP. */
6dfeb7ce 6382 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6383 {
6384 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6385 if (XEXP (op1, 1) == const0_rtx)
6386 *cost += 1;
6387 if (speed)
6388 {
6389 machine_mode mode = GET_MODE (XEXP (op1, 0));
6390 const struct cpu_cost_table *extra_cost
6391 = aarch64_tune_params.insn_extra_cost;
6392
6393 if (GET_MODE_CLASS (mode) == MODE_INT)
6394 *cost += extra_cost->alu.arith;
6395 else
6396 *cost += extra_cost->fp[mode == DFmode].compare;
6397 }
6398 return true;
6399 }
6400
2d5ffe46
AP
6401 /* It's a conditional operation based on the status flags,
6402 so it must be some flavor of CSEL. */
6403
6404 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6405 if (GET_CODE (op1) == NEG
6406 || GET_CODE (op1) == NOT
6407 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6408 op1 = XEXP (op1, 0);
bad00732
KT
6409 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6410 {
6411 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6412 op1 = XEXP (op1, 0);
6413 op2 = XEXP (op2, 0);
6414 }
2d5ffe46 6415
e548c9df
AM
6416 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6417 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6418 return true;
6419 }
6420
6421 /* We don't know what this is, cost all operands. */
6422 return false;
6423}
6424
283b6c85
KT
6425/* Check whether X is a bitfield operation of the form shift + extend that
6426 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6427 operand to which the bitfield operation is applied. Otherwise return
6428 NULL_RTX. */
6429
6430static rtx
6431aarch64_extend_bitfield_pattern_p (rtx x)
6432{
6433 rtx_code outer_code = GET_CODE (x);
6434 machine_mode outer_mode = GET_MODE (x);
6435
6436 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6437 && outer_mode != SImode && outer_mode != DImode)
6438 return NULL_RTX;
6439
6440 rtx inner = XEXP (x, 0);
6441 rtx_code inner_code = GET_CODE (inner);
6442 machine_mode inner_mode = GET_MODE (inner);
6443 rtx op = NULL_RTX;
6444
6445 switch (inner_code)
6446 {
6447 case ASHIFT:
6448 if (CONST_INT_P (XEXP (inner, 1))
6449 && (inner_mode == QImode || inner_mode == HImode))
6450 op = XEXP (inner, 0);
6451 break;
6452 case LSHIFTRT:
6453 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6454 && (inner_mode == QImode || inner_mode == HImode))
6455 op = XEXP (inner, 0);
6456 break;
6457 case ASHIFTRT:
6458 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6459 && (inner_mode == QImode || inner_mode == HImode))
6460 op = XEXP (inner, 0);
6461 break;
6462 default:
6463 break;
6464 }
6465
6466 return op;
6467}
6468
8c83f71d
KT
6469/* Return true if the mask and a shift amount from an RTX of the form
6470 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6471 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6472
6473bool
6474aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6475{
6476 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6477 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6478 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6479 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6480}
6481
43e9d192
IB
6482/* Calculate the cost of calculating X, storing it in *COST. Result
6483 is true if the total cost of the operation has now been calculated. */
6484static bool
e548c9df 6485aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6486 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6487{
a8eecd00 6488 rtx op0, op1, op2;
73250c4c 6489 const struct cpu_cost_table *extra_cost
b175b679 6490 = aarch64_tune_params.insn_extra_cost;
e548c9df 6491 int code = GET_CODE (x);
43e9d192 6492
7fc5ef02
JG
6493 /* By default, assume that everything has equivalent cost to the
6494 cheapest instruction. Any additional costs are applied as a delta
6495 above this default. */
6496 *cost = COSTS_N_INSNS (1);
6497
43e9d192
IB
6498 switch (code)
6499 {
6500 case SET:
ba123b0d
JG
6501 /* The cost depends entirely on the operands to SET. */
6502 *cost = 0;
43e9d192
IB
6503 op0 = SET_DEST (x);
6504 op1 = SET_SRC (x);
6505
6506 switch (GET_CODE (op0))
6507 {
6508 case MEM:
6509 if (speed)
2961177e
JG
6510 {
6511 rtx address = XEXP (op0, 0);
b6875aac
KV
6512 if (VECTOR_MODE_P (mode))
6513 *cost += extra_cost->ldst.storev;
6514 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6515 *cost += extra_cost->ldst.store;
6516 else if (mode == SFmode)
6517 *cost += extra_cost->ldst.storef;
6518 else if (mode == DFmode)
6519 *cost += extra_cost->ldst.stored;
6520
6521 *cost +=
6522 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6523 0, speed));
6524 }
43e9d192 6525
e548c9df 6526 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6527 return true;
6528
6529 case SUBREG:
6530 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6531 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6532
43e9d192
IB
6533 /* Fall through. */
6534 case REG:
b6875aac
KV
6535 /* The cost is one per vector-register copied. */
6536 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6537 {
6538 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6539 / GET_MODE_SIZE (V4SImode);
6540 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6541 }
ba123b0d
JG
6542 /* const0_rtx is in general free, but we will use an
6543 instruction to set a register to 0. */
b6875aac
KV
6544 else if (REG_P (op1) || op1 == const0_rtx)
6545 {
6546 /* The cost is 1 per register copied. */
6547 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6548 / UNITS_PER_WORD;
b6875aac
KV
6549 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6550 }
ba123b0d
JG
6551 else
6552 /* Cost is just the cost of the RHS of the set. */
e548c9df 6553 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6554 return true;
6555
ba123b0d 6556 case ZERO_EXTRACT:
43e9d192 6557 case SIGN_EXTRACT:
ba123b0d
JG
6558 /* Bit-field insertion. Strip any redundant widening of
6559 the RHS to meet the width of the target. */
43e9d192
IB
6560 if (GET_CODE (op1) == SUBREG)
6561 op1 = SUBREG_REG (op1);
6562 if ((GET_CODE (op1) == ZERO_EXTEND
6563 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6564 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6565 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6566 >= INTVAL (XEXP (op0, 1))))
6567 op1 = XEXP (op1, 0);
ba123b0d
JG
6568
6569 if (CONST_INT_P (op1))
6570 {
6571 /* MOV immediate is assumed to always be cheap. */
6572 *cost = COSTS_N_INSNS (1);
6573 }
6574 else
6575 {
6576 /* BFM. */
6577 if (speed)
6578 *cost += extra_cost->alu.bfi;
e548c9df 6579 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6580 }
6581
43e9d192
IB
6582 return true;
6583
6584 default:
ba123b0d
JG
6585 /* We can't make sense of this, assume default cost. */
6586 *cost = COSTS_N_INSNS (1);
61263118 6587 return false;
43e9d192
IB
6588 }
6589 return false;
6590
9dfc162c
JG
6591 case CONST_INT:
6592 /* If an instruction can incorporate a constant within the
6593 instruction, the instruction's expression avoids calling
6594 rtx_cost() on the constant. If rtx_cost() is called on a
6595 constant, then it is usually because the constant must be
6596 moved into a register by one or more instructions.
6597
6598 The exception is constant 0, which can be expressed
6599 as XZR/WZR and is therefore free. The exception to this is
6600 if we have (set (reg) (const0_rtx)) in which case we must cost
6601 the move. However, we can catch that when we cost the SET, so
6602 we don't need to consider that here. */
6603 if (x == const0_rtx)
6604 *cost = 0;
6605 else
6606 {
6607 /* To an approximation, building any other constant is
6608 proportionally expensive to the number of instructions
6609 required to build that constant. This is true whether we
6610 are compiling for SPEED or otherwise. */
82614948
RR
6611 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6612 (NULL_RTX, x, false, mode));
9dfc162c
JG
6613 }
6614 return true;
6615
6616 case CONST_DOUBLE:
6617 if (speed)
6618 {
6619 /* mov[df,sf]_aarch64. */
6620 if (aarch64_float_const_representable_p (x))
6621 /* FMOV (scalar immediate). */
6622 *cost += extra_cost->fp[mode == DFmode].fpconst;
6623 else if (!aarch64_float_const_zero_rtx_p (x))
6624 {
6625 /* This will be a load from memory. */
6626 if (mode == DFmode)
6627 *cost += extra_cost->ldst.loadd;
6628 else
6629 *cost += extra_cost->ldst.loadf;
6630 }
6631 else
6632 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6633 or MOV v0.s[0], wzr - neither of which are modeled by the
6634 cost tables. Just use the default cost. */
6635 {
6636 }
6637 }
6638
6639 return true;
6640
43e9d192
IB
6641 case MEM:
6642 if (speed)
2961177e
JG
6643 {
6644 /* For loads we want the base cost of a load, plus an
6645 approximation for the additional cost of the addressing
6646 mode. */
6647 rtx address = XEXP (x, 0);
b6875aac
KV
6648 if (VECTOR_MODE_P (mode))
6649 *cost += extra_cost->ldst.loadv;
6650 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6651 *cost += extra_cost->ldst.load;
6652 else if (mode == SFmode)
6653 *cost += extra_cost->ldst.loadf;
6654 else if (mode == DFmode)
6655 *cost += extra_cost->ldst.loadd;
6656
6657 *cost +=
6658 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6659 0, speed));
6660 }
43e9d192
IB
6661
6662 return true;
6663
6664 case NEG:
4745e701
JG
6665 op0 = XEXP (x, 0);
6666
b6875aac
KV
6667 if (VECTOR_MODE_P (mode))
6668 {
6669 if (speed)
6670 {
6671 /* FNEG. */
6672 *cost += extra_cost->vect.alu;
6673 }
6674 return false;
6675 }
6676
e548c9df
AM
6677 if (GET_MODE_CLASS (mode) == MODE_INT)
6678 {
4745e701
JG
6679 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6680 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6681 {
6682 /* CSETM. */
e548c9df 6683 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6684 return true;
6685 }
6686
6687 /* Cost this as SUB wzr, X. */
e548c9df 6688 op0 = CONST0_RTX (mode);
4745e701
JG
6689 op1 = XEXP (x, 0);
6690 goto cost_minus;
6691 }
6692
e548c9df 6693 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6694 {
6695 /* Support (neg(fma...)) as a single instruction only if
6696 sign of zeros is unimportant. This matches the decision
6697 making in aarch64.md. */
6698 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6699 {
6700 /* FNMADD. */
e548c9df 6701 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6702 return true;
6703 }
d318517d
SN
6704 if (GET_CODE (op0) == MULT)
6705 {
6706 /* FNMUL. */
6707 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6708 return true;
6709 }
4745e701
JG
6710 if (speed)
6711 /* FNEG. */
6712 *cost += extra_cost->fp[mode == DFmode].neg;
6713 return false;
6714 }
6715
6716 return false;
43e9d192 6717
781aeb73
KT
6718 case CLRSB:
6719 case CLZ:
6720 if (speed)
b6875aac
KV
6721 {
6722 if (VECTOR_MODE_P (mode))
6723 *cost += extra_cost->vect.alu;
6724 else
6725 *cost += extra_cost->alu.clz;
6726 }
781aeb73
KT
6727
6728 return false;
6729
43e9d192
IB
6730 case COMPARE:
6731 op0 = XEXP (x, 0);
6732 op1 = XEXP (x, 1);
6733
6734 if (op1 == const0_rtx
6735 && GET_CODE (op0) == AND)
6736 {
6737 x = op0;
e548c9df 6738 mode = GET_MODE (op0);
43e9d192
IB
6739 goto cost_logic;
6740 }
6741
a8eecd00
JG
6742 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6743 {
6744 /* TODO: A write to the CC flags possibly costs extra, this
6745 needs encoding in the cost tables. */
6746
e548c9df 6747 mode = GET_MODE (op0);
a8eecd00
JG
6748 /* ANDS. */
6749 if (GET_CODE (op0) == AND)
6750 {
6751 x = op0;
6752 goto cost_logic;
6753 }
6754
6755 if (GET_CODE (op0) == PLUS)
6756 {
6757 /* ADDS (and CMN alias). */
6758 x = op0;
6759 goto cost_plus;
6760 }
6761
6762 if (GET_CODE (op0) == MINUS)
6763 {
6764 /* SUBS. */
6765 x = op0;
6766 goto cost_minus;
6767 }
6768
345854d8
KT
6769 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6770 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
6771 && CONST_INT_P (XEXP (op0, 2)))
6772 {
6773 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6774 Handle it here directly rather than going to cost_logic
6775 since we know the immediate generated for the TST is valid
6776 so we can avoid creating an intermediate rtx for it only
6777 for costing purposes. */
6778 if (speed)
6779 *cost += extra_cost->alu.logical;
6780
6781 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
6782 ZERO_EXTRACT, 0, speed);
6783 return true;
6784 }
6785
a8eecd00
JG
6786 if (GET_CODE (op1) == NEG)
6787 {
6788 /* CMN. */
6789 if (speed)
6790 *cost += extra_cost->alu.arith;
6791
e548c9df
AM
6792 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6793 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6794 return true;
6795 }
6796
6797 /* CMP.
6798
6799 Compare can freely swap the order of operands, and
6800 canonicalization puts the more complex operation first.
6801 But the integer MINUS logic expects the shift/extend
6802 operation in op1. */
6803 if (! (REG_P (op0)
6804 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6805 {
6806 op0 = XEXP (x, 1);
6807 op1 = XEXP (x, 0);
6808 }
6809 goto cost_minus;
6810 }
6811
6812 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6813 {
6814 /* FCMP. */
6815 if (speed)
6816 *cost += extra_cost->fp[mode == DFmode].compare;
6817
6818 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6819 {
e548c9df 6820 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6821 /* FCMP supports constant 0.0 for no extra cost. */
6822 return true;
6823 }
6824 return false;
6825 }
6826
b6875aac
KV
6827 if (VECTOR_MODE_P (mode))
6828 {
6829 /* Vector compare. */
6830 if (speed)
6831 *cost += extra_cost->vect.alu;
6832
6833 if (aarch64_float_const_zero_rtx_p (op1))
6834 {
6835 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6836 cost. */
6837 return true;
6838 }
6839 return false;
6840 }
a8eecd00 6841 return false;
43e9d192
IB
6842
6843 case MINUS:
4745e701
JG
6844 {
6845 op0 = XEXP (x, 0);
6846 op1 = XEXP (x, 1);
6847
6848cost_minus:
e548c9df 6849 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6850
4745e701
JG
6851 /* Detect valid immediates. */
6852 if ((GET_MODE_CLASS (mode) == MODE_INT
6853 || (GET_MODE_CLASS (mode) == MODE_CC
6854 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6855 && CONST_INT_P (op1)
6856 && aarch64_uimm12_shift (INTVAL (op1)))
6857 {
4745e701
JG
6858 if (speed)
6859 /* SUB(S) (immediate). */
6860 *cost += extra_cost->alu.arith;
6861 return true;
4745e701
JG
6862 }
6863
7cc2145f
JG
6864 /* Look for SUB (extended register). */
6865 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6866 {
6867 if (speed)
2533c820 6868 *cost += extra_cost->alu.extend_arith;
7cc2145f 6869
e47c4031
KT
6870 op1 = aarch64_strip_extend (op1);
6871 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6872 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6873 return true;
6874 }
6875
4745e701
JG
6876 rtx new_op1 = aarch64_strip_extend (op1);
6877
6878 /* Cost this as an FMA-alike operation. */
6879 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6880 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6881 && code != COMPARE)
6882 {
6883 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6884 (enum rtx_code) code,
6885 speed);
4745e701
JG
6886 return true;
6887 }
43e9d192 6888
e548c9df 6889 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6890
4745e701
JG
6891 if (speed)
6892 {
b6875aac
KV
6893 if (VECTOR_MODE_P (mode))
6894 {
6895 /* Vector SUB. */
6896 *cost += extra_cost->vect.alu;
6897 }
6898 else if (GET_MODE_CLASS (mode) == MODE_INT)
6899 {
6900 /* SUB(S). */
6901 *cost += extra_cost->alu.arith;
6902 }
4745e701 6903 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6904 {
6905 /* FSUB. */
6906 *cost += extra_cost->fp[mode == DFmode].addsub;
6907 }
4745e701
JG
6908 }
6909 return true;
6910 }
43e9d192
IB
6911
6912 case PLUS:
4745e701
JG
6913 {
6914 rtx new_op0;
43e9d192 6915
4745e701
JG
6916 op0 = XEXP (x, 0);
6917 op1 = XEXP (x, 1);
43e9d192 6918
a8eecd00 6919cost_plus:
4745e701
JG
6920 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6921 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6922 {
6923 /* CSINC. */
e548c9df
AM
6924 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6925 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6926 return true;
6927 }
43e9d192 6928
4745e701
JG
6929 if (GET_MODE_CLASS (mode) == MODE_INT
6930 && CONST_INT_P (op1)
6931 && aarch64_uimm12_shift (INTVAL (op1)))
6932 {
e548c9df 6933 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6934
4745e701
JG
6935 if (speed)
6936 /* ADD (immediate). */
6937 *cost += extra_cost->alu.arith;
6938 return true;
6939 }
6940
e548c9df 6941 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6942
7cc2145f
JG
6943 /* Look for ADD (extended register). */
6944 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6945 {
6946 if (speed)
2533c820 6947 *cost += extra_cost->alu.extend_arith;
7cc2145f 6948
e47c4031
KT
6949 op0 = aarch64_strip_extend (op0);
6950 *cost += rtx_cost (op0, VOIDmode,
e548c9df 6951 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6952 return true;
6953 }
6954
4745e701
JG
6955 /* Strip any extend, leave shifts behind as we will
6956 cost them through mult_cost. */
6957 new_op0 = aarch64_strip_extend (op0);
6958
6959 if (GET_CODE (new_op0) == MULT
0a78ebe4 6960 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6961 {
6962 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6963 speed);
4745e701
JG
6964 return true;
6965 }
6966
e548c9df 6967 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6968
6969 if (speed)
6970 {
b6875aac
KV
6971 if (VECTOR_MODE_P (mode))
6972 {
6973 /* Vector ADD. */
6974 *cost += extra_cost->vect.alu;
6975 }
6976 else if (GET_MODE_CLASS (mode) == MODE_INT)
6977 {
6978 /* ADD. */
6979 *cost += extra_cost->alu.arith;
6980 }
4745e701 6981 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6982 {
6983 /* FADD. */
6984 *cost += extra_cost->fp[mode == DFmode].addsub;
6985 }
4745e701
JG
6986 }
6987 return true;
6988 }
43e9d192 6989
18b42b2a
KT
6990 case BSWAP:
6991 *cost = COSTS_N_INSNS (1);
6992
6993 if (speed)
b6875aac
KV
6994 {
6995 if (VECTOR_MODE_P (mode))
6996 *cost += extra_cost->vect.alu;
6997 else
6998 *cost += extra_cost->alu.rev;
6999 }
18b42b2a
KT
7000 return false;
7001
43e9d192 7002 case IOR:
f7d5cf8d
KT
7003 if (aarch_rev16_p (x))
7004 {
7005 *cost = COSTS_N_INSNS (1);
7006
b6875aac
KV
7007 if (speed)
7008 {
7009 if (VECTOR_MODE_P (mode))
7010 *cost += extra_cost->vect.alu;
7011 else
7012 *cost += extra_cost->alu.rev;
7013 }
7014 return true;
f7d5cf8d 7015 }
fb0cb7fa
KT
7016
7017 if (aarch64_extr_rtx_p (x, &op0, &op1))
7018 {
e548c9df
AM
7019 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7020 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
7021 if (speed)
7022 *cost += extra_cost->alu.shift;
7023
7024 return true;
7025 }
f7d5cf8d 7026 /* Fall through. */
43e9d192
IB
7027 case XOR:
7028 case AND:
7029 cost_logic:
7030 op0 = XEXP (x, 0);
7031 op1 = XEXP (x, 1);
7032
b6875aac
KV
7033 if (VECTOR_MODE_P (mode))
7034 {
7035 if (speed)
7036 *cost += extra_cost->vect.alu;
7037 return true;
7038 }
7039
268c3b47
JG
7040 if (code == AND
7041 && GET_CODE (op0) == MULT
7042 && CONST_INT_P (XEXP (op0, 1))
7043 && CONST_INT_P (op1)
7044 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7045 INTVAL (op1)) != 0)
7046 {
7047 /* This is a UBFM/SBFM. */
e548c9df 7048 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
7049 if (speed)
7050 *cost += extra_cost->alu.bfx;
7051 return true;
7052 }
7053
e548c9df 7054 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 7055 {
8c83f71d 7056 if (CONST_INT_P (op1))
43e9d192 7057 {
8c83f71d
KT
7058 /* We have a mask + shift version of a UBFIZ
7059 i.e. the *andim_ashift<mode>_bfiz pattern. */
7060 if (GET_CODE (op0) == ASHIFT
7061 && aarch64_mask_and_shift_for_ubfiz_p (mode, op1,
7062 XEXP (op0, 1)))
7063 {
7064 *cost += rtx_cost (XEXP (op0, 0), mode,
7065 (enum rtx_code) code, 0, speed);
7066 if (speed)
7067 *cost += extra_cost->alu.bfx;
268c3b47 7068
8c83f71d
KT
7069 return true;
7070 }
7071 else if (aarch64_bitmask_imm (INTVAL (op1), mode))
7072 {
7073 /* We possibly get the immediate for free, this is not
7074 modelled. */
7075 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
7076 if (speed)
7077 *cost += extra_cost->alu.logical;
268c3b47 7078
8c83f71d
KT
7079 return true;
7080 }
43e9d192
IB
7081 }
7082 else
7083 {
268c3b47
JG
7084 rtx new_op0 = op0;
7085
7086 /* Handle ORN, EON, or BIC. */
43e9d192
IB
7087 if (GET_CODE (op0) == NOT)
7088 op0 = XEXP (op0, 0);
268c3b47
JG
7089
7090 new_op0 = aarch64_strip_shift (op0);
7091
7092 /* If we had a shift on op0 then this is a logical-shift-
7093 by-register/immediate operation. Otherwise, this is just
7094 a logical operation. */
7095 if (speed)
7096 {
7097 if (new_op0 != op0)
7098 {
7099 /* Shift by immediate. */
7100 if (CONST_INT_P (XEXP (op0, 1)))
7101 *cost += extra_cost->alu.log_shift;
7102 else
7103 *cost += extra_cost->alu.log_shift_reg;
7104 }
7105 else
7106 *cost += extra_cost->alu.logical;
7107 }
7108
7109 /* In both cases we want to cost both operands. */
e548c9df
AM
7110 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
7111 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
7112
7113 return true;
43e9d192 7114 }
43e9d192
IB
7115 }
7116 return false;
7117
268c3b47 7118 case NOT:
6365da9e
KT
7119 x = XEXP (x, 0);
7120 op0 = aarch64_strip_shift (x);
7121
b6875aac
KV
7122 if (VECTOR_MODE_P (mode))
7123 {
7124 /* Vector NOT. */
7125 *cost += extra_cost->vect.alu;
7126 return false;
7127 }
7128
6365da9e
KT
7129 /* MVN-shifted-reg. */
7130 if (op0 != x)
7131 {
e548c9df 7132 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
7133
7134 if (speed)
7135 *cost += extra_cost->alu.log_shift;
7136
7137 return true;
7138 }
7139 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7140 Handle the second form here taking care that 'a' in the above can
7141 be a shift. */
7142 else if (GET_CODE (op0) == XOR)
7143 {
7144 rtx newop0 = XEXP (op0, 0);
7145 rtx newop1 = XEXP (op0, 1);
7146 rtx op0_stripped = aarch64_strip_shift (newop0);
7147
e548c9df
AM
7148 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7149 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
7150
7151 if (speed)
7152 {
7153 if (op0_stripped != newop0)
7154 *cost += extra_cost->alu.log_shift;
7155 else
7156 *cost += extra_cost->alu.logical;
7157 }
7158
7159 return true;
7160 }
268c3b47
JG
7161 /* MVN. */
7162 if (speed)
7163 *cost += extra_cost->alu.logical;
7164
268c3b47
JG
7165 return false;
7166
43e9d192 7167 case ZERO_EXTEND:
b1685e62
JG
7168
7169 op0 = XEXP (x, 0);
7170 /* If a value is written in SI mode, then zero extended to DI
7171 mode, the operation will in general be free as a write to
7172 a 'w' register implicitly zeroes the upper bits of an 'x'
7173 register. However, if this is
7174
7175 (set (reg) (zero_extend (reg)))
7176
7177 we must cost the explicit register move. */
7178 if (mode == DImode
7179 && GET_MODE (op0) == SImode
7180 && outer == SET)
7181 {
e548c9df 7182 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 7183
dde23f43
KM
7184 /* If OP_COST is non-zero, then the cost of the zero extend
7185 is effectively the cost of the inner operation. Otherwise
7186 we have a MOV instruction and we take the cost from the MOV
7187 itself. This is true independently of whether we are
7188 optimizing for space or time. */
7189 if (op_cost)
b1685e62
JG
7190 *cost = op_cost;
7191
7192 return true;
7193 }
e548c9df 7194 else if (MEM_P (op0))
43e9d192 7195 {
b1685e62 7196 /* All loads can zero extend to any size for free. */
e548c9df 7197 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
7198 return true;
7199 }
b1685e62 7200
283b6c85
KT
7201 op0 = aarch64_extend_bitfield_pattern_p (x);
7202 if (op0)
7203 {
7204 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7205 if (speed)
7206 *cost += extra_cost->alu.bfx;
7207 return true;
7208 }
7209
b1685e62 7210 if (speed)
b6875aac
KV
7211 {
7212 if (VECTOR_MODE_P (mode))
7213 {
7214 /* UMOV. */
7215 *cost += extra_cost->vect.alu;
7216 }
7217 else
7218 {
63715e5e
WD
7219 /* We generate an AND instead of UXTB/UXTH. */
7220 *cost += extra_cost->alu.logical;
b6875aac
KV
7221 }
7222 }
43e9d192
IB
7223 return false;
7224
7225 case SIGN_EXTEND:
b1685e62 7226 if (MEM_P (XEXP (x, 0)))
43e9d192 7227 {
b1685e62
JG
7228 /* LDRSH. */
7229 if (speed)
7230 {
7231 rtx address = XEXP (XEXP (x, 0), 0);
7232 *cost += extra_cost->ldst.load_sign_extend;
7233
7234 *cost +=
7235 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7236 0, speed));
7237 }
43e9d192
IB
7238 return true;
7239 }
b1685e62 7240
283b6c85
KT
7241 op0 = aarch64_extend_bitfield_pattern_p (x);
7242 if (op0)
7243 {
7244 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7245 if (speed)
7246 *cost += extra_cost->alu.bfx;
7247 return true;
7248 }
7249
b1685e62 7250 if (speed)
b6875aac
KV
7251 {
7252 if (VECTOR_MODE_P (mode))
7253 *cost += extra_cost->vect.alu;
7254 else
7255 *cost += extra_cost->alu.extend;
7256 }
43e9d192
IB
7257 return false;
7258
ba0cfa17
JG
7259 case ASHIFT:
7260 op0 = XEXP (x, 0);
7261 op1 = XEXP (x, 1);
7262
7263 if (CONST_INT_P (op1))
7264 {
ba0cfa17 7265 if (speed)
b6875aac
KV
7266 {
7267 if (VECTOR_MODE_P (mode))
7268 {
7269 /* Vector shift (immediate). */
7270 *cost += extra_cost->vect.alu;
7271 }
7272 else
7273 {
7274 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7275 aliases. */
7276 *cost += extra_cost->alu.shift;
7277 }
7278 }
ba0cfa17
JG
7279
7280 /* We can incorporate zero/sign extend for free. */
7281 if (GET_CODE (op0) == ZERO_EXTEND
7282 || GET_CODE (op0) == SIGN_EXTEND)
7283 op0 = XEXP (op0, 0);
7284
e548c9df 7285 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7286 return true;
7287 }
7288 else
7289 {
ba0cfa17 7290 if (speed)
b6875aac
KV
7291 {
7292 if (VECTOR_MODE_P (mode))
7293 {
7294 /* Vector shift (register). */
7295 *cost += extra_cost->vect.alu;
7296 }
7297 else
7298 {
7299 /* LSLV. */
7300 *cost += extra_cost->alu.shift_reg;
7301 }
7302 }
ba0cfa17
JG
7303 return false; /* All arguments need to be in registers. */
7304 }
7305
43e9d192 7306 case ROTATE:
43e9d192
IB
7307 case ROTATERT:
7308 case LSHIFTRT:
43e9d192 7309 case ASHIFTRT:
ba0cfa17
JG
7310 op0 = XEXP (x, 0);
7311 op1 = XEXP (x, 1);
43e9d192 7312
ba0cfa17
JG
7313 if (CONST_INT_P (op1))
7314 {
7315 /* ASR (immediate) and friends. */
7316 if (speed)
b6875aac
KV
7317 {
7318 if (VECTOR_MODE_P (mode))
7319 *cost += extra_cost->vect.alu;
7320 else
7321 *cost += extra_cost->alu.shift;
7322 }
43e9d192 7323
e548c9df 7324 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7325 return true;
7326 }
7327 else
7328 {
7329
7330 /* ASR (register) and friends. */
7331 if (speed)
b6875aac
KV
7332 {
7333 if (VECTOR_MODE_P (mode))
7334 *cost += extra_cost->vect.alu;
7335 else
7336 *cost += extra_cost->alu.shift_reg;
7337 }
ba0cfa17
JG
7338 return false; /* All arguments need to be in registers. */
7339 }
43e9d192 7340
909734be
JG
7341 case SYMBOL_REF:
7342
1b1e81f8
JW
7343 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7344 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7345 {
7346 /* LDR. */
7347 if (speed)
7348 *cost += extra_cost->ldst.load;
7349 }
7350 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7351 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7352 {
7353 /* ADRP, followed by ADD. */
7354 *cost += COSTS_N_INSNS (1);
7355 if (speed)
7356 *cost += 2 * extra_cost->alu.arith;
7357 }
7358 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7359 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7360 {
7361 /* ADR. */
7362 if (speed)
7363 *cost += extra_cost->alu.arith;
7364 }
7365
7366 if (flag_pic)
7367 {
7368 /* One extra load instruction, after accessing the GOT. */
7369 *cost += COSTS_N_INSNS (1);
7370 if (speed)
7371 *cost += extra_cost->ldst.load;
7372 }
43e9d192
IB
7373 return true;
7374
909734be 7375 case HIGH:
43e9d192 7376 case LO_SUM:
909734be
JG
7377 /* ADRP/ADD (immediate). */
7378 if (speed)
7379 *cost += extra_cost->alu.arith;
43e9d192
IB
7380 return true;
7381
7382 case ZERO_EXTRACT:
7383 case SIGN_EXTRACT:
7cc2145f
JG
7384 /* UBFX/SBFX. */
7385 if (speed)
b6875aac
KV
7386 {
7387 if (VECTOR_MODE_P (mode))
7388 *cost += extra_cost->vect.alu;
7389 else
7390 *cost += extra_cost->alu.bfx;
7391 }
7cc2145f
JG
7392
7393 /* We can trust that the immediates used will be correct (there
7394 are no by-register forms), so we need only cost op0. */
e548c9df 7395 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7396 return true;
7397
7398 case MULT:
4745e701
JG
7399 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7400 /* aarch64_rtx_mult_cost always handles recursion to its
7401 operands. */
7402 return true;
43e9d192
IB
7403
7404 case MOD:
4f58fe36
KT
7405 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7406 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7407 an unconditional negate. This case should only ever be reached through
7408 the set_smod_pow2_cheap check in expmed.c. */
7409 if (CONST_INT_P (XEXP (x, 1))
7410 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7411 && (mode == SImode || mode == DImode))
7412 {
7413 /* We expand to 4 instructions. Reset the baseline. */
7414 *cost = COSTS_N_INSNS (4);
7415
7416 if (speed)
7417 *cost += 2 * extra_cost->alu.logical
7418 + 2 * extra_cost->alu.arith;
7419
7420 return true;
7421 }
7422
7423 /* Fall-through. */
43e9d192 7424 case UMOD:
43e9d192
IB
7425 if (speed)
7426 {
b6875aac
KV
7427 if (VECTOR_MODE_P (mode))
7428 *cost += extra_cost->vect.alu;
e548c9df
AM
7429 else if (GET_MODE_CLASS (mode) == MODE_INT)
7430 *cost += (extra_cost->mult[mode == DImode].add
7431 + extra_cost->mult[mode == DImode].idiv);
7432 else if (mode == DFmode)
73250c4c
KT
7433 *cost += (extra_cost->fp[1].mult
7434 + extra_cost->fp[1].div);
e548c9df 7435 else if (mode == SFmode)
73250c4c
KT
7436 *cost += (extra_cost->fp[0].mult
7437 + extra_cost->fp[0].div);
43e9d192
IB
7438 }
7439 return false; /* All arguments need to be in registers. */
7440
7441 case DIV:
7442 case UDIV:
4105fe38 7443 case SQRT:
43e9d192
IB
7444 if (speed)
7445 {
b6875aac
KV
7446 if (VECTOR_MODE_P (mode))
7447 *cost += extra_cost->vect.alu;
7448 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7449 /* There is no integer SQRT, so only DIV and UDIV can get
7450 here. */
7451 *cost += extra_cost->mult[mode == DImode].idiv;
7452 else
7453 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7454 }
7455 return false; /* All arguments need to be in registers. */
7456
a8eecd00 7457 case IF_THEN_ELSE:
2d5ffe46
AP
7458 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7459 XEXP (x, 2), cost, speed);
a8eecd00
JG
7460
7461 case EQ:
7462 case NE:
7463 case GT:
7464 case GTU:
7465 case LT:
7466 case LTU:
7467 case GE:
7468 case GEU:
7469 case LE:
7470 case LEU:
7471
7472 return false; /* All arguments must be in registers. */
7473
b292109f
JG
7474 case FMA:
7475 op0 = XEXP (x, 0);
7476 op1 = XEXP (x, 1);
7477 op2 = XEXP (x, 2);
7478
7479 if (speed)
b6875aac
KV
7480 {
7481 if (VECTOR_MODE_P (mode))
7482 *cost += extra_cost->vect.alu;
7483 else
7484 *cost += extra_cost->fp[mode == DFmode].fma;
7485 }
b292109f
JG
7486
7487 /* FMSUB, FNMADD, and FNMSUB are free. */
7488 if (GET_CODE (op0) == NEG)
7489 op0 = XEXP (op0, 0);
7490
7491 if (GET_CODE (op2) == NEG)
7492 op2 = XEXP (op2, 0);
7493
7494 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7495 and the by-element operand as operand 0. */
7496 if (GET_CODE (op1) == NEG)
7497 op1 = XEXP (op1, 0);
7498
7499 /* Catch vector-by-element operations. The by-element operand can
7500 either be (vec_duplicate (vec_select (x))) or just
7501 (vec_select (x)), depending on whether we are multiplying by
7502 a vector or a scalar.
7503
7504 Canonicalization is not very good in these cases, FMA4 will put the
7505 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7506 if (GET_CODE (op0) == VEC_DUPLICATE)
7507 op0 = XEXP (op0, 0);
7508 else if (GET_CODE (op1) == VEC_DUPLICATE)
7509 op1 = XEXP (op1, 0);
7510
7511 if (GET_CODE (op0) == VEC_SELECT)
7512 op0 = XEXP (op0, 0);
7513 else if (GET_CODE (op1) == VEC_SELECT)
7514 op1 = XEXP (op1, 0);
7515
7516 /* If the remaining parameters are not registers,
7517 get the cost to put them into registers. */
e548c9df
AM
7518 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7519 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7520 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7521 return true;
7522
5e2a765b
KT
7523 case FLOAT:
7524 case UNSIGNED_FLOAT:
7525 if (speed)
7526 *cost += extra_cost->fp[mode == DFmode].fromint;
7527 return false;
7528
b292109f
JG
7529 case FLOAT_EXTEND:
7530 if (speed)
b6875aac
KV
7531 {
7532 if (VECTOR_MODE_P (mode))
7533 {
7534 /*Vector truncate. */
7535 *cost += extra_cost->vect.alu;
7536 }
7537 else
7538 *cost += extra_cost->fp[mode == DFmode].widen;
7539 }
b292109f
JG
7540 return false;
7541
7542 case FLOAT_TRUNCATE:
7543 if (speed)
b6875aac
KV
7544 {
7545 if (VECTOR_MODE_P (mode))
7546 {
7547 /*Vector conversion. */
7548 *cost += extra_cost->vect.alu;
7549 }
7550 else
7551 *cost += extra_cost->fp[mode == DFmode].narrow;
7552 }
b292109f
JG
7553 return false;
7554
61263118
KT
7555 case FIX:
7556 case UNSIGNED_FIX:
7557 x = XEXP (x, 0);
7558 /* Strip the rounding part. They will all be implemented
7559 by the fcvt* family of instructions anyway. */
7560 if (GET_CODE (x) == UNSPEC)
7561 {
7562 unsigned int uns_code = XINT (x, 1);
7563
7564 if (uns_code == UNSPEC_FRINTA
7565 || uns_code == UNSPEC_FRINTM
7566 || uns_code == UNSPEC_FRINTN
7567 || uns_code == UNSPEC_FRINTP
7568 || uns_code == UNSPEC_FRINTZ)
7569 x = XVECEXP (x, 0, 0);
7570 }
7571
7572 if (speed)
b6875aac
KV
7573 {
7574 if (VECTOR_MODE_P (mode))
7575 *cost += extra_cost->vect.alu;
7576 else
7577 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7578 }
39252973
KT
7579
7580 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7581 fixed-point fcvt. */
7582 if (GET_CODE (x) == MULT
7583 && ((VECTOR_MODE_P (mode)
7584 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7585 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7586 {
7587 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7588 0, speed);
7589 return true;
7590 }
7591
e548c9df 7592 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7593 return true;
7594
b292109f 7595 case ABS:
b6875aac
KV
7596 if (VECTOR_MODE_P (mode))
7597 {
7598 /* ABS (vector). */
7599 if (speed)
7600 *cost += extra_cost->vect.alu;
7601 }
7602 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7603 {
19261b99
KT
7604 op0 = XEXP (x, 0);
7605
7606 /* FABD, which is analogous to FADD. */
7607 if (GET_CODE (op0) == MINUS)
7608 {
e548c9df
AM
7609 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7610 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7611 if (speed)
7612 *cost += extra_cost->fp[mode == DFmode].addsub;
7613
7614 return true;
7615 }
7616 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7617 if (speed)
7618 *cost += extra_cost->fp[mode == DFmode].neg;
7619 }
7620 else
7621 {
7622 /* Integer ABS will either be split to
7623 two arithmetic instructions, or will be an ABS
7624 (scalar), which we don't model. */
7625 *cost = COSTS_N_INSNS (2);
7626 if (speed)
7627 *cost += 2 * extra_cost->alu.arith;
7628 }
7629 return false;
7630
7631 case SMAX:
7632 case SMIN:
7633 if (speed)
7634 {
b6875aac
KV
7635 if (VECTOR_MODE_P (mode))
7636 *cost += extra_cost->vect.alu;
7637 else
7638 {
7639 /* FMAXNM/FMINNM/FMAX/FMIN.
7640 TODO: This may not be accurate for all implementations, but
7641 we do not model this in the cost tables. */
7642 *cost += extra_cost->fp[mode == DFmode].addsub;
7643 }
b292109f
JG
7644 }
7645 return false;
7646
61263118
KT
7647 case UNSPEC:
7648 /* The floating point round to integer frint* instructions. */
7649 if (aarch64_frint_unspec_p (XINT (x, 1)))
7650 {
7651 if (speed)
7652 *cost += extra_cost->fp[mode == DFmode].roundint;
7653
7654 return false;
7655 }
781aeb73
KT
7656
7657 if (XINT (x, 1) == UNSPEC_RBIT)
7658 {
7659 if (speed)
7660 *cost += extra_cost->alu.rev;
7661
7662 return false;
7663 }
61263118
KT
7664 break;
7665
fb620c4a
JG
7666 case TRUNCATE:
7667
7668 /* Decompose <su>muldi3_highpart. */
7669 if (/* (truncate:DI */
7670 mode == DImode
7671 /* (lshiftrt:TI */
7672 && GET_MODE (XEXP (x, 0)) == TImode
7673 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7674 /* (mult:TI */
7675 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7676 /* (ANY_EXTEND:TI (reg:DI))
7677 (ANY_EXTEND:TI (reg:DI))) */
7678 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7679 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7680 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7681 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7682 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7683 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7684 /* (const_int 64) */
7685 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7686 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7687 {
7688 /* UMULH/SMULH. */
7689 if (speed)
7690 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7691 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7692 mode, MULT, 0, speed);
7693 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7694 mode, MULT, 1, speed);
fb620c4a
JG
7695 return true;
7696 }
7697
7698 /* Fall through. */
43e9d192 7699 default:
61263118 7700 break;
43e9d192 7701 }
61263118 7702
c10e3d7f
AP
7703 if (dump_file
7704 && flag_aarch64_verbose_cost)
61263118
KT
7705 fprintf (dump_file,
7706 "\nFailed to cost RTX. Assuming default cost.\n");
7707
7708 return true;
43e9d192
IB
7709}
7710
0ee859b5
JG
7711/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7712 calculated for X. This cost is stored in *COST. Returns true
7713 if the total cost of X was calculated. */
7714static bool
e548c9df 7715aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7716 int param, int *cost, bool speed)
7717{
e548c9df 7718 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 7719
c10e3d7f
AP
7720 if (dump_file
7721 && flag_aarch64_verbose_cost)
0ee859b5
JG
7722 {
7723 print_rtl_single (dump_file, x);
7724 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7725 speed ? "Hot" : "Cold",
7726 *cost, result ? "final" : "partial");
7727 }
7728
7729 return result;
7730}
7731
43e9d192 7732static int
ef4bddc2 7733aarch64_register_move_cost (machine_mode mode,
8a3a7e67 7734 reg_class_t from_i, reg_class_t to_i)
43e9d192 7735{
8a3a7e67
RH
7736 enum reg_class from = (enum reg_class) from_i;
7737 enum reg_class to = (enum reg_class) to_i;
43e9d192 7738 const struct cpu_regmove_cost *regmove_cost
b175b679 7739 = aarch64_tune_params.regmove_cost;
43e9d192 7740
3be07662 7741 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 7742 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
7743 to = GENERAL_REGS;
7744
2876a13f 7745 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
7746 from = GENERAL_REGS;
7747
6ee70f81
AP
7748 /* Moving between GPR and stack cost is the same as GP2GP. */
7749 if ((from == GENERAL_REGS && to == STACK_REG)
7750 || (to == GENERAL_REGS && from == STACK_REG))
7751 return regmove_cost->GP2GP;
7752
7753 /* To/From the stack register, we move via the gprs. */
7754 if (to == STACK_REG || from == STACK_REG)
7755 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7756 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7757
8919453c
WD
7758 if (GET_MODE_SIZE (mode) == 16)
7759 {
7760 /* 128-bit operations on general registers require 2 instructions. */
7761 if (from == GENERAL_REGS && to == GENERAL_REGS)
7762 return regmove_cost->GP2GP * 2;
7763 else if (from == GENERAL_REGS)
7764 return regmove_cost->GP2FP * 2;
7765 else if (to == GENERAL_REGS)
7766 return regmove_cost->FP2GP * 2;
7767
7768 /* When AdvSIMD instructions are disabled it is not possible to move
7769 a 128-bit value directly between Q registers. This is handled in
7770 secondary reload. A general register is used as a scratch to move
7771 the upper DI value and the lower DI value is moved directly,
7772 hence the cost is the sum of three moves. */
7773 if (! TARGET_SIMD)
7774 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7775
7776 return regmove_cost->FP2FP;
7777 }
7778
43e9d192
IB
7779 if (from == GENERAL_REGS && to == GENERAL_REGS)
7780 return regmove_cost->GP2GP;
7781 else if (from == GENERAL_REGS)
7782 return regmove_cost->GP2FP;
7783 else if (to == GENERAL_REGS)
7784 return regmove_cost->FP2GP;
7785
43e9d192
IB
7786 return regmove_cost->FP2FP;
7787}
7788
7789static int
ef4bddc2 7790aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
7791 reg_class_t rclass ATTRIBUTE_UNUSED,
7792 bool in ATTRIBUTE_UNUSED)
7793{
b175b679 7794 return aarch64_tune_params.memmov_cost;
43e9d192
IB
7795}
7796
0c30e0f3
EM
7797/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7798 to optimize 1.0/sqrt. */
ee62a5a6
RS
7799
7800static bool
9acc9cbe 7801use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
7802{
7803 return (!flag_trapping_math
7804 && flag_unsafe_math_optimizations
9acc9cbe
EM
7805 && ((aarch64_tune_params.approx_modes->recip_sqrt
7806 & AARCH64_APPROX_MODE (mode))
1a33079e 7807 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
7808}
7809
0c30e0f3
EM
7810/* Function to decide when to use the approximate reciprocal square root
7811 builtin. */
a6fc00da
BH
7812
7813static tree
ee62a5a6 7814aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 7815{
9acc9cbe
EM
7816 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
7817
7818 if (!use_rsqrt_p (mode))
a6fc00da 7819 return NULL_TREE;
ee62a5a6 7820 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
7821}
7822
7823typedef rtx (*rsqrte_type) (rtx, rtx);
7824
98daafa0
EM
7825/* Select reciprocal square root initial estimate insn depending on machine
7826 mode. */
a6fc00da 7827
98daafa0 7828static rsqrte_type
a6fc00da
BH
7829get_rsqrte_type (machine_mode mode)
7830{
7831 switch (mode)
7832 {
2a823433
JW
7833 case DFmode: return gen_aarch64_rsqrtedf;
7834 case SFmode: return gen_aarch64_rsqrtesf;
7835 case V2DFmode: return gen_aarch64_rsqrtev2df;
7836 case V2SFmode: return gen_aarch64_rsqrtev2sf;
7837 case V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
7838 default: gcc_unreachable ();
7839 }
7840}
7841
7842typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
7843
98daafa0 7844/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 7845
98daafa0 7846static rsqrts_type
a6fc00da
BH
7847get_rsqrts_type (machine_mode mode)
7848{
7849 switch (mode)
7850 {
00ea75d4
JW
7851 case DFmode: return gen_aarch64_rsqrtsdf;
7852 case SFmode: return gen_aarch64_rsqrtssf;
7853 case V2DFmode: return gen_aarch64_rsqrtsv2df;
7854 case V2SFmode: return gen_aarch64_rsqrtsv2sf;
7855 case V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
7856 default: gcc_unreachable ();
7857 }
7858}
7859
98daafa0
EM
7860/* Emit instruction sequence to compute either the approximate square root
7861 or its approximate reciprocal, depending on the flag RECP, and return
7862 whether the sequence was emitted or not. */
a6fc00da 7863
98daafa0
EM
7864bool
7865aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 7866{
98daafa0 7867 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
7868
7869 if (GET_MODE_INNER (mode) == HFmode)
7870 return false;
7871
98daafa0
EM
7872 machine_mode mmsk = mode_for_vector
7873 (int_mode_for_mode (GET_MODE_INNER (mode)),
7874 GET_MODE_NUNITS (mode));
7875 bool use_approx_sqrt_p = (!recp
7876 && (flag_mlow_precision_sqrt
7877 || (aarch64_tune_params.approx_modes->sqrt
7878 & AARCH64_APPROX_MODE (mode))));
7879 bool use_approx_rsqrt_p = (recp
7880 && (flag_mrecip_low_precision_sqrt
7881 || (aarch64_tune_params.approx_modes->recip_sqrt
7882 & AARCH64_APPROX_MODE (mode))));
7883
7884 if (!flag_finite_math_only
7885 || flag_trapping_math
7886 || !flag_unsafe_math_optimizations
7887 || !(use_approx_sqrt_p || use_approx_rsqrt_p)
7888 || optimize_function_for_size_p (cfun))
7889 return false;
a6fc00da 7890
98daafa0
EM
7891 rtx xmsk = gen_reg_rtx (mmsk);
7892 if (!recp)
7893 /* When calculating the approximate square root, compare the argument with
7894 0.0 and create a mask. */
7895 emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src,
7896 CONST0_RTX (mode)))));
a6fc00da 7897
98daafa0
EM
7898 /* Estimate the approximate reciprocal square root. */
7899 rtx xdst = gen_reg_rtx (mode);
7900 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 7901
98daafa0
EM
7902 /* Iterate over the series twice for SF and thrice for DF. */
7903 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 7904
98daafa0
EM
7905 /* Optionally iterate over the series once less for faster performance
7906 while sacrificing the accuracy. */
7907 if ((recp && flag_mrecip_low_precision_sqrt)
7908 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
7909 iterations--;
7910
98daafa0
EM
7911 /* Iterate over the series to calculate the approximate reciprocal square
7912 root. */
7913 rtx x1 = gen_reg_rtx (mode);
7914 while (iterations--)
a6fc00da 7915 {
a6fc00da 7916 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
7917 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
7918
7919 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 7920
98daafa0
EM
7921 if (iterations > 0)
7922 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
7923 }
7924
7925 if (!recp)
7926 {
7927 /* Qualify the approximate reciprocal square root when the argument is
7928 0.0 by squashing the intermediary result to 0.0. */
7929 rtx xtmp = gen_reg_rtx (mmsk);
7930 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
7931 gen_rtx_SUBREG (mmsk, xdst, 0)));
7932 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 7933
98daafa0
EM
7934 /* Calculate the approximate square root. */
7935 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
7936 }
7937
98daafa0
EM
7938 /* Finalize the approximation. */
7939 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
7940
7941 return true;
a6fc00da
BH
7942}
7943
79a2bc2d
EM
7944typedef rtx (*recpe_type) (rtx, rtx);
7945
7946/* Select reciprocal initial estimate insn depending on machine mode. */
7947
7948static recpe_type
7949get_recpe_type (machine_mode mode)
7950{
7951 switch (mode)
7952 {
7953 case SFmode: return (gen_aarch64_frecpesf);
7954 case V2SFmode: return (gen_aarch64_frecpev2sf);
7955 case V4SFmode: return (gen_aarch64_frecpev4sf);
7956 case DFmode: return (gen_aarch64_frecpedf);
7957 case V2DFmode: return (gen_aarch64_frecpev2df);
7958 default: gcc_unreachable ();
7959 }
7960}
7961
7962typedef rtx (*recps_type) (rtx, rtx, rtx);
7963
7964/* Select reciprocal series step insn depending on machine mode. */
7965
7966static recps_type
7967get_recps_type (machine_mode mode)
7968{
7969 switch (mode)
7970 {
7971 case SFmode: return (gen_aarch64_frecpssf);
7972 case V2SFmode: return (gen_aarch64_frecpsv2sf);
7973 case V4SFmode: return (gen_aarch64_frecpsv4sf);
7974 case DFmode: return (gen_aarch64_frecpsdf);
7975 case V2DFmode: return (gen_aarch64_frecpsv2df);
7976 default: gcc_unreachable ();
7977 }
7978}
7979
7980/* Emit the instruction sequence to compute the approximation for the division
7981 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
7982
7983bool
7984aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
7985{
7986 machine_mode mode = GET_MODE (quo);
33d72b63
JW
7987
7988 if (GET_MODE_INNER (mode) == HFmode)
7989 return false;
7990
79a2bc2d
EM
7991 bool use_approx_division_p = (flag_mlow_precision_div
7992 || (aarch64_tune_params.approx_modes->division
7993 & AARCH64_APPROX_MODE (mode)));
7994
7995 if (!flag_finite_math_only
7996 || flag_trapping_math
7997 || !flag_unsafe_math_optimizations
7998 || optimize_function_for_size_p (cfun)
7999 || !use_approx_division_p)
8000 return false;
8001
8002 /* Estimate the approximate reciprocal. */
8003 rtx xrcp = gen_reg_rtx (mode);
8004 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8005
8006 /* Iterate over the series twice for SF and thrice for DF. */
8007 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8008
8009 /* Optionally iterate over the series once less for faster performance,
8010 while sacrificing the accuracy. */
8011 if (flag_mlow_precision_div)
8012 iterations--;
8013
8014 /* Iterate over the series to calculate the approximate reciprocal. */
8015 rtx xtmp = gen_reg_rtx (mode);
8016 while (iterations--)
8017 {
8018 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8019
8020 if (iterations > 0)
8021 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8022 }
8023
8024 if (num != CONST1_RTX (mode))
8025 {
8026 /* As the approximate reciprocal of DEN is already calculated, only
8027 calculate the approximate division when NUM is not 1.0. */
8028 rtx xnum = force_reg (mode, num);
8029 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8030 }
8031
8032 /* Finalize the approximation. */
8033 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8034 return true;
8035}
8036
d126a4ae
AP
8037/* Return the number of instructions that can be issued per cycle. */
8038static int
8039aarch64_sched_issue_rate (void)
8040{
b175b679 8041 return aarch64_tune_params.issue_rate;
d126a4ae
AP
8042}
8043
d03f7e44
MK
8044static int
8045aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8046{
8047 int issue_rate = aarch64_sched_issue_rate ();
8048
8049 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8050}
8051
2d6bc7fa
KT
8052
8053/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8054 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8055 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8056
8057static int
8058aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8059 int ready_index)
8060{
8061 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8062}
8063
8064
8990e73a
TB
8065/* Vectorizer cost model target hooks. */
8066
8067/* Implement targetm.vectorize.builtin_vectorization_cost. */
8068static int
8069aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8070 tree vectype,
8071 int misalign ATTRIBUTE_UNUSED)
8072{
8073 unsigned elements;
8074
8075 switch (type_of_cost)
8076 {
8077 case scalar_stmt:
b175b679 8078 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
8079
8080 case scalar_load:
b175b679 8081 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
8082
8083 case scalar_store:
b175b679 8084 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
8085
8086 case vector_stmt:
b175b679 8087 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
8088
8089 case vector_load:
b175b679 8090 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
8091
8092 case vector_store:
b175b679 8093 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
8094
8095 case vec_to_scalar:
b175b679 8096 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
8097
8098 case scalar_to_vec:
b175b679 8099 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
8100
8101 case unaligned_load:
b175b679 8102 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
8103
8104 case unaligned_store:
b175b679 8105 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
8106
8107 case cond_branch_taken:
b175b679 8108 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
8109
8110 case cond_branch_not_taken:
b175b679 8111 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
8112
8113 case vec_perm:
c428f91c
WD
8114 return aarch64_tune_params.vec_costs->vec_permute_cost;
8115
8990e73a 8116 case vec_promote_demote:
b175b679 8117 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
8118
8119 case vec_construct:
8120 elements = TYPE_VECTOR_SUBPARTS (vectype);
8121 return elements / 2 + 1;
8122
8123 default:
8124 gcc_unreachable ();
8125 }
8126}
8127
8128/* Implement targetm.vectorize.add_stmt_cost. */
8129static unsigned
8130aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8131 struct _stmt_vec_info *stmt_info, int misalign,
8132 enum vect_cost_model_location where)
8133{
8134 unsigned *cost = (unsigned *) data;
8135 unsigned retval = 0;
8136
8137 if (flag_vect_cost_model)
8138 {
8139 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8140 int stmt_cost =
8141 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8142
8143 /* Statements in an inner loop relative to the loop being
8144 vectorized are weighted more heavily. The value here is
058e4c71 8145 arbitrary and could potentially be improved with analysis. */
8990e73a 8146 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 8147 count *= 50; /* FIXME */
8990e73a
TB
8148
8149 retval = (unsigned) (count * stmt_cost);
8150 cost[where] += retval;
8151 }
8152
8153 return retval;
8154}
8155
0cfff2a1 8156static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 8157
0cfff2a1
KT
8158/* Parse the TO_PARSE string and put the architecture struct that it
8159 selects into RES and the architectural features into ISA_FLAGS.
8160 Return an aarch64_parse_opt_result describing the parse result.
8161 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 8162
0cfff2a1
KT
8163static enum aarch64_parse_opt_result
8164aarch64_parse_arch (const char *to_parse, const struct processor **res,
8165 unsigned long *isa_flags)
43e9d192
IB
8166{
8167 char *ext;
8168 const struct processor *arch;
0cfff2a1 8169 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8170 size_t len;
8171
0cfff2a1 8172 strcpy (str, to_parse);
43e9d192
IB
8173
8174 ext = strchr (str, '+');
8175
8176 if (ext != NULL)
8177 len = ext - str;
8178 else
8179 len = strlen (str);
8180
8181 if (len == 0)
0cfff2a1
KT
8182 return AARCH64_PARSE_MISSING_ARG;
8183
43e9d192 8184
0cfff2a1 8185 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
8186 for (arch = all_architectures; arch->name != NULL; arch++)
8187 {
8188 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8189 {
0cfff2a1 8190 unsigned long isa_temp = arch->flags;
43e9d192
IB
8191
8192 if (ext != NULL)
8193 {
0cfff2a1
KT
8194 /* TO_PARSE string contains at least one extension. */
8195 enum aarch64_parse_opt_result ext_res
8196 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8197
0cfff2a1
KT
8198 if (ext_res != AARCH64_PARSE_OK)
8199 return ext_res;
ffee7aa9 8200 }
0cfff2a1
KT
8201 /* Extension parsing was successful. Confirm the result
8202 arch and ISA flags. */
8203 *res = arch;
8204 *isa_flags = isa_temp;
8205 return AARCH64_PARSE_OK;
43e9d192
IB
8206 }
8207 }
8208
8209 /* ARCH name not found in list. */
0cfff2a1 8210 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8211}
8212
0cfff2a1
KT
8213/* Parse the TO_PARSE string and put the result tuning in RES and the
8214 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8215 describing the parse result. If there is an error parsing, RES and
8216 ISA_FLAGS are left unchanged. */
43e9d192 8217
0cfff2a1
KT
8218static enum aarch64_parse_opt_result
8219aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8220 unsigned long *isa_flags)
43e9d192
IB
8221{
8222 char *ext;
8223 const struct processor *cpu;
0cfff2a1 8224 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8225 size_t len;
8226
0cfff2a1 8227 strcpy (str, to_parse);
43e9d192
IB
8228
8229 ext = strchr (str, '+');
8230
8231 if (ext != NULL)
8232 len = ext - str;
8233 else
8234 len = strlen (str);
8235
8236 if (len == 0)
0cfff2a1
KT
8237 return AARCH64_PARSE_MISSING_ARG;
8238
43e9d192
IB
8239
8240 /* Loop through the list of supported CPUs to find a match. */
8241 for (cpu = all_cores; cpu->name != NULL; cpu++)
8242 {
8243 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8244 {
0cfff2a1
KT
8245 unsigned long isa_temp = cpu->flags;
8246
43e9d192
IB
8247
8248 if (ext != NULL)
8249 {
0cfff2a1
KT
8250 /* TO_PARSE string contains at least one extension. */
8251 enum aarch64_parse_opt_result ext_res
8252 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8253
0cfff2a1
KT
8254 if (ext_res != AARCH64_PARSE_OK)
8255 return ext_res;
8256 }
8257 /* Extension parsing was successfull. Confirm the result
8258 cpu and ISA flags. */
8259 *res = cpu;
8260 *isa_flags = isa_temp;
8261 return AARCH64_PARSE_OK;
43e9d192
IB
8262 }
8263 }
8264
8265 /* CPU name not found in list. */
0cfff2a1 8266 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8267}
8268
0cfff2a1
KT
8269/* Parse the TO_PARSE string and put the cpu it selects into RES.
8270 Return an aarch64_parse_opt_result describing the parse result.
8271 If the parsing fails the RES does not change. */
43e9d192 8272
0cfff2a1
KT
8273static enum aarch64_parse_opt_result
8274aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
8275{
8276 const struct processor *cpu;
0cfff2a1
KT
8277 char *str = (char *) alloca (strlen (to_parse) + 1);
8278
8279 strcpy (str, to_parse);
43e9d192
IB
8280
8281 /* Loop through the list of supported CPUs to find a match. */
8282 for (cpu = all_cores; cpu->name != NULL; cpu++)
8283 {
8284 if (strcmp (cpu->name, str) == 0)
8285 {
0cfff2a1
KT
8286 *res = cpu;
8287 return AARCH64_PARSE_OK;
43e9d192
IB
8288 }
8289 }
8290
8291 /* CPU name not found in list. */
0cfff2a1 8292 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8293}
8294
8dec06f2
JG
8295/* Parse TOKEN, which has length LENGTH to see if it is an option
8296 described in FLAG. If it is, return the index bit for that fusion type.
8297 If not, error (printing OPTION_NAME) and return zero. */
8298
8299static unsigned int
8300aarch64_parse_one_option_token (const char *token,
8301 size_t length,
8302 const struct aarch64_flag_desc *flag,
8303 const char *option_name)
8304{
8305 for (; flag->name != NULL; flag++)
8306 {
8307 if (length == strlen (flag->name)
8308 && !strncmp (flag->name, token, length))
8309 return flag->flag;
8310 }
8311
8312 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8313 return 0;
8314}
8315
8316/* Parse OPTION which is a comma-separated list of flags to enable.
8317 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8318 default state we inherit from the CPU tuning structures. OPTION_NAME
8319 gives the top-level option we are parsing in the -moverride string,
8320 for use in error messages. */
8321
8322static unsigned int
8323aarch64_parse_boolean_options (const char *option,
8324 const struct aarch64_flag_desc *flags,
8325 unsigned int initial_state,
8326 const char *option_name)
8327{
8328 const char separator = '.';
8329 const char* specs = option;
8330 const char* ntoken = option;
8331 unsigned int found_flags = initial_state;
8332
8333 while ((ntoken = strchr (specs, separator)))
8334 {
8335 size_t token_length = ntoken - specs;
8336 unsigned token_ops = aarch64_parse_one_option_token (specs,
8337 token_length,
8338 flags,
8339 option_name);
8340 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8341 in the token stream, reset the supported operations. So:
8342
8343 adrp+add.cmp+branch.none.adrp+add
8344
8345 would have the result of turning on only adrp+add fusion. */
8346 if (!token_ops)
8347 found_flags = 0;
8348
8349 found_flags |= token_ops;
8350 specs = ++ntoken;
8351 }
8352
8353 /* We ended with a comma, print something. */
8354 if (!(*specs))
8355 {
8356 error ("%s string ill-formed\n", option_name);
8357 return 0;
8358 }
8359
8360 /* We still have one more token to parse. */
8361 size_t token_length = strlen (specs);
8362 unsigned token_ops = aarch64_parse_one_option_token (specs,
8363 token_length,
8364 flags,
8365 option_name);
8366 if (!token_ops)
8367 found_flags = 0;
8368
8369 found_flags |= token_ops;
8370 return found_flags;
8371}
8372
8373/* Support for overriding instruction fusion. */
8374
8375static void
8376aarch64_parse_fuse_string (const char *fuse_string,
8377 struct tune_params *tune)
8378{
8379 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8380 aarch64_fusible_pairs,
8381 tune->fusible_ops,
8382 "fuse=");
8383}
8384
8385/* Support for overriding other tuning flags. */
8386
8387static void
8388aarch64_parse_tune_string (const char *tune_string,
8389 struct tune_params *tune)
8390{
8391 tune->extra_tuning_flags
8392 = aarch64_parse_boolean_options (tune_string,
8393 aarch64_tuning_flags,
8394 tune->extra_tuning_flags,
8395 "tune=");
8396}
8397
8398/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8399 we understand. If it is, extract the option string and handoff to
8400 the appropriate function. */
8401
8402void
8403aarch64_parse_one_override_token (const char* token,
8404 size_t length,
8405 struct tune_params *tune)
8406{
8407 const struct aarch64_tuning_override_function *fn
8408 = aarch64_tuning_override_functions;
8409
8410 const char *option_part = strchr (token, '=');
8411 if (!option_part)
8412 {
8413 error ("tuning string missing in option (%s)", token);
8414 return;
8415 }
8416
8417 /* Get the length of the option name. */
8418 length = option_part - token;
8419 /* Skip the '=' to get to the option string. */
8420 option_part++;
8421
8422 for (; fn->name != NULL; fn++)
8423 {
8424 if (!strncmp (fn->name, token, length))
8425 {
8426 fn->parse_override (option_part, tune);
8427 return;
8428 }
8429 }
8430
8431 error ("unknown tuning option (%s)",token);
8432 return;
8433}
8434
5eee3c34
JW
8435/* A checking mechanism for the implementation of the tls size. */
8436
8437static void
8438initialize_aarch64_tls_size (struct gcc_options *opts)
8439{
8440 if (aarch64_tls_size == 0)
8441 aarch64_tls_size = 24;
8442
8443 switch (opts->x_aarch64_cmodel_var)
8444 {
8445 case AARCH64_CMODEL_TINY:
8446 /* Both the default and maximum TLS size allowed under tiny is 1M which
8447 needs two instructions to address, so we clamp the size to 24. */
8448 if (aarch64_tls_size > 24)
8449 aarch64_tls_size = 24;
8450 break;
8451 case AARCH64_CMODEL_SMALL:
8452 /* The maximum TLS size allowed under small is 4G. */
8453 if (aarch64_tls_size > 32)
8454 aarch64_tls_size = 32;
8455 break;
8456 case AARCH64_CMODEL_LARGE:
8457 /* The maximum TLS size allowed under large is 16E.
8458 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8459 if (aarch64_tls_size > 48)
8460 aarch64_tls_size = 48;
8461 break;
8462 default:
8463 gcc_unreachable ();
8464 }
8465
8466 return;
8467}
8468
8dec06f2
JG
8469/* Parse STRING looking for options in the format:
8470 string :: option:string
8471 option :: name=substring
8472 name :: {a-z}
8473 substring :: defined by option. */
8474
8475static void
8476aarch64_parse_override_string (const char* input_string,
8477 struct tune_params* tune)
8478{
8479 const char separator = ':';
8480 size_t string_length = strlen (input_string) + 1;
8481 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8482 char *string = string_root;
8483 strncpy (string, input_string, string_length);
8484 string[string_length - 1] = '\0';
8485
8486 char* ntoken = string;
8487
8488 while ((ntoken = strchr (string, separator)))
8489 {
8490 size_t token_length = ntoken - string;
8491 /* Make this substring look like a string. */
8492 *ntoken = '\0';
8493 aarch64_parse_one_override_token (string, token_length, tune);
8494 string = ++ntoken;
8495 }
8496
8497 /* One last option to parse. */
8498 aarch64_parse_one_override_token (string, strlen (string), tune);
8499 free (string_root);
8500}
43e9d192 8501
43e9d192
IB
8502
8503static void
0cfff2a1 8504aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8505{
a3dc8760
NC
8506 /* The logic here is that if we are disabling all frame pointer generation
8507 then we do not need to disable leaf frame pointer generation as a
8508 separate operation. But if we are *only* disabling leaf frame pointer
8509 generation then we set flag_omit_frame_pointer to true, but in
8510 aarch64_frame_pointer_required we return false only for leaf functions.
8511
8512 PR 70044: We have to be careful about being called multiple times for the
8513 same function. Once we have decided to set flag_omit_frame_pointer just
8514 so that we can omit leaf frame pointers, we must then not interpret a
8515 second call as meaning that all frame pointer generation should be
8516 omitted. We do this by setting flag_omit_frame_pointer to a special,
8517 non-zero value. */
8518 if (opts->x_flag_omit_frame_pointer == 2)
8519 opts->x_flag_omit_frame_pointer = 0;
8520
0cfff2a1
KT
8521 if (opts->x_flag_omit_frame_pointer)
8522 opts->x_flag_omit_leaf_frame_pointer = false;
8523 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8524 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8525
1be34295 8526 /* If not optimizing for size, set the default
0cfff2a1
KT
8527 alignment to what the target wants. */
8528 if (!opts->x_optimize_size)
43e9d192 8529 {
0cfff2a1
KT
8530 if (opts->x_align_loops <= 0)
8531 opts->x_align_loops = aarch64_tune_params.loop_align;
8532 if (opts->x_align_jumps <= 0)
8533 opts->x_align_jumps = aarch64_tune_params.jump_align;
8534 if (opts->x_align_functions <= 0)
8535 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8536 }
b4f50fd4 8537
9ee6540a
WD
8538 /* We default to no pc-relative literal loads. */
8539
8540 aarch64_pcrelative_literal_loads = false;
8541
8542 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8543 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8544 if (opts->x_pcrelative_literal_loads == 1)
8545 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8546
48bb1a55
CL
8547 /* This is PR70113. When building the Linux kernel with
8548 CONFIG_ARM64_ERRATUM_843419, support for relocations
8549 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8550 removed from the kernel to avoid loading objects with possibly
9ee6540a 8551 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8552 generate such relocations, preventing the kernel build from
8553 succeeding. */
9ee6540a
WD
8554 if (opts->x_pcrelative_literal_loads == 2
8555 && TARGET_FIX_ERR_A53_843419)
8556 aarch64_pcrelative_literal_loads = true;
8557
8558 /* In the tiny memory model it makes no sense to disallow PC relative
8559 literal pool loads. */
8560 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8561 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8562 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8563
8564 /* When enabling the lower precision Newton series for the square root, also
8565 enable it for the reciprocal square root, since the latter is an
8566 intermediary step for the former. */
8567 if (flag_mlow_precision_sqrt)
8568 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8569}
43e9d192 8570
0cfff2a1
KT
8571/* 'Unpack' up the internal tuning structs and update the options
8572 in OPTS. The caller must have set up selected_tune and selected_arch
8573 as all the other target-specific codegen decisions are
8574 derived from them. */
8575
e4ea20c8 8576void
0cfff2a1
KT
8577aarch64_override_options_internal (struct gcc_options *opts)
8578{
8579 aarch64_tune_flags = selected_tune->flags;
8580 aarch64_tune = selected_tune->sched_core;
8581 /* Make a copy of the tuning parameters attached to the core, which
8582 we may later overwrite. */
8583 aarch64_tune_params = *(selected_tune->tune);
8584 aarch64_architecture_version = selected_arch->architecture_version;
8585
8586 if (opts->x_aarch64_override_tune_string)
8587 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8588 &aarch64_tune_params);
8589
8590 /* This target defaults to strict volatile bitfields. */
8591 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8592 opts->x_flag_strict_volatile_bitfields = 1;
8593
0cfff2a1 8594 initialize_aarch64_code_model (opts);
5eee3c34 8595 initialize_aarch64_tls_size (opts);
63892fa2 8596
2d6bc7fa
KT
8597 int queue_depth = 0;
8598 switch (aarch64_tune_params.autoprefetcher_model)
8599 {
8600 case tune_params::AUTOPREFETCHER_OFF:
8601 queue_depth = -1;
8602 break;
8603 case tune_params::AUTOPREFETCHER_WEAK:
8604 queue_depth = 0;
8605 break;
8606 case tune_params::AUTOPREFETCHER_STRONG:
8607 queue_depth = max_insn_queue_index + 1;
8608 break;
8609 default:
8610 gcc_unreachable ();
8611 }
8612
8613 /* We don't mind passing in global_options_set here as we don't use
8614 the *options_set structs anyway. */
8615 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8616 queue_depth,
8617 opts->x_param_values,
8618 global_options_set.x_param_values);
8619
50487d79
EM
8620 /* Set the L1 cache line size. */
8621 if (selected_cpu->tune->cache_line_size != 0)
8622 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
8623 selected_cpu->tune->cache_line_size,
8624 opts->x_param_values,
8625 global_options_set.x_param_values);
8626
0cfff2a1
KT
8627 aarch64_override_options_after_change_1 (opts);
8628}
43e9d192 8629
01f44038
KT
8630/* Print a hint with a suggestion for a core or architecture name that
8631 most closely resembles what the user passed in STR. ARCH is true if
8632 the user is asking for an architecture name. ARCH is false if the user
8633 is asking for a core name. */
8634
8635static void
8636aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
8637{
8638 auto_vec<const char *> candidates;
8639 const struct processor *entry = arch ? all_architectures : all_cores;
8640 for (; entry->name != NULL; entry++)
8641 candidates.safe_push (entry->name);
8642 char *s;
8643 const char *hint = candidates_list_and_hint (str, s, candidates);
8644 if (hint)
8645 inform (input_location, "valid arguments are: %s;"
8646 " did you mean %qs?", s, hint);
8647 XDELETEVEC (s);
8648}
8649
8650/* Print a hint with a suggestion for a core name that most closely resembles
8651 what the user passed in STR. */
8652
8653inline static void
8654aarch64_print_hint_for_core (const char *str)
8655{
8656 aarch64_print_hint_for_core_or_arch (str, false);
8657}
8658
8659/* Print a hint with a suggestion for an architecture name that most closely
8660 resembles what the user passed in STR. */
8661
8662inline static void
8663aarch64_print_hint_for_arch (const char *str)
8664{
8665 aarch64_print_hint_for_core_or_arch (str, true);
8666}
8667
0cfff2a1
KT
8668/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8669 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
8670 they are valid in RES and ISA_FLAGS. Return whether the option is
8671 valid. */
43e9d192 8672
361fb3ee 8673static bool
0cfff2a1
KT
8674aarch64_validate_mcpu (const char *str, const struct processor **res,
8675 unsigned long *isa_flags)
8676{
8677 enum aarch64_parse_opt_result parse_res
8678 = aarch64_parse_cpu (str, res, isa_flags);
8679
8680 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8681 return true;
0cfff2a1
KT
8682
8683 switch (parse_res)
8684 {
8685 case AARCH64_PARSE_MISSING_ARG:
8686 error ("missing cpu name in -mcpu=%qs", str);
8687 break;
8688 case AARCH64_PARSE_INVALID_ARG:
8689 error ("unknown value %qs for -mcpu", str);
01f44038 8690 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8691 break;
8692 case AARCH64_PARSE_INVALID_FEATURE:
8693 error ("invalid feature modifier in -mcpu=%qs", str);
8694 break;
8695 default:
8696 gcc_unreachable ();
8697 }
361fb3ee
KT
8698
8699 return false;
0cfff2a1
KT
8700}
8701
8702/* Validate a command-line -march option. Parse the arch and extensions
8703 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8704 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8705 option is valid. */
0cfff2a1 8706
361fb3ee 8707static bool
0cfff2a1 8708aarch64_validate_march (const char *str, const struct processor **res,
01f44038 8709 unsigned long *isa_flags)
0cfff2a1
KT
8710{
8711 enum aarch64_parse_opt_result parse_res
8712 = aarch64_parse_arch (str, res, isa_flags);
8713
8714 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8715 return true;
0cfff2a1
KT
8716
8717 switch (parse_res)
8718 {
8719 case AARCH64_PARSE_MISSING_ARG:
8720 error ("missing arch name in -march=%qs", str);
8721 break;
8722 case AARCH64_PARSE_INVALID_ARG:
8723 error ("unknown value %qs for -march", str);
01f44038 8724 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
8725 break;
8726 case AARCH64_PARSE_INVALID_FEATURE:
8727 error ("invalid feature modifier in -march=%qs", str);
8728 break;
8729 default:
8730 gcc_unreachable ();
8731 }
361fb3ee
KT
8732
8733 return false;
0cfff2a1
KT
8734}
8735
8736/* Validate a command-line -mtune option. Parse the cpu
8737 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8738 result, if it is valid, in RES. Return whether the option is
8739 valid. */
0cfff2a1 8740
361fb3ee 8741static bool
0cfff2a1
KT
8742aarch64_validate_mtune (const char *str, const struct processor **res)
8743{
8744 enum aarch64_parse_opt_result parse_res
8745 = aarch64_parse_tune (str, res);
8746
8747 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8748 return true;
0cfff2a1
KT
8749
8750 switch (parse_res)
8751 {
8752 case AARCH64_PARSE_MISSING_ARG:
8753 error ("missing cpu name in -mtune=%qs", str);
8754 break;
8755 case AARCH64_PARSE_INVALID_ARG:
8756 error ("unknown value %qs for -mtune", str);
01f44038 8757 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8758 break;
8759 default:
8760 gcc_unreachable ();
8761 }
361fb3ee
KT
8762 return false;
8763}
8764
8765/* Return the CPU corresponding to the enum CPU.
8766 If it doesn't specify a cpu, return the default. */
8767
8768static const struct processor *
8769aarch64_get_tune_cpu (enum aarch64_processor cpu)
8770{
8771 if (cpu != aarch64_none)
8772 return &all_cores[cpu];
8773
8774 /* The & 0x3f is to extract the bottom 6 bits that encode the
8775 default cpu as selected by the --with-cpu GCC configure option
8776 in config.gcc.
8777 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8778 flags mechanism should be reworked to make it more sane. */
8779 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8780}
8781
8782/* Return the architecture corresponding to the enum ARCH.
8783 If it doesn't specify a valid architecture, return the default. */
8784
8785static const struct processor *
8786aarch64_get_arch (enum aarch64_arch arch)
8787{
8788 if (arch != aarch64_no_arch)
8789 return &all_architectures[arch];
8790
8791 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8792
8793 return &all_architectures[cpu->arch];
0cfff2a1
KT
8794}
8795
8796/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8797 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8798 tuning structs. In particular it must set selected_tune and
8799 aarch64_isa_flags that define the available ISA features and tuning
8800 decisions. It must also set selected_arch as this will be used to
8801 output the .arch asm tags for each function. */
8802
8803static void
8804aarch64_override_options (void)
8805{
8806 unsigned long cpu_isa = 0;
8807 unsigned long arch_isa = 0;
8808 aarch64_isa_flags = 0;
8809
361fb3ee
KT
8810 bool valid_cpu = true;
8811 bool valid_tune = true;
8812 bool valid_arch = true;
8813
0cfff2a1
KT
8814 selected_cpu = NULL;
8815 selected_arch = NULL;
8816 selected_tune = NULL;
8817
8818 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8819 If either of -march or -mtune is given, they override their
8820 respective component of -mcpu. */
8821 if (aarch64_cpu_string)
361fb3ee
KT
8822 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
8823 &cpu_isa);
0cfff2a1
KT
8824
8825 if (aarch64_arch_string)
361fb3ee
KT
8826 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
8827 &arch_isa);
0cfff2a1
KT
8828
8829 if (aarch64_tune_string)
361fb3ee 8830 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
8831
8832 /* If the user did not specify a processor, choose the default
8833 one for them. This will be the CPU set during configuration using
a3cd0246 8834 --with-cpu, otherwise it is "generic". */
43e9d192
IB
8835 if (!selected_cpu)
8836 {
0cfff2a1
KT
8837 if (selected_arch)
8838 {
8839 selected_cpu = &all_cores[selected_arch->ident];
8840 aarch64_isa_flags = arch_isa;
361fb3ee 8841 explicit_arch = selected_arch->arch;
0cfff2a1
KT
8842 }
8843 else
8844 {
361fb3ee
KT
8845 /* Get default configure-time CPU. */
8846 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
8847 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8848 }
361fb3ee
KT
8849
8850 if (selected_tune)
8851 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
8852 }
8853 /* If both -mcpu and -march are specified check that they are architecturally
8854 compatible, warn if they're not and prefer the -march ISA flags. */
8855 else if (selected_arch)
8856 {
8857 if (selected_arch->arch != selected_cpu->arch)
8858 {
8859 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8860 all_architectures[selected_cpu->arch].name,
8861 selected_arch->name);
8862 }
8863 aarch64_isa_flags = arch_isa;
361fb3ee
KT
8864 explicit_arch = selected_arch->arch;
8865 explicit_tune_core = selected_tune ? selected_tune->ident
8866 : selected_cpu->ident;
0cfff2a1
KT
8867 }
8868 else
8869 {
8870 /* -mcpu but no -march. */
8871 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
8872 explicit_tune_core = selected_tune ? selected_tune->ident
8873 : selected_cpu->ident;
8874 gcc_assert (selected_cpu);
8875 selected_arch = &all_architectures[selected_cpu->arch];
8876 explicit_arch = selected_arch->arch;
43e9d192
IB
8877 }
8878
0cfff2a1
KT
8879 /* Set the arch as well as we will need it when outputing
8880 the .arch directive in assembly. */
8881 if (!selected_arch)
8882 {
8883 gcc_assert (selected_cpu);
8884 selected_arch = &all_architectures[selected_cpu->arch];
8885 }
43e9d192 8886
43e9d192 8887 if (!selected_tune)
3edaf26d 8888 selected_tune = selected_cpu;
43e9d192 8889
0cfff2a1
KT
8890#ifndef HAVE_AS_MABI_OPTION
8891 /* The compiler may have been configured with 2.23.* binutils, which does
8892 not have support for ILP32. */
8893 if (TARGET_ILP32)
8894 error ("Assembler does not support -mabi=ilp32");
8895#endif
43e9d192 8896
361fb3ee
KT
8897 /* Make sure we properly set up the explicit options. */
8898 if ((aarch64_cpu_string && valid_cpu)
8899 || (aarch64_tune_string && valid_tune))
8900 gcc_assert (explicit_tune_core != aarch64_none);
8901
8902 if ((aarch64_cpu_string && valid_cpu)
8903 || (aarch64_arch_string && valid_arch))
8904 gcc_assert (explicit_arch != aarch64_no_arch);
8905
0cfff2a1
KT
8906 aarch64_override_options_internal (&global_options);
8907
8908 /* Save these options as the default ones in case we push and pop them later
8909 while processing functions with potential target attributes. */
8910 target_option_default_node = target_option_current_node
8911 = build_target_option_node (&global_options);
43e9d192
IB
8912}
8913
8914/* Implement targetm.override_options_after_change. */
8915
8916static void
8917aarch64_override_options_after_change (void)
8918{
0cfff2a1 8919 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
8920}
8921
8922static struct machine_function *
8923aarch64_init_machine_status (void)
8924{
8925 struct machine_function *machine;
766090c2 8926 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
8927 return machine;
8928}
8929
8930void
8931aarch64_init_expanders (void)
8932{
8933 init_machine_status = aarch64_init_machine_status;
8934}
8935
8936/* A checking mechanism for the implementation of the various code models. */
8937static void
0cfff2a1 8938initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 8939{
0cfff2a1 8940 if (opts->x_flag_pic)
43e9d192 8941 {
0cfff2a1 8942 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
8943 {
8944 case AARCH64_CMODEL_TINY:
8945 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
8946 break;
8947 case AARCH64_CMODEL_SMALL:
34ecdb0f 8948#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
8949 aarch64_cmodel = (flag_pic == 2
8950 ? AARCH64_CMODEL_SMALL_PIC
8951 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
8952#else
8953 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
8954#endif
43e9d192
IB
8955 break;
8956 case AARCH64_CMODEL_LARGE:
8957 sorry ("code model %qs with -f%s", "large",
0cfff2a1 8958 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 8959 break;
43e9d192
IB
8960 default:
8961 gcc_unreachable ();
8962 }
8963 }
8964 else
0cfff2a1 8965 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
8966}
8967
361fb3ee
KT
8968/* Implement TARGET_OPTION_SAVE. */
8969
8970static void
8971aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
8972{
8973 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
8974}
8975
8976/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8977 using the information saved in PTR. */
8978
8979static void
8980aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
8981{
8982 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
8983 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8984 opts->x_explicit_arch = ptr->x_explicit_arch;
8985 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
8986 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
8987
8988 aarch64_override_options_internal (opts);
8989}
8990
8991/* Implement TARGET_OPTION_PRINT. */
8992
8993static void
8994aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
8995{
8996 const struct processor *cpu
8997 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8998 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
8999 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 9000 std::string extension
04a99ebe 9001 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
9002
9003 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
9004 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9005 arch->name, extension.c_str ());
361fb3ee
KT
9006}
9007
d78006d9
KT
9008static GTY(()) tree aarch64_previous_fndecl;
9009
e4ea20c8
KT
9010void
9011aarch64_reset_previous_fndecl (void)
9012{
9013 aarch64_previous_fndecl = NULL;
9014}
9015
acfc1ac1
KT
9016/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9017 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9018 make sure optab availability predicates are recomputed when necessary. */
9019
9020void
9021aarch64_save_restore_target_globals (tree new_tree)
9022{
9023 if (TREE_TARGET_GLOBALS (new_tree))
9024 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9025 else if (new_tree == target_option_default_node)
9026 restore_target_globals (&default_target_globals);
9027 else
9028 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9029}
9030
d78006d9
KT
9031/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9032 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9033 of the function, if such exists. This function may be called multiple
9034 times on a single function so use aarch64_previous_fndecl to avoid
9035 setting up identical state. */
9036
9037static void
9038aarch64_set_current_function (tree fndecl)
9039{
acfc1ac1
KT
9040 if (!fndecl || fndecl == aarch64_previous_fndecl)
9041 return;
9042
d78006d9
KT
9043 tree old_tree = (aarch64_previous_fndecl
9044 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9045 : NULL_TREE);
9046
acfc1ac1 9047 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 9048
acfc1ac1
KT
9049 /* If current function has no attributes but the previous one did,
9050 use the default node. */
9051 if (!new_tree && old_tree)
9052 new_tree = target_option_default_node;
d78006d9 9053
acfc1ac1
KT
9054 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9055 the default have been handled by aarch64_save_restore_target_globals from
9056 aarch64_pragma_target_parse. */
9057 if (old_tree == new_tree)
9058 return;
d78006d9 9059
acfc1ac1 9060 aarch64_previous_fndecl = fndecl;
6e17a23b 9061
acfc1ac1
KT
9062 /* First set the target options. */
9063 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 9064
acfc1ac1 9065 aarch64_save_restore_target_globals (new_tree);
d78006d9 9066}
361fb3ee 9067
5a2c8331
KT
9068/* Enum describing the various ways we can handle attributes.
9069 In many cases we can reuse the generic option handling machinery. */
9070
9071enum aarch64_attr_opt_type
9072{
9073 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9074 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9075 aarch64_attr_enum, /* Attribute sets an enum variable. */
9076 aarch64_attr_custom /* Attribute requires a custom handling function. */
9077};
9078
9079/* All the information needed to handle a target attribute.
9080 NAME is the name of the attribute.
9c582551 9081 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
9082 in the definition of enum aarch64_attr_opt_type.
9083 ALLOW_NEG is true if the attribute supports a "no-" form.
9084 HANDLER is the function that takes the attribute string and whether
9085 it is a pragma or attribute and handles the option. It is needed only
9086 when the ATTR_TYPE is aarch64_attr_custom.
9087 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 9088 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
9089 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9090 aarch64_attr_enum. */
9091
9092struct aarch64_attribute_info
9093{
9094 const char *name;
9095 enum aarch64_attr_opt_type attr_type;
9096 bool allow_neg;
9097 bool (*handler) (const char *, const char *);
9098 enum opt_code opt_num;
9099};
9100
9101/* Handle the ARCH_STR argument to the arch= target attribute.
9102 PRAGMA_OR_ATTR is used in potential error messages. */
9103
9104static bool
9105aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
9106{
9107 const struct processor *tmp_arch = NULL;
9108 enum aarch64_parse_opt_result parse_res
9109 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9110
9111 if (parse_res == AARCH64_PARSE_OK)
9112 {
9113 gcc_assert (tmp_arch);
9114 selected_arch = tmp_arch;
9115 explicit_arch = selected_arch->arch;
9116 return true;
9117 }
9118
9119 switch (parse_res)
9120 {
9121 case AARCH64_PARSE_MISSING_ARG:
9122 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
9123 break;
9124 case AARCH64_PARSE_INVALID_ARG:
9125 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 9126 aarch64_print_hint_for_arch (str);
5a2c8331
KT
9127 break;
9128 case AARCH64_PARSE_INVALID_FEATURE:
9129 error ("invalid feature modifier %qs for 'arch' target %s",
9130 str, pragma_or_attr);
9131 break;
9132 default:
9133 gcc_unreachable ();
9134 }
9135
9136 return false;
9137}
9138
9139/* Handle the argument CPU_STR to the cpu= target attribute.
9140 PRAGMA_OR_ATTR is used in potential error messages. */
9141
9142static bool
9143aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
9144{
9145 const struct processor *tmp_cpu = NULL;
9146 enum aarch64_parse_opt_result parse_res
9147 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9148
9149 if (parse_res == AARCH64_PARSE_OK)
9150 {
9151 gcc_assert (tmp_cpu);
9152 selected_tune = tmp_cpu;
9153 explicit_tune_core = selected_tune->ident;
9154
9155 selected_arch = &all_architectures[tmp_cpu->arch];
9156 explicit_arch = selected_arch->arch;
9157 return true;
9158 }
9159
9160 switch (parse_res)
9161 {
9162 case AARCH64_PARSE_MISSING_ARG:
9163 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
9164 break;
9165 case AARCH64_PARSE_INVALID_ARG:
9166 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 9167 aarch64_print_hint_for_core (str);
5a2c8331
KT
9168 break;
9169 case AARCH64_PARSE_INVALID_FEATURE:
9170 error ("invalid feature modifier %qs for 'cpu' target %s",
9171 str, pragma_or_attr);
9172 break;
9173 default:
9174 gcc_unreachable ();
9175 }
9176
9177 return false;
9178}
9179
9180/* Handle the argument STR to the tune= target attribute.
9181 PRAGMA_OR_ATTR is used in potential error messages. */
9182
9183static bool
9184aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
9185{
9186 const struct processor *tmp_tune = NULL;
9187 enum aarch64_parse_opt_result parse_res
9188 = aarch64_parse_tune (str, &tmp_tune);
9189
9190 if (parse_res == AARCH64_PARSE_OK)
9191 {
9192 gcc_assert (tmp_tune);
9193 selected_tune = tmp_tune;
9194 explicit_tune_core = selected_tune->ident;
9195 return true;
9196 }
9197
9198 switch (parse_res)
9199 {
9200 case AARCH64_PARSE_INVALID_ARG:
9201 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 9202 aarch64_print_hint_for_core (str);
5a2c8331
KT
9203 break;
9204 default:
9205 gcc_unreachable ();
9206 }
9207
9208 return false;
9209}
9210
9211/* Parse an architecture extensions target attribute string specified in STR.
9212 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9213 if successful. Update aarch64_isa_flags to reflect the ISA features
9214 modified.
9215 PRAGMA_OR_ATTR is used in potential error messages. */
9216
9217static bool
9218aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
9219{
9220 enum aarch64_parse_opt_result parse_res;
9221 unsigned long isa_flags = aarch64_isa_flags;
9222
e4ea20c8
KT
9223 /* We allow "+nothing" in the beginning to clear out all architectural
9224 features if the user wants to handpick specific features. */
9225 if (strncmp ("+nothing", str, 8) == 0)
9226 {
9227 isa_flags = 0;
9228 str += 8;
9229 }
9230
5a2c8331
KT
9231 parse_res = aarch64_parse_extension (str, &isa_flags);
9232
9233 if (parse_res == AARCH64_PARSE_OK)
9234 {
9235 aarch64_isa_flags = isa_flags;
9236 return true;
9237 }
9238
9239 switch (parse_res)
9240 {
9241 case AARCH64_PARSE_MISSING_ARG:
9242 error ("missing feature modifier in target %s %qs",
9243 pragma_or_attr, str);
9244 break;
9245
9246 case AARCH64_PARSE_INVALID_FEATURE:
9247 error ("invalid feature modifier in target %s %qs",
9248 pragma_or_attr, str);
9249 break;
9250
9251 default:
9252 gcc_unreachable ();
9253 }
9254
9255 return false;
9256}
9257
9258/* The target attributes that we support. On top of these we also support just
9259 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9260 handled explicitly in aarch64_process_one_target_attr. */
9261
9262static const struct aarch64_attribute_info aarch64_attributes[] =
9263{
9264 { "general-regs-only", aarch64_attr_mask, false, NULL,
9265 OPT_mgeneral_regs_only },
9266 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9267 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
9268 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9269 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
9270 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9271 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9272 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9273 OPT_momit_leaf_frame_pointer },
9274 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9275 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9276 OPT_march_ },
9277 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9278 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9279 OPT_mtune_ },
9280 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9281};
9282
9283/* Parse ARG_STR which contains the definition of one target attribute.
9284 Show appropriate errors if any or return true if the attribute is valid.
9285 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9286 we're processing a target attribute or pragma. */
9287
9288static bool
9289aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9290{
9291 bool invert = false;
9292
9293 size_t len = strlen (arg_str);
9294
9295 if (len == 0)
9296 {
9297 error ("malformed target %s", pragma_or_attr);
9298 return false;
9299 }
9300
9301 char *str_to_check = (char *) alloca (len + 1);
9302 strcpy (str_to_check, arg_str);
9303
9304 /* Skip leading whitespace. */
9305 while (*str_to_check == ' ' || *str_to_check == '\t')
9306 str_to_check++;
9307
9308 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9309 It is easier to detect and handle it explicitly here rather than going
9310 through the machinery for the rest of the target attributes in this
9311 function. */
9312 if (*str_to_check == '+')
9313 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9314
9315 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9316 {
9317 invert = true;
9318 str_to_check += 3;
9319 }
9320 char *arg = strchr (str_to_check, '=');
9321
9322 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9323 and point ARG to "foo". */
9324 if (arg)
9325 {
9326 *arg = '\0';
9327 arg++;
9328 }
9329 const struct aarch64_attribute_info *p_attr;
16d12992 9330 bool found = false;
5a2c8331
KT
9331 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9332 {
9333 /* If the names don't match up, or the user has given an argument
9334 to an attribute that doesn't accept one, or didn't give an argument
9335 to an attribute that expects one, fail to match. */
9336 if (strcmp (str_to_check, p_attr->name) != 0)
9337 continue;
9338
16d12992 9339 found = true;
5a2c8331
KT
9340 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9341 || p_attr->attr_type == aarch64_attr_enum;
9342
9343 if (attr_need_arg_p ^ (arg != NULL))
9344 {
9345 error ("target %s %qs does not accept an argument",
9346 pragma_or_attr, str_to_check);
9347 return false;
9348 }
9349
9350 /* If the name matches but the attribute does not allow "no-" versions
9351 then we can't match. */
9352 if (invert && !p_attr->allow_neg)
9353 {
9354 error ("target %s %qs does not allow a negated form",
9355 pragma_or_attr, str_to_check);
9356 return false;
9357 }
9358
9359 switch (p_attr->attr_type)
9360 {
9361 /* Has a custom handler registered.
9362 For example, cpu=, arch=, tune=. */
9363 case aarch64_attr_custom:
9364 gcc_assert (p_attr->handler);
9365 if (!p_attr->handler (arg, pragma_or_attr))
9366 return false;
9367 break;
9368
9369 /* Either set or unset a boolean option. */
9370 case aarch64_attr_bool:
9371 {
9372 struct cl_decoded_option decoded;
9373
9374 generate_option (p_attr->opt_num, NULL, !invert,
9375 CL_TARGET, &decoded);
9376 aarch64_handle_option (&global_options, &global_options_set,
9377 &decoded, input_location);
9378 break;
9379 }
9380 /* Set or unset a bit in the target_flags. aarch64_handle_option
9381 should know what mask to apply given the option number. */
9382 case aarch64_attr_mask:
9383 {
9384 struct cl_decoded_option decoded;
9385 /* We only need to specify the option number.
9386 aarch64_handle_option will know which mask to apply. */
9387 decoded.opt_index = p_attr->opt_num;
9388 decoded.value = !invert;
9389 aarch64_handle_option (&global_options, &global_options_set,
9390 &decoded, input_location);
9391 break;
9392 }
9393 /* Use the option setting machinery to set an option to an enum. */
9394 case aarch64_attr_enum:
9395 {
9396 gcc_assert (arg);
9397 bool valid;
9398 int value;
9399 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9400 &value, CL_TARGET);
9401 if (valid)
9402 {
9403 set_option (&global_options, NULL, p_attr->opt_num, value,
9404 NULL, DK_UNSPECIFIED, input_location,
9405 global_dc);
9406 }
9407 else
9408 {
9409 error ("target %s %s=%s is not valid",
9410 pragma_or_attr, str_to_check, arg);
9411 }
9412 break;
9413 }
9414 default:
9415 gcc_unreachable ();
9416 }
9417 }
9418
16d12992
KT
9419 /* If we reached here we either have found an attribute and validated
9420 it or didn't match any. If we matched an attribute but its arguments
9421 were malformed we will have returned false already. */
9422 return found;
5a2c8331
KT
9423}
9424
9425/* Count how many times the character C appears in
9426 NULL-terminated string STR. */
9427
9428static unsigned int
9429num_occurences_in_str (char c, char *str)
9430{
9431 unsigned int res = 0;
9432 while (*str != '\0')
9433 {
9434 if (*str == c)
9435 res++;
9436
9437 str++;
9438 }
9439
9440 return res;
9441}
9442
9443/* Parse the tree in ARGS that contains the target attribute information
9444 and update the global target options space. PRAGMA_OR_ATTR is a string
9445 to be used in error messages, specifying whether this is processing
9446 a target attribute or a target pragma. */
9447
9448bool
9449aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9450{
9451 if (TREE_CODE (args) == TREE_LIST)
9452 {
9453 do
9454 {
9455 tree head = TREE_VALUE (args);
9456 if (head)
9457 {
9458 if (!aarch64_process_target_attr (head, pragma_or_attr))
9459 return false;
9460 }
9461 args = TREE_CHAIN (args);
9462 } while (args);
9463
9464 return true;
9465 }
9466 /* We expect to find a string to parse. */
9467 gcc_assert (TREE_CODE (args) == STRING_CST);
9468
9469 size_t len = strlen (TREE_STRING_POINTER (args));
9470 char *str_to_check = (char *) alloca (len + 1);
9471 strcpy (str_to_check, TREE_STRING_POINTER (args));
9472
9473 if (len == 0)
9474 {
9475 error ("malformed target %s value", pragma_or_attr);
9476 return false;
9477 }
9478
9479 /* Used to catch empty spaces between commas i.e.
9480 attribute ((target ("attr1,,attr2"))). */
9481 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9482
9483 /* Handle multiple target attributes separated by ','. */
9484 char *token = strtok (str_to_check, ",");
9485
9486 unsigned int num_attrs = 0;
9487 while (token)
9488 {
9489 num_attrs++;
9490 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9491 {
9492 error ("target %s %qs is invalid", pragma_or_attr, token);
9493 return false;
9494 }
9495
9496 token = strtok (NULL, ",");
9497 }
9498
9499 if (num_attrs != num_commas + 1)
9500 {
9501 error ("malformed target %s list %qs",
9502 pragma_or_attr, TREE_STRING_POINTER (args));
9503 return false;
9504 }
9505
9506 return true;
9507}
9508
9509/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9510 process attribute ((target ("..."))). */
9511
9512static bool
9513aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9514{
9515 struct cl_target_option cur_target;
9516 bool ret;
9517 tree old_optimize;
9518 tree new_target, new_optimize;
9519 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9520
9521 /* If what we're processing is the current pragma string then the
9522 target option node is already stored in target_option_current_node
9523 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9524 having to re-parse the string. This is especially useful to keep
9525 arm_neon.h compile times down since that header contains a lot
9526 of intrinsics enclosed in pragmas. */
9527 if (!existing_target && args == current_target_pragma)
9528 {
9529 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9530 return true;
9531 }
5a2c8331
KT
9532 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9533
9534 old_optimize = build_optimization_node (&global_options);
9535 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9536
9537 /* If the function changed the optimization levels as well as setting
9538 target options, start with the optimizations specified. */
9539 if (func_optimize && func_optimize != old_optimize)
9540 cl_optimization_restore (&global_options,
9541 TREE_OPTIMIZATION (func_optimize));
9542
9543 /* Save the current target options to restore at the end. */
9544 cl_target_option_save (&cur_target, &global_options);
9545
9546 /* If fndecl already has some target attributes applied to it, unpack
9547 them so that we add this attribute on top of them, rather than
9548 overwriting them. */
9549 if (existing_target)
9550 {
9551 struct cl_target_option *existing_options
9552 = TREE_TARGET_OPTION (existing_target);
9553
9554 if (existing_options)
9555 cl_target_option_restore (&global_options, existing_options);
9556 }
9557 else
9558 cl_target_option_restore (&global_options,
9559 TREE_TARGET_OPTION (target_option_current_node));
9560
9561
9562 ret = aarch64_process_target_attr (args, "attribute");
9563
9564 /* Set up any additional state. */
9565 if (ret)
9566 {
9567 aarch64_override_options_internal (&global_options);
e95a988a
KT
9568 /* Initialize SIMD builtins if we haven't already.
9569 Set current_target_pragma to NULL for the duration so that
9570 the builtin initialization code doesn't try to tag the functions
9571 being built with the attributes specified by any current pragma, thus
9572 going into an infinite recursion. */
9573 if (TARGET_SIMD)
9574 {
9575 tree saved_current_target_pragma = current_target_pragma;
9576 current_target_pragma = NULL;
9577 aarch64_init_simd_builtins ();
9578 current_target_pragma = saved_current_target_pragma;
9579 }
5a2c8331
KT
9580 new_target = build_target_option_node (&global_options);
9581 }
9582 else
9583 new_target = NULL;
9584
9585 new_optimize = build_optimization_node (&global_options);
9586
9587 if (fndecl && ret)
9588 {
9589 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9590
9591 if (old_optimize != new_optimize)
9592 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9593 }
9594
9595 cl_target_option_restore (&global_options, &cur_target);
9596
9597 if (old_optimize != new_optimize)
9598 cl_optimization_restore (&global_options,
9599 TREE_OPTIMIZATION (old_optimize));
9600 return ret;
9601}
9602
1fd8d40c
KT
9603/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9604 tri-bool options (yes, no, don't care) and the default value is
9605 DEF, determine whether to reject inlining. */
9606
9607static bool
9608aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9609 int dont_care, int def)
9610{
9611 /* If the callee doesn't care, always allow inlining. */
9612 if (callee == dont_care)
9613 return true;
9614
9615 /* If the caller doesn't care, always allow inlining. */
9616 if (caller == dont_care)
9617 return true;
9618
9619 /* Otherwise, allow inlining if either the callee and caller values
9620 agree, or if the callee is using the default value. */
9621 return (callee == caller || callee == def);
9622}
9623
9624/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9625 to inline CALLEE into CALLER based on target-specific info.
9626 Make sure that the caller and callee have compatible architectural
9627 features. Then go through the other possible target attributes
9628 and see if they can block inlining. Try not to reject always_inline
9629 callees unless they are incompatible architecturally. */
9630
9631static bool
9632aarch64_can_inline_p (tree caller, tree callee)
9633{
9634 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9635 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9636
9637 /* If callee has no option attributes, then it is ok to inline. */
9638 if (!callee_tree)
9639 return true;
9640
9641 struct cl_target_option *caller_opts
9642 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9643 : target_option_default_node);
9644
9645 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9646
9647
9648 /* Callee's ISA flags should be a subset of the caller's. */
9649 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9650 != callee_opts->x_aarch64_isa_flags)
9651 return false;
9652
9653 /* Allow non-strict aligned functions inlining into strict
9654 aligned ones. */
9655 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9656 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9657 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9658 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9659 return false;
9660
9661 bool always_inline = lookup_attribute ("always_inline",
9662 DECL_ATTRIBUTES (callee));
9663
9664 /* If the architectural features match up and the callee is always_inline
9665 then the other attributes don't matter. */
9666 if (always_inline)
9667 return true;
9668
9669 if (caller_opts->x_aarch64_cmodel_var
9670 != callee_opts->x_aarch64_cmodel_var)
9671 return false;
9672
9673 if (caller_opts->x_aarch64_tls_dialect
9674 != callee_opts->x_aarch64_tls_dialect)
9675 return false;
9676
9677 /* Honour explicit requests to workaround errata. */
9678 if (!aarch64_tribools_ok_for_inlining_p (
9679 caller_opts->x_aarch64_fix_a53_err835769,
9680 callee_opts->x_aarch64_fix_a53_err835769,
9681 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
9682 return false;
9683
48bb1a55
CL
9684 if (!aarch64_tribools_ok_for_inlining_p (
9685 caller_opts->x_aarch64_fix_a53_err843419,
9686 callee_opts->x_aarch64_fix_a53_err843419,
9687 2, TARGET_FIX_ERR_A53_843419))
9688 return false;
9689
1fd8d40c
KT
9690 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9691 caller and calle and they don't match up, reject inlining. */
9692 if (!aarch64_tribools_ok_for_inlining_p (
9693 caller_opts->x_flag_omit_leaf_frame_pointer,
9694 callee_opts->x_flag_omit_leaf_frame_pointer,
9695 2, 1))
9696 return false;
9697
9698 /* If the callee has specific tuning overrides, respect them. */
9699 if (callee_opts->x_aarch64_override_tune_string != NULL
9700 && caller_opts->x_aarch64_override_tune_string == NULL)
9701 return false;
9702
9703 /* If the user specified tuning override strings for the
9704 caller and callee and they don't match up, reject inlining.
9705 We just do a string compare here, we don't analyze the meaning
9706 of the string, as it would be too costly for little gain. */
9707 if (callee_opts->x_aarch64_override_tune_string
9708 && caller_opts->x_aarch64_override_tune_string
9709 && (strcmp (callee_opts->x_aarch64_override_tune_string,
9710 caller_opts->x_aarch64_override_tune_string) != 0))
9711 return false;
9712
9713 return true;
9714}
9715
43e9d192
IB
9716/* Return true if SYMBOL_REF X binds locally. */
9717
9718static bool
9719aarch64_symbol_binds_local_p (const_rtx x)
9720{
9721 return (SYMBOL_REF_DECL (x)
9722 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
9723 : SYMBOL_REF_LOCAL_P (x));
9724}
9725
9726/* Return true if SYMBOL_REF X is thread local */
9727static bool
9728aarch64_tls_symbol_p (rtx x)
9729{
9730 if (! TARGET_HAVE_TLS)
9731 return false;
9732
9733 if (GET_CODE (x) != SYMBOL_REF)
9734 return false;
9735
9736 return SYMBOL_REF_TLS_MODEL (x) != 0;
9737}
9738
9739/* Classify a TLS symbol into one of the TLS kinds. */
9740enum aarch64_symbol_type
9741aarch64_classify_tls_symbol (rtx x)
9742{
9743 enum tls_model tls_kind = tls_symbolic_operand_type (x);
9744
9745 switch (tls_kind)
9746 {
9747 case TLS_MODEL_GLOBAL_DYNAMIC:
9748 case TLS_MODEL_LOCAL_DYNAMIC:
9749 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
9750
9751 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
9752 switch (aarch64_cmodel)
9753 {
9754 case AARCH64_CMODEL_TINY:
9755 case AARCH64_CMODEL_TINY_PIC:
9756 return SYMBOL_TINY_TLSIE;
9757 default:
79496620 9758 return SYMBOL_SMALL_TLSIE;
5ae7caad 9759 }
43e9d192
IB
9760
9761 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
9762 if (aarch64_tls_size == 12)
9763 return SYMBOL_TLSLE12;
9764 else if (aarch64_tls_size == 24)
9765 return SYMBOL_TLSLE24;
9766 else if (aarch64_tls_size == 32)
9767 return SYMBOL_TLSLE32;
9768 else if (aarch64_tls_size == 48)
9769 return SYMBOL_TLSLE48;
9770 else
9771 gcc_unreachable ();
43e9d192
IB
9772
9773 case TLS_MODEL_EMULATED:
9774 case TLS_MODEL_NONE:
9775 return SYMBOL_FORCE_TO_MEM;
9776
9777 default:
9778 gcc_unreachable ();
9779 }
9780}
9781
9782/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 9783 LABEL_REF X. */
17f4d4bf 9784
43e9d192 9785enum aarch64_symbol_type
a6e0bfa7 9786aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
9787{
9788 if (GET_CODE (x) == LABEL_REF)
9789 {
9790 switch (aarch64_cmodel)
9791 {
9792 case AARCH64_CMODEL_LARGE:
9793 return SYMBOL_FORCE_TO_MEM;
9794
9795 case AARCH64_CMODEL_TINY_PIC:
9796 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
9797 return SYMBOL_TINY_ABSOLUTE;
9798
1b1e81f8 9799 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9800 case AARCH64_CMODEL_SMALL_PIC:
9801 case AARCH64_CMODEL_SMALL:
9802 return SYMBOL_SMALL_ABSOLUTE;
9803
9804 default:
9805 gcc_unreachable ();
9806 }
9807 }
9808
17f4d4bf 9809 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 9810 {
43e9d192
IB
9811 if (aarch64_tls_symbol_p (x))
9812 return aarch64_classify_tls_symbol (x);
9813
17f4d4bf
CSS
9814 switch (aarch64_cmodel)
9815 {
9816 case AARCH64_CMODEL_TINY:
15f6e0da 9817 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
9818 the offset does not cause overflow of the final address. But
9819 we have no way of knowing the address of symbol at compile time
9820 so we can't accurately say if the distance between the PC and
9821 symbol + offset is outside the addressible range of +/-1M in the
9822 TINY code model. So we rely on images not being greater than
9823 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
9824 be loaded using an alternative mechanism. Furthermore if the
9825 symbol is a weak reference to something that isn't known to
9826 resolve to a symbol in this module, then force to memory. */
9827 if ((SYMBOL_REF_WEAK (x)
9828 && !aarch64_symbol_binds_local_p (x))
f8b756b7 9829 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
9830 return SYMBOL_FORCE_TO_MEM;
9831 return SYMBOL_TINY_ABSOLUTE;
9832
17f4d4bf 9833 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
9834 /* Same reasoning as the tiny code model, but the offset cap here is
9835 4G. */
15f6e0da
RR
9836 if ((SYMBOL_REF_WEAK (x)
9837 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
9838 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9839 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
9840 return SYMBOL_FORCE_TO_MEM;
9841 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9842
17f4d4bf 9843 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 9844 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 9845 return SYMBOL_TINY_GOT;
38e6c9a6
MS
9846 return SYMBOL_TINY_ABSOLUTE;
9847
1b1e81f8 9848 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
9849 case AARCH64_CMODEL_SMALL_PIC:
9850 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
9851 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9852 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 9853 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9854
9ee6540a
WD
9855 case AARCH64_CMODEL_LARGE:
9856 /* This is alright even in PIC code as the constant
9857 pool reference is always PC relative and within
9858 the same translation unit. */
9859 if (CONSTANT_POOL_ADDRESS_P (x))
9860 return SYMBOL_SMALL_ABSOLUTE;
9861 else
9862 return SYMBOL_FORCE_TO_MEM;
9863
17f4d4bf
CSS
9864 default:
9865 gcc_unreachable ();
9866 }
43e9d192 9867 }
17f4d4bf 9868
43e9d192
IB
9869 /* By default push everything into the constant pool. */
9870 return SYMBOL_FORCE_TO_MEM;
9871}
9872
43e9d192
IB
9873bool
9874aarch64_constant_address_p (rtx x)
9875{
9876 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9877}
9878
9879bool
9880aarch64_legitimate_pic_operand_p (rtx x)
9881{
9882 if (GET_CODE (x) == SYMBOL_REF
9883 || (GET_CODE (x) == CONST
9884 && GET_CODE (XEXP (x, 0)) == PLUS
9885 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9886 return false;
9887
9888 return true;
9889}
9890
3520f7cc
JG
9891/* Return true if X holds either a quarter-precision or
9892 floating-point +0.0 constant. */
9893static bool
ef4bddc2 9894aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
9895{
9896 if (!CONST_DOUBLE_P (x))
9897 return false;
9898
6a0f8c01
JW
9899 if (aarch64_float_const_zero_rtx_p (x))
9900 return true;
9901
9902 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
9903 if (!(mode == SFmode || mode == DFmode))
9904 return false;
9905
3520f7cc
JG
9906 return aarch64_float_const_representable_p (x);
9907}
9908
43e9d192 9909static bool
ef4bddc2 9910aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
9911{
9912 /* Do not allow vector struct mode constants. We could support
9913 0 and -1 easily, but they need support in aarch64-simd.md. */
9914 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
9915 return false;
9916
9917 /* This could probably go away because
9918 we now decompose CONST_INTs according to expand_mov_immediate. */
9919 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 9920 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
9921 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
9922 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
9923
9924 if (GET_CODE (x) == HIGH
9925 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9926 return true;
9927
9928 return aarch64_constant_address_p (x);
9929}
9930
a5bc806c 9931rtx
43e9d192
IB
9932aarch64_load_tp (rtx target)
9933{
9934 if (!target
9935 || GET_MODE (target) != Pmode
9936 || !register_operand (target, Pmode))
9937 target = gen_reg_rtx (Pmode);
9938
9939 /* Can return in any reg. */
9940 emit_insn (gen_aarch64_load_tp_hard (target));
9941 return target;
9942}
9943
43e9d192
IB
9944/* On AAPCS systems, this is the "struct __va_list". */
9945static GTY(()) tree va_list_type;
9946
9947/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9948 Return the type to use as __builtin_va_list.
9949
9950 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9951
9952 struct __va_list
9953 {
9954 void *__stack;
9955 void *__gr_top;
9956 void *__vr_top;
9957 int __gr_offs;
9958 int __vr_offs;
9959 }; */
9960
9961static tree
9962aarch64_build_builtin_va_list (void)
9963{
9964 tree va_list_name;
9965 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9966
9967 /* Create the type. */
9968 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
9969 /* Give it the required name. */
9970 va_list_name = build_decl (BUILTINS_LOCATION,
9971 TYPE_DECL,
9972 get_identifier ("__va_list"),
9973 va_list_type);
9974 DECL_ARTIFICIAL (va_list_name) = 1;
9975 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 9976 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
9977
9978 /* Create the fields. */
9979 f_stack = build_decl (BUILTINS_LOCATION,
9980 FIELD_DECL, get_identifier ("__stack"),
9981 ptr_type_node);
9982 f_grtop = build_decl (BUILTINS_LOCATION,
9983 FIELD_DECL, get_identifier ("__gr_top"),
9984 ptr_type_node);
9985 f_vrtop = build_decl (BUILTINS_LOCATION,
9986 FIELD_DECL, get_identifier ("__vr_top"),
9987 ptr_type_node);
9988 f_groff = build_decl (BUILTINS_LOCATION,
9989 FIELD_DECL, get_identifier ("__gr_offs"),
9990 integer_type_node);
9991 f_vroff = build_decl (BUILTINS_LOCATION,
9992 FIELD_DECL, get_identifier ("__vr_offs"),
9993 integer_type_node);
9994
88e3bdd1 9995 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
9996 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
9997 purpose to identify whether the code is updating va_list internal
9998 offset fields through irregular way. */
9999 va_list_gpr_counter_field = f_groff;
10000 va_list_fpr_counter_field = f_vroff;
10001
43e9d192
IB
10002 DECL_ARTIFICIAL (f_stack) = 1;
10003 DECL_ARTIFICIAL (f_grtop) = 1;
10004 DECL_ARTIFICIAL (f_vrtop) = 1;
10005 DECL_ARTIFICIAL (f_groff) = 1;
10006 DECL_ARTIFICIAL (f_vroff) = 1;
10007
10008 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10009 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10010 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10011 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10012 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10013
10014 TYPE_FIELDS (va_list_type) = f_stack;
10015 DECL_CHAIN (f_stack) = f_grtop;
10016 DECL_CHAIN (f_grtop) = f_vrtop;
10017 DECL_CHAIN (f_vrtop) = f_groff;
10018 DECL_CHAIN (f_groff) = f_vroff;
10019
10020 /* Compute its layout. */
10021 layout_type (va_list_type);
10022
10023 return va_list_type;
10024}
10025
10026/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10027static void
10028aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10029{
10030 const CUMULATIVE_ARGS *cum;
10031 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10032 tree stack, grtop, vrtop, groff, vroff;
10033 tree t;
88e3bdd1
JW
10034 int gr_save_area_size = cfun->va_list_gpr_size;
10035 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
10036 int vr_offset;
10037
10038 cum = &crtl->args.info;
88e3bdd1
JW
10039 if (cfun->va_list_gpr_size)
10040 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10041 cfun->va_list_gpr_size);
10042 if (cfun->va_list_fpr_size)
10043 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10044 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 10045
d5726973 10046 if (!TARGET_FLOAT)
43e9d192 10047 {
261fb553 10048 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
10049 vr_save_area_size = 0;
10050 }
10051
10052 f_stack = TYPE_FIELDS (va_list_type_node);
10053 f_grtop = DECL_CHAIN (f_stack);
10054 f_vrtop = DECL_CHAIN (f_grtop);
10055 f_groff = DECL_CHAIN (f_vrtop);
10056 f_vroff = DECL_CHAIN (f_groff);
10057
10058 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10059 NULL_TREE);
10060 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10061 NULL_TREE);
10062 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10063 NULL_TREE);
10064 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10065 NULL_TREE);
10066 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10067 NULL_TREE);
10068
10069 /* Emit code to initialize STACK, which points to the next varargs stack
10070 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10071 by named arguments. STACK is 8-byte aligned. */
10072 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10073 if (cum->aapcs_stack_size > 0)
10074 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10075 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10076 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10077
10078 /* Emit code to initialize GRTOP, the top of the GR save area.
10079 virtual_incoming_args_rtx should have been 16 byte aligned. */
10080 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10081 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10082 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10083
10084 /* Emit code to initialize VRTOP, the top of the VR save area.
10085 This address is gr_save_area_bytes below GRTOP, rounded
10086 down to the next 16-byte boundary. */
10087 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
10088 vr_offset = ROUND_UP (gr_save_area_size,
10089 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10090
10091 if (vr_offset)
10092 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10093 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10094 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10095
10096 /* Emit code to initialize GROFF, the offset from GRTOP of the
10097 next GPR argument. */
10098 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10099 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10101
10102 /* Likewise emit code to initialize VROFF, the offset from FTOP
10103 of the next VR argument. */
10104 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10105 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10107}
10108
10109/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10110
10111static tree
10112aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10113 gimple_seq *post_p ATTRIBUTE_UNUSED)
10114{
10115 tree addr;
10116 bool indirect_p;
10117 bool is_ha; /* is HFA or HVA. */
10118 bool dw_align; /* double-word align. */
ef4bddc2 10119 machine_mode ag_mode = VOIDmode;
43e9d192 10120 int nregs;
ef4bddc2 10121 machine_mode mode;
43e9d192
IB
10122
10123 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10124 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10125 HOST_WIDE_INT size, rsize, adjust, align;
10126 tree t, u, cond1, cond2;
10127
10128 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10129 if (indirect_p)
10130 type = build_pointer_type (type);
10131
10132 mode = TYPE_MODE (type);
10133
10134 f_stack = TYPE_FIELDS (va_list_type_node);
10135 f_grtop = DECL_CHAIN (f_stack);
10136 f_vrtop = DECL_CHAIN (f_grtop);
10137 f_groff = DECL_CHAIN (f_vrtop);
10138 f_vroff = DECL_CHAIN (f_groff);
10139
10140 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10141 f_stack, NULL_TREE);
10142 size = int_size_in_bytes (type);
10143 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
10144
10145 dw_align = false;
10146 adjust = 0;
10147 if (aarch64_vfp_is_call_or_return_candidate (mode,
10148 type,
10149 &ag_mode,
10150 &nregs,
10151 &is_ha))
10152 {
10153 /* TYPE passed in fp/simd registers. */
d5726973 10154 if (!TARGET_FLOAT)
261fb553 10155 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
10156
10157 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10158 unshare_expr (valist), f_vrtop, NULL_TREE);
10159 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10160 unshare_expr (valist), f_vroff, NULL_TREE);
10161
10162 rsize = nregs * UNITS_PER_VREG;
10163
10164 if (is_ha)
10165 {
10166 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10167 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10168 }
10169 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
10170 && size < UNITS_PER_VREG)
10171 {
10172 adjust = UNITS_PER_VREG - size;
10173 }
10174 }
10175 else
10176 {
10177 /* TYPE passed in general registers. */
10178 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10179 unshare_expr (valist), f_grtop, NULL_TREE);
10180 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10181 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 10182 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
10183 nregs = rsize / UNITS_PER_WORD;
10184
10185 if (align > 8)
10186 dw_align = true;
10187
10188 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10189 && size < UNITS_PER_WORD)
10190 {
10191 adjust = UNITS_PER_WORD - size;
10192 }
10193 }
10194
10195 /* Get a local temporary for the field value. */
10196 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10197
10198 /* Emit code to branch if off >= 0. */
10199 t = build2 (GE_EXPR, boolean_type_node, off,
10200 build_int_cst (TREE_TYPE (off), 0));
10201 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10202
10203 if (dw_align)
10204 {
10205 /* Emit: offs = (offs + 15) & -16. */
10206 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10207 build_int_cst (TREE_TYPE (off), 15));
10208 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10209 build_int_cst (TREE_TYPE (off), -16));
10210 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10211 }
10212 else
10213 roundup = NULL;
10214
10215 /* Update ap.__[g|v]r_offs */
10216 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10217 build_int_cst (TREE_TYPE (off), rsize));
10218 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10219
10220 /* String up. */
10221 if (roundup)
10222 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10223
10224 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10225 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10226 build_int_cst (TREE_TYPE (f_off), 0));
10227 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10228
10229 /* String up: make sure the assignment happens before the use. */
10230 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10231 COND_EXPR_ELSE (cond1) = t;
10232
10233 /* Prepare the trees handling the argument that is passed on the stack;
10234 the top level node will store in ON_STACK. */
10235 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10236 if (align > 8)
10237 {
10238 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10239 t = fold_convert (intDI_type_node, arg);
10240 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10241 build_int_cst (TREE_TYPE (t), 15));
10242 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10243 build_int_cst (TREE_TYPE (t), -16));
10244 t = fold_convert (TREE_TYPE (arg), t);
10245 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10246 }
10247 else
10248 roundup = NULL;
10249 /* Advance ap.__stack */
10250 t = fold_convert (intDI_type_node, arg);
10251 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10252 build_int_cst (TREE_TYPE (t), size + 7));
10253 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10254 build_int_cst (TREE_TYPE (t), -8));
10255 t = fold_convert (TREE_TYPE (arg), t);
10256 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10257 /* String up roundup and advance. */
10258 if (roundup)
10259 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10260 /* String up with arg */
10261 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10262 /* Big-endianness related address adjustment. */
10263 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10264 && size < UNITS_PER_WORD)
10265 {
10266 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10267 size_int (UNITS_PER_WORD - size));
10268 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10269 }
10270
10271 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10272 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10273
10274 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10275 t = off;
10276 if (adjust)
10277 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10278 build_int_cst (TREE_TYPE (off), adjust));
10279
10280 t = fold_convert (sizetype, t);
10281 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10282
10283 if (is_ha)
10284 {
10285 /* type ha; // treat as "struct {ftype field[n];}"
10286 ... [computing offs]
10287 for (i = 0; i <nregs; ++i, offs += 16)
10288 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10289 return ha; */
10290 int i;
10291 tree tmp_ha, field_t, field_ptr_t;
10292
10293 /* Declare a local variable. */
10294 tmp_ha = create_tmp_var_raw (type, "ha");
10295 gimple_add_tmp_var (tmp_ha);
10296
10297 /* Establish the base type. */
10298 switch (ag_mode)
10299 {
10300 case SFmode:
10301 field_t = float_type_node;
10302 field_ptr_t = float_ptr_type_node;
10303 break;
10304 case DFmode:
10305 field_t = double_type_node;
10306 field_ptr_t = double_ptr_type_node;
10307 break;
10308 case TFmode:
10309 field_t = long_double_type_node;
10310 field_ptr_t = long_double_ptr_type_node;
10311 break;
43e9d192 10312 case HFmode:
1b62ed4f
JG
10313 field_t = aarch64_fp16_type_node;
10314 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10315 break;
43e9d192
IB
10316 case V2SImode:
10317 case V4SImode:
10318 {
10319 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10320 field_t = build_vector_type_for_mode (innertype, ag_mode);
10321 field_ptr_t = build_pointer_type (field_t);
10322 }
10323 break;
10324 default:
10325 gcc_assert (0);
10326 }
10327
10328 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10329 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10330 addr = t;
10331 t = fold_convert (field_ptr_t, addr);
10332 t = build2 (MODIFY_EXPR, field_t,
10333 build1 (INDIRECT_REF, field_t, tmp_ha),
10334 build1 (INDIRECT_REF, field_t, t));
10335
10336 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10337 for (i = 1; i < nregs; ++i)
10338 {
10339 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10340 u = fold_convert (field_ptr_t, addr);
10341 u = build2 (MODIFY_EXPR, field_t,
10342 build2 (MEM_REF, field_t, tmp_ha,
10343 build_int_cst (field_ptr_t,
10344 (i *
10345 int_size_in_bytes (field_t)))),
10346 build1 (INDIRECT_REF, field_t, u));
10347 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10348 }
10349
10350 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10351 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10352 }
10353
10354 COND_EXPR_ELSE (cond2) = t;
10355 addr = fold_convert (build_pointer_type (type), cond1);
10356 addr = build_va_arg_indirect_ref (addr);
10357
10358 if (indirect_p)
10359 addr = build_va_arg_indirect_ref (addr);
10360
10361 return addr;
10362}
10363
10364/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10365
10366static void
ef4bddc2 10367aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10368 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10369 int no_rtl)
10370{
10371 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10372 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10373 int gr_saved = cfun->va_list_gpr_size;
10374 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10375
10376 /* The caller has advanced CUM up to, but not beyond, the last named
10377 argument. Advance a local copy of CUM past the last "real" named
10378 argument, to find out how many registers are left over. */
10379 local_cum = *cum;
10380 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10381
88e3bdd1
JW
10382 /* Found out how many registers we need to save.
10383 Honor tree-stdvar analysis results. */
10384 if (cfun->va_list_gpr_size)
10385 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10386 cfun->va_list_gpr_size / UNITS_PER_WORD);
10387 if (cfun->va_list_fpr_size)
10388 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10389 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10390
d5726973 10391 if (!TARGET_FLOAT)
43e9d192 10392 {
261fb553 10393 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10394 vr_saved = 0;
10395 }
10396
10397 if (!no_rtl)
10398 {
10399 if (gr_saved > 0)
10400 {
10401 rtx ptr, mem;
10402
10403 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10404 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10405 - gr_saved * UNITS_PER_WORD);
10406 mem = gen_frame_mem (BLKmode, ptr);
10407 set_mem_alias_set (mem, get_varargs_alias_set ());
10408
10409 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10410 mem, gr_saved);
10411 }
10412 if (vr_saved > 0)
10413 {
10414 /* We can't use move_block_from_reg, because it will use
10415 the wrong mode, storing D regs only. */
ef4bddc2 10416 machine_mode mode = TImode;
88e3bdd1 10417 int off, i, vr_start;
43e9d192
IB
10418
10419 /* Set OFF to the offset from virtual_incoming_args_rtx of
10420 the first vector register. The VR save area lies below
10421 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10422 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10423 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10424 off -= vr_saved * UNITS_PER_VREG;
10425
88e3bdd1
JW
10426 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10427 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10428 {
10429 rtx ptr, mem;
10430
10431 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10432 mem = gen_frame_mem (mode, ptr);
10433 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10434 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10435 off += UNITS_PER_VREG;
10436 }
10437 }
10438 }
10439
10440 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10441 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10442 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10443 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10444 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10445 + vr_saved * UNITS_PER_VREG);
10446}
10447
10448static void
10449aarch64_conditional_register_usage (void)
10450{
10451 int i;
10452 if (!TARGET_FLOAT)
10453 {
10454 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10455 {
10456 fixed_regs[i] = 1;
10457 call_used_regs[i] = 1;
10458 }
10459 }
10460}
10461
10462/* Walk down the type tree of TYPE counting consecutive base elements.
10463 If *MODEP is VOIDmode, then set it to the first valid floating point
10464 type. If a non-floating point type is found, or if a floating point
10465 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10466 otherwise return the count in the sub-tree. */
10467static int
ef4bddc2 10468aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10469{
ef4bddc2 10470 machine_mode mode;
43e9d192
IB
10471 HOST_WIDE_INT size;
10472
10473 switch (TREE_CODE (type))
10474 {
10475 case REAL_TYPE:
10476 mode = TYPE_MODE (type);
1b62ed4f
JG
10477 if (mode != DFmode && mode != SFmode
10478 && mode != TFmode && mode != HFmode)
43e9d192
IB
10479 return -1;
10480
10481 if (*modep == VOIDmode)
10482 *modep = mode;
10483
10484 if (*modep == mode)
10485 return 1;
10486
10487 break;
10488
10489 case COMPLEX_TYPE:
10490 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10491 if (mode != DFmode && mode != SFmode
10492 && mode != TFmode && mode != HFmode)
43e9d192
IB
10493 return -1;
10494
10495 if (*modep == VOIDmode)
10496 *modep = mode;
10497
10498 if (*modep == mode)
10499 return 2;
10500
10501 break;
10502
10503 case VECTOR_TYPE:
10504 /* Use V2SImode and V4SImode as representatives of all 64-bit
10505 and 128-bit vector types. */
10506 size = int_size_in_bytes (type);
10507 switch (size)
10508 {
10509 case 8:
10510 mode = V2SImode;
10511 break;
10512 case 16:
10513 mode = V4SImode;
10514 break;
10515 default:
10516 return -1;
10517 }
10518
10519 if (*modep == VOIDmode)
10520 *modep = mode;
10521
10522 /* Vector modes are considered to be opaque: two vectors are
10523 equivalent for the purposes of being homogeneous aggregates
10524 if they are the same size. */
10525 if (*modep == mode)
10526 return 1;
10527
10528 break;
10529
10530 case ARRAY_TYPE:
10531 {
10532 int count;
10533 tree index = TYPE_DOMAIN (type);
10534
807e902e
KZ
10535 /* Can't handle incomplete types nor sizes that are not
10536 fixed. */
10537 if (!COMPLETE_TYPE_P (type)
10538 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10539 return -1;
10540
10541 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10542 if (count == -1
10543 || !index
10544 || !TYPE_MAX_VALUE (index)
cc269bb6 10545 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10546 || !TYPE_MIN_VALUE (index)
cc269bb6 10547 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
10548 || count < 0)
10549 return -1;
10550
ae7e9ddd
RS
10551 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10552 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
10553
10554 /* There must be no padding. */
807e902e 10555 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10556 return -1;
10557
10558 return count;
10559 }
10560
10561 case RECORD_TYPE:
10562 {
10563 int count = 0;
10564 int sub_count;
10565 tree field;
10566
807e902e
KZ
10567 /* Can't handle incomplete types nor sizes that are not
10568 fixed. */
10569 if (!COMPLETE_TYPE_P (type)
10570 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10571 return -1;
10572
10573 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10574 {
10575 if (TREE_CODE (field) != FIELD_DECL)
10576 continue;
10577
10578 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10579 if (sub_count < 0)
10580 return -1;
10581 count += sub_count;
10582 }
10583
10584 /* There must be no padding. */
807e902e 10585 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10586 return -1;
10587
10588 return count;
10589 }
10590
10591 case UNION_TYPE:
10592 case QUAL_UNION_TYPE:
10593 {
10594 /* These aren't very interesting except in a degenerate case. */
10595 int count = 0;
10596 int sub_count;
10597 tree field;
10598
807e902e
KZ
10599 /* Can't handle incomplete types nor sizes that are not
10600 fixed. */
10601 if (!COMPLETE_TYPE_P (type)
10602 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10603 return -1;
10604
10605 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10606 {
10607 if (TREE_CODE (field) != FIELD_DECL)
10608 continue;
10609
10610 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10611 if (sub_count < 0)
10612 return -1;
10613 count = count > sub_count ? count : sub_count;
10614 }
10615
10616 /* There must be no padding. */
807e902e 10617 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10618 return -1;
10619
10620 return count;
10621 }
10622
10623 default:
10624 break;
10625 }
10626
10627 return -1;
10628}
10629
b6ec6215
KT
10630/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10631 type as described in AAPCS64 \S 4.1.2.
10632
10633 See the comment above aarch64_composite_type_p for the notes on MODE. */
10634
10635static bool
10636aarch64_short_vector_p (const_tree type,
10637 machine_mode mode)
10638{
10639 HOST_WIDE_INT size = -1;
10640
10641 if (type && TREE_CODE (type) == VECTOR_TYPE)
10642 size = int_size_in_bytes (type);
10643 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10644 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10645 size = GET_MODE_SIZE (mode);
10646
10647 return (size == 8 || size == 16);
10648}
10649
43e9d192
IB
10650/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10651 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10652 array types. The C99 floating-point complex types are also considered
10653 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10654 types, which are GCC extensions and out of the scope of AAPCS64, are
10655 treated as composite types here as well.
10656
10657 Note that MODE itself is not sufficient in determining whether a type
10658 is such a composite type or not. This is because
10659 stor-layout.c:compute_record_mode may have already changed the MODE
10660 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10661 structure with only one field may have its MODE set to the mode of the
10662 field. Also an integer mode whose size matches the size of the
10663 RECORD_TYPE type may be used to substitute the original mode
10664 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10665 solely relied on. */
10666
10667static bool
10668aarch64_composite_type_p (const_tree type,
ef4bddc2 10669 machine_mode mode)
43e9d192 10670{
b6ec6215
KT
10671 if (aarch64_short_vector_p (type, mode))
10672 return false;
10673
43e9d192
IB
10674 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
10675 return true;
10676
10677 if (mode == BLKmode
10678 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
10679 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
10680 return true;
10681
10682 return false;
10683}
10684
43e9d192
IB
10685/* Return TRUE if an argument, whose type is described by TYPE and MODE,
10686 shall be passed or returned in simd/fp register(s) (providing these
10687 parameter passing registers are available).
10688
10689 Upon successful return, *COUNT returns the number of needed registers,
10690 *BASE_MODE returns the mode of the individual register and when IS_HAF
10691 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10692 floating-point aggregate or a homogeneous short-vector aggregate. */
10693
10694static bool
ef4bddc2 10695aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 10696 const_tree type,
ef4bddc2 10697 machine_mode *base_mode,
43e9d192
IB
10698 int *count,
10699 bool *is_ha)
10700{
ef4bddc2 10701 machine_mode new_mode = VOIDmode;
43e9d192
IB
10702 bool composite_p = aarch64_composite_type_p (type, mode);
10703
10704 if (is_ha != NULL) *is_ha = false;
10705
10706 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
10707 || aarch64_short_vector_p (type, mode))
10708 {
10709 *count = 1;
10710 new_mode = mode;
10711 }
10712 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10713 {
10714 if (is_ha != NULL) *is_ha = true;
10715 *count = 2;
10716 new_mode = GET_MODE_INNER (mode);
10717 }
10718 else if (type && composite_p)
10719 {
10720 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
10721
10722 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
10723 {
10724 if (is_ha != NULL) *is_ha = true;
10725 *count = ag_count;
10726 }
10727 else
10728 return false;
10729 }
10730 else
10731 return false;
10732
10733 *base_mode = new_mode;
10734 return true;
10735}
10736
10737/* Implement TARGET_STRUCT_VALUE_RTX. */
10738
10739static rtx
10740aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
10741 int incoming ATTRIBUTE_UNUSED)
10742{
10743 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
10744}
10745
10746/* Implements target hook vector_mode_supported_p. */
10747static bool
ef4bddc2 10748aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
10749{
10750 if (TARGET_SIMD
10751 && (mode == V4SImode || mode == V8HImode
10752 || mode == V16QImode || mode == V2DImode
10753 || mode == V2SImode || mode == V4HImode
10754 || mode == V8QImode || mode == V2SFmode
ad7d90cc 10755 || mode == V4SFmode || mode == V2DFmode
71a11456 10756 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 10757 || mode == V1DFmode))
43e9d192
IB
10758 return true;
10759
10760 return false;
10761}
10762
b7342d25
IB
10763/* Return appropriate SIMD container
10764 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
10765static machine_mode
10766aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 10767{
b7342d25 10768 gcc_assert (width == 64 || width == 128);
43e9d192 10769 if (TARGET_SIMD)
b7342d25
IB
10770 {
10771 if (width == 128)
10772 switch (mode)
10773 {
10774 case DFmode:
10775 return V2DFmode;
10776 case SFmode:
10777 return V4SFmode;
10778 case SImode:
10779 return V4SImode;
10780 case HImode:
10781 return V8HImode;
10782 case QImode:
10783 return V16QImode;
10784 case DImode:
10785 return V2DImode;
10786 default:
10787 break;
10788 }
10789 else
10790 switch (mode)
10791 {
10792 case SFmode:
10793 return V2SFmode;
10794 case SImode:
10795 return V2SImode;
10796 case HImode:
10797 return V4HImode;
10798 case QImode:
10799 return V8QImode;
10800 default:
10801 break;
10802 }
10803 }
43e9d192
IB
10804 return word_mode;
10805}
10806
b7342d25 10807/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
10808static machine_mode
10809aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
10810{
10811 return aarch64_simd_container_mode (mode, 128);
10812}
10813
3b357264
JG
10814/* Return the bitmask of possible vector sizes for the vectorizer
10815 to iterate over. */
10816static unsigned int
10817aarch64_autovectorize_vector_sizes (void)
10818{
10819 return (16 | 8);
10820}
10821
ac2b960f
YZ
10822/* Implement TARGET_MANGLE_TYPE. */
10823
6f549691 10824static const char *
ac2b960f
YZ
10825aarch64_mangle_type (const_tree type)
10826{
10827 /* The AArch64 ABI documents say that "__va_list" has to be
10828 managled as if it is in the "std" namespace. */
10829 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
10830 return "St9__va_list";
10831
c2ec330c
AL
10832 /* Half-precision float. */
10833 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
10834 return "Dh";
10835
f9d53c27
TB
10836 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10837 builtin types. */
10838 if (TYPE_NAME (type) != NULL)
10839 return aarch64_mangle_builtin_type (type);
c6fc9e43 10840
ac2b960f
YZ
10841 /* Use the default mangling. */
10842 return NULL;
10843}
10844
8baff86e
KT
10845
10846/* Return true if the rtx_insn contains a MEM RTX somewhere
10847 in it. */
75cf1494
KT
10848
10849static bool
8baff86e 10850has_memory_op (rtx_insn *mem_insn)
75cf1494 10851{
8baff86e
KT
10852 subrtx_iterator::array_type array;
10853 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
10854 if (MEM_P (*iter))
10855 return true;
10856
10857 return false;
75cf1494
KT
10858}
10859
10860/* Find the first rtx_insn before insn that will generate an assembly
10861 instruction. */
10862
10863static rtx_insn *
10864aarch64_prev_real_insn (rtx_insn *insn)
10865{
10866 if (!insn)
10867 return NULL;
10868
10869 do
10870 {
10871 insn = prev_real_insn (insn);
10872 }
10873 while (insn && recog_memoized (insn) < 0);
10874
10875 return insn;
10876}
10877
10878static bool
10879is_madd_op (enum attr_type t1)
10880{
10881 unsigned int i;
10882 /* A number of these may be AArch32 only. */
10883 enum attr_type mlatypes[] = {
10884 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10885 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10886 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10887 };
10888
10889 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10890 {
10891 if (t1 == mlatypes[i])
10892 return true;
10893 }
10894
10895 return false;
10896}
10897
10898/* Check if there is a register dependency between a load and the insn
10899 for which we hold recog_data. */
10900
10901static bool
10902dep_between_memop_and_curr (rtx memop)
10903{
10904 rtx load_reg;
10905 int opno;
10906
8baff86e 10907 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
10908
10909 if (!REG_P (SET_DEST (memop)))
10910 return false;
10911
10912 load_reg = SET_DEST (memop);
8baff86e 10913 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
10914 {
10915 rtx operand = recog_data.operand[opno];
10916 if (REG_P (operand)
10917 && reg_overlap_mentioned_p (load_reg, operand))
10918 return true;
10919
10920 }
10921 return false;
10922}
10923
8baff86e
KT
10924
10925/* When working around the Cortex-A53 erratum 835769,
10926 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10927 instruction and has a preceding memory instruction such that a NOP
10928 should be inserted between them. */
10929
75cf1494
KT
10930bool
10931aarch64_madd_needs_nop (rtx_insn* insn)
10932{
10933 enum attr_type attr_type;
10934 rtx_insn *prev;
10935 rtx body;
10936
b32c1043 10937 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
10938 return false;
10939
e322d6e3 10940 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
10941 return false;
10942
10943 attr_type = get_attr_type (insn);
10944 if (!is_madd_op (attr_type))
10945 return false;
10946
10947 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
10948 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10949 Restore recog state to INSN to avoid state corruption. */
10950 extract_constrain_insn_cached (insn);
10951
8baff86e 10952 if (!prev || !has_memory_op (prev))
75cf1494
KT
10953 return false;
10954
10955 body = single_set (prev);
10956
10957 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
10958 it and the DImode madd, emit a NOP between them. If body is NULL then we
10959 have a complex memory operation, probably a load/store pair.
10960 Be conservative for now and emit a NOP. */
10961 if (GET_MODE (recog_data.operand[0]) == DImode
10962 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
10963 return true;
10964
10965 return false;
10966
10967}
10968
8baff86e
KT
10969
10970/* Implement FINAL_PRESCAN_INSN. */
10971
75cf1494
KT
10972void
10973aarch64_final_prescan_insn (rtx_insn *insn)
10974{
10975 if (aarch64_madd_needs_nop (insn))
10976 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
10977}
10978
10979
43e9d192 10980/* Return the equivalent letter for size. */
81c2dfb9 10981static char
43e9d192
IB
10982sizetochar (int size)
10983{
10984 switch (size)
10985 {
10986 case 64: return 'd';
10987 case 32: return 's';
10988 case 16: return 'h';
10989 case 8 : return 'b';
10990 default: gcc_unreachable ();
10991 }
10992}
10993
3520f7cc
JG
10994/* Return true iff x is a uniform vector of floating-point
10995 constants, and the constant can be represented in
10996 quarter-precision form. Note, as aarch64_float_const_representable
10997 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10998static bool
10999aarch64_vect_float_const_representable_p (rtx x)
11000{
92695fbb
RS
11001 rtx elt;
11002 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11003 && const_vec_duplicate_p (x, &elt)
11004 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
11005}
11006
d8edd899 11007/* Return true for valid and false for invalid. */
3ea63f60 11008bool
ef4bddc2 11009aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 11010 struct simd_immediate_info *info)
43e9d192
IB
11011{
11012#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11013 matches = 1; \
11014 for (i = 0; i < idx; i += (STRIDE)) \
11015 if (!(TEST)) \
11016 matches = 0; \
11017 if (matches) \
11018 { \
11019 immtype = (CLASS); \
11020 elsize = (ELSIZE); \
43e9d192
IB
11021 eshift = (SHIFT); \
11022 emvn = (NEG); \
11023 break; \
11024 }
11025
11026 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 11027 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 11028 unsigned char bytes[16];
43e9d192
IB
11029 int immtype = -1, matches;
11030 unsigned int invmask = inverse ? 0xff : 0;
11031 int eshift, emvn;
11032
43e9d192 11033 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 11034 {
81c2dfb9
IB
11035 if (! (aarch64_simd_imm_zero_p (op, mode)
11036 || aarch64_vect_float_const_representable_p (op)))
d8edd899 11037 return false;
3520f7cc 11038
48063b9d
IB
11039 if (info)
11040 {
11041 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 11042 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
11043 info->mvn = false;
11044 info->shift = 0;
11045 }
3520f7cc 11046
d8edd899 11047 return true;
3520f7cc 11048 }
43e9d192
IB
11049
11050 /* Splat vector constant out into a byte vector. */
11051 for (i = 0; i < n_elts; i++)
11052 {
4b1e108c
AL
11053 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11054 it must be laid out in the vector register in reverse order. */
11055 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 11056 unsigned HOST_WIDE_INT elpart;
43e9d192 11057
ee78df47
KT
11058 gcc_assert (CONST_INT_P (el));
11059 elpart = INTVAL (el);
11060
11061 for (unsigned int byte = 0; byte < innersize; byte++)
11062 {
11063 bytes[idx++] = (elpart & 0xff) ^ invmask;
11064 elpart >>= BITS_PER_UNIT;
11065 }
43e9d192 11066
43e9d192
IB
11067 }
11068
11069 /* Sanity check. */
11070 gcc_assert (idx == GET_MODE_SIZE (mode));
11071
11072 do
11073 {
11074 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11075 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11076
11077 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11078 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11079
11080 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11081 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11082
11083 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11084 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11085
11086 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11087
11088 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11089
11090 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11091 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11092
11093 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11094 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11095
11096 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11097 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11098
11099 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11100 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11101
11102 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11103
11104 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11105
11106 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 11107 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
11108
11109 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 11110 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
11111
11112 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 11113 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
11114
11115 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 11116 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
11117
11118 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11119
11120 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11121 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11122 }
11123 while (0);
11124
e4f0f84d 11125 if (immtype == -1)
d8edd899 11126 return false;
43e9d192 11127
48063b9d 11128 if (info)
43e9d192 11129 {
48063b9d 11130 info->element_width = elsize;
48063b9d
IB
11131 info->mvn = emvn != 0;
11132 info->shift = eshift;
11133
43e9d192
IB
11134 unsigned HOST_WIDE_INT imm = 0;
11135
e4f0f84d
TB
11136 if (immtype >= 12 && immtype <= 15)
11137 info->msl = true;
11138
43e9d192
IB
11139 /* Un-invert bytes of recognized vector, if necessary. */
11140 if (invmask != 0)
11141 for (i = 0; i < idx; i++)
11142 bytes[i] ^= invmask;
11143
11144 if (immtype == 17)
11145 {
11146 /* FIXME: Broken on 32-bit H_W_I hosts. */
11147 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11148
11149 for (i = 0; i < 8; i++)
11150 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11151 << (i * BITS_PER_UNIT);
11152
43e9d192 11153
48063b9d
IB
11154 info->value = GEN_INT (imm);
11155 }
11156 else
11157 {
11158 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11159 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
11160
11161 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
11162 generic constants. */
11163 if (info->mvn)
43e9d192 11164 imm = ~imm;
48063b9d
IB
11165 imm = (imm >> info->shift) & 0xff;
11166 info->value = GEN_INT (imm);
11167 }
43e9d192
IB
11168 }
11169
48063b9d 11170 return true;
43e9d192
IB
11171#undef CHECK
11172}
11173
43e9d192
IB
11174/* Check of immediate shift constants are within range. */
11175bool
ef4bddc2 11176aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
11177{
11178 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11179 if (left)
ddeabd3e 11180 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 11181 else
ddeabd3e 11182 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
11183}
11184
3520f7cc
JG
11185/* Return true if X is a uniform vector where all elements
11186 are either the floating-point constant 0.0 or the
11187 integer constant 0. */
43e9d192 11188bool
ef4bddc2 11189aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 11190{
3520f7cc 11191 return x == CONST0_RTX (mode);
43e9d192
IB
11192}
11193
7325d85a
KT
11194
11195/* Return the bitmask CONST_INT to select the bits required by a zero extract
11196 operation of width WIDTH at bit position POS. */
11197
11198rtx
11199aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11200{
11201 gcc_assert (CONST_INT_P (width));
11202 gcc_assert (CONST_INT_P (pos));
11203
11204 unsigned HOST_WIDE_INT mask
11205 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11206 return GEN_INT (mask << UINTVAL (pos));
11207}
11208
43e9d192 11209bool
ef4bddc2 11210aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
11211{
11212 HOST_WIDE_INT imm = INTVAL (x);
11213 int i;
11214
11215 for (i = 0; i < 8; i++)
11216 {
11217 unsigned int byte = imm & 0xff;
11218 if (byte != 0xff && byte != 0)
11219 return false;
11220 imm >>= 8;
11221 }
11222
11223 return true;
11224}
11225
83f8c414 11226bool
a6e0bfa7 11227aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 11228{
83f8c414
CSS
11229 if (GET_CODE (x) == HIGH
11230 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11231 return true;
11232
82614948 11233 if (CONST_INT_P (x))
83f8c414
CSS
11234 return true;
11235
11236 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11237 return true;
11238
a6e0bfa7 11239 return aarch64_classify_symbolic_expression (x)
a5350ddc 11240 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
11241}
11242
43e9d192
IB
11243/* Return a const_int vector of VAL. */
11244rtx
ef4bddc2 11245aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
11246{
11247 int nunits = GET_MODE_NUNITS (mode);
11248 rtvec v = rtvec_alloc (nunits);
11249 int i;
11250
11251 for (i=0; i < nunits; i++)
11252 RTVEC_ELT (v, i) = GEN_INT (val);
11253
11254 return gen_rtx_CONST_VECTOR (mode, v);
11255}
11256
051d0e2f
SN
11257/* Check OP is a legal scalar immediate for the MOVI instruction. */
11258
11259bool
ef4bddc2 11260aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 11261{
ef4bddc2 11262 machine_mode vmode;
051d0e2f
SN
11263
11264 gcc_assert (!VECTOR_MODE_P (mode));
11265 vmode = aarch64_preferred_simd_mode (mode);
11266 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 11267 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
11268}
11269
988fa693
JG
11270/* Construct and return a PARALLEL RTX vector with elements numbering the
11271 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11272 the vector - from the perspective of the architecture. This does not
11273 line up with GCC's perspective on lane numbers, so we end up with
11274 different masks depending on our target endian-ness. The diagram
11275 below may help. We must draw the distinction when building masks
11276 which select one half of the vector. An instruction selecting
11277 architectural low-lanes for a big-endian target, must be described using
11278 a mask selecting GCC high-lanes.
11279
11280 Big-Endian Little-Endian
11281
11282GCC 0 1 2 3 3 2 1 0
11283 | x | x | x | x | | x | x | x | x |
11284Architecture 3 2 1 0 3 2 1 0
11285
11286Low Mask: { 2, 3 } { 0, 1 }
11287High Mask: { 0, 1 } { 2, 3 }
11288*/
11289
43e9d192 11290rtx
ef4bddc2 11291aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11292{
11293 int nunits = GET_MODE_NUNITS (mode);
11294 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11295 int high_base = nunits / 2;
11296 int low_base = 0;
11297 int base;
43e9d192
IB
11298 rtx t1;
11299 int i;
11300
988fa693
JG
11301 if (BYTES_BIG_ENDIAN)
11302 base = high ? low_base : high_base;
11303 else
11304 base = high ? high_base : low_base;
11305
11306 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11307 RTVEC_ELT (v, i) = GEN_INT (base + i);
11308
11309 t1 = gen_rtx_PARALLEL (mode, v);
11310 return t1;
11311}
11312
988fa693
JG
11313/* Check OP for validity as a PARALLEL RTX vector with elements
11314 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11315 from the perspective of the architecture. See the diagram above
11316 aarch64_simd_vect_par_cnst_half for more details. */
11317
11318bool
ef4bddc2 11319aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11320 bool high)
11321{
11322 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11323 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11324 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11325 int i = 0;
11326
11327 if (!VECTOR_MODE_P (mode))
11328 return false;
11329
11330 if (count_op != count_ideal)
11331 return false;
11332
11333 for (i = 0; i < count_ideal; i++)
11334 {
11335 rtx elt_op = XVECEXP (op, 0, i);
11336 rtx elt_ideal = XVECEXP (ideal, 0, i);
11337
4aa81c2e 11338 if (!CONST_INT_P (elt_op)
988fa693
JG
11339 || INTVAL (elt_ideal) != INTVAL (elt_op))
11340 return false;
11341 }
11342 return true;
11343}
11344
43e9d192
IB
11345/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11346 HIGH (exclusive). */
11347void
46ed6024
CB
11348aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11349 const_tree exp)
43e9d192
IB
11350{
11351 HOST_WIDE_INT lane;
4aa81c2e 11352 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11353 lane = INTVAL (operand);
11354
11355 if (lane < low || lane >= high)
46ed6024
CB
11356 {
11357 if (exp)
cf0c27ef 11358 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11359 else
cf0c27ef 11360 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11361 }
43e9d192
IB
11362}
11363
43e9d192
IB
11364/* Return TRUE if OP is a valid vector addressing mode. */
11365bool
11366aarch64_simd_mem_operand_p (rtx op)
11367{
11368 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11369 || REG_P (XEXP (op, 0)));
43e9d192
IB
11370}
11371
2d8c6dc1
AH
11372/* Emit a register copy from operand to operand, taking care not to
11373 early-clobber source registers in the process.
43e9d192 11374
2d8c6dc1
AH
11375 COUNT is the number of components into which the copy needs to be
11376 decomposed. */
43e9d192 11377void
2d8c6dc1
AH
11378aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
11379 unsigned int count)
43e9d192
IB
11380{
11381 unsigned int i;
2d8c6dc1
AH
11382 int rdest = REGNO (operands[0]);
11383 int rsrc = REGNO (operands[1]);
43e9d192
IB
11384
11385 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11386 || rdest < rsrc)
11387 for (i = 0; i < count; i++)
11388 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11389 gen_rtx_REG (mode, rsrc + i));
43e9d192 11390 else
2d8c6dc1
AH
11391 for (i = 0; i < count; i++)
11392 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11393 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11394}
11395
668046d1 11396/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11397 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
11398int
11399aarch64_simd_attr_length_rglist (enum machine_mode mode)
11400{
11401 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11402}
11403
db0253a4
TB
11404/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11405 alignment of a vector to 128 bits. */
11406static HOST_WIDE_INT
11407aarch64_simd_vector_alignment (const_tree type)
11408{
9439e9a1 11409 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11410 return MIN (align, 128);
11411}
11412
11413/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11414static bool
11415aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11416{
11417 if (is_packed)
11418 return false;
11419
11420 /* We guarantee alignment for vectors up to 128-bits. */
11421 if (tree_int_cst_compare (TYPE_SIZE (type),
11422 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11423 return false;
11424
11425 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11426 return true;
11427}
11428
7df76747
N
11429/* Return true if the vector misalignment factor is supported by the
11430 target. */
11431static bool
11432aarch64_builtin_support_vector_misalignment (machine_mode mode,
11433 const_tree type, int misalignment,
11434 bool is_packed)
11435{
11436 if (TARGET_SIMD && STRICT_ALIGNMENT)
11437 {
11438 /* Return if movmisalign pattern is not supported for this mode. */
11439 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11440 return false;
11441
11442 if (misalignment == -1)
11443 {
11444 /* Misalignment factor is unknown at compile time but we know
11445 it's word aligned. */
11446 if (aarch64_simd_vector_alignment_reachable (type, is_packed))
11447 {
11448 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
11449
11450 if (element_size != 64)
11451 return true;
11452 }
11453 return false;
11454 }
11455 }
11456 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11457 is_packed);
11458}
11459
4369c11e
TB
11460/* If VALS is a vector constant that can be loaded into a register
11461 using DUP, generate instructions to do so and return an RTX to
11462 assign to the register. Otherwise return NULL_RTX. */
11463static rtx
11464aarch64_simd_dup_constant (rtx vals)
11465{
ef4bddc2
RS
11466 machine_mode mode = GET_MODE (vals);
11467 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11468 rtx x;
4369c11e 11469
92695fbb 11470 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11471 return NULL_RTX;
11472
11473 /* We can load this constant by using DUP and a constant in a
11474 single ARM register. This will be cheaper than a vector
11475 load. */
92695fbb 11476 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11477 return gen_rtx_VEC_DUPLICATE (mode, x);
11478}
11479
11480
11481/* Generate code to load VALS, which is a PARALLEL containing only
11482 constants (for vec_init) or CONST_VECTOR, efficiently into a
11483 register. Returns an RTX to copy into the register, or NULL_RTX
11484 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11485static rtx
4369c11e
TB
11486aarch64_simd_make_constant (rtx vals)
11487{
ef4bddc2 11488 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11489 rtx const_dup;
11490 rtx const_vec = NULL_RTX;
11491 int n_elts = GET_MODE_NUNITS (mode);
11492 int n_const = 0;
11493 int i;
11494
11495 if (GET_CODE (vals) == CONST_VECTOR)
11496 const_vec = vals;
11497 else if (GET_CODE (vals) == PARALLEL)
11498 {
11499 /* A CONST_VECTOR must contain only CONST_INTs and
11500 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11501 Only store valid constants in a CONST_VECTOR. */
11502 for (i = 0; i < n_elts; ++i)
11503 {
11504 rtx x = XVECEXP (vals, 0, i);
11505 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11506 n_const++;
11507 }
11508 if (n_const == n_elts)
11509 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11510 }
11511 else
11512 gcc_unreachable ();
11513
11514 if (const_vec != NULL_RTX
48063b9d 11515 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11516 /* Load using MOVI/MVNI. */
11517 return const_vec;
11518 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11519 /* Loaded using DUP. */
11520 return const_dup;
11521 else if (const_vec != NULL_RTX)
11522 /* Load from constant pool. We can not take advantage of single-cycle
11523 LD1 because we need a PC-relative addressing mode. */
11524 return const_vec;
11525 else
11526 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11527 We can not construct an initializer. */
11528 return NULL_RTX;
11529}
11530
35a093b6
JG
11531/* Expand a vector initialisation sequence, such that TARGET is
11532 initialised to contain VALS. */
11533
4369c11e
TB
11534void
11535aarch64_expand_vector_init (rtx target, rtx vals)
11536{
ef4bddc2
RS
11537 machine_mode mode = GET_MODE (target);
11538 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11539 /* The number of vector elements. */
4369c11e 11540 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11541 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11542 int n_var = 0;
11543 rtx any_const = NULL_RTX;
35a093b6
JG
11544 /* The first element of vals. */
11545 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11546 bool all_same = true;
4369c11e 11547
35a093b6 11548 /* Count the number of variable elements to initialise. */
8b66a2d4 11549 for (int i = 0; i < n_elts; ++i)
4369c11e 11550 {
8b66a2d4 11551 rtx x = XVECEXP (vals, 0, i);
35a093b6 11552 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11553 ++n_var;
11554 else
11555 any_const = x;
4369c11e 11556
35a093b6 11557 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11558 }
11559
35a093b6
JG
11560 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11561 how best to handle this. */
4369c11e
TB
11562 if (n_var == 0)
11563 {
11564 rtx constant = aarch64_simd_make_constant (vals);
11565 if (constant != NULL_RTX)
11566 {
11567 emit_move_insn (target, constant);
11568 return;
11569 }
11570 }
11571
11572 /* Splat a single non-constant element if we can. */
11573 if (all_same)
11574 {
35a093b6 11575 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
11576 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11577 return;
11578 }
11579
35a093b6
JG
11580 /* Initialise a vector which is part-variable. We want to first try
11581 to build those lanes which are constant in the most efficient way we
11582 can. */
11583 if (n_var != n_elts)
4369c11e
TB
11584 {
11585 rtx copy = copy_rtx (vals);
4369c11e 11586
8b66a2d4
AL
11587 /* Load constant part of vector. We really don't care what goes into the
11588 parts we will overwrite, but we're more likely to be able to load the
11589 constant efficiently if it has fewer, larger, repeating parts
11590 (see aarch64_simd_valid_immediate). */
11591 for (int i = 0; i < n_elts; i++)
11592 {
11593 rtx x = XVECEXP (vals, 0, i);
11594 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11595 continue;
11596 rtx subst = any_const;
11597 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11598 {
11599 /* Look in the copied vector, as more elements are const. */
11600 rtx test = XVECEXP (copy, 0, i ^ bit);
11601 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11602 {
11603 subst = test;
11604 break;
11605 }
11606 }
11607 XVECEXP (copy, 0, i) = subst;
11608 }
4369c11e 11609 aarch64_expand_vector_init (target, copy);
35a093b6 11610 }
4369c11e 11611
35a093b6 11612 /* Insert the variable lanes directly. */
8b66a2d4 11613
35a093b6
JG
11614 enum insn_code icode = optab_handler (vec_set_optab, mode);
11615 gcc_assert (icode != CODE_FOR_nothing);
4369c11e 11616
8b66a2d4 11617 for (int i = 0; i < n_elts; i++)
35a093b6
JG
11618 {
11619 rtx x = XVECEXP (vals, 0, i);
11620 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11621 continue;
11622 x = copy_to_mode_reg (inner_mode, x);
11623 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11624 }
4369c11e
TB
11625}
11626
43e9d192 11627static unsigned HOST_WIDE_INT
ef4bddc2 11628aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
11629{
11630 return
ac59ad4e
KT
11631 (!SHIFT_COUNT_TRUNCATED
11632 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
11633 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11634}
11635
43e9d192
IB
11636/* Select a format to encode pointers in exception handling data. */
11637int
11638aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
11639{
11640 int type;
11641 switch (aarch64_cmodel)
11642 {
11643 case AARCH64_CMODEL_TINY:
11644 case AARCH64_CMODEL_TINY_PIC:
11645 case AARCH64_CMODEL_SMALL:
11646 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 11647 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11648 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11649 for everything. */
11650 type = DW_EH_PE_sdata4;
11651 break;
11652 default:
11653 /* No assumptions here. 8-byte relocs required. */
11654 type = DW_EH_PE_sdata8;
11655 break;
11656 }
11657 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
11658}
11659
e1c1ecb0
KT
11660/* The last .arch and .tune assembly strings that we printed. */
11661static std::string aarch64_last_printed_arch_string;
11662static std::string aarch64_last_printed_tune_string;
11663
361fb3ee
KT
11664/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11665 by the function fndecl. */
11666
11667void
11668aarch64_declare_function_name (FILE *stream, const char* name,
11669 tree fndecl)
11670{
11671 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
11672
11673 struct cl_target_option *targ_options;
11674 if (target_parts)
11675 targ_options = TREE_TARGET_OPTION (target_parts);
11676 else
11677 targ_options = TREE_TARGET_OPTION (target_option_current_node);
11678 gcc_assert (targ_options);
11679
11680 const struct processor *this_arch
11681 = aarch64_get_arch (targ_options->x_explicit_arch);
11682
054b4005
JG
11683 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
11684 std::string extension
04a99ebe
JG
11685 = aarch64_get_extension_string_for_isa_flags (isa_flags,
11686 this_arch->flags);
e1c1ecb0
KT
11687 /* Only update the assembler .arch string if it is distinct from the last
11688 such string we printed. */
11689 std::string to_print = this_arch->name + extension;
11690 if (to_print != aarch64_last_printed_arch_string)
11691 {
11692 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
11693 aarch64_last_printed_arch_string = to_print;
11694 }
361fb3ee
KT
11695
11696 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
11697 useful to readers of the generated asm. Do it only when it changes
11698 from function to function and verbose assembly is requested. */
361fb3ee
KT
11699 const struct processor *this_tune
11700 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
11701
e1c1ecb0
KT
11702 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
11703 {
11704 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
11705 this_tune->name);
11706 aarch64_last_printed_tune_string = this_tune->name;
11707 }
361fb3ee
KT
11708
11709 /* Don't forget the type directive for ELF. */
11710 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
11711 ASM_OUTPUT_LABEL (stream, name);
11712}
11713
e1c1ecb0
KT
11714/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11715
11716static void
11717aarch64_start_file (void)
11718{
11719 struct cl_target_option *default_options
11720 = TREE_TARGET_OPTION (target_option_default_node);
11721
11722 const struct processor *default_arch
11723 = aarch64_get_arch (default_options->x_explicit_arch);
11724 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
11725 std::string extension
04a99ebe
JG
11726 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
11727 default_arch->flags);
e1c1ecb0
KT
11728
11729 aarch64_last_printed_arch_string = default_arch->name + extension;
11730 aarch64_last_printed_tune_string = "";
11731 asm_fprintf (asm_out_file, "\t.arch %s\n",
11732 aarch64_last_printed_arch_string.c_str ());
11733
11734 default_file_start ();
11735}
11736
0462169c
SN
11737/* Emit load exclusive. */
11738
11739static void
ef4bddc2 11740aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
11741 rtx mem, rtx model_rtx)
11742{
11743 rtx (*gen) (rtx, rtx, rtx);
11744
11745 switch (mode)
11746 {
11747 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
11748 case HImode: gen = gen_aarch64_load_exclusivehi; break;
11749 case SImode: gen = gen_aarch64_load_exclusivesi; break;
11750 case DImode: gen = gen_aarch64_load_exclusivedi; break;
11751 default:
11752 gcc_unreachable ();
11753 }
11754
11755 emit_insn (gen (rval, mem, model_rtx));
11756}
11757
11758/* Emit store exclusive. */
11759
11760static void
ef4bddc2 11761aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
11762 rtx rval, rtx mem, rtx model_rtx)
11763{
11764 rtx (*gen) (rtx, rtx, rtx, rtx);
11765
11766 switch (mode)
11767 {
11768 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
11769 case HImode: gen = gen_aarch64_store_exclusivehi; break;
11770 case SImode: gen = gen_aarch64_store_exclusivesi; break;
11771 case DImode: gen = gen_aarch64_store_exclusivedi; break;
11772 default:
11773 gcc_unreachable ();
11774 }
11775
11776 emit_insn (gen (bval, rval, mem, model_rtx));
11777}
11778
11779/* Mark the previous jump instruction as unlikely. */
11780
11781static void
11782aarch64_emit_unlikely_jump (rtx insn)
11783{
e5af9ddd 11784 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c 11785
f370536c
TS
11786 rtx_insn *jump = emit_jump_insn (insn);
11787 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
0462169c
SN
11788}
11789
11790/* Expand a compare and swap pattern. */
11791
11792void
11793aarch64_expand_compare_and_swap (rtx operands[])
11794{
11795 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 11796 machine_mode mode, cmp_mode;
b0770c0f
MW
11797 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
11798 int idx;
11799 gen_cas_fn gen;
11800 const gen_cas_fn split_cas[] =
11801 {
11802 gen_aarch64_compare_and_swapqi,
11803 gen_aarch64_compare_and_swaphi,
11804 gen_aarch64_compare_and_swapsi,
11805 gen_aarch64_compare_and_swapdi
11806 };
11807 const gen_cas_fn atomic_cas[] =
11808 {
11809 gen_aarch64_compare_and_swapqi_lse,
11810 gen_aarch64_compare_and_swaphi_lse,
11811 gen_aarch64_compare_and_swapsi_lse,
11812 gen_aarch64_compare_and_swapdi_lse
11813 };
0462169c
SN
11814
11815 bval = operands[0];
11816 rval = operands[1];
11817 mem = operands[2];
11818 oldval = operands[3];
11819 newval = operands[4];
11820 is_weak = operands[5];
11821 mod_s = operands[6];
11822 mod_f = operands[7];
11823 mode = GET_MODE (mem);
11824 cmp_mode = mode;
11825
11826 /* Normally the succ memory model must be stronger than fail, but in the
11827 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11828 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11829
46b35980
AM
11830 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11831 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
11832 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11833
11834 switch (mode)
11835 {
11836 case QImode:
11837 case HImode:
11838 /* For short modes, we're going to perform the comparison in SImode,
11839 so do the zero-extension now. */
11840 cmp_mode = SImode;
11841 rval = gen_reg_rtx (SImode);
11842 oldval = convert_modes (SImode, mode, oldval, true);
11843 /* Fall through. */
11844
11845 case SImode:
11846 case DImode:
11847 /* Force the value into a register if needed. */
11848 if (!aarch64_plus_operand (oldval, mode))
11849 oldval = force_reg (cmp_mode, oldval);
11850 break;
11851
11852 default:
11853 gcc_unreachable ();
11854 }
11855
11856 switch (mode)
11857 {
b0770c0f
MW
11858 case QImode: idx = 0; break;
11859 case HImode: idx = 1; break;
11860 case SImode: idx = 2; break;
11861 case DImode: idx = 3; break;
0462169c
SN
11862 default:
11863 gcc_unreachable ();
11864 }
b0770c0f
MW
11865 if (TARGET_LSE)
11866 gen = atomic_cas[idx];
11867 else
11868 gen = split_cas[idx];
0462169c
SN
11869
11870 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11871
11872 if (mode == QImode || mode == HImode)
11873 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11874
11875 x = gen_rtx_REG (CCmode, CC_REGNUM);
11876 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 11877 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
11878}
11879
641c2f8b
MW
11880/* Test whether the target supports using a atomic load-operate instruction.
11881 CODE is the operation and AFTER is TRUE if the data in memory after the
11882 operation should be returned and FALSE if the data before the operation
11883 should be returned. Returns FALSE if the operation isn't supported by the
11884 architecture. */
11885
11886bool
11887aarch64_atomic_ldop_supported_p (enum rtx_code code)
11888{
11889 if (!TARGET_LSE)
11890 return false;
11891
11892 switch (code)
11893 {
11894 case SET:
11895 case AND:
11896 case IOR:
11897 case XOR:
11898 case MINUS:
11899 case PLUS:
11900 return true;
11901 default:
11902 return false;
11903 }
11904}
11905
f70fb3b6
MW
11906/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11907 sequence implementing an atomic operation. */
11908
11909static void
11910aarch64_emit_post_barrier (enum memmodel model)
11911{
11912 const enum memmodel base_model = memmodel_base (model);
11913
11914 if (is_mm_sync (model)
11915 && (base_model == MEMMODEL_ACQUIRE
11916 || base_model == MEMMODEL_ACQ_REL
11917 || base_model == MEMMODEL_SEQ_CST))
11918 {
11919 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
11920 }
11921}
11922
b0770c0f
MW
11923/* Emit an atomic compare-and-swap operation. RVAL is the destination register
11924 for the data in memory. EXPECTED is the value expected to be in memory.
11925 DESIRED is the value to store to memory. MEM is the memory location. MODEL
11926 is the memory ordering to use. */
11927
11928void
11929aarch64_gen_atomic_cas (rtx rval, rtx mem,
11930 rtx expected, rtx desired,
11931 rtx model)
11932{
11933 rtx (*gen) (rtx, rtx, rtx, rtx);
11934 machine_mode mode;
11935
11936 mode = GET_MODE (mem);
11937
11938 switch (mode)
11939 {
11940 case QImode: gen = gen_aarch64_atomic_casqi; break;
11941 case HImode: gen = gen_aarch64_atomic_cashi; break;
11942 case SImode: gen = gen_aarch64_atomic_cassi; break;
11943 case DImode: gen = gen_aarch64_atomic_casdi; break;
11944 default:
11945 gcc_unreachable ();
11946 }
11947
11948 /* Move the expected value into the CAS destination register. */
11949 emit_insn (gen_rtx_SET (rval, expected));
11950
11951 /* Emit the CAS. */
11952 emit_insn (gen (rval, mem, desired, model));
11953
11954 /* Compare the expected value with the value loaded by the CAS, to establish
11955 whether the swap was made. */
11956 aarch64_gen_compare_reg (EQ, rval, expected);
11957}
11958
0462169c
SN
11959/* Split a compare and swap pattern. */
11960
11961void
11962aarch64_split_compare_and_swap (rtx operands[])
11963{
11964 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 11965 machine_mode mode;
0462169c 11966 bool is_weak;
5d8a22a5
DM
11967 rtx_code_label *label1, *label2;
11968 rtx x, cond;
ab876106
MW
11969 enum memmodel model;
11970 rtx model_rtx;
0462169c
SN
11971
11972 rval = operands[0];
11973 mem = operands[1];
11974 oldval = operands[2];
11975 newval = operands[3];
11976 is_weak = (operands[4] != const0_rtx);
ab876106 11977 model_rtx = operands[5];
0462169c
SN
11978 scratch = operands[7];
11979 mode = GET_MODE (mem);
ab876106 11980 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 11981
5d8a22a5 11982 label1 = NULL;
0462169c
SN
11983 if (!is_weak)
11984 {
11985 label1 = gen_label_rtx ();
11986 emit_label (label1);
11987 }
11988 label2 = gen_label_rtx ();
11989
ab876106
MW
11990 /* The initial load can be relaxed for a __sync operation since a final
11991 barrier will be emitted to stop code hoisting. */
11992 if (is_mm_sync (model))
11993 aarch64_emit_load_exclusive (mode, rval, mem,
11994 GEN_INT (MEMMODEL_RELAXED));
11995 else
11996 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
11997
11998 cond = aarch64_gen_compare_reg (NE, rval, oldval);
11999 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12000 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12001 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 12002 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 12003
ab876106 12004 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
12005
12006 if (!is_weak)
12007 {
12008 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12009 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12010 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 12011 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
12012 }
12013 else
12014 {
12015 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12016 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 12017 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
12018 }
12019
12020 emit_label (label2);
ab876106
MW
12021
12022 /* Emit any final barrier needed for a __sync operation. */
12023 if (is_mm_sync (model))
12024 aarch64_emit_post_barrier (model);
0462169c
SN
12025}
12026
68729b06
MW
12027/* Emit a BIC instruction. */
12028
12029static void
12030aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12031{
12032 rtx shift_rtx = GEN_INT (shift);
12033 rtx (*gen) (rtx, rtx, rtx, rtx);
12034
12035 switch (mode)
12036 {
12037 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12038 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
12039 default:
12040 gcc_unreachable ();
12041 }
12042
12043 emit_insn (gen (dst, s2, shift_rtx, s1));
12044}
12045
9cd7b720
MW
12046/* Emit an atomic swap. */
12047
12048static void
12049aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12050 rtx mem, rtx model)
12051{
12052 rtx (*gen) (rtx, rtx, rtx, rtx);
12053
12054 switch (mode)
12055 {
12056 case QImode: gen = gen_aarch64_atomic_swpqi; break;
12057 case HImode: gen = gen_aarch64_atomic_swphi; break;
12058 case SImode: gen = gen_aarch64_atomic_swpsi; break;
12059 case DImode: gen = gen_aarch64_atomic_swpdi; break;
12060 default:
12061 gcc_unreachable ();
12062 }
12063
12064 emit_insn (gen (dst, mem, value, model));
12065}
12066
641c2f8b
MW
12067/* Operations supported by aarch64_emit_atomic_load_op. */
12068
12069enum aarch64_atomic_load_op_code
12070{
12071 AARCH64_LDOP_PLUS, /* A + B */
12072 AARCH64_LDOP_XOR, /* A ^ B */
12073 AARCH64_LDOP_OR, /* A | B */
12074 AARCH64_LDOP_BIC /* A & ~B */
12075};
12076
12077/* Emit an atomic load-operate. */
12078
12079static void
12080aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12081 machine_mode mode, rtx dst, rtx src,
12082 rtx mem, rtx model)
12083{
12084 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12085 const aarch64_atomic_load_op_fn plus[] =
12086 {
12087 gen_aarch64_atomic_loadaddqi,
12088 gen_aarch64_atomic_loadaddhi,
12089 gen_aarch64_atomic_loadaddsi,
12090 gen_aarch64_atomic_loadadddi
12091 };
12092 const aarch64_atomic_load_op_fn eor[] =
12093 {
12094 gen_aarch64_atomic_loadeorqi,
12095 gen_aarch64_atomic_loadeorhi,
12096 gen_aarch64_atomic_loadeorsi,
12097 gen_aarch64_atomic_loadeordi
12098 };
12099 const aarch64_atomic_load_op_fn ior[] =
12100 {
12101 gen_aarch64_atomic_loadsetqi,
12102 gen_aarch64_atomic_loadsethi,
12103 gen_aarch64_atomic_loadsetsi,
12104 gen_aarch64_atomic_loadsetdi
12105 };
12106 const aarch64_atomic_load_op_fn bic[] =
12107 {
12108 gen_aarch64_atomic_loadclrqi,
12109 gen_aarch64_atomic_loadclrhi,
12110 gen_aarch64_atomic_loadclrsi,
12111 gen_aarch64_atomic_loadclrdi
12112 };
12113 aarch64_atomic_load_op_fn gen;
12114 int idx = 0;
12115
12116 switch (mode)
12117 {
12118 case QImode: idx = 0; break;
12119 case HImode: idx = 1; break;
12120 case SImode: idx = 2; break;
12121 case DImode: idx = 3; break;
12122 default:
12123 gcc_unreachable ();
12124 }
12125
12126 switch (code)
12127 {
12128 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12129 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12130 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12131 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12132 default:
12133 gcc_unreachable ();
12134 }
12135
12136 emit_insn (gen (dst, mem, src, model));
12137}
12138
12139/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
12140 location to store the data read from memory. OUT_RESULT is the location to
12141 store the result of the operation. MEM is the memory location to read and
12142 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12143 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12144 be NULL. */
9cd7b720
MW
12145
12146void
68729b06 12147aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
12148 rtx mem, rtx value, rtx model_rtx)
12149{
12150 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
12151 machine_mode wmode = (mode == DImode ? DImode : SImode);
12152 const bool short_mode = (mode < SImode);
12153 aarch64_atomic_load_op_code ldop_code;
12154 rtx src;
12155 rtx x;
12156
12157 if (out_data)
12158 out_data = gen_lowpart (mode, out_data);
9cd7b720 12159
68729b06
MW
12160 if (out_result)
12161 out_result = gen_lowpart (mode, out_result);
12162
641c2f8b
MW
12163 /* Make sure the value is in a register, putting it into a destination
12164 register if it needs to be manipulated. */
12165 if (!register_operand (value, mode)
12166 || code == AND || code == MINUS)
12167 {
68729b06 12168 src = out_result ? out_result : out_data;
641c2f8b
MW
12169 emit_move_insn (src, gen_lowpart (mode, value));
12170 }
12171 else
12172 src = value;
12173 gcc_assert (register_operand (src, mode));
9cd7b720 12174
641c2f8b
MW
12175 /* Preprocess the data for the operation as necessary. If the operation is
12176 a SET then emit a swap instruction and finish. */
9cd7b720
MW
12177 switch (code)
12178 {
12179 case SET:
641c2f8b 12180 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
12181 return;
12182
641c2f8b
MW
12183 case MINUS:
12184 /* Negate the value and treat it as a PLUS. */
12185 {
12186 rtx neg_src;
12187
12188 /* Resize the value if necessary. */
12189 if (short_mode)
12190 src = gen_lowpart (wmode, src);
12191
12192 neg_src = gen_rtx_NEG (wmode, src);
12193 emit_insn (gen_rtx_SET (src, neg_src));
12194
12195 if (short_mode)
12196 src = gen_lowpart (mode, src);
12197 }
12198 /* Fall-through. */
12199 case PLUS:
12200 ldop_code = AARCH64_LDOP_PLUS;
12201 break;
12202
12203 case IOR:
12204 ldop_code = AARCH64_LDOP_OR;
12205 break;
12206
12207 case XOR:
12208 ldop_code = AARCH64_LDOP_XOR;
12209 break;
12210
12211 case AND:
12212 {
12213 rtx not_src;
12214
12215 /* Resize the value if necessary. */
12216 if (short_mode)
12217 src = gen_lowpart (wmode, src);
12218
12219 not_src = gen_rtx_NOT (wmode, src);
12220 emit_insn (gen_rtx_SET (src, not_src));
12221
12222 if (short_mode)
12223 src = gen_lowpart (mode, src);
12224 }
12225 ldop_code = AARCH64_LDOP_BIC;
12226 break;
12227
9cd7b720
MW
12228 default:
12229 /* The operation can't be done with atomic instructions. */
12230 gcc_unreachable ();
12231 }
641c2f8b
MW
12232
12233 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
12234
12235 /* If necessary, calculate the data in memory after the update by redoing the
12236 operation from values in registers. */
12237 if (!out_result)
12238 return;
12239
12240 if (short_mode)
12241 {
12242 src = gen_lowpart (wmode, src);
12243 out_data = gen_lowpart (wmode, out_data);
12244 out_result = gen_lowpart (wmode, out_result);
12245 }
12246
12247 x = NULL_RTX;
12248
12249 switch (code)
12250 {
12251 case MINUS:
12252 case PLUS:
12253 x = gen_rtx_PLUS (wmode, out_data, src);
12254 break;
12255 case IOR:
12256 x = gen_rtx_IOR (wmode, out_data, src);
12257 break;
12258 case XOR:
12259 x = gen_rtx_XOR (wmode, out_data, src);
12260 break;
12261 case AND:
12262 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12263 return;
12264 default:
12265 gcc_unreachable ();
12266 }
12267
12268 emit_set_insn (out_result, x);
12269
12270 return;
9cd7b720
MW
12271}
12272
0462169c
SN
12273/* Split an atomic operation. */
12274
12275void
12276aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 12277 rtx value, rtx model_rtx, rtx cond)
0462169c 12278{
ef4bddc2
RS
12279 machine_mode mode = GET_MODE (mem);
12280 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
12281 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12282 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
12283 rtx_code_label *label;
12284 rtx x;
0462169c 12285
9cd7b720 12286 /* Split the atomic operation into a sequence. */
0462169c
SN
12287 label = gen_label_rtx ();
12288 emit_label (label);
12289
12290 if (new_out)
12291 new_out = gen_lowpart (wmode, new_out);
12292 if (old_out)
12293 old_out = gen_lowpart (wmode, old_out);
12294 else
12295 old_out = new_out;
12296 value = simplify_gen_subreg (wmode, value, mode, 0);
12297
f70fb3b6
MW
12298 /* The initial load can be relaxed for a __sync operation since a final
12299 barrier will be emitted to stop code hoisting. */
12300 if (is_sync)
12301 aarch64_emit_load_exclusive (mode, old_out, mem,
12302 GEN_INT (MEMMODEL_RELAXED));
12303 else
12304 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
12305
12306 switch (code)
12307 {
12308 case SET:
12309 new_out = value;
12310 break;
12311
12312 case NOT:
12313 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12314 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12315 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12316 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12317 break;
12318
12319 case MINUS:
12320 if (CONST_INT_P (value))
12321 {
12322 value = GEN_INT (-INTVAL (value));
12323 code = PLUS;
12324 }
12325 /* Fall through. */
12326
12327 default:
12328 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12329 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12330 break;
12331 }
12332
12333 aarch64_emit_store_exclusive (mode, cond, mem,
12334 gen_lowpart (mode, new_out), model_rtx);
12335
12336 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12337 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12338 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12339 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12340
12341 /* Emit any final barrier needed for a __sync operation. */
12342 if (is_sync)
12343 aarch64_emit_post_barrier (model);
0462169c
SN
12344}
12345
c2ec330c
AL
12346static void
12347aarch64_init_libfuncs (void)
12348{
12349 /* Half-precision float operations. The compiler handles all operations
12350 with NULL libfuncs by converting to SFmode. */
12351
12352 /* Conversions. */
12353 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12354 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12355
12356 /* Arithmetic. */
12357 set_optab_libfunc (add_optab, HFmode, NULL);
12358 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12359 set_optab_libfunc (smul_optab, HFmode, NULL);
12360 set_optab_libfunc (neg_optab, HFmode, NULL);
12361 set_optab_libfunc (sub_optab, HFmode, NULL);
12362
12363 /* Comparisons. */
12364 set_optab_libfunc (eq_optab, HFmode, NULL);
12365 set_optab_libfunc (ne_optab, HFmode, NULL);
12366 set_optab_libfunc (lt_optab, HFmode, NULL);
12367 set_optab_libfunc (le_optab, HFmode, NULL);
12368 set_optab_libfunc (ge_optab, HFmode, NULL);
12369 set_optab_libfunc (gt_optab, HFmode, NULL);
12370 set_optab_libfunc (unord_optab, HFmode, NULL);
12371}
12372
43e9d192 12373/* Target hook for c_mode_for_suffix. */
ef4bddc2 12374static machine_mode
43e9d192
IB
12375aarch64_c_mode_for_suffix (char suffix)
12376{
12377 if (suffix == 'q')
12378 return TFmode;
12379
12380 return VOIDmode;
12381}
12382
3520f7cc
JG
12383/* We can only represent floating point constants which will fit in
12384 "quarter-precision" values. These values are characterised by
12385 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12386 by:
12387
12388 (-1)^s * (n/16) * 2^r
12389
12390 Where:
12391 's' is the sign bit.
12392 'n' is an integer in the range 16 <= n <= 31.
12393 'r' is an integer in the range -3 <= r <= 4. */
12394
12395/* Return true iff X can be represented by a quarter-precision
12396 floating point immediate operand X. Note, we cannot represent 0.0. */
12397bool
12398aarch64_float_const_representable_p (rtx x)
12399{
12400 /* This represents our current view of how many bits
12401 make up the mantissa. */
12402 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12403 int exponent;
3520f7cc 12404 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12405 REAL_VALUE_TYPE r, m;
807e902e 12406 bool fail;
3520f7cc
JG
12407
12408 if (!CONST_DOUBLE_P (x))
12409 return false;
12410
c2ec330c
AL
12411 /* We don't support HFmode constants yet. */
12412 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12413 return false;
12414
34a72c33 12415 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12416
12417 /* We cannot represent infinities, NaNs or +/-zero. We won't
12418 know if we have +zero until we analyse the mantissa, but we
12419 can reject the other invalid values. */
12420 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12421 || REAL_VALUE_MINUS_ZERO (r))
12422 return false;
12423
ba96cdfb 12424 /* Extract exponent. */
3520f7cc
JG
12425 r = real_value_abs (&r);
12426 exponent = REAL_EXP (&r);
12427
12428 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12429 highest (sign) bit, with a fixed binary point at bit point_pos.
12430 m1 holds the low part of the mantissa, m2 the high part.
12431 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12432 bits for the mantissa, this can fail (low bits will be lost). */
12433 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12434 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12435
12436 /* If the low part of the mantissa has bits set we cannot represent
12437 the value. */
807e902e 12438 if (w.elt (0) != 0)
3520f7cc
JG
12439 return false;
12440 /* We have rejected the lower HOST_WIDE_INT, so update our
12441 understanding of how many bits lie in the mantissa and
12442 look only at the high HOST_WIDE_INT. */
807e902e 12443 mantissa = w.elt (1);
3520f7cc
JG
12444 point_pos -= HOST_BITS_PER_WIDE_INT;
12445
12446 /* We can only represent values with a mantissa of the form 1.xxxx. */
12447 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12448 if ((mantissa & mask) != 0)
12449 return false;
12450
12451 /* Having filtered unrepresentable values, we may now remove all
12452 but the highest 5 bits. */
12453 mantissa >>= point_pos - 5;
12454
12455 /* We cannot represent the value 0.0, so reject it. This is handled
12456 elsewhere. */
12457 if (mantissa == 0)
12458 return false;
12459
12460 /* Then, as bit 4 is always set, we can mask it off, leaving
12461 the mantissa in the range [0, 15]. */
12462 mantissa &= ~(1 << 4);
12463 gcc_assert (mantissa <= 15);
12464
12465 /* GCC internally does not use IEEE754-like encoding (where normalized
12466 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12467 Our mantissa values are shifted 4 places to the left relative to
12468 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12469 by 5 places to correct for GCC's representation. */
12470 exponent = 5 - exponent;
12471
12472 return (exponent >= 0 && exponent <= 7);
12473}
12474
12475char*
81c2dfb9 12476aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12477 machine_mode mode,
3520f7cc
JG
12478 unsigned width)
12479{
3ea63f60 12480 bool is_valid;
3520f7cc 12481 static char templ[40];
3520f7cc 12482 const char *mnemonic;
e4f0f84d 12483 const char *shift_op;
3520f7cc 12484 unsigned int lane_count = 0;
81c2dfb9 12485 char element_char;
3520f7cc 12486
e4f0f84d 12487 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12488
12489 /* This will return true to show const_vector is legal for use as either
12490 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12491 also update INFO to show how the immediate should be generated. */
81c2dfb9 12492 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12493 gcc_assert (is_valid);
12494
81c2dfb9 12495 element_char = sizetochar (info.element_width);
48063b9d
IB
12496 lane_count = width / info.element_width;
12497
3520f7cc 12498 mode = GET_MODE_INNER (mode);
0d8e1702 12499 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 12500 {
48063b9d 12501 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
12502 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12503 move immediate path. */
48063b9d
IB
12504 if (aarch64_float_const_zero_rtx_p (info.value))
12505 info.value = GEN_INT (0);
12506 else
12507 {
83faf7d0 12508 const unsigned int buf_size = 20;
48063b9d 12509 char float_buf[buf_size] = {'\0'};
34a72c33
RS
12510 real_to_decimal_for_mode (float_buf,
12511 CONST_DOUBLE_REAL_VALUE (info.value),
12512 buf_size, buf_size, 1, mode);
48063b9d
IB
12513
12514 if (lane_count == 1)
12515 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12516 else
12517 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 12518 lane_count, element_char, float_buf);
48063b9d
IB
12519 return templ;
12520 }
3520f7cc 12521 }
3520f7cc 12522
48063b9d 12523 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 12524 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 12525
0d8e1702 12526 gcc_assert (CONST_INT_P (info.value));
3520f7cc 12527 if (lane_count == 1)
48063b9d
IB
12528 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12529 mnemonic, UINTVAL (info.value));
12530 else if (info.shift)
12531 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
12532 ", %s %d", mnemonic, lane_count, element_char,
12533 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 12534 else
48063b9d 12535 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 12536 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
12537 return templ;
12538}
12539
b7342d25
IB
12540char*
12541aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 12542 machine_mode mode)
b7342d25 12543{
ef4bddc2 12544 machine_mode vmode;
b7342d25
IB
12545
12546 gcc_assert (!VECTOR_MODE_P (mode));
12547 vmode = aarch64_simd_container_mode (mode, 64);
12548 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12549 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12550}
12551
88b08073
JG
12552/* Split operands into moves from op[1] + op[2] into op[0]. */
12553
12554void
12555aarch64_split_combinev16qi (rtx operands[3])
12556{
12557 unsigned int dest = REGNO (operands[0]);
12558 unsigned int src1 = REGNO (operands[1]);
12559 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 12560 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
12561 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12562 rtx destlo, desthi;
12563
12564 gcc_assert (halfmode == V16QImode);
12565
12566 if (src1 == dest && src2 == dest + halfregs)
12567 {
12568 /* No-op move. Can't split to nothing; emit something. */
12569 emit_note (NOTE_INSN_DELETED);
12570 return;
12571 }
12572
12573 /* Preserve register attributes for variable tracking. */
12574 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12575 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12576 GET_MODE_SIZE (halfmode));
12577
12578 /* Special case of reversed high/low parts. */
12579 if (reg_overlap_mentioned_p (operands[2], destlo)
12580 && reg_overlap_mentioned_p (operands[1], desthi))
12581 {
12582 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12583 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12584 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12585 }
12586 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12587 {
12588 /* Try to avoid unnecessary moves if part of the result
12589 is in the right place already. */
12590 if (src1 != dest)
12591 emit_move_insn (destlo, operands[1]);
12592 if (src2 != dest + halfregs)
12593 emit_move_insn (desthi, operands[2]);
12594 }
12595 else
12596 {
12597 if (src2 != dest + halfregs)
12598 emit_move_insn (desthi, operands[2]);
12599 if (src1 != dest)
12600 emit_move_insn (destlo, operands[1]);
12601 }
12602}
12603
12604/* vec_perm support. */
12605
12606#define MAX_VECT_LEN 16
12607
12608struct expand_vec_perm_d
12609{
12610 rtx target, op0, op1;
12611 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 12612 machine_mode vmode;
88b08073
JG
12613 unsigned char nelt;
12614 bool one_vector_p;
12615 bool testing_p;
12616};
12617
12618/* Generate a variable permutation. */
12619
12620static void
12621aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
12622{
ef4bddc2 12623 machine_mode vmode = GET_MODE (target);
88b08073
JG
12624 bool one_vector_p = rtx_equal_p (op0, op1);
12625
12626 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
12627 gcc_checking_assert (GET_MODE (op0) == vmode);
12628 gcc_checking_assert (GET_MODE (op1) == vmode);
12629 gcc_checking_assert (GET_MODE (sel) == vmode);
12630 gcc_checking_assert (TARGET_SIMD);
12631
12632 if (one_vector_p)
12633 {
12634 if (vmode == V8QImode)
12635 {
12636 /* Expand the argument to a V16QI mode by duplicating it. */
12637 rtx pair = gen_reg_rtx (V16QImode);
12638 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
12639 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12640 }
12641 else
12642 {
12643 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
12644 }
12645 }
12646 else
12647 {
12648 rtx pair;
12649
12650 if (vmode == V8QImode)
12651 {
12652 pair = gen_reg_rtx (V16QImode);
12653 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
12654 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12655 }
12656 else
12657 {
12658 pair = gen_reg_rtx (OImode);
12659 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
12660 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
12661 }
12662 }
12663}
12664
12665void
12666aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
12667{
ef4bddc2 12668 machine_mode vmode = GET_MODE (target);
c9d1a16a 12669 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 12670 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 12671 rtx mask;
88b08073
JG
12672
12673 /* The TBL instruction does not use a modulo index, so we must take care
12674 of that ourselves. */
f7c4e5b8
AL
12675 mask = aarch64_simd_gen_const_vector_dup (vmode,
12676 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
12677 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
12678
f7c4e5b8
AL
12679 /* For big-endian, we also need to reverse the index within the vector
12680 (but not which vector). */
12681 if (BYTES_BIG_ENDIAN)
12682 {
12683 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12684 if (!one_vector_p)
12685 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
12686 sel = expand_simple_binop (vmode, XOR, sel, mask,
12687 NULL, 0, OPTAB_LIB_WIDEN);
12688 }
88b08073
JG
12689 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
12690}
12691
cc4d934f
JG
12692/* Recognize patterns suitable for the TRN instructions. */
12693static bool
12694aarch64_evpc_trn (struct expand_vec_perm_d *d)
12695{
12696 unsigned int i, odd, mask, nelt = d->nelt;
12697 rtx out, in0, in1, x;
12698 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12699 machine_mode vmode = d->vmode;
cc4d934f
JG
12700
12701 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12702 return false;
12703
12704 /* Note that these are little-endian tests.
12705 We correct for big-endian later. */
12706 if (d->perm[0] == 0)
12707 odd = 0;
12708 else if (d->perm[0] == 1)
12709 odd = 1;
12710 else
12711 return false;
12712 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12713
12714 for (i = 0; i < nelt; i += 2)
12715 {
12716 if (d->perm[i] != i + odd)
12717 return false;
12718 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
12719 return false;
12720 }
12721
12722 /* Success! */
12723 if (d->testing_p)
12724 return true;
12725
12726 in0 = d->op0;
12727 in1 = d->op1;
12728 if (BYTES_BIG_ENDIAN)
12729 {
12730 x = in0, in0 = in1, in1 = x;
12731 odd = !odd;
12732 }
12733 out = d->target;
12734
12735 if (odd)
12736 {
12737 switch (vmode)
12738 {
12739 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
12740 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
12741 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
12742 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
12743 case V4SImode: gen = gen_aarch64_trn2v4si; break;
12744 case V2SImode: gen = gen_aarch64_trn2v2si; break;
12745 case V2DImode: gen = gen_aarch64_trn2v2di; break;
358decd5
JW
12746 case V4HFmode: gen = gen_aarch64_trn2v4hf; break;
12747 case V8HFmode: gen = gen_aarch64_trn2v8hf; break;
cc4d934f
JG
12748 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
12749 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
12750 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
12751 default:
12752 return false;
12753 }
12754 }
12755 else
12756 {
12757 switch (vmode)
12758 {
12759 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
12760 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
12761 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
12762 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
12763 case V4SImode: gen = gen_aarch64_trn1v4si; break;
12764 case V2SImode: gen = gen_aarch64_trn1v2si; break;
12765 case V2DImode: gen = gen_aarch64_trn1v2di; break;
358decd5
JW
12766 case V4HFmode: gen = gen_aarch64_trn1v4hf; break;
12767 case V8HFmode: gen = gen_aarch64_trn1v8hf; break;
cc4d934f
JG
12768 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
12769 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
12770 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
12771 default:
12772 return false;
12773 }
12774 }
12775
12776 emit_insn (gen (out, in0, in1));
12777 return true;
12778}
12779
12780/* Recognize patterns suitable for the UZP instructions. */
12781static bool
12782aarch64_evpc_uzp (struct expand_vec_perm_d *d)
12783{
12784 unsigned int i, odd, mask, nelt = d->nelt;
12785 rtx out, in0, in1, x;
12786 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12787 machine_mode vmode = d->vmode;
cc4d934f
JG
12788
12789 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12790 return false;
12791
12792 /* Note that these are little-endian tests.
12793 We correct for big-endian later. */
12794 if (d->perm[0] == 0)
12795 odd = 0;
12796 else if (d->perm[0] == 1)
12797 odd = 1;
12798 else
12799 return false;
12800 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12801
12802 for (i = 0; i < nelt; i++)
12803 {
12804 unsigned elt = (i * 2 + odd) & mask;
12805 if (d->perm[i] != elt)
12806 return false;
12807 }
12808
12809 /* Success! */
12810 if (d->testing_p)
12811 return true;
12812
12813 in0 = d->op0;
12814 in1 = d->op1;
12815 if (BYTES_BIG_ENDIAN)
12816 {
12817 x = in0, in0 = in1, in1 = x;
12818 odd = !odd;
12819 }
12820 out = d->target;
12821
12822 if (odd)
12823 {
12824 switch (vmode)
12825 {
12826 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
12827 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
12828 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
12829 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
12830 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
12831 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
12832 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
358decd5
JW
12833 case V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
12834 case V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
cc4d934f
JG
12835 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
12836 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
12837 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
12838 default:
12839 return false;
12840 }
12841 }
12842 else
12843 {
12844 switch (vmode)
12845 {
12846 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
12847 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
12848 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
12849 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
12850 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
12851 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
12852 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
358decd5
JW
12853 case V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
12854 case V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
cc4d934f
JG
12855 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
12856 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
12857 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
12858 default:
12859 return false;
12860 }
12861 }
12862
12863 emit_insn (gen (out, in0, in1));
12864 return true;
12865}
12866
12867/* Recognize patterns suitable for the ZIP instructions. */
12868static bool
12869aarch64_evpc_zip (struct expand_vec_perm_d *d)
12870{
12871 unsigned int i, high, mask, nelt = d->nelt;
12872 rtx out, in0, in1, x;
12873 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12874 machine_mode vmode = d->vmode;
cc4d934f
JG
12875
12876 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12877 return false;
12878
12879 /* Note that these are little-endian tests.
12880 We correct for big-endian later. */
12881 high = nelt / 2;
12882 if (d->perm[0] == high)
12883 /* Do Nothing. */
12884 ;
12885 else if (d->perm[0] == 0)
12886 high = 0;
12887 else
12888 return false;
12889 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12890
12891 for (i = 0; i < nelt / 2; i++)
12892 {
12893 unsigned elt = (i + high) & mask;
12894 if (d->perm[i * 2] != elt)
12895 return false;
12896 elt = (elt + nelt) & mask;
12897 if (d->perm[i * 2 + 1] != elt)
12898 return false;
12899 }
12900
12901 /* Success! */
12902 if (d->testing_p)
12903 return true;
12904
12905 in0 = d->op0;
12906 in1 = d->op1;
12907 if (BYTES_BIG_ENDIAN)
12908 {
12909 x = in0, in0 = in1, in1 = x;
12910 high = !high;
12911 }
12912 out = d->target;
12913
12914 if (high)
12915 {
12916 switch (vmode)
12917 {
12918 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
12919 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
12920 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
12921 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
12922 case V4SImode: gen = gen_aarch64_zip2v4si; break;
12923 case V2SImode: gen = gen_aarch64_zip2v2si; break;
12924 case V2DImode: gen = gen_aarch64_zip2v2di; break;
358decd5
JW
12925 case V4HFmode: gen = gen_aarch64_zip2v4hf; break;
12926 case V8HFmode: gen = gen_aarch64_zip2v8hf; break;
cc4d934f
JG
12927 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
12928 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
12929 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
12930 default:
12931 return false;
12932 }
12933 }
12934 else
12935 {
12936 switch (vmode)
12937 {
12938 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
12939 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
12940 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
12941 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
12942 case V4SImode: gen = gen_aarch64_zip1v4si; break;
12943 case V2SImode: gen = gen_aarch64_zip1v2si; break;
12944 case V2DImode: gen = gen_aarch64_zip1v2di; break;
358decd5
JW
12945 case V4HFmode: gen = gen_aarch64_zip1v4hf; break;
12946 case V8HFmode: gen = gen_aarch64_zip1v8hf; break;
cc4d934f
JG
12947 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
12948 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
12949 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
12950 default:
12951 return false;
12952 }
12953 }
12954
12955 emit_insn (gen (out, in0, in1));
12956 return true;
12957}
12958
ae0533da
AL
12959/* Recognize patterns for the EXT insn. */
12960
12961static bool
12962aarch64_evpc_ext (struct expand_vec_perm_d *d)
12963{
12964 unsigned int i, nelt = d->nelt;
12965 rtx (*gen) (rtx, rtx, rtx, rtx);
12966 rtx offset;
12967
12968 unsigned int location = d->perm[0]; /* Always < nelt. */
12969
12970 /* Check if the extracted indices are increasing by one. */
12971 for (i = 1; i < nelt; i++)
12972 {
12973 unsigned int required = location + i;
12974 if (d->one_vector_p)
12975 {
12976 /* We'll pass the same vector in twice, so allow indices to wrap. */
12977 required &= (nelt - 1);
12978 }
12979 if (d->perm[i] != required)
12980 return false;
12981 }
12982
ae0533da
AL
12983 switch (d->vmode)
12984 {
12985 case V16QImode: gen = gen_aarch64_extv16qi; break;
12986 case V8QImode: gen = gen_aarch64_extv8qi; break;
12987 case V4HImode: gen = gen_aarch64_extv4hi; break;
12988 case V8HImode: gen = gen_aarch64_extv8hi; break;
12989 case V2SImode: gen = gen_aarch64_extv2si; break;
12990 case V4SImode: gen = gen_aarch64_extv4si; break;
358decd5
JW
12991 case V4HFmode: gen = gen_aarch64_extv4hf; break;
12992 case V8HFmode: gen = gen_aarch64_extv8hf; break;
ae0533da
AL
12993 case V2SFmode: gen = gen_aarch64_extv2sf; break;
12994 case V4SFmode: gen = gen_aarch64_extv4sf; break;
12995 case V2DImode: gen = gen_aarch64_extv2di; break;
12996 case V2DFmode: gen = gen_aarch64_extv2df; break;
12997 default:
12998 return false;
12999 }
13000
13001 /* Success! */
13002 if (d->testing_p)
13003 return true;
13004
b31e65bb
AL
13005 /* The case where (location == 0) is a no-op for both big- and little-endian,
13006 and is removed by the mid-end at optimization levels -O1 and higher. */
13007
13008 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
13009 {
13010 /* After setup, we want the high elements of the first vector (stored
13011 at the LSB end of the register), and the low elements of the second
13012 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 13013 std::swap (d->op0, d->op1);
ae0533da
AL
13014 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13015 location = nelt - location;
13016 }
13017
13018 offset = GEN_INT (location);
13019 emit_insn (gen (d->target, d->op0, d->op1, offset));
13020 return true;
13021}
13022
923fcec3
AL
13023/* Recognize patterns for the REV insns. */
13024
13025static bool
13026aarch64_evpc_rev (struct expand_vec_perm_d *d)
13027{
13028 unsigned int i, j, diff, nelt = d->nelt;
13029 rtx (*gen) (rtx, rtx);
13030
13031 if (!d->one_vector_p)
13032 return false;
13033
13034 diff = d->perm[0];
13035 switch (diff)
13036 {
13037 case 7:
13038 switch (d->vmode)
13039 {
13040 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
13041 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
13042 default:
13043 return false;
13044 }
13045 break;
13046 case 3:
13047 switch (d->vmode)
13048 {
13049 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
13050 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
13051 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
13052 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
13053 default:
13054 return false;
13055 }
13056 break;
13057 case 1:
13058 switch (d->vmode)
13059 {
13060 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
13061 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
13062 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
13063 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
13064 case V4SImode: gen = gen_aarch64_rev64v4si; break;
13065 case V2SImode: gen = gen_aarch64_rev64v2si; break;
13066 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
13067 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
358decd5
JW
13068 case V8HFmode: gen = gen_aarch64_rev64v8hf; break;
13069 case V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
13070 default:
13071 return false;
13072 }
13073 break;
13074 default:
13075 return false;
13076 }
13077
13078 for (i = 0; i < nelt ; i += diff + 1)
13079 for (j = 0; j <= diff; j += 1)
13080 {
13081 /* This is guaranteed to be true as the value of diff
13082 is 7, 3, 1 and we should have enough elements in the
13083 queue to generate this. Getting a vector mask with a
13084 value of diff other than these values implies that
13085 something is wrong by the time we get here. */
13086 gcc_assert (i + j < nelt);
13087 if (d->perm[i + j] != i + diff - j)
13088 return false;
13089 }
13090
13091 /* Success! */
13092 if (d->testing_p)
13093 return true;
13094
13095 emit_insn (gen (d->target, d->op0));
13096 return true;
13097}
13098
91bd4114
JG
13099static bool
13100aarch64_evpc_dup (struct expand_vec_perm_d *d)
13101{
13102 rtx (*gen) (rtx, rtx, rtx);
13103 rtx out = d->target;
13104 rtx in0;
ef4bddc2 13105 machine_mode vmode = d->vmode;
91bd4114
JG
13106 unsigned int i, elt, nelt = d->nelt;
13107 rtx lane;
13108
91bd4114
JG
13109 elt = d->perm[0];
13110 for (i = 1; i < nelt; i++)
13111 {
13112 if (elt != d->perm[i])
13113 return false;
13114 }
13115
13116 /* The generic preparation in aarch64_expand_vec_perm_const_1
13117 swaps the operand order and the permute indices if it finds
13118 d->perm[0] to be in the second operand. Thus, we can always
13119 use d->op0 and need not do any extra arithmetic to get the
13120 correct lane number. */
13121 in0 = d->op0;
f901401e 13122 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
13123
13124 switch (vmode)
13125 {
13126 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
13127 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
13128 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
13129 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
13130 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
13131 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
13132 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
13133 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
13134 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
13135 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
13136 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
13137 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
13138 default:
13139 return false;
13140 }
13141
13142 emit_insn (gen (out, in0, lane));
13143 return true;
13144}
13145
88b08073
JG
13146static bool
13147aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13148{
13149 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 13150 machine_mode vmode = d->vmode;
88b08073
JG
13151 unsigned int i, nelt = d->nelt;
13152
88b08073
JG
13153 if (d->testing_p)
13154 return true;
13155
13156 /* Generic code will try constant permutation twice. Once with the
13157 original mode and again with the elements lowered to QImode.
13158 So wait and don't do the selector expansion ourselves. */
13159 if (vmode != V8QImode && vmode != V16QImode)
13160 return false;
13161
13162 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
13163 {
13164 int nunits = GET_MODE_NUNITS (vmode);
13165
13166 /* If big-endian and two vectors we end up with a weird mixed-endian
13167 mode on NEON. Reverse the index within each word but not the word
13168 itself. */
13169 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13170 : d->perm[i]);
13171 }
88b08073
JG
13172 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13173 sel = force_reg (vmode, sel);
13174
13175 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13176 return true;
13177}
13178
13179static bool
13180aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13181{
13182 /* The pattern matching functions above are written to look for a small
13183 number to begin the sequence (0, 1, N/2). If we begin with an index
13184 from the second operand, we can swap the operands. */
13185 if (d->perm[0] >= d->nelt)
13186 {
13187 unsigned i, nelt = d->nelt;
88b08073 13188
0696116a 13189 gcc_assert (nelt == (nelt & -nelt));
88b08073 13190 for (i = 0; i < nelt; ++i)
0696116a 13191 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 13192
cb5c6c29 13193 std::swap (d->op0, d->op1);
88b08073
JG
13194 }
13195
13196 if (TARGET_SIMD)
cc4d934f 13197 {
923fcec3
AL
13198 if (aarch64_evpc_rev (d))
13199 return true;
13200 else if (aarch64_evpc_ext (d))
ae0533da 13201 return true;
f901401e
AL
13202 else if (aarch64_evpc_dup (d))
13203 return true;
ae0533da 13204 else if (aarch64_evpc_zip (d))
cc4d934f
JG
13205 return true;
13206 else if (aarch64_evpc_uzp (d))
13207 return true;
13208 else if (aarch64_evpc_trn (d))
13209 return true;
13210 return aarch64_evpc_tbl (d);
13211 }
88b08073
JG
13212 return false;
13213}
13214
13215/* Expand a vec_perm_const pattern. */
13216
13217bool
13218aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
13219{
13220 struct expand_vec_perm_d d;
13221 int i, nelt, which;
13222
13223 d.target = target;
13224 d.op0 = op0;
13225 d.op1 = op1;
13226
13227 d.vmode = GET_MODE (target);
13228 gcc_assert (VECTOR_MODE_P (d.vmode));
13229 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13230 d.testing_p = false;
13231
13232 for (i = which = 0; i < nelt; ++i)
13233 {
13234 rtx e = XVECEXP (sel, 0, i);
13235 int ei = INTVAL (e) & (2 * nelt - 1);
13236 which |= (ei < nelt ? 1 : 2);
13237 d.perm[i] = ei;
13238 }
13239
13240 switch (which)
13241 {
13242 default:
13243 gcc_unreachable ();
13244
13245 case 3:
13246 d.one_vector_p = false;
13247 if (!rtx_equal_p (op0, op1))
13248 break;
13249
13250 /* The elements of PERM do not suggest that only the first operand
13251 is used, but both operands are identical. Allow easier matching
13252 of the permutation by folding the permutation into the single
13253 input vector. */
13254 /* Fall Through. */
13255 case 2:
13256 for (i = 0; i < nelt; ++i)
13257 d.perm[i] &= nelt - 1;
13258 d.op0 = op1;
13259 d.one_vector_p = true;
13260 break;
13261
13262 case 1:
13263 d.op1 = op0;
13264 d.one_vector_p = true;
13265 break;
13266 }
13267
13268 return aarch64_expand_vec_perm_const_1 (&d);
13269}
13270
13271static bool
ef4bddc2 13272aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
13273 const unsigned char *sel)
13274{
13275 struct expand_vec_perm_d d;
13276 unsigned int i, nelt, which;
13277 bool ret;
13278
13279 d.vmode = vmode;
13280 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13281 d.testing_p = true;
13282 memcpy (d.perm, sel, nelt);
13283
13284 /* Calculate whether all elements are in one vector. */
13285 for (i = which = 0; i < nelt; ++i)
13286 {
13287 unsigned char e = d.perm[i];
13288 gcc_assert (e < 2 * nelt);
13289 which |= (e < nelt ? 1 : 2);
13290 }
13291
13292 /* If all elements are from the second vector, reindex as if from the
13293 first vector. */
13294 if (which == 2)
13295 for (i = 0; i < nelt; ++i)
13296 d.perm[i] -= nelt;
13297
13298 /* Check whether the mask can be applied to a single vector. */
13299 d.one_vector_p = (which != 3);
13300
13301 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13302 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13303 if (!d.one_vector_p)
13304 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13305
13306 start_sequence ();
13307 ret = aarch64_expand_vec_perm_const_1 (&d);
13308 end_sequence ();
13309
13310 return ret;
13311}
13312
668046d1
DS
13313rtx
13314aarch64_reverse_mask (enum machine_mode mode)
13315{
13316 /* We have to reverse each vector because we dont have
13317 a permuted load that can reverse-load according to ABI rules. */
13318 rtx mask;
13319 rtvec v = rtvec_alloc (16);
13320 int i, j;
13321 int nunits = GET_MODE_NUNITS (mode);
13322 int usize = GET_MODE_UNIT_SIZE (mode);
13323
13324 gcc_assert (BYTES_BIG_ENDIAN);
13325 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13326
13327 for (i = 0; i < nunits; i++)
13328 for (j = 0; j < usize; j++)
13329 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13330 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13331 return force_reg (V16QImode, mask);
13332}
13333
61f17a5c
WD
13334/* Implement MODES_TIEABLE_P. In principle we should always return true.
13335 However due to issues with register allocation it is preferable to avoid
13336 tieing integer scalar and FP scalar modes. Executing integer operations
13337 in general registers is better than treating them as scalar vector
13338 operations. This reduces latency and avoids redundant int<->FP moves.
13339 So tie modes if they are either the same class, or vector modes with
13340 other vector modes, vector structs or any scalar mode.
13341*/
97e1ad78
JG
13342
13343bool
ef4bddc2 13344aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13345{
13346 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13347 return true;
13348
13349 /* We specifically want to allow elements of "structure" modes to
13350 be tieable to the structure. This more general condition allows
13351 other rarer situations too. */
61f17a5c
WD
13352 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13353 return true;
13354
13355 /* Also allow any scalar modes with vectors. */
13356 if (aarch64_vector_mode_supported_p (mode1)
13357 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13358 return true;
13359
13360 return false;
13361}
13362
e2c75eea
JG
13363/* Return a new RTX holding the result of moving POINTER forward by
13364 AMOUNT bytes. */
13365
13366static rtx
13367aarch64_move_pointer (rtx pointer, int amount)
13368{
13369 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13370
13371 return adjust_automodify_address (pointer, GET_MODE (pointer),
13372 next, amount);
13373}
13374
13375/* Return a new RTX holding the result of moving POINTER forward by the
13376 size of the mode it points to. */
13377
13378static rtx
13379aarch64_progress_pointer (rtx pointer)
13380{
13381 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13382
13383 return aarch64_move_pointer (pointer, amount);
13384}
13385
13386/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13387 MODE bytes. */
13388
13389static void
13390aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13391 machine_mode mode)
e2c75eea
JG
13392{
13393 rtx reg = gen_reg_rtx (mode);
13394
13395 /* "Cast" the pointers to the correct mode. */
13396 *src = adjust_address (*src, mode, 0);
13397 *dst = adjust_address (*dst, mode, 0);
13398 /* Emit the memcpy. */
13399 emit_move_insn (reg, *src);
13400 emit_move_insn (*dst, reg);
13401 /* Move the pointers forward. */
13402 *src = aarch64_progress_pointer (*src);
13403 *dst = aarch64_progress_pointer (*dst);
13404}
13405
13406/* Expand movmem, as if from a __builtin_memcpy. Return true if
13407 we succeed, otherwise return false. */
13408
13409bool
13410aarch64_expand_movmem (rtx *operands)
13411{
13412 unsigned int n;
13413 rtx dst = operands[0];
13414 rtx src = operands[1];
13415 rtx base;
13416 bool speed_p = !optimize_function_for_size_p (cfun);
13417
13418 /* When optimizing for size, give a better estimate of the length of a
13419 memcpy call, but use the default otherwise. */
13420 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13421
13422 /* We can't do anything smart if the amount to copy is not constant. */
13423 if (!CONST_INT_P (operands[2]))
13424 return false;
13425
13426 n = UINTVAL (operands[2]);
13427
13428 /* Try to keep the number of instructions low. For cases below 16 bytes we
13429 need to make at most two moves. For cases above 16 bytes it will be one
13430 move for each 16 byte chunk, then at most two additional moves. */
13431 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13432 return false;
13433
13434 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13435 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13436
13437 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13438 src = adjust_automodify_address (src, VOIDmode, base, 0);
13439
13440 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13441 1-byte chunk. */
13442 if (n < 4)
13443 {
13444 if (n >= 2)
13445 {
13446 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13447 n -= 2;
13448 }
13449
13450 if (n == 1)
13451 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13452
13453 return true;
13454 }
13455
13456 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13457 4-byte chunk, partially overlapping with the previously copied chunk. */
13458 if (n < 8)
13459 {
13460 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13461 n -= 4;
13462 if (n > 0)
13463 {
13464 int move = n - 4;
13465
13466 src = aarch64_move_pointer (src, move);
13467 dst = aarch64_move_pointer (dst, move);
13468 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13469 }
13470 return true;
13471 }
13472
13473 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13474 them, then (if applicable) an 8-byte chunk. */
13475 while (n >= 8)
13476 {
13477 if (n / 16)
13478 {
13479 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13480 n -= 16;
13481 }
13482 else
13483 {
13484 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13485 n -= 8;
13486 }
13487 }
13488
13489 /* Finish the final bytes of the copy. We can always do this in one
13490 instruction. We either copy the exact amount we need, or partially
13491 overlap with the previous chunk we copied and copy 8-bytes. */
13492 if (n == 0)
13493 return true;
13494 else if (n == 1)
13495 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13496 else if (n == 2)
13497 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13498 else if (n == 4)
13499 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13500 else
13501 {
13502 if (n == 3)
13503 {
13504 src = aarch64_move_pointer (src, -1);
13505 dst = aarch64_move_pointer (dst, -1);
13506 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13507 }
13508 else
13509 {
13510 int move = n - 8;
13511
13512 src = aarch64_move_pointer (src, move);
13513 dst = aarch64_move_pointer (dst, move);
13514 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13515 }
13516 }
13517
13518 return true;
13519}
13520
141a3ccf
KT
13521/* Split a DImode store of a CONST_INT SRC to MEM DST as two
13522 SImode stores. Handle the case when the constant has identical
13523 bottom and top halves. This is beneficial when the two stores can be
13524 merged into an STP and we avoid synthesising potentially expensive
13525 immediates twice. Return true if such a split is possible. */
13526
13527bool
13528aarch64_split_dimode_const_store (rtx dst, rtx src)
13529{
13530 rtx lo = gen_lowpart (SImode, src);
13531 rtx hi = gen_highpart_mode (SImode, DImode, src);
13532
13533 bool size_p = optimize_function_for_size_p (cfun);
13534
13535 if (!rtx_equal_p (lo, hi))
13536 return false;
13537
13538 unsigned int orig_cost
13539 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
13540 unsigned int lo_cost
13541 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
13542
13543 /* We want to transform:
13544 MOV x1, 49370
13545 MOVK x1, 0x140, lsl 16
13546 MOVK x1, 0xc0da, lsl 32
13547 MOVK x1, 0x140, lsl 48
13548 STR x1, [x0]
13549 into:
13550 MOV w1, 49370
13551 MOVK w1, 0x140, lsl 16
13552 STP w1, w1, [x0]
13553 So we want to perform this only when we save two instructions
13554 or more. When optimizing for size, however, accept any code size
13555 savings we can. */
13556 if (size_p && orig_cost <= lo_cost)
13557 return false;
13558
13559 if (!size_p
13560 && (orig_cost <= lo_cost + 1))
13561 return false;
13562
13563 rtx mem_lo = adjust_address (dst, SImode, 0);
13564 if (!aarch64_mem_pair_operand (mem_lo, SImode))
13565 return false;
13566
13567 rtx tmp_reg = gen_reg_rtx (SImode);
13568 aarch64_expand_mov_immediate (tmp_reg, lo);
13569 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
13570 /* Don't emit an explicit store pair as this may not be always profitable.
13571 Let the sched-fusion logic decide whether to merge them. */
13572 emit_move_insn (mem_lo, tmp_reg);
13573 emit_move_insn (mem_hi, tmp_reg);
13574
13575 return true;
13576}
13577
a3125fc2
CL
13578/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13579
13580static unsigned HOST_WIDE_INT
13581aarch64_asan_shadow_offset (void)
13582{
13583 return (HOST_WIDE_INT_1 << 36);
13584}
13585
d3006da6 13586static bool
445d7826 13587aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
13588 unsigned int align,
13589 enum by_pieces_operation op,
13590 bool speed_p)
13591{
13592 /* STORE_BY_PIECES can be used when copying a constant string, but
13593 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13594 For now we always fail this and let the move_by_pieces code copy
13595 the string from read-only memory. */
13596 if (op == STORE_BY_PIECES)
13597 return false;
13598
13599 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13600}
13601
5f3bc026 13602static rtx
cb4347e8 13603aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
13604 int code, tree treeop0, tree treeop1)
13605{
c8012fbc
WD
13606 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
13607 rtx op0, op1;
5f3bc026 13608 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13609 insn_code icode;
5f3bc026
ZC
13610 struct expand_operand ops[4];
13611
5f3bc026
ZC
13612 start_sequence ();
13613 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13614
13615 op_mode = GET_MODE (op0);
13616 if (op_mode == VOIDmode)
13617 op_mode = GET_MODE (op1);
13618
13619 switch (op_mode)
13620 {
13621 case QImode:
13622 case HImode:
13623 case SImode:
13624 cmp_mode = SImode;
13625 icode = CODE_FOR_cmpsi;
13626 break;
13627
13628 case DImode:
13629 cmp_mode = DImode;
13630 icode = CODE_FOR_cmpdi;
13631 break;
13632
786e3c06
WD
13633 case SFmode:
13634 cmp_mode = SFmode;
13635 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13636 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
13637 break;
13638
13639 case DFmode:
13640 cmp_mode = DFmode;
13641 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13642 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
13643 break;
13644
5f3bc026
ZC
13645 default:
13646 end_sequence ();
13647 return NULL_RTX;
13648 }
13649
c8012fbc
WD
13650 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
13651 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
13652 if (!op0 || !op1)
13653 {
13654 end_sequence ();
13655 return NULL_RTX;
13656 }
13657 *prep_seq = get_insns ();
13658 end_sequence ();
13659
c8012fbc
WD
13660 create_fixed_operand (&ops[0], op0);
13661 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
13662
13663 start_sequence ();
c8012fbc 13664 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
13665 {
13666 end_sequence ();
13667 return NULL_RTX;
13668 }
13669 *gen_seq = get_insns ();
13670 end_sequence ();
13671
c8012fbc
WD
13672 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
13673 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
13674}
13675
13676static rtx
cb4347e8
TS
13677aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
13678 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 13679{
c8012fbc
WD
13680 rtx op0, op1, target;
13681 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 13682 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13683 insn_code icode;
5f3bc026 13684 struct expand_operand ops[6];
c8012fbc 13685 int aarch64_cond;
5f3bc026 13686
cb4347e8 13687 push_to_sequence (*prep_seq);
5f3bc026
ZC
13688 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13689
13690 op_mode = GET_MODE (op0);
13691 if (op_mode == VOIDmode)
13692 op_mode = GET_MODE (op1);
13693
13694 switch (op_mode)
13695 {
13696 case QImode:
13697 case HImode:
13698 case SImode:
13699 cmp_mode = SImode;
c8012fbc 13700 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
13701 break;
13702
13703 case DImode:
13704 cmp_mode = DImode;
c8012fbc 13705 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
13706 break;
13707
786e3c06
WD
13708 case SFmode:
13709 cmp_mode = SFmode;
13710 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13711 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
13712 break;
13713
13714 case DFmode:
13715 cmp_mode = DFmode;
13716 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13717 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
13718 break;
13719
5f3bc026
ZC
13720 default:
13721 end_sequence ();
13722 return NULL_RTX;
13723 }
13724
13725 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13726 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13727 if (!op0 || !op1)
13728 {
13729 end_sequence ();
13730 return NULL_RTX;
13731 }
13732 *prep_seq = get_insns ();
13733 end_sequence ();
13734
13735 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 13736 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 13737
c8012fbc
WD
13738 if (bit_code != AND)
13739 {
13740 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
13741 GET_MODE (XEXP (prev, 0))),
13742 VOIDmode, XEXP (prev, 0), const0_rtx);
13743 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
13744 }
13745
13746 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
13747 create_fixed_operand (&ops[1], target);
13748 create_fixed_operand (&ops[2], op0);
13749 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
13750 create_fixed_operand (&ops[4], prev);
13751 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 13752
cb4347e8 13753 push_to_sequence (*gen_seq);
5f3bc026
ZC
13754 if (!maybe_expand_insn (icode, 6, ops))
13755 {
13756 end_sequence ();
13757 return NULL_RTX;
13758 }
13759
13760 *gen_seq = get_insns ();
13761 end_sequence ();
13762
c8012fbc 13763 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
13764}
13765
13766#undef TARGET_GEN_CCMP_FIRST
13767#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13768
13769#undef TARGET_GEN_CCMP_NEXT
13770#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13771
6a569cdd
KT
13772/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13773 instruction fusion of some sort. */
13774
13775static bool
13776aarch64_macro_fusion_p (void)
13777{
b175b679 13778 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
13779}
13780
13781
13782/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13783 should be kept together during scheduling. */
13784
13785static bool
13786aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
13787{
13788 rtx set_dest;
13789 rtx prev_set = single_set (prev);
13790 rtx curr_set = single_set (curr);
13791 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13792 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
13793
13794 if (!aarch64_macro_fusion_p ())
13795 return false;
13796
d7b03373 13797 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
13798 {
13799 /* We are trying to match:
13800 prev (mov) == (set (reg r0) (const_int imm16))
13801 curr (movk) == (set (zero_extract (reg r0)
13802 (const_int 16)
13803 (const_int 16))
13804 (const_int imm16_1)) */
13805
13806 set_dest = SET_DEST (curr_set);
13807
13808 if (GET_CODE (set_dest) == ZERO_EXTRACT
13809 && CONST_INT_P (SET_SRC (curr_set))
13810 && CONST_INT_P (SET_SRC (prev_set))
13811 && CONST_INT_P (XEXP (set_dest, 2))
13812 && INTVAL (XEXP (set_dest, 2)) == 16
13813 && REG_P (XEXP (set_dest, 0))
13814 && REG_P (SET_DEST (prev_set))
13815 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
13816 {
13817 return true;
13818 }
13819 }
13820
d7b03373 13821 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
13822 {
13823
13824 /* We're trying to match:
13825 prev (adrp) == (set (reg r1)
13826 (high (symbol_ref ("SYM"))))
13827 curr (add) == (set (reg r0)
13828 (lo_sum (reg r1)
13829 (symbol_ref ("SYM"))))
13830 Note that r0 need not necessarily be the same as r1, especially
13831 during pre-regalloc scheduling. */
13832
13833 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13834 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13835 {
13836 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
13837 && REG_P (XEXP (SET_SRC (curr_set), 0))
13838 && REGNO (XEXP (SET_SRC (curr_set), 0))
13839 == REGNO (SET_DEST (prev_set))
13840 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
13841 XEXP (SET_SRC (curr_set), 1)))
13842 return true;
13843 }
13844 }
13845
d7b03373 13846 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
13847 {
13848
13849 /* We're trying to match:
13850 prev (movk) == (set (zero_extract (reg r0)
13851 (const_int 16)
13852 (const_int 32))
13853 (const_int imm16_1))
13854 curr (movk) == (set (zero_extract (reg r0)
13855 (const_int 16)
13856 (const_int 48))
13857 (const_int imm16_2)) */
13858
13859 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
13860 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
13861 && REG_P (XEXP (SET_DEST (prev_set), 0))
13862 && REG_P (XEXP (SET_DEST (curr_set), 0))
13863 && REGNO (XEXP (SET_DEST (prev_set), 0))
13864 == REGNO (XEXP (SET_DEST (curr_set), 0))
13865 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
13866 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
13867 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
13868 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
13869 && CONST_INT_P (SET_SRC (prev_set))
13870 && CONST_INT_P (SET_SRC (curr_set)))
13871 return true;
13872
13873 }
d7b03373 13874 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
13875 {
13876 /* We're trying to match:
13877 prev (adrp) == (set (reg r0)
13878 (high (symbol_ref ("SYM"))))
13879 curr (ldr) == (set (reg r1)
13880 (mem (lo_sum (reg r0)
13881 (symbol_ref ("SYM")))))
13882 or
13883 curr (ldr) == (set (reg r1)
13884 (zero_extend (mem
13885 (lo_sum (reg r0)
13886 (symbol_ref ("SYM")))))) */
13887 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13888 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13889 {
13890 rtx curr_src = SET_SRC (curr_set);
13891
13892 if (GET_CODE (curr_src) == ZERO_EXTEND)
13893 curr_src = XEXP (curr_src, 0);
13894
13895 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
13896 && REG_P (XEXP (XEXP (curr_src, 0), 0))
13897 && REGNO (XEXP (XEXP (curr_src, 0), 0))
13898 == REGNO (SET_DEST (prev_set))
13899 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
13900 XEXP (SET_SRC (prev_set), 0)))
13901 return true;
13902 }
13903 }
cd0cb232 13904
d7b03373 13905 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
13906 && aarch_crypto_can_dual_issue (prev, curr))
13907 return true;
13908
d7b03373 13909 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
13910 && any_condjump_p (curr))
13911 {
13912 enum attr_type prev_type = get_attr_type (prev);
13913
13914 /* FIXME: this misses some which is considered simple arthematic
13915 instructions for ThunderX. Simple shifts are missed here. */
13916 if (prev_type == TYPE_ALUS_SREG
13917 || prev_type == TYPE_ALUS_IMM
13918 || prev_type == TYPE_LOGICS_REG
13919 || prev_type == TYPE_LOGICS_IMM)
13920 return true;
13921 }
13922
6a569cdd
KT
13923 return false;
13924}
13925
f2879a90
KT
13926/* Return true iff the instruction fusion described by OP is enabled. */
13927
13928bool
13929aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
13930{
13931 return (aarch64_tune_params.fusible_ops & op) != 0;
13932}
13933
350013bc
BC
13934/* If MEM is in the form of [base+offset], extract the two parts
13935 of address and set to BASE and OFFSET, otherwise return false
13936 after clearing BASE and OFFSET. */
13937
13938bool
13939extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
13940{
13941 rtx addr;
13942
13943 gcc_assert (MEM_P (mem));
13944
13945 addr = XEXP (mem, 0);
13946
13947 if (REG_P (addr))
13948 {
13949 *base = addr;
13950 *offset = const0_rtx;
13951 return true;
13952 }
13953
13954 if (GET_CODE (addr) == PLUS
13955 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
13956 {
13957 *base = XEXP (addr, 0);
13958 *offset = XEXP (addr, 1);
13959 return true;
13960 }
13961
13962 *base = NULL_RTX;
13963 *offset = NULL_RTX;
13964
13965 return false;
13966}
13967
13968/* Types for scheduling fusion. */
13969enum sched_fusion_type
13970{
13971 SCHED_FUSION_NONE = 0,
13972 SCHED_FUSION_LD_SIGN_EXTEND,
13973 SCHED_FUSION_LD_ZERO_EXTEND,
13974 SCHED_FUSION_LD,
13975 SCHED_FUSION_ST,
13976 SCHED_FUSION_NUM
13977};
13978
13979/* If INSN is a load or store of address in the form of [base+offset],
13980 extract the two parts and set to BASE and OFFSET. Return scheduling
13981 fusion type this INSN is. */
13982
13983static enum sched_fusion_type
13984fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
13985{
13986 rtx x, dest, src;
13987 enum sched_fusion_type fusion = SCHED_FUSION_LD;
13988
13989 gcc_assert (INSN_P (insn));
13990 x = PATTERN (insn);
13991 if (GET_CODE (x) != SET)
13992 return SCHED_FUSION_NONE;
13993
13994 src = SET_SRC (x);
13995 dest = SET_DEST (x);
13996
abc52318
KT
13997 machine_mode dest_mode = GET_MODE (dest);
13998
13999 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
14000 return SCHED_FUSION_NONE;
14001
14002 if (GET_CODE (src) == SIGN_EXTEND)
14003 {
14004 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14005 src = XEXP (src, 0);
14006 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14007 return SCHED_FUSION_NONE;
14008 }
14009 else if (GET_CODE (src) == ZERO_EXTEND)
14010 {
14011 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14012 src = XEXP (src, 0);
14013 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14014 return SCHED_FUSION_NONE;
14015 }
14016
14017 if (GET_CODE (src) == MEM && REG_P (dest))
14018 extract_base_offset_in_addr (src, base, offset);
14019 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14020 {
14021 fusion = SCHED_FUSION_ST;
14022 extract_base_offset_in_addr (dest, base, offset);
14023 }
14024 else
14025 return SCHED_FUSION_NONE;
14026
14027 if (*base == NULL_RTX || *offset == NULL_RTX)
14028 fusion = SCHED_FUSION_NONE;
14029
14030 return fusion;
14031}
14032
14033/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14034
14035 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14036 and PRI are only calculated for these instructions. For other instruction,
14037 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14038 type instruction fusion can be added by returning different priorities.
14039
14040 It's important that irrelevant instructions get the largest FUSION_PRI. */
14041
14042static void
14043aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14044 int *fusion_pri, int *pri)
14045{
14046 int tmp, off_val;
14047 rtx base, offset;
14048 enum sched_fusion_type fusion;
14049
14050 gcc_assert (INSN_P (insn));
14051
14052 tmp = max_pri - 1;
14053 fusion = fusion_load_store (insn, &base, &offset);
14054 if (fusion == SCHED_FUSION_NONE)
14055 {
14056 *pri = tmp;
14057 *fusion_pri = tmp;
14058 return;
14059 }
14060
14061 /* Set FUSION_PRI according to fusion type and base register. */
14062 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14063
14064 /* Calculate PRI. */
14065 tmp /= 2;
14066
14067 /* INSN with smaller offset goes first. */
14068 off_val = (int)(INTVAL (offset));
14069 if (off_val >= 0)
14070 tmp -= (off_val & 0xfffff);
14071 else
14072 tmp += ((- off_val) & 0xfffff);
14073
14074 *pri = tmp;
14075 return;
14076}
14077
14078/* Given OPERANDS of consecutive load/store, check if we can merge
14079 them into ldp/stp. LOAD is true if they are load instructions.
14080 MODE is the mode of memory operands. */
14081
14082bool
14083aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
14084 enum machine_mode mode)
14085{
14086 HOST_WIDE_INT offval_1, offval_2, msize;
14087 enum reg_class rclass_1, rclass_2;
14088 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14089
14090 if (load)
14091 {
14092 mem_1 = operands[1];
14093 mem_2 = operands[3];
14094 reg_1 = operands[0];
14095 reg_2 = operands[2];
14096 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14097 if (REGNO (reg_1) == REGNO (reg_2))
14098 return false;
14099 }
14100 else
14101 {
14102 mem_1 = operands[0];
14103 mem_2 = operands[2];
14104 reg_1 = operands[1];
14105 reg_2 = operands[3];
14106 }
14107
bf84ac44
AP
14108 /* The mems cannot be volatile. */
14109 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14110 return false;
14111
54700e2e
AP
14112 /* If we have SImode and slow unaligned ldp,
14113 check the alignment to be at least 8 byte. */
14114 if (mode == SImode
14115 && (aarch64_tune_params.extra_tuning_flags
14116 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14117 && !optimize_size
14118 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14119 return false;
14120
350013bc
BC
14121 /* Check if the addresses are in the form of [base+offset]. */
14122 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14123 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14124 return false;
14125 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14126 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14127 return false;
14128
14129 /* Check if the bases are same. */
14130 if (!rtx_equal_p (base_1, base_2))
14131 return false;
14132
14133 offval_1 = INTVAL (offset_1);
14134 offval_2 = INTVAL (offset_2);
14135 msize = GET_MODE_SIZE (mode);
14136 /* Check if the offsets are consecutive. */
14137 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14138 return false;
14139
14140 /* Check if the addresses are clobbered by load. */
14141 if (load)
14142 {
14143 if (reg_mentioned_p (reg_1, mem_1))
14144 return false;
14145
14146 /* In increasing order, the last load can clobber the address. */
14147 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14148 return false;
14149 }
14150
14151 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14152 rclass_1 = FP_REGS;
14153 else
14154 rclass_1 = GENERAL_REGS;
14155
14156 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14157 rclass_2 = FP_REGS;
14158 else
14159 rclass_2 = GENERAL_REGS;
14160
14161 /* Check if the registers are of same class. */
14162 if (rclass_1 != rclass_2)
14163 return false;
14164
14165 return true;
14166}
14167
14168/* Given OPERANDS of consecutive load/store, check if we can merge
14169 them into ldp/stp by adjusting the offset. LOAD is true if they
14170 are load instructions. MODE is the mode of memory operands.
14171
14172 Given below consecutive stores:
14173
14174 str w1, [xb, 0x100]
14175 str w1, [xb, 0x104]
14176 str w1, [xb, 0x108]
14177 str w1, [xb, 0x10c]
14178
14179 Though the offsets are out of the range supported by stp, we can
14180 still pair them after adjusting the offset, like:
14181
14182 add scratch, xb, 0x100
14183 stp w1, w1, [scratch]
14184 stp w1, w1, [scratch, 0x8]
14185
14186 The peephole patterns detecting this opportunity should guarantee
14187 the scratch register is avaliable. */
14188
14189bool
14190aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
14191 enum machine_mode mode)
14192{
14193 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14194 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14195 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14196 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14197
14198 if (load)
14199 {
14200 reg_1 = operands[0];
14201 mem_1 = operands[1];
14202 reg_2 = operands[2];
14203 mem_2 = operands[3];
14204 reg_3 = operands[4];
14205 mem_3 = operands[5];
14206 reg_4 = operands[6];
14207 mem_4 = operands[7];
14208 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14209 && REG_P (reg_3) && REG_P (reg_4));
14210 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14211 return false;
14212 }
14213 else
14214 {
14215 mem_1 = operands[0];
14216 reg_1 = operands[1];
14217 mem_2 = operands[2];
14218 reg_2 = operands[3];
14219 mem_3 = operands[4];
14220 reg_3 = operands[5];
14221 mem_4 = operands[6];
14222 reg_4 = operands[7];
14223 }
14224 /* Skip if memory operand is by itslef valid for ldp/stp. */
14225 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14226 return false;
14227
bf84ac44
AP
14228 /* The mems cannot be volatile. */
14229 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14230 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14231 return false;
14232
350013bc
BC
14233 /* Check if the addresses are in the form of [base+offset]. */
14234 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14235 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14236 return false;
14237 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14238 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14239 return false;
14240 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14241 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14242 return false;
14243 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14244 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14245 return false;
14246
14247 /* Check if the bases are same. */
14248 if (!rtx_equal_p (base_1, base_2)
14249 || !rtx_equal_p (base_2, base_3)
14250 || !rtx_equal_p (base_3, base_4))
14251 return false;
14252
14253 offval_1 = INTVAL (offset_1);
14254 offval_2 = INTVAL (offset_2);
14255 offval_3 = INTVAL (offset_3);
14256 offval_4 = INTVAL (offset_4);
14257 msize = GET_MODE_SIZE (mode);
14258 /* Check if the offsets are consecutive. */
14259 if ((offval_1 != (offval_2 + msize)
14260 || offval_1 != (offval_3 + msize * 2)
14261 || offval_1 != (offval_4 + msize * 3))
14262 && (offval_4 != (offval_3 + msize)
14263 || offval_4 != (offval_2 + msize * 2)
14264 || offval_4 != (offval_1 + msize * 3)))
14265 return false;
14266
14267 /* Check if the addresses are clobbered by load. */
14268 if (load)
14269 {
14270 if (reg_mentioned_p (reg_1, mem_1)
14271 || reg_mentioned_p (reg_2, mem_2)
14272 || reg_mentioned_p (reg_3, mem_3))
14273 return false;
14274
14275 /* In increasing order, the last load can clobber the address. */
14276 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14277 return false;
14278 }
14279
54700e2e
AP
14280 /* If we have SImode and slow unaligned ldp,
14281 check the alignment to be at least 8 byte. */
14282 if (mode == SImode
14283 && (aarch64_tune_params.extra_tuning_flags
14284 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14285 && !optimize_size
14286 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14287 return false;
14288
350013bc
BC
14289 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14290 rclass_1 = FP_REGS;
14291 else
14292 rclass_1 = GENERAL_REGS;
14293
14294 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14295 rclass_2 = FP_REGS;
14296 else
14297 rclass_2 = GENERAL_REGS;
14298
14299 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14300 rclass_3 = FP_REGS;
14301 else
14302 rclass_3 = GENERAL_REGS;
14303
14304 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14305 rclass_4 = FP_REGS;
14306 else
14307 rclass_4 = GENERAL_REGS;
14308
14309 /* Check if the registers are of same class. */
14310 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14311 return false;
14312
14313 return true;
14314}
14315
14316/* Given OPERANDS of consecutive load/store, this function pairs them
14317 into ldp/stp after adjusting the offset. It depends on the fact
14318 that addresses of load/store instructions are in increasing order.
14319 MODE is the mode of memory operands. CODE is the rtl operator
14320 which should be applied to all memory operands, it's SIGN_EXTEND,
14321 ZERO_EXTEND or UNKNOWN. */
14322
14323bool
14324aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
14325 enum machine_mode mode, RTX_CODE code)
14326{
14327 rtx base, offset, t1, t2;
14328 rtx mem_1, mem_2, mem_3, mem_4;
14329 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14330
14331 if (load)
14332 {
14333 mem_1 = operands[1];
14334 mem_2 = operands[3];
14335 mem_3 = operands[5];
14336 mem_4 = operands[7];
14337 }
14338 else
14339 {
14340 mem_1 = operands[0];
14341 mem_2 = operands[2];
14342 mem_3 = operands[4];
14343 mem_4 = operands[6];
14344 gcc_assert (code == UNKNOWN);
14345 }
14346
14347 extract_base_offset_in_addr (mem_1, &base, &offset);
14348 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14349
14350 /* Adjust offset thus it can fit in ldp/stp instruction. */
14351 msize = GET_MODE_SIZE (mode);
14352 stp_off_limit = msize * 0x40;
14353 off_val = INTVAL (offset);
14354 abs_off = (off_val < 0) ? -off_val : off_val;
14355 new_off = abs_off % stp_off_limit;
14356 adj_off = abs_off - new_off;
14357
14358 /* Further adjust to make sure all offsets are OK. */
14359 if ((new_off + msize * 2) >= stp_off_limit)
14360 {
14361 adj_off += stp_off_limit;
14362 new_off -= stp_off_limit;
14363 }
14364
14365 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14366 if (adj_off >= 0x1000)
14367 return false;
14368
14369 if (off_val < 0)
14370 {
14371 adj_off = -adj_off;
14372 new_off = -new_off;
14373 }
14374
14375 /* Create new memory references. */
14376 mem_1 = change_address (mem_1, VOIDmode,
14377 plus_constant (DImode, operands[8], new_off));
14378
14379 /* Check if the adjusted address is OK for ldp/stp. */
14380 if (!aarch64_mem_pair_operand (mem_1, mode))
14381 return false;
14382
14383 msize = GET_MODE_SIZE (mode);
14384 mem_2 = change_address (mem_2, VOIDmode,
14385 plus_constant (DImode,
14386 operands[8],
14387 new_off + msize));
14388 mem_3 = change_address (mem_3, VOIDmode,
14389 plus_constant (DImode,
14390 operands[8],
14391 new_off + msize * 2));
14392 mem_4 = change_address (mem_4, VOIDmode,
14393 plus_constant (DImode,
14394 operands[8],
14395 new_off + msize * 3));
14396
14397 if (code == ZERO_EXTEND)
14398 {
14399 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14400 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14401 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14402 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14403 }
14404 else if (code == SIGN_EXTEND)
14405 {
14406 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
14407 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
14408 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
14409 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
14410 }
14411
14412 if (load)
14413 {
14414 operands[1] = mem_1;
14415 operands[3] = mem_2;
14416 operands[5] = mem_3;
14417 operands[7] = mem_4;
14418 }
14419 else
14420 {
14421 operands[0] = mem_1;
14422 operands[2] = mem_2;
14423 operands[4] = mem_3;
14424 operands[6] = mem_4;
14425 }
14426
14427 /* Emit adjusting instruction. */
f7df4a84 14428 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 14429 /* Emit ldp/stp instructions. */
f7df4a84
RS
14430 t1 = gen_rtx_SET (operands[0], operands[1]);
14431 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 14432 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
14433 t1 = gen_rtx_SET (operands[4], operands[5]);
14434 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
14435 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14436 return true;
14437}
14438
1b1e81f8
JW
14439/* Return 1 if pseudo register should be created and used to hold
14440 GOT address for PIC code. */
14441
14442bool
14443aarch64_use_pseudo_pic_reg (void)
14444{
14445 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
14446}
14447
7b841a12
JW
14448/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14449
14450static int
14451aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
14452{
14453 switch (XINT (x, 1))
14454 {
14455 case UNSPEC_GOTSMALLPIC:
14456 case UNSPEC_GOTSMALLPIC28K:
14457 case UNSPEC_GOTTINYPIC:
14458 return 0;
14459 default:
14460 break;
14461 }
14462
14463 return default_unspec_may_trap_p (x, flags);
14464}
14465
39252973
KT
14466
14467/* If X is a positive CONST_DOUBLE with a value that is a power of 2
14468 return the log2 of that value. Otherwise return -1. */
14469
14470int
14471aarch64_fpconst_pow_of_2 (rtx x)
14472{
14473 const REAL_VALUE_TYPE *r;
14474
14475 if (!CONST_DOUBLE_P (x))
14476 return -1;
14477
14478 r = CONST_DOUBLE_REAL_VALUE (x);
14479
14480 if (REAL_VALUE_NEGATIVE (*r)
14481 || REAL_VALUE_ISNAN (*r)
14482 || REAL_VALUE_ISINF (*r)
14483 || !real_isinteger (r, DFmode))
14484 return -1;
14485
14486 return exact_log2 (real_to_integer (r));
14487}
14488
14489/* If X is a vector of equal CONST_DOUBLE values and that value is
14490 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14491
14492int
14493aarch64_vec_fpconst_pow_of_2 (rtx x)
14494{
14495 if (GET_CODE (x) != CONST_VECTOR)
14496 return -1;
14497
14498 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
14499 return -1;
14500
14501 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
14502 if (firstval <= 0)
14503 return -1;
14504
14505 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
14506 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
14507 return -1;
14508
14509 return firstval;
14510}
14511
11e554b3
JG
14512/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14513 to float.
14514
14515 __fp16 always promotes through this hook.
14516 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14517 through the generic excess precision logic rather than here. */
14518
c2ec330c
AL
14519static tree
14520aarch64_promoted_type (const_tree t)
14521{
11e554b3
JG
14522 if (SCALAR_FLOAT_TYPE_P (t)
14523 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 14524 return float_type_node;
11e554b3 14525
c2ec330c
AL
14526 return NULL_TREE;
14527}
ee62a5a6
RS
14528
14529/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14530
14531static bool
9acc9cbe 14532aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
14533 optimization_type opt_type)
14534{
14535 switch (op)
14536 {
14537 case rsqrt_optab:
9acc9cbe 14538 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
14539
14540 default:
14541 return true;
14542 }
14543}
14544
11e554b3
JG
14545/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14546 if MODE is HFmode, and punt to the generic implementation otherwise. */
14547
14548static bool
14549aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
14550{
14551 return (mode == HFmode
14552 ? true
14553 : default_libgcc_floating_mode_supported_p (mode));
14554}
14555
2e5f8203
JG
14556/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
14557 if MODE is HFmode, and punt to the generic implementation otherwise. */
14558
14559static bool
14560aarch64_scalar_mode_supported_p (machine_mode mode)
14561{
14562 return (mode == HFmode
14563 ? true
14564 : default_scalar_mode_supported_p (mode));
14565}
14566
11e554b3
JG
14567/* Set the value of FLT_EVAL_METHOD.
14568 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
14569
14570 0: evaluate all operations and constants, whose semantic type has at
14571 most the range and precision of type float, to the range and
14572 precision of float; evaluate all other operations and constants to
14573 the range and precision of the semantic type;
14574
14575 N, where _FloatN is a supported interchange floating type
14576 evaluate all operations and constants, whose semantic type has at
14577 most the range and precision of _FloatN type, to the range and
14578 precision of the _FloatN type; evaluate all other operations and
14579 constants to the range and precision of the semantic type;
14580
14581 If we have the ARMv8.2-A extensions then we support _Float16 in native
14582 precision, so we should set this to 16. Otherwise, we support the type,
14583 but want to evaluate expressions in float precision, so set this to
14584 0. */
14585
14586static enum flt_eval_method
14587aarch64_excess_precision (enum excess_precision_type type)
14588{
14589 switch (type)
14590 {
14591 case EXCESS_PRECISION_TYPE_FAST:
14592 case EXCESS_PRECISION_TYPE_STANDARD:
14593 /* We can calculate either in 16-bit range and precision or
14594 32-bit range and precision. Make that decision based on whether
14595 we have native support for the ARMv8.2-A 16-bit floating-point
14596 instructions or not. */
14597 return (TARGET_FP_F16INST
14598 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
14599 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
14600 case EXCESS_PRECISION_TYPE_IMPLICIT:
14601 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
14602 default:
14603 gcc_unreachable ();
14604 }
14605 return FLT_EVAL_METHOD_UNPREDICTABLE;
14606}
14607
43e9d192
IB
14608#undef TARGET_ADDRESS_COST
14609#define TARGET_ADDRESS_COST aarch64_address_cost
14610
14611/* This hook will determines whether unnamed bitfields affect the alignment
14612 of the containing structure. The hook returns true if the structure
14613 should inherit the alignment requirements of an unnamed bitfield's
14614 type. */
14615#undef TARGET_ALIGN_ANON_BITFIELD
14616#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14617
14618#undef TARGET_ASM_ALIGNED_DI_OP
14619#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14620
14621#undef TARGET_ASM_ALIGNED_HI_OP
14622#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14623
14624#undef TARGET_ASM_ALIGNED_SI_OP
14625#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14626
14627#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14628#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
14629 hook_bool_const_tree_hwi_hwi_const_tree_true
14630
e1c1ecb0
KT
14631#undef TARGET_ASM_FILE_START
14632#define TARGET_ASM_FILE_START aarch64_start_file
14633
43e9d192
IB
14634#undef TARGET_ASM_OUTPUT_MI_THUNK
14635#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
14636
14637#undef TARGET_ASM_SELECT_RTX_SECTION
14638#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
14639
14640#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14641#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
14642
14643#undef TARGET_BUILD_BUILTIN_VA_LIST
14644#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
14645
14646#undef TARGET_CALLEE_COPIES
14647#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14648
14649#undef TARGET_CAN_ELIMINATE
14650#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14651
1fd8d40c
KT
14652#undef TARGET_CAN_INLINE_P
14653#define TARGET_CAN_INLINE_P aarch64_can_inline_p
14654
43e9d192
IB
14655#undef TARGET_CANNOT_FORCE_CONST_MEM
14656#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14657
50487d79
EM
14658#undef TARGET_CASE_VALUES_THRESHOLD
14659#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14660
43e9d192
IB
14661#undef TARGET_CONDITIONAL_REGISTER_USAGE
14662#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14663
14664/* Only the least significant bit is used for initialization guard
14665 variables. */
14666#undef TARGET_CXX_GUARD_MASK_BIT
14667#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14668
14669#undef TARGET_C_MODE_FOR_SUFFIX
14670#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14671
14672#ifdef TARGET_BIG_ENDIAN_DEFAULT
14673#undef TARGET_DEFAULT_TARGET_FLAGS
14674#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14675#endif
14676
14677#undef TARGET_CLASS_MAX_NREGS
14678#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14679
119103ca
JG
14680#undef TARGET_BUILTIN_DECL
14681#define TARGET_BUILTIN_DECL aarch64_builtin_decl
14682
a6fc00da
BH
14683#undef TARGET_BUILTIN_RECIPROCAL
14684#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14685
11e554b3
JG
14686#undef TARGET_C_EXCESS_PRECISION
14687#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
14688
43e9d192
IB
14689#undef TARGET_EXPAND_BUILTIN
14690#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14691
14692#undef TARGET_EXPAND_BUILTIN_VA_START
14693#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14694
9697e620
JG
14695#undef TARGET_FOLD_BUILTIN
14696#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14697
43e9d192
IB
14698#undef TARGET_FUNCTION_ARG
14699#define TARGET_FUNCTION_ARG aarch64_function_arg
14700
14701#undef TARGET_FUNCTION_ARG_ADVANCE
14702#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14703
14704#undef TARGET_FUNCTION_ARG_BOUNDARY
14705#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14706
14707#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14708#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14709
14710#undef TARGET_FUNCTION_VALUE
14711#define TARGET_FUNCTION_VALUE aarch64_function_value
14712
14713#undef TARGET_FUNCTION_VALUE_REGNO_P
14714#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14715
14716#undef TARGET_FRAME_POINTER_REQUIRED
14717#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14718
fc72cba7
AL
14719#undef TARGET_GIMPLE_FOLD_BUILTIN
14720#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 14721
43e9d192
IB
14722#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14723#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14724
14725#undef TARGET_INIT_BUILTINS
14726#define TARGET_INIT_BUILTINS aarch64_init_builtins
14727
c64f7d37
WD
14728#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
14729#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
14730 aarch64_ira_change_pseudo_allocno_class
14731
43e9d192
IB
14732#undef TARGET_LEGITIMATE_ADDRESS_P
14733#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14734
14735#undef TARGET_LEGITIMATE_CONSTANT_P
14736#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14737
491ec060
WD
14738#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
14739#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
14740 aarch64_legitimize_address_displacement
14741
43e9d192
IB
14742#undef TARGET_LIBGCC_CMP_RETURN_MODE
14743#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14744
11e554b3
JG
14745#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
14746#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
14747aarch64_libgcc_floating_mode_supported_p
14748
ac2b960f
YZ
14749#undef TARGET_MANGLE_TYPE
14750#define TARGET_MANGLE_TYPE aarch64_mangle_type
14751
43e9d192
IB
14752#undef TARGET_MEMORY_MOVE_COST
14753#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14754
26e0ff94
WD
14755#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14756#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14757
43e9d192
IB
14758#undef TARGET_MUST_PASS_IN_STACK
14759#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14760
14761/* This target hook should return true if accesses to volatile bitfields
14762 should use the narrowest mode possible. It should return false if these
14763 accesses should use the bitfield container type. */
14764#undef TARGET_NARROW_VOLATILE_BITFIELD
14765#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14766
14767#undef TARGET_OPTION_OVERRIDE
14768#define TARGET_OPTION_OVERRIDE aarch64_override_options
14769
14770#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14771#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14772 aarch64_override_options_after_change
14773
361fb3ee
KT
14774#undef TARGET_OPTION_SAVE
14775#define TARGET_OPTION_SAVE aarch64_option_save
14776
14777#undef TARGET_OPTION_RESTORE
14778#define TARGET_OPTION_RESTORE aarch64_option_restore
14779
14780#undef TARGET_OPTION_PRINT
14781#define TARGET_OPTION_PRINT aarch64_option_print
14782
5a2c8331
KT
14783#undef TARGET_OPTION_VALID_ATTRIBUTE_P
14784#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14785
d78006d9
KT
14786#undef TARGET_SET_CURRENT_FUNCTION
14787#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14788
43e9d192
IB
14789#undef TARGET_PASS_BY_REFERENCE
14790#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14791
14792#undef TARGET_PREFERRED_RELOAD_CLASS
14793#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
14794
cee66c68
WD
14795#undef TARGET_SCHED_REASSOCIATION_WIDTH
14796#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
14797
c2ec330c
AL
14798#undef TARGET_PROMOTED_TYPE
14799#define TARGET_PROMOTED_TYPE aarch64_promoted_type
14800
43e9d192
IB
14801#undef TARGET_SECONDARY_RELOAD
14802#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
14803
14804#undef TARGET_SHIFT_TRUNCATION_MASK
14805#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
14806
14807#undef TARGET_SETUP_INCOMING_VARARGS
14808#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
14809
14810#undef TARGET_STRUCT_VALUE_RTX
14811#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
14812
14813#undef TARGET_REGISTER_MOVE_COST
14814#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
14815
14816#undef TARGET_RETURN_IN_MEMORY
14817#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
14818
14819#undef TARGET_RETURN_IN_MSB
14820#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
14821
14822#undef TARGET_RTX_COSTS
7cc2145f 14823#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 14824
2e5f8203
JG
14825#undef TARGET_SCALAR_MODE_SUPPORTED_P
14826#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
14827
d126a4ae
AP
14828#undef TARGET_SCHED_ISSUE_RATE
14829#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
14830
d03f7e44
MK
14831#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14832#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
14833 aarch64_sched_first_cycle_multipass_dfa_lookahead
14834
2d6bc7fa
KT
14835#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
14836#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
14837 aarch64_first_cycle_multipass_dfa_lookahead_guard
14838
827ab47a
KT
14839#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
14840#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
14841 aarch64_get_separate_components
14842
14843#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
14844#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
14845 aarch64_components_for_bb
14846
14847#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
14848#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
14849 aarch64_disqualify_components
14850
14851#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
14852#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
14853 aarch64_emit_prologue_components
14854
14855#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
14856#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
14857 aarch64_emit_epilogue_components
14858
14859#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
14860#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
14861 aarch64_set_handled_components
14862
43e9d192
IB
14863#undef TARGET_TRAMPOLINE_INIT
14864#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
14865
14866#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
14867#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
14868
14869#undef TARGET_VECTOR_MODE_SUPPORTED_P
14870#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
14871
7df76747
N
14872#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14873#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
14874 aarch64_builtin_support_vector_misalignment
14875
43e9d192
IB
14876#undef TARGET_ARRAY_MODE_SUPPORTED_P
14877#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
14878
8990e73a
TB
14879#undef TARGET_VECTORIZE_ADD_STMT_COST
14880#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
14881
14882#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
14883#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
14884 aarch64_builtin_vectorization_cost
14885
43e9d192
IB
14886#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14887#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
14888
42fc9a7f
JG
14889#undef TARGET_VECTORIZE_BUILTINS
14890#define TARGET_VECTORIZE_BUILTINS
14891
14892#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
14893#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
14894 aarch64_builtin_vectorized_function
14895
3b357264
JG
14896#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
14897#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
14898 aarch64_autovectorize_vector_sizes
14899
aa87aced
KV
14900#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14901#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
14902 aarch64_atomic_assign_expand_fenv
14903
43e9d192
IB
14904/* Section anchor support. */
14905
14906#undef TARGET_MIN_ANCHOR_OFFSET
14907#define TARGET_MIN_ANCHOR_OFFSET -256
14908
14909/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
14910 byte offset; we can do much more for larger data types, but have no way
14911 to determine the size of the access. We assume accesses are aligned. */
14912#undef TARGET_MAX_ANCHOR_OFFSET
14913#define TARGET_MAX_ANCHOR_OFFSET 4095
14914
db0253a4
TB
14915#undef TARGET_VECTOR_ALIGNMENT
14916#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
14917
14918#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
14919#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
14920 aarch64_simd_vector_alignment_reachable
14921
88b08073
JG
14922/* vec_perm support. */
14923
14924#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
14925#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
14926 aarch64_vectorize_vec_perm_const_ok
14927
c2ec330c
AL
14928#undef TARGET_INIT_LIBFUNCS
14929#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 14930
706b2314 14931#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
14932#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
14933
5cb74e90
RR
14934#undef TARGET_FLAGS_REGNUM
14935#define TARGET_FLAGS_REGNUM CC_REGNUM
14936
78607708
TV
14937#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
14938#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
14939
a3125fc2
CL
14940#undef TARGET_ASAN_SHADOW_OFFSET
14941#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
14942
0c4ec427
RE
14943#undef TARGET_LEGITIMIZE_ADDRESS
14944#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
14945
d3006da6
JG
14946#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14947#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14948 aarch64_use_by_pieces_infrastructure_p
14949
594bdd53
FY
14950#undef TARGET_CAN_USE_DOLOOP_P
14951#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
14952
6a569cdd
KT
14953#undef TARGET_SCHED_MACRO_FUSION_P
14954#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
14955
14956#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
14957#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
14958
350013bc
BC
14959#undef TARGET_SCHED_FUSION_PRIORITY
14960#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
14961
7b841a12
JW
14962#undef TARGET_UNSPEC_MAY_TRAP_P
14963#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
14964
1b1e81f8
JW
14965#undef TARGET_USE_PSEUDO_PIC_REG
14966#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
14967
cc8ca59e
JB
14968#undef TARGET_PRINT_OPERAND
14969#define TARGET_PRINT_OPERAND aarch64_print_operand
14970
14971#undef TARGET_PRINT_OPERAND_ADDRESS
14972#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
14973
ee62a5a6
RS
14974#undef TARGET_OPTAB_SUPPORTED_P
14975#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
14976
43203dea
RR
14977#undef TARGET_OMIT_STRUCT_RETURN_REG
14978#define TARGET_OMIT_STRUCT_RETURN_REG true
14979
43e9d192
IB
14980struct gcc_target targetm = TARGET_INITIALIZER;
14981
14982#include "gt-aarch64.h"